Skip to content
Snippets Groups Projects
Commit bbc74d28 authored by elabaron's avatar elabaron
Browse files

src/RNAseq.nf : adapt htseq and add post genomic alignement

parent a27d714c
Branches
No related tags found
No related merge requests found
......@@ -104,6 +104,10 @@ profiles {
container = "lbmc/htseq:0.11.2"
cpus = 1
}
withName: hisat2_postGenomic {
cpus = 4
container = "lbmc/hisat2:2.1.0"
}
withName: multiqc {
container = "ewels/multiqc:1.9"
cpus = 1
......
......@@ -5,18 +5,24 @@
params.fastq_raw = "data/fastq/*{_R1,_R2}.fastq.gz"
params.output = "results"
params.do_fastqc = true
params.script_cov = "src/norm_coverage.sh"
//params.script_cov = "src/norm_coverage.sh"
params.filter = "data/filter/human_rRNA_tRNA/*.bt2"
params.index_genome = "data/genome/*.ht2"
params.do_dedup = true
params.gtf = "data/annotation/*.gtf"
params.index_postgenome = "data/post_genome/*.ht2"
params.do_postgenome = true
log.info "input raw : ${params.fastq_raw}"
log.info "outut directory : ${params.output}"
log.info "filter index files : ${params.filter}"
log.info "genome index : ${params.index_genome}"
log.info "gtf file : ${params.gtf}"
log.info "post-genome index : ${params.index_postgenome}"
log.info ""
log.info "do fastqc ? : ${params.do_fastqc}"
log.info "do deduplication ? : ${params.do_dedup}"
log.info "do post genome alignement ? : ${params.do_postgenome}"
log.info ""
Channel
......@@ -35,6 +41,16 @@ Channel
.ifEmpty { error "Cannot find any index files matching: ${params.index_genome}" }
.set { GENOME_INDEX }
Channel
.fromPath( params.gtf )
.ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
.set { GTF_FILE }
Channel
.fromPath ( params.index_genome )
.ifEmpty { error "Cannot find any index files matching: ${params.index_genome}" }
.set { POSTGENOME_INDEX }
/* Fastqc of raw input */
process fastqc_raw {
......@@ -66,7 +82,9 @@ process trimming {
output:
set file_id, "*cut_{R1,R2}.fastq.gz" into CUTADAPT_OUTPUT
file "*.txt" into CUTADAPT_LOG
file "*first_report.txt" into CUTADAPT_LOG
file "*{second,third}_report.txt" into CUTADAPT_LOG_2
script:
"""
......@@ -79,7 +97,7 @@ process trimming {
${reads[0]} ${reads[1]} > ${file_id}_first_report.txt
cutadapt -j ${task.cpus} \
-u -10 \
-a "A{100}" \
-o ${file_id}_cut_R1.fastq.gz \
${file_id}_tmp_R1.fastq.gz \
> ${file_id}_second_report.txt
......@@ -140,13 +158,13 @@ process rRNA_removal {
bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \
-1 ${reads[0]} -2 ${reads[1]} --no-unal \
--un-conc-gz ${file_id}_R%.fastq.gz 2> \
${file_id}_bowtie2_report.txt | samtools view -bS - \
${file_id}_filter.txt | samtools view -bS - \
| samtools sort -@ ${task.cpus} -o ${file_id}.filter.bam \
&& samtools index ${file_id}.filter.bam \
&& samtools idxstats ${file_id}.filter.bam > \
${file_id}.filter.stats
if grep -q "Error " ${file_id}_bowtie2_report.txt; then
if grep -q "Error " ${file_id}_filter.txt; then
exit 1
fi
"""
......@@ -185,7 +203,7 @@ process hisat2_human {
file index from GENOME_INDEX.toList()
output:
file "*.fastq.gz" into HISAT_UNALIGNED
set file_id, "*.fastq.gz" into HISAT_UNALIGNED
set file_id, "*.{bam,bam.bai}" into HISAT_ALIGNED
file "*.txt" into HISAT_LOG
......@@ -203,12 +221,12 @@ hisat2 -x ${index_id} \
-2 ${fastq_filtred[1]} \
--un-conc-gz ${file_id}_notaligned_R%.fastq.gz \
--rna-strandness 'F' \
2> ${file_id}_hisat2_hg38.txt \
2> ${file_id}.txt \
| samtools view -bS -F 4 - \
| samtools sort -@ ${task.cpus} -o ${file_id}.bam \
&& samtools index ${file_id}.bam
if grep -q "ERR" ${file_id}_hisat2_hg38.txt; then
if grep -q "ERR" ${file_id}.txt; then
exit 1
fi
"""
......@@ -248,7 +266,7 @@ process dedup_genome {
set file_id, file(bam) from HISAT_ALIGNED_DEDUP
output:
set file_id, "*.bam" into DEDUP_GENOME
set file_id, "*.{bam, bai}" into DEDUP_GENOME
file "*.log" into DEDUP_LOG
when:
......@@ -265,19 +283,16 @@ if (! params.do_dedup) {
HISAT_ALIGNED_DEDUP.into{DEDUP_GENOME}
}
/* HTseq
/* HTseq */
process sort_bam {
tag "$file_id"
input:
set file_id, file(bam) from sorted_bam_htseq
set file_id, file(bam) from DEDUP_GENOME
output:
set file_id, "*_htseq.bam" into sorted_bam_files_2
set file_id, "*_htseq.bam" into SORTED_NAME_GENOME
script:
"""
......@@ -285,24 +300,16 @@ samtools sort -@ ${task.cpus} -n -O BAM -o ${file_id}_htseq.bam ${bam[0]}
"""
}
params.gtf = "$baseDir/data/annotation/*.gtf"
log.info "gtf files : ${params.gtf}"
Channel
.fromPath( params.gtf )
.ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
.set { gtf_file }
process counting {
tag "$file_id"
publishDir "${params.output}/04_HTseq/", mode: 'copy'
input:
set file_id, file(bam) from sorted_bam_files_2
file gtf from gtf_file.toList()
set file_id, file(bam) from SORTED_NAME_GENOME
file gtf from GTF_FILE.toList()
output:
file "*.count" into count_files
file "*.count" into HTSEQ_COUNT
script:
"""
......@@ -322,11 +329,55 @@ htseq-count ${bam[0]} ${gtf} \
-t exon \
-i gene_id \
-f bam \
> ${file_id}_exon.count
> ${file_id}.count
"""
}
/* HISAT2 POST_GENOMIC */
process hisat2_postGenomic {
tag "$file_id"
publishDir "${params.output}/05_post_genome_hisat2/", mode: 'copy'
input:
set file_id, file(fastq_unaligned) from HISAT_UNALIGNED
file index2 from POSTGENOME_INDEX.toList()
output:
set file_id, "*.{bam,bam.bai}" into POSTGENOME_ALIGNED
file "*.txt" into POSTGENOME_LOG
when:
params.do_postgenome
script:
index2_id = index2[0]
for (index2_file in index2) {
if (index2_file =~ /.*\.1\.ht2/ && !(index2_file =~ /.*\.rev\.1\.ht2/)) {
index2_id = ( index2_file =~ /(.*)\.1\.ht2/)[0][1]
}
}
"""
hisat2 -x ${index2_id} \
-p ${task.cpus} \
-1 ${fastq_unaligned[0]} \
-2 ${fastq_unaligned[1]} \
--rna-strandness 'F' \
2> ${file_id}.txt \
| samtools view -bS -F 4 - \
| samtools sort -@ ${task.cpus} -o ${file_id}.bam \
&& samtools index ${file_id}.bam
if grep -q "ERR" ${file_id}.txt; then
exit 1
fi
"""
}
POSTGENOME_ALIGNED.into{POSTGENOME_ALIGNED_FASTQC;
POSTGENOME_ALIGNED_DEDUP}
/*
Channel
.fromFilePairs(params.script_cov)
.ifEmpty { error "Cannot find any file matching: ${params.script_cov}" }
......@@ -368,7 +419,8 @@ process multiqc {
file ('fastqc/*') from OUTPUT_FASTQC_FILTER.collect().ifEmpty([])
file ('*') from HISAT_LOG.collect().ifEmpty([])
file ('fastqc/*') from OUTPUT_FASTQC_GENOME.collect().ifEmpty([])
file ('*') from HTSEQ_COUNT.collect().ifEmpty([])
file ('*') from POSTGENOME_LOG.collect().ifEmpty([])
output:
file "multiqc_report.html" into multiqc_report
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment