Skip to content
Snippets Groups Projects
Commit 452f8e2e authored by elabaron's avatar elabaron
Browse files

src/RNAseq.nf : debeug cutadapt, improve filtering, add fastqc cut&filt

parent b290fa21
Branches
No related tags found
No related merge requests found
...@@ -72,6 +72,14 @@ profiles { ...@@ -72,6 +72,14 @@ profiles {
container = "lbmc/fastqc:0.11.5" container = "lbmc/fastqc:0.11.5"
cpus = 4 cpus = 4
} }
withName: fastqc_cut {
container = "lbmc/fastqc:0.11.5"
cpus = 4
}
withName: fastqc_filter {
container = "lbmc/fastqc:0.11.5"
cpus = 4
}
withName: adaptor_removal { withName: adaptor_removal {
container = "lbmc/cutadapt:2.1" container = "lbmc/cutadapt:2.1"
cpus = 4 cpus = 4
......
...@@ -6,12 +6,12 @@ params.fastq_raw = "data/fastq/*{_R1,_R2}.fastq.gz" ...@@ -6,12 +6,12 @@ params.fastq_raw = "data/fastq/*{_R1,_R2}.fastq.gz"
params.output = "results" params.output = "results"
params.do_fastqc = true params.do_fastqc = true
params.script_cov = "src/norm_coverage.sh" params.script_cov = "src/norm_coverage.sh"
params.indexrRNA = "data/filter/human_rRNA_tRNA/*.bt2" params.filter = "data/filter/human_rRNA_tRNA/*.bt2"
log.info "input raw : ${params.fastq_raw}" log.info "input raw : ${params.fastq_raw}"
log.info "outut directory : ${params.output}" log.info "outut directory : ${params.output}"
log.info "do fastqc ? : ${params.do_fastqc}" log.info "do fastqc ? : ${params.do_fastqc}"
log.info "filter index files : ${params.indexrRNA}" log.info "filter index files : ${params.filter}"
Channel Channel
.fromFilePairs(params.fastq_raw) .fromFilePairs(params.fastq_raw)
...@@ -19,6 +19,11 @@ Channel ...@@ -19,6 +19,11 @@ Channel
.into {INPUT_FASTQC_RAW; .into {INPUT_FASTQC_RAW;
INPUT_CUTADAPT} INPUT_CUTADAPT}
Channel
.fromPath( params.filter )
.ifEmpty { error "Cannot find any index files matching: ${params.filter}" }
.set { FILTER_INDEX }
/* Fastqc of raw input */ /* Fastqc of raw input */
process fastqc_raw { process fastqc_raw {
...@@ -54,35 +59,65 @@ process trimming { ...@@ -54,35 +59,65 @@ process trimming {
script: script:
""" """
cutadapt -j ${task.cpus} -a "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA" -A "N{13}AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT;min_overlap=16" \ cutadapt -j ${task.cpus} \
-o ${file_id}_tmp_R1.fastq.gz -p ${file_id}_cut_R2.fastq.gz \ -a "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA" \
-A "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT" \
-o ${file_id}_tmp_R1.fastq.gz \
-p ${file_id}_tmp_R2.fastq.gz \
--minimum-length 70 \
${reads[0]} ${reads[1]} > ${file_id}_first_report.txt ${reads[0]} ${reads[1]} > ${file_id}_first_report.txt
cutadapt -j ${task.cpus} -a "A{100}" -o ${file_id}_cut_R1.fastq.gz ${file_id}_tmp_R1.fastq.gz \ cutadapt -j ${task.cpus} \
-a "A{100}" \
-u -10 \
-o ${file_id}_cut_R1.fastq.gz \
${file_id}_tmp_R1.fastq.gz \
> ${file_id}_second_report.txt > ${file_id}_second_report.txt
cutadapt -j ${task.cpus} \
-u -13 \
-o ${file_id}_cut_R2.fastq.gz \
${file_id}_tmp_R2.fastq.gz \
> ${file_id}_third_report.txt
""" """
} }
CUTADAPT_OUTPUT.into{CUTADAPT_OUTPUT_FASTQC;
CUTADAPT_OUTPUT_FILTER}
/* Fastqc of raw input */
/* rRNA and tRNA filtering process fastqc_cut {
tag "$file_id"
publishDir "${params.output}/00_fastqc/cut/", mode: 'copy'
Channel input:
.fromPath( params.indexrRNA ) set file_id, file(reads) from CUTADAPT_OUTPUT_FASTQC
.ifEmpty { error "Cannot find any index files matching: ${params.indexrRNA}" }
.set { rRNA_index_files } output:
set file_id, "*.{html,zip}" into OUTPUT_FASTQC_CUT
when:
params.do_fastqc
"""
fastqc ${file_id}* -t ${task.cpus}
"""
}
/* rRNA and tRNA filtering */
process rRNA_removal { process rRNA_removal {
tag "$file_id" tag "$file_id"
cpus 8
publishDir "${params.output}/02_rRNA_depletion/", mode: 'copy' publishDir "${params.output}/02_rRNA_depletion/", mode: 'copy'
input: input:
set file_id, file(reads) from fastq_files_cut set file_id, file(reads) from CUTADAPT_OUTPUT_FILTER
file index from rRNA_index_files.toList() file index from FILTER_INDEX.toList()
output: output:
set file_id, "*.fastq.gz" into rRNA_removed_files set file_id, "*.fastq.gz" into FILTER_FASTQ
file "*.txt" into bowtie_report set file_id, "*.bam*" into FILTER_BAM
file "*.{txt,stats}" into FILTER_LOG
script: script:
index_id = index[0] index_id = index[0]
...@@ -93,14 +128,41 @@ process rRNA_removal { ...@@ -93,14 +128,41 @@ process rRNA_removal {
} }
""" """
bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \ bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \
-1 ${reads[0]} -2 ${reads[1]} --un-conc-gz ${file_id}_R%.fastq.gz 2> \ -1 ${reads[0]} -2 ${reads[1]} --no-unal \
${file_id}_bowtie2_report.txt > /dev/null --un-conc-gz ${file_id}_R%.fastq.gz 2> \
${file_id}_bowtie2_report.txt | samtools view -bS - \
| samtools sort -@ ${task.cpus} -o ${file_id}.filter.bam \
&& samtools index ${file_id}.filter.bam \
&& samtools idxstats ${file_id}.filter.bam > \
${file_id}.filter.stats
if grep -q "Error " ${file_id}_bowtie2_report.txt; then if grep -q "Error " ${file_id}_bowtie2_report.txt; then
exit 1 exit 1
fi fi
""" """
} }
FILTER_FASTQ.into{FILTER_FASTQ_FASTQC;
FILTER_FASTQ_HISAT}
/* Fastqc of filtred reads */
process fastqc_filter {
tag "$file_id"
publishDir "${params.output}/00_fastqc/filter/", mode: 'copy'
input:
set file_id, file(reads) from FILTER_FASTQ_FASTQC
output:
set file_id, "*.{html,zip}" into OUTPUT_FASTQC_FILTER
when:
params.do_fastqc
"""
fastqc ${file_id}* -t ${task.cpus}
"""
}
/* mapping against human genome with hisat2 /* mapping against human genome with hisat2
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment