diff --git a/src/RNAseq_sen1D_bowtie2_SE_CCA.nf b/src/RNAseq_sen1D_bowtie2_SE_CCA.nf new file mode 100644 index 0000000000000000000000000000000000000000..f16c1f39637f3aa42bf448aea22d091247cafc8f --- /dev/null +++ b/src/RNAseq_sen1D_bowtie2_SE_CCA.nf @@ -0,0 +1,320 @@ +/* +* cutadapt : +* Imputs : fastq files +* Output : fastq files +*/ + +/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */ + +/* +* for single-end data +*/ + +params.fastq = "$baseDir/data/fastq_SE/*.fastq.gz" +log.info "fastq files : ${params.fastq}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } + +fastq_files.into{fastq_files_adaptor; fastq_files_fastq} + +process fastqc_fastq { + tag "$reads.baseName" + publishDir "results/fastq_SE/fastqc/raw", mode: 'copy' + + input: + file reads from fastq_files_fastq + + output: + file "*.{zip,html}" into fastqc_repport + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads} +""" +} + +process adaptor_removal { + tag "$reads.baseName" + + input: + file reads from fastq_files_adaptor + + output: + file "*_cut.fastq.gz" into fastq_files_cut + + script: + """ + cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \ + -o ${reads.baseName}_cut.fastq.gz \ + ${reads} > ${reads.baseName}_report.txt + """ +} + +fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq} + +process fastqc_fastq_cutadapt { + tag "$reads.baseName" + publishDir "results/fastq_SE/fastqc/adaptor_removal/", mode: 'copy' + + input: + file (reads) from fastq_files_cut_fastq + + output: + file "*.{zip,html}" into cutadapt_fastqc_repport + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads} +""" +} + +process random_bases_4_trimming { + tag "$reads.baseName" + publishDir "results/fastq_SE/adaptor_removal/", mode: 'copy' + + input: + file reads from fastq_files_cut_randombp + + output: + file "*_cut4.fastq.gz" into fastq_files_cut4 + + script: + """ + cutadapt -u 4 -u -4 \ + -o ${reads.baseName}_cut4.fastq.gz \ + ${reads[0]} > ${reads.baseName}_report.txt + """ +} + +fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq} + +process fastqc_fastq_randombp { + tag "$reads.baseName" + publishDir "results/fastq_SE/fastqc/random_bases_4_trimming/", mode: 'copy' + + input: + file reads from fastq_files_cut4_fastq + + output: + file "*.{zip,html}" into randombp_fastqc_repport + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads} +""" +} + +/* +* urqt : +* Imputs : fastq files +* Output : fastq files +*/ +/* quality trimming */ + +/* +* for single-end data +*/ + +process trimming { + tag "${reads}" + cpus 4 + publishDir "results/fastq_SE/trimming/", mode: 'copy' + + input: + file reads from fastq_files_trim + + output: + file "*_trim.fastq.gz" into fastq_files_urqt + + script: +""" +UrQt --t 20 --m ${task.cpus} --gz \ +--in ${reads} \ +--out ${reads.baseName}_trim.fastq.gz \ +> ${reads.baseName}_trimming_report.txt +""" +} + +fastq_files_urqt.into{fastq_files_CCA; fastq_files_urqt_fastq} + +process fastqc_fastq_urqt { + tag "$reads.baseName" + publishDir "results/fastq_SE/fastqc/urqt/", mode: 'copy' + + input: + file reads from fastq_files_urqt_fastq + + output: + file "*.{zip,html}" into urqt_fastqc_repport + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads} +""" +} + +process CCA_removal { + tag "$reads.baseName" + + input: + file reads from fastq_files_CCA + + output: + file "*_cut_CCA.fastq.gz" into fastq_files_cut_CCA + + script: + """ + cutadapt -a CCA -g TGG \ + -o ${reads.baseName}_cut_CCA.fastq.gz \ + ${reads} > ${reads.baseName}_report.txt + """ +} + +/* +* Bowtie2 : +* Imputs : fastq files +* Imputs : fasta files +* Output : bam files +*/ + +/* fasta indexing */ +params.fasta = "$baseDir/data/bam/*.fasta" + +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" } + .set { fasta_file } + +process index_fasta { + tag "$fasta.baseName" + cpus 4 + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_file + + output: + file "*.index*" into index_files + + script: +""" +bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt + +if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then + exit 1 +fi +""" +} + +/* +* for single-end data +*/ + +process mapping_fastq { + tag "$reads.baseName" + cpus 4 + publishDir "results/mapping_SE/bams/", mode: 'copy' + + input: + file reads from fastq_files_cut_CCA + file index from index_files.collect() + + output: + file "*.bam" into bam_files + file "*_bowtie2_report.txt" into mapping_fastq_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" +bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ +-U ${reads} 2> \ +${reads.baseName}_bowtie2_report.txt | \ +samtools view -Sb - > ${reads.baseName}.bam + +if grep -q "Error" ${reads.baseName}_bowtie2_report.txt; then + exit 1 +fi +""" +} + +/* +* filtering mapping quality >= 2 +*/ + +process quality_filtered_bam { + tag "$bam.baseName" + cpus 2 + + input: + file bam from bam_files + + output: + file "*_filtered.bam*" into filtered_bam_files + script: +""" +samtools view -hb -q 2 ${bam} > ${bam}_filtered.bam +""" +} + +/* MultiQC */ + +process multiqc { + tag "$repport" + publishDir "results/fastq_SE/multiqc/", mode: 'copy' + cpus = 1 + + input: + file repport from fastqc_repport.collect() + file repport_urqt from urqt_fastqc_repport.collect() + file repport_cutadapt from cutadapt_fastqc_repport.collect() + file repport_randombp from randombp_fastqc_repport.collect() + output: + file "*multiqc_*" into multiqc_report + + script: +""" +multiqc -f . +""" +} + +/* bams sorting */ + +process sort_bam { + tag "$bam.baseName" + cpus 4 + publishDir "results/mapping_SE/bams/", mode: 'copy' + input: + file bam from filtered_bam_files + + output: + file "*_sorted.bam" into sorted_bam_files + + script: +""" +samtools sort -@ ${task.cpus} -O BAM -o ${bam.baseName}_sorted.bam ${bam} +""" +} +/* bams indexing */ + +process index_bam { + tag "$bam.baseName" + publishDir "results/mapping_SE/bams/", mode: 'copy' + input: + file bam from sorted_bam_files + output: + file "*bam*" into indexed_bam_file + script: +""" +samtools index ${bam} +""" +}