diff --git a/src/RNAseq_sen1D_bowtie2.nf b/src/RNAseq_sen1D_bowtie2.nf new file mode 100644 index 0000000000000000000000000000000000000000..3b79021d9820c22173d1352013776b3d0d41d080 --- /dev/null +++ b/src/RNAseq_sen1D_bowtie2.nf @@ -0,0 +1,340 @@ +/* +* cutadapt : +* Imputs : fastq files +* Output : fastq files +*/ + +/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */ + +/* +* for paired-end data +*/ + +params.fastq = "$baseDir/data/fastq/*_R{1,2}.fastq.gz" + +log.info "fastq files : ${params.fastq}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } + +fastq_files.into{fastq_files_adaptor; fastq_files_fastq} + +process fastqc_fastq { + tag "$pair_id" + publishDir "results/fastq/fastqc/raw", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_fastq + + output: + file "*.{zip,html}" into fastqc_repport + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ +${reads[0]} ${reads[1]} +""" +} + +process adaptor_removal { + tag "$pair_id" + + input: + set pair_id, file(reads) from fastq_files_adaptor + + output: + set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut + + script: + """ + cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \ + -o ${pair_id}_cut_R1.fastq.gz \ + ${reads[0]} > ${pair_id}_report.txt + + cutadapt -a GATCGTCGGACTGTAGAACTCTGAAC -g GTTCAGAGTTCTACAGTCCGACGATC \ + -o ${pair_id}_cut_R2.fastq.gz \ + ${reads[1]} > ${pair_id}_report.txt + """ +} + +fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq} + +process fastqc_fastq_cutadapt { + tag "$pair_id" + publishDir "results/fastq/fastqc/adaptor_removal/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_cut_fastq + + output: + file "*.{zip,html}" into cutadapt_fastqc_repport + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ +${reads[0]} ${reads[1]} +""" +} + +process random_bases_4_trimming { + tag "$pair_id" + publishDir "results/fastq/adaptor_removal/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_cut_randombp + + output: + set pair_id, "*_cut4_R{1,2}.fastq.gz" into fastq_files_cut4 + + script: + """ + cutadapt -u 4 -u -4 \ + -o ${pair_id}_cut4_R1.fastq.gz -p ${pair_id}_cut4_R2.fastq.gz \ + ${reads[0]} ${reads[1]} > ${pair_id}_report.txt + """ +} + +fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq} + +process fastqc_fastq_randombp { + tag "$pair_id" + publishDir "results/fastq/fastqc/random_bases_4_trimming/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_cut4_fastq + + output: + file "*.{zip,html}" into randombp_fastqc_repport + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ +${reads[0]} ${reads[1]} +""" +} + +/* +* urqt : +* Imputs : fastq files +* Output : fastq files +*/ +/* quality trimming */ + +/* +* for paired-end data +*/ + +process trimming { + tag "${reads}" + cpus 4 + publishDir "results/fastq/trimming/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_trim + + output: + set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_urqt + + script: +""" +UrQt --t 20 --m ${task.cpus} --gz \ +--in ${reads[0]} --inpair ${reads[1]} \ +--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \ +> ${pair_id}_trimming_report.txt +""" +} + +fastq_files_urqt.into{fastq_files_align; fastq_files_urqt_fastq} + +process fastqc_fastq_urqt { + tag "$pair_id" + publishDir "results/fastq/fastqc/urqt/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_urqt_fastq + + output: + file "*.{zip,html}" into urqt_fastqc_repport + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ +${reads[0]} ${reads[1]} +""" +} + +/* +* Bowtie2 : +* Imputs : fastq files +* Imputs : fasta files +* Output : bam files +*/ + +/* fasta indexing */ +params.fasta = "$baseDir/data/bam/*.fasta" + +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" } + .set { fasta_file } + +process index_fasta { + tag "$fasta.baseName" + cpus 4 + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_file + + output: + file "*.index*" into index_files + + script: +""" +bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt + +if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then + exit 1 +fi +""" +} + +/* +* for paired-end data +*/ + +process mapping_fastq { + tag "$pair_id" + cpus 4 + publishDir "results/mapping/bams/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_align + file index from index_files.toList() + + output: + set pair_id, "*.bam" into bam_files + file "*_bowtie2_report.txt" into mapping_fastq_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" + bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ + -1 ${reads[0]} -2 ${reads[1]} 2> \ + ${pair_id}_bowtie2_report.txt | \ + samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_bowtie2_report.txt; then + exit 1 +fi +""" +} + +/* MultiQC */ + +process multiqc { + tag "$repport" + publishDir "results/fastq/multiqc/", mode: 'copy' + cpus = 1 + + input: + file repport from fastqc_repport.collect() + file repport_urqt from urqt_fastqc_repport.collect() + file repport_cutadapt from cutadapt_fastqc_repport.collect() + file repport_randombp from randombp_fastqc_repport.collect() + output: + file "*multiqc_*" into multiqc_report + + script: +""" +multiqc -f . +""" +} + +/* bams spliting */ + +process split_bam { + tag "$pair_id" + cpus 2 + input: + set pair_id, file(bam) from bam_files + + output: + set pair_id, "*_forward.bam" into forward_bam_files + set pair_id, "*_reverse.bam" into reverse_bam_files + script: +""" +samtools view -hb -F 0x10 ${bam} > ${pair_id}_forward.bam & +samtools view -hb -f 0x10 ${bam} > ${pair_id}_reverse.bam +""" +} + +/* bams sorting */ + +process sort_bam_forward { + tag "$pair_id" + cpus 4 + publishDir "results/mapping/bams/", mode: 'copy' + input: + set pair_id, file(bam) from forward_bam_files + + output: + set pair_id, "*_sorted.bam" into forward_sorted_bam_files + + script: +""" +samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_forward_sorted.bam ${bam} +""" +} +process sort_bam_reverse { + tag "$pair_id" + cpus 4 + publishDir "results/mapping/bams/", mode: 'copy' + input: + set pair_id, file(bam) from reverse_bam_files + + output: + set pair_id, "*_sorted.bam" into reverse_sorted_bam_files + + script: +""" +samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_reverse_sorted.bam ${bam} +""" +} + +/* bams indexing */ + +process index_bam_forward { + tag "$pair_id" + publishDir "results/mapping/bams/", mode: 'copy' + input: + set pair_id, file(bam) from forward_sorted_bam_files + output: + set pair_id, "*bam*" into forward_indexed_bam_file + script: +""" +samtools index ${bam} +""" +} + +process index_bam_reverse { + tag "$pair_id" + publishDir "results/mapping/bams/", mode: 'copy' + input: + set pair_id, file(bam) from reverse_sorted_bam_files + output: + set pair_id, "*bam*" into reverse_indexed_bam_file + script: +""" +samtools index ${bam} +""" +}