pipeline for Single-end SE sequencing

e1ef7769 · vvanoost · 133a4f03 · e1ef7769
Commit e1ef7769 authored 6 years ago by vvanoost
--- a/src/RNAseq_sen1D_bowtie2_SE.nf
+++ b/src/RNAseq_sen1D_bowtie2_SE.nf
+/*
+* cutadapt :
+* Imputs : fastq files
+* Output : fastq files
+*/
+
+/*                     Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3                            */
+
+/*
+* for single-end data
+*/
+
+params.fastq = "$baseDir/data/fastq_SE/*.fastq.gz"
+log.info "fastq files : ${params.fastq}"
+
+Channel
+  .fromFilePairs( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .set { fastq_files }
+
+fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
+
+process fastqc_fastq {
+  tag "$reads.baseName"
+  publishDir "results/fastq_SE/fastqc/raw", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files_fastq
+
+  output:
+    file "*.{zip,html}" into fastqc_repport
+
+  script:
+"""
+fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
+"""
+}
+
+process adaptor_removal {
+  tag "$reads.baseName"
+
+  input:
+  set pair_id, file(reads) from fastq_files_adaptor
+
+  output:
+  set pair_id, "*_cut.fastq.gz" into fastq_files_cut
+
+  script:
+  """
+  cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
+  -o ${reads.baseName}_cut.fastq.gz \
+  ${reads} > ${reads.baseName}_report.txt
+  """
+}
+
+fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
+
+process fastqc_fastq_cutadapt {
+  tag "$reads.baseName"
+  publishDir "results/fastq_SE/fastqc/adaptor_removal/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files_cut_fastq
+
+  output:
+    file "*.{zip,html}" into cutadapt_fastqc_repport
+
+  script:
+"""
+fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
+"""
+}
+
+process random_bases_4_trimming {
+  tag "$reads.baseName"
+  publishDir "results/fastq_SE/adaptor_removal/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files_cut_randombp
+
+  output:
+  set pair_id, "*_cut4.fastq.gz" into fastq_files_cut4
+
+  script:
+  """
+  cutadapt -u 4 -u -4 \
+  -o ${reads.baseName}_cut4.fastq.gz \
+  ${reads[0]} > ${reads.baseName}_report.txt
+  """
+}
+
+fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
+
+process fastqc_fastq_randombp {
+  tag "$reads.baseName"
+  publishDir "results/fastq_SE/fastqc/random_bases_4_trimming/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files_cut4_fastq
+
+  output:
+    file "*.{zip,html}" into randombp_fastqc_repport
+
+  script:
+"""
+fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
+"""
+}
+
+/*
+* urqt :
+* Imputs : fastq files
+* Output : fastq files
+*/
+/*                      quality trimming                                     */
+
+/*
+* for single-end data
+*/
+
+process trimming {
+  tag "${reads}"
+  cpus 4
+  publishDir "results/fastq_SE/trimming/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files_trim
+
+  output:
+  set pair_id, "*_trim.fastq.gz" into fastq_files_urqt
+
+  script:
+"""
+UrQt --t 20 --m ${task.cpus} --gz \
+--in ${reads} \
+--out ${reads.baseName}_trim.fastq.gz \
+> ${reads.baseName}_trimming_report.txt
+"""
+}
+
+fastq_files_urqt.into{fastq_files_align; fastq_files_urqt_fastq}
+
+process fastqc_fastq_urqt {
+  tag "$reads.baseName"
+  publishDir "results/fastq_SE/fastqc/urqt/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files_urqt_fastq
+
+  output:
+    file "*.{zip,html}" into urqt_fastqc_repport
+
+  script:
+"""
+fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
+"""
+}
+
+/*
+* Bowtie2 :
+* Imputs : fastq files
+* Imputs : fasta files
+* Output : bam files
+*/
+
+/*                      fasta indexing                                     */
+params.fasta = "$baseDir/data/bam/*.fasta"
+
+log.info "fasta files : ${params.fasta}"
+
+Channel
+  .fromPath( params.fasta )
+  .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
+  .set { fasta_file }
+
+process index_fasta {
+  tag "$fasta.baseName"
+  cpus 4
+  publishDir "results/mapping/index/", mode: 'copy'
+
+  input:
+    file fasta from fasta_file
+
+  output:
+    file "*.index*" into index_files
+
+  script:
+"""
+bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
+
+if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
+  exit 1
+fi
+"""
+}
+
+/*
+* for single-end data
+*/
+
+process mapping_fastq {
+  tag "$reads.baseName"
+  cpus 4
+  publishDir "results/mapping_SE/bams/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files_align
+  file index from index_files.collect()
+
+  output:
+  set pair_id, "*.bam" into bam_files
+  file "*_bowtie2_report.txt" into mapping_fastq_report
+
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
+"""
+bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \
+-U ${reads} 2> \
+${reads.baseName}_bowtie2_report.txt | \
+samtools view -Sb - > ${reads.baseName}.bam
+
+if grep -q "Error" ${reads.baseName}_bowtie2_report.txt; then
+  exit 1
+fi
+"""
+}
+
+/*                      MultiQC                                     */
+
+process multiqc {
+  tag "$repport"
+  publishDir "results/fastq_SE/multiqc/", mode: 'copy'
+  cpus = 1
+
+  input:
+    file repport from fastqc_repport.collect()
+    file repport_urqt from urqt_fastqc_repport.collect()
+    file repport_cutadapt from cutadapt_fastqc_repport.collect()
+    file repport_randombp from randombp_fastqc_repport.collect()
+  output:
+    file "*multiqc_*" into multiqc_report
+
+  script:
+"""
+multiqc -f .
+"""
+}
+
+/*                      bams sorting                                    */
+
+process sort_bam {
+  tag "$reads.baseName"
+  cpus 4
+  publishDir "results/mapping_SE/bams/", mode: 'copy'
+  input:
+    set pair_id, file(bam) from bam_files
+
+  output:
+    set pair_id, "*_sorted.bam" into sorted_bam_files
+
+  script:
+"""
+samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_sorted.bam ${bam}
+"""
+}
+/*                      bams indexing                                     */
+
+process index_bam {
+  tag "$reads.baseName"
+  publishDir "results/mapping_SE/bams/", mode: 'copy'
+  input:
+    set pair_id, file(bam) from sorted_bam_files
+  output:
+    set pair_id, "*bam*" into indexed_bam_file
+  script:
+"""
+samtools index ${bam}
+"""
+}