add RNAseq worflow for illumina libraries

ba76a948 · elabaron · 12b1d9fb · ba76a948
Commit ba76a948 authored Jul 2, 2020 by elabaron
--- a/src/RNAseq_illumina.nf
+++ b/src/RNAseq_illumina.nf
+/*
+*	RNAseq Analysis pipeline
+*/
+
+params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz"
+params.output = "results"
+
+Channel
+   .fromFilePairs(params.fastq_raw)
+   .ifEmpty { error "Cannot find any file matching: ${params.fastq_raw}" }
+   .set {fastq_raw_channel}
+
+/* Trimming by quality */
+
+process trimming {
+  tag "$file_id"
+  cpus 4
+  publishDir "${params.output}/01_cutadapt/", mode: 'copy'
+  echo true
+
+  input:
+  set file_id, file(reads) from fastq_raw_channel
+
+  output:
+  set file_id, "*cut_{R1,R2}.fastq.gz" into fastq_files_cut
+  file "*.txt" into rapport_UrQt
+
+  script:
+  """
+  cutadapt -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \
+  --minimum-length 50 \
+  -o ${file_id}_cut_R1.fastq.gz -p ${file_id}_cut_R2.fastq.gz \
+  ${reads[0]} ${reads[1]} > ${file_id}_report.txt
+
+  """
+}
+
+
+/* rRNA and tRNA filtering */
+
+params.indexrRNA = "/Xnfs/lbmcdb/Ricci_team/shared_data/genomes/human_rRNA_tRNA/*.bt2"
+
+log.info "index files : ${params.indexrRNA}"
+
+Channel
+  .fromPath( params.indexrRNA )
+  .ifEmpty { error "Cannot find any index files matching: ${params.indexrRNA}" }
+  .set { rRNA_index_files }
+
+process rRNA_removal {
+  tag "$file_id"
+  cpus 8
+  publishDir "${params.output}/02_rRNA_depletion/", mode: 'copy'
+
+  input:
+  set file_id, file(reads) from fastq_files_cut
+  file index from rRNA_index_files.toList()
+
+  output:
+  set file_id, "*.fastq.gz" into rRNA_removed_files
+  file "*.txt" into bowtie_report
+
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
+"""
+bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \
+-1 ${reads[0]} -2 ${reads[1]} --un-conc-gz ${file_id}_R%.fastq.gz 2> \
+${file_id}_bowtie2_report.txt > /dev/null
+
+if grep -q "Error " ${file_id}_bowtie2_report.txt; then
+  exit 1
+fi
+"""
+}
+
+/*	mapping against human genome with hisat2 */
+
+params.index_hg38 = "/media/adminmanu/Stockage/HISAT2_index_hg38_tran/*.ht2"
+
+log.info "index : ${params.index_hg38}"
+
+
+Channel
+  .fromPath ( params.index_hg38 )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index_hg38}" }
+  .set { index_file_hg38 }
+
+process hisat2_human {
+  tag "$file_id"
+  publishDir "${params.output}/03_hisat2/", mode: 'copy'
+  errorStrategy 'finish'
+
+  input:
+    set file_id, file(fastq_filtred) from rRNA_removed_files
+    file index from index_file_hg38.toList()
+
+  output:
+    file "*.fastq.gz" into reads_non_aligned_hg38
+    set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files
+    file "*.txt" into hisat_report
+
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
+    }
+  }
+"""
+hisat2 -x ${index_id} -p ${task.cpus} \
+-1 ${fastq_filtred[0]} -2 ${fastq_filtred[1]} \
+--un-conc-gz ${file_id}_notaligned_R%.fastq.gz \
+--rna-strandness 'F' 2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam
+
+if grep -q "ERR" ${file_id}_hisat2_hg38.txt; then
+  exit 1
+fi
+
+samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${file_id}.bam
+samtools index ${file_id}_sorted.bam
+
+"""
+}
+
+
+/*                   HTseq                            */
+
+process sort_bam {
+  tag "$file_id"
+
+  input:
+    set file_id, file(bam) from sorted_bam_files
+
+  output:
+    set file_id, "*_htseq.bam" into sorted_bam_files_2
+
+  script:
+"""
+samtools sort -@ ${task.cpus} -n -O BAM -o ${file_id}_htseq.bam ${bam[0]}
+"""
+}
+
+params.gtf = "$baseDir/data/annotation/*.gtf"
+log.info "gtf files : ${params.gtf}"
+
+Channel
+  .fromPath( params.gtf )
+  .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
+  .set { gtf_file }
+
+process counting {
+  tag "$file_id"
+  publishDir "${params.output}/04_HTseq/", mode: 'copy'
+
+  input:
+  set file_id, file(bam) from sorted_bam_files_2
+  file gtf from gtf_file.toList()
+
+  output:
+  file "*.count" into count_files
+
+  script:
+"""
+htseq-count ${bam[0]} ${gtf} \
+            --mode=intersection-nonempty \
+            -a 10 \
+            -s yes \
+            -t CDS \
+            -i gene_id \
+            -f bam \
+> ${file_id}_CDS.count
+
+htseq-count ${bam[0]} ${gtf} \
+            --mode=intersection-nonempty \
+            -a 10 \
+            -s yes \
+            -t exon \
+            -i gene_id \
+            -f bam \
+> ${file_id}_exon.count
+
+"""
+}