RNAseq.nf

/*
*	RNAseq Analysis pipeline
*/

params.input = "data/demultiplexed/*{_R1,_R2}.fastq.gz"

Channel
   .fromFilePairs(params.input)
   .ifEmpty { error "Cannot find any file matching: ${params.input}" }
   .set {input_channel}

/* Trimming by quality */

process trimming {
  tag "$file_id"
  cpus 4
  publishDir "results/RNAseq/01_cutadapt/", mode: 'copy'
  echo true

  input:
  set file_id, file(reads) from input_channel

  output:
  set file_id, "*cut_{R1,R2}.fastq.gz" into fastq_files_cut
  file "*.txt" into rapport_UrQt

  script:
  """
  cutadapt -a AGATCGGAAGAGC -A AGATCGGAAGAGC \
  -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_tmp_R2.fastq.gz \
  ${reads[0]} ${reads[1]} > ${pair_id}_report.txt

  cutadapt -u -14 -o ${pair_id}_cut_R2.fastq.gz ${pair_id}_tmp_R2.fastq.gz \
  > ${pair_id}_cut_report.txt
  """
}


/* rRNA and tRNA filtering */

params.indexrRNA = "/Xnfs/lbmcdb/Ricci_team/shared_data/genomes/human_rRNA_tRNA/*.bt2"

log.info "index files : ${params.indexrRNA}"

Channel
  .fromPath( params.indexrRNA )
  .ifEmpty { error "Cannot find any index files matching: ${params.indexrRNA}" }
  .set { rRNA_index_files }

process rRNA_removal {
  tag "$file_id"
  cpus 8
  publishDir "results/RNAseq/U937/02_rRNA_depletion/", mode: 'copy'

  input:
  set file_id, file(reads) from fastq_files_cut
  file index from rRNA_index_files.toList()

  output:
  set file_id, "*.fastq.gz" into rRNA_removed_files
  file "*.txt" into bowtie_report

  script:
"""
bowtie2 --sensitive -p ${task.cpus} -x human_rRNA_tRNA \
-1 ${reads[0]} -2 ${reads[1]} --un-conc-gz ${file_id}_R%.fastq.gz 2> \
${file_id}_bowtie2_report.txt > /dev/null

if grep -q "Error " ${file_id}_bowtie2_report.txt; then
  exit 1
fi
"""
}

/*	mapping against human genome with hisat2 */

params.index_hg38 = "/media/adminmanu/Stockage/HISAT2_index_hg38_tran/*.ht2"

log.info "index : ${params.index_hg38}"


Channel
  .fromPath ( params.index_hg38 )
  .ifEmpty { error "Cannot find any hg38 index files matching: ${params.index_hg38}" }
  .set { index_file_hg38 }--paired

process hisat2_human {
  tag "$file_id"

  input:
    set file_id, file(fastq_filtred) from rRNA_removed_reads
    file index from index_file_hg38.toList()

  output:
    set file_id, "*.fastq.gz" into reads_non_aligned_hg38
    set file_id, "*.bam" into reads_aligned_hg38
    file "*.txt" into hisat_report

  script:
"""
hisat2 -x genome_tran -p ${task.cpus} \
-1 ${fastq_filtred[0]} -2 ${fastq_filtred[1]} \
--un-conc-gz ${file_id}_notaligned_hg38_R%.fastq.gz \
--rna-strandness 'F' \
2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam

"""
}

/*                   sorting                             */

process index_bam {
  tag "$file_id"
  publishDir "${params.output}/03_hisat2_hg38/", mode: 'copy'

  input:
    set file_id, file(bam) from reads_aligned_hg38

  output:
    set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files

  script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
samtools index ${file_id}_sorted.bam
"""
}

sorted_bam_files.into{for_dedup;for_htseq}

/*                   deduplicating reads

params.dedup_options = "--paired"

process dedup {
  tag "$file_id"

  input:
  set file_id, file(bam) from for_dedup
  output:
  set file_id, "*dedup.bam" into dedup_bam
  file "*.txt" into dedup_report

  script:
"""
umi_tools dedup -I ${bam[0]} \
                ${params.dedup_options} \
                -S ${file_id}_dedup.bam > report.txt
"""
}

process sort_bam {
  tag "$file_id"
  publishDir "${params.output}/03_hisat2_hg38_dedup/", mode: 'copy'

  input:
    set file_id, file(bam) from dedup_bam
    file dedup from dedup_report

  output:
    set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files_2
    file "*.txt" into report_dedup

  script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
samtools index ${file_id}_sorted.bam
cat ${dedup} > ${file_id}_dedup_report.txt
"""
} */

/*                   HTseq                            */

params.gtf = "$baseDir/data/annotation/*.gtf"
log.info "gtf files : ${params.gtf}"

Channel
  .fromPath( params.gtf )
  .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
  .set { gtf_file }

process counting {
  tag "$file_id"
  publishDir "${params.output}/04_HTseq/", mode: 'copy'

  input:
  set file_id, file(bam) from for_htseq
  file gtf from gtf_file.toList()

  output:
  file "*.count" into count_files

  script:
"""
htseq-count ${bam[0]} ${gtf} \
            --mode=intersection-nonempty \
            -a 10 \
            -s yes \
            -t CDS \
            -i gene_id \
            -r pos \
            -f bam \
> ${file_id}_CDS.count

htseq-count ${bam[0]} ${gtf} \
            --mode=intersection-nonempty \
            -a 10 \
            -s yes \
            -t exon \
            -i gene_id \
            -r pos \
            -f bam \
> ${file_id}_exon.count

"""
}