RiboProf.nf & RNAseq.nf : harmonisation and generalisation des termes

bb620eb7 · elabaron · 38a453b6 · bb620eb7 · bb620eb7
Commit bb620eb7 authored Mar 27, 2020 by elabaron
--- a/src/RNAseq.nf
+++ b/src/RNAseq.nf
@@ -2,23 +2,24 @@
 *	RNAseq Analysis pipeline
 */

-params.input = "data/demultiplexed/*{_R1,_R2}.fastq.gz"
+params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz"
+params.output = "results"

 Channel
-   .fromFilePairs(params.input)
-   .ifEmpty { error "Cannot find any file matching: ${params.input}" }
-   .set {input_channel}
+   .fromFilePairs(params.fastq_raw)
+   .ifEmpty { error "Cannot find any file matching: ${params.fastq_raw}" }
+   .set {fastq_raw_channel}

 /* Trimming by quality */

 process trimming {
  tag "$file_id"
  cpus 4
-  publishDir "results/RNAseq/01_cutadapt/", mode: 'copy'
+  publishDir "${params.output}/01_cutadapt/", mode: 'copy'
  echo true

  input:
-  set file_id, file(reads) from input_channel
+  set file_id, file(reads) from fastq_raw_channel

  output:
  set file_id, "*cut_{R1,R2}.fastq.gz" into fastq_files_cut
@@ -51,7 +52,7 @@ Channel
 process rRNA_removal {
  tag "$file_id"
  cpus 8
-  publishDir "results/RNAseq/02_rRNA_depletion/", mode: 'copy'
+  publishDir "${params.output}/02_rRNA_depletion/", mode: 'copy'

  input:
  set file_id, file(reads) from fastq_files_cut
@@ -62,8 +63,14 @@ process rRNA_removal {
  file "*.txt" into bowtie_report

  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
 """
-bowtie2 --sensitive -p ${task.cpus} -x human_rRNA_tRNA \
+bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \
 -1 ${reads[0]} -2 ${reads[1]} --un-conc-gz ${file_id}_R%.fastq.gz 2> \
 ${file_id}_bowtie2_report.txt > /dev/null

@@ -82,7 +89,7 @@ log.info "index : ${params.index_hg38}"

 Channel
  .fromPath ( params.index_hg38 )
-  .ifEmpty { error "Cannot find any hg38 index files matching: ${params.index_hg38}" }
+  .ifEmpty { error "Cannot find any index files matching: ${params.index_hg38}" }
  .set { index_file_hg38 }

 process hisat2_human {
@@ -98,10 +105,16 @@ process hisat2_human {
    file "*.txt" into hisat_report

  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
+    }
+  }
 """
-hisat2 -x genome_tran -p ${task.cpus} \
+hisat2 -x ${index_id} -p ${task.cpus} \
 -1 ${fastq_filtred[0]} -2 ${fastq_filtred[1]} \
--un-conc-gz ${file_id}_notaligned_hg38_R%.fastq.gz \
+--un-conc-gz ${file_id}_notaligned_R%.fastq.gz \
 --rna-strandness 'F' \
 2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam

@@ -115,7 +128,7 @@ reads_aligned_hg38.into{for_mapping;for_htseq}

 process index_bam {
  tag "$file_id"
-  publishDir "results/RNAseq/03_hisat2_hg38/", mode: 'copy'
+  publishDir "${params.output}/03_hisat2/", mode: 'copy'

  input:
    set file_id, file(bam) from for_mapping
@@ -203,7 +216,7 @@ Channel

 process counting {
  tag "$file_id"
-  publishDir "results/RNAseq/04_HTseq/", mode: 'copy'
+  publishDir "${params.output}/04_HTseq/", mode: 'copy'

  input:
  set file_id, file(bam) from sorted_bam_files_2

--- a/src/RibosomeProfiling.nf
+++ b/src/RibosomeProfiling.nf
@@ -55,8 +55,14 @@ process rRNA_removal {
  file "*.txt" into bowtie_report

  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
 """
-zcat ${reads} | bowtie2 --sensitive -p ${task.cpus} -x human_rRNA_tRNA \
+zcat ${reads} | bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \
 -U - --un-gz ${file_id}_mRNA.fastq.gz 2> \
 ${file_id}_bowtie2_report.txt > /dev/null

@@ -75,7 +81,7 @@ log.info "index : ${params.index_hg38}"

 Channel
  .fromPath ( params.index_hg38 )
-  .ifEmpty { error "Cannot find any hg38 index files matching: ${params.index_hg38}" }
+  .ifEmpty { error "Cannot find any index files matching: ${params.index_hg38}" }
  .set { index_file_hg38 }

 process hisat2_human {
@@ -92,9 +98,15 @@ process hisat2_human {
    file "*.txt" into hisat_report

  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
+    }
+  }
 """
-hisat2 -x genome_tran -p ${task.cpus} \
-U ${fastq_filtred} --un-gz ${file_id}_notaligned_hg38.fastq.gz \
+hisat2 -x ${index_id} -p ${task.cpus} \
+-U ${fastq_filtred} --un-gz ${file_id}_notaligned.fastq.gz \
 --end-to-end  --rna-strandness 'F' \
 2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam

@@ -105,7 +117,7 @@ hisat2 -x genome_tran -p ${task.cpus} \

 process index_bam {
  tag "$file_id"
-  publishDir "${params.output}/03_hisat2_hg38/", mode: 'copy'
+  publishDir "${params.output}/03_hisat2/", mode: 'copy'

  input:
    set file_id, file(bam) from reads_aligned_hg38
@@ -122,7 +134,7 @@ samtools index ${file_id}_sorted.bam

 sorted_bam_files.into{for_dedup;for_htseq}

-/*                   deduplicating reads                            */
+/*                   deduplicating reads

 params.dedup_options = ""

@@ -142,11 +154,11 @@ umi_tools dedup -I ${bam[0]} \
                ${params.dedup_options} \
                -S ${file_id}_dedup.bam > report.txt
 """
-}
-
+}*/
+/*
 process sort_bam {
  tag "$file_id"
-  publishDir "${params.output}/03_hisat2_hg38_dedup/", mode: 'copy'
+  publishDir "${params.output}/03_hisat2_dedup/", mode: 'copy'

  input:
    set file_id, file(bam) from dedup_bam
@@ -163,7 +175,7 @@ samtools index ${file_id}_sorted.bam
 cat ${dedup} > ${file_id}_dedup_report.txt
 """
 }
-
+*/
 /*                   HTseq                            */

 params.gtf = "$baseDir/data/annotation/*.gtf"