From c0c6a0f8ae505c66604a15431572dacab6391276 Mon Sep 17 00:00:00 2001
From: nservant <nservant@curie.fr>
Date: Thu, 4 Apr 2019 23:21:14 +0200
Subject: [PATCH] add --fastqSplit option

---
 conf/base.config   |   2 +-
 conf/hicpro.config |   1 +
 main.nf            | 115 +++++++++++++++++++++++++++++++++++----------
 3 files changed, 93 insertions(+), 25 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index ad6b710..b01cd5d 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -64,7 +64,7 @@ process {
     memory = { check_max( 6.GB * task.attempt, 'memory' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
-withName:run_iced {
+withName:run_ice {
     cpus = { check_max( 1, 'cpus' ) }
     memory = { check_max( 10.GB * task.attempt, 'memory' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
diff --git a/conf/hicpro.config b/conf/hicpro.config
index 163b086..4e0835c 100644
--- a/conf/hicpro.config
+++ b/conf/hicpro.config
@@ -10,6 +10,7 @@
 params {
 
        // Alignment options
+       splitFastq = false
        bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'
        bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'
        min_mapq = 10       
diff --git a/main.nf b/main.nf
index f29a49c..00d6704 100644
--- a/main.nf
+++ b/main.nf
@@ -160,7 +160,13 @@ if (params.readPaths){
       .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] }
 }
 
-raw_reads = raw_reads.concat( raw_reads_2 )
+if ( params.splitFastq ){
+   raw_reads_full = raw_reads.concat( raw_reads_2 )
+   raw_reads = raw_reads_full.splitFastq( by: params.splitFastq , file: true)
+ }else{
+   raw_reads = raw_reads.concat( raw_reads_2 )
+}
+
 
 // SPlit fastq files
 // https://www.nextflow.io/docs/latest/operator.html#splitfastq
@@ -192,9 +198,6 @@ else {
    exit 1, "No reference genome specified!"
 }
 
-//println (bwt2_dir)
-//println (bwt2_base)
-
 
 // Chromosome size
 
@@ -228,6 +231,9 @@ else {
 // Resolutions for contact maps
 map_res = Channel.from( params.bins_size.tokenize(',') )
 
+// Stage config files
+ch_multiqc_config = Channel.fromPath(params.multiqc_config)
+ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
 
 /**********************************************************
  * SET UP LOGS
@@ -320,8 +326,8 @@ process get_software_versions {
 if(!params.bwt2_index && params.fasta){
     process makeBowtie2Index {
         tag "$bwt2_base"
-        //publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
-        //           saveAs: { params.saveReference ? it : null }, mode: 'copy'
+        publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
+                   saveAs: { params.saveReference ? it : null }, mode: 'copy'
 
         input:
         file fasta from fasta_for_index
@@ -343,8 +349,8 @@ if(!params.bwt2_index && params.fasta){
 if(!params.chromosome_size && params.fasta){
     process makeChromSize {
         tag "$fasta"
-        //publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
-        //           saveAs: { params.saveReference ? it : null }, mode: 'copy'
+        publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
+                   saveAs: { params.saveReference ? it : null }, mode: 'copy'
 
         input:
         file fasta from fasta_for_chromsize
@@ -363,8 +369,8 @@ if(!params.chromosome_size && params.fasta){
 if(!params.restriction_fragments && params.fasta){
     process makeRestrictionFragments {
         tag "$fasta - ${params.restriction_site}"
-        //publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
-        //           saveAs: { params.saveReference ? it : null }, mode: 'copy'
+        publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
+                   saveAs: { params.saveReference ? it : null }, mode: 'copy'
 
         input:
         file fasta from fasta_for_resfrag
@@ -389,6 +395,9 @@ if(!params.restriction_fragments && params.fasta){
 
 process bowtie2_end_to_end {
    tag "$prefix"
+   publishDir path: { params.saveAlignedIntermediates ? "${params.outdir}/mapping" : params.outdir },
+   	      saveAs: { params.saveAlignedIntermediates ? it : null }, mode: 'copy'
+
    input:
         set val(sample), file(reads) from raw_reads
         file index from bwt2_index_end2end.collect()
@@ -412,6 +421,9 @@ process bowtie2_end_to_end {
 
 process trim_reads {
    tag "$prefix"
+   publishDir path: { params.saveAlignedIntermediates ? "${params.outdir}/mapping" : params.outdir },
+   	      saveAs: { params.saveAlignedIntermediates ? it : null }, mode: 'copy'
+
    input:
       set val(prefix), file(reads) from unmapped_end_to_end
 
@@ -428,6 +440,9 @@ process trim_reads {
 
 process bowtie2_on_trimmed_reads {
    tag "$prefix"
+   publishDir path: { params.saveAlignedIntermediates ? "${params.outdir}/mapping" : params.outdir },
+   	      saveAs: { params.saveAlignedIntermediates ? it : null }, mode: 'copy'
+
    input:
       set val(prefix), file(reads) from trimmed_reads
       file index from bwt2_index_trim.collect()
@@ -447,7 +462,10 @@ process bowtie2_on_trimmed_reads {
 }
 
 process merge_mapping_steps{
-   tag "$bam1 + $bam2"
+   tag "$sample = $bam1 + $bam2"
+   publishDir path: { params.saveAlignedIntermediates ? "${params.outdir}/mapping" : params.outdir },
+   	      saveAs: { params.saveAlignedIntermediates ? it : null }, mode: 'copy'
+
    input:
       set val(prefix), file(bam1), file(bam2) from end_to_end_bam.join( trimmed_bam )
 
@@ -455,7 +473,7 @@ process merge_mapping_steps{
       set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam
 
    script:
-      sample = prefix.toString() - ~/(_R1)?(_R2)?(_val_1)?(_val_2)?$/
+      sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2)/
       """
       samtools merge -@ ${task.cpus} \\
        	             -f ${prefix}_bwt2merged.bam \\
@@ -472,11 +490,15 @@ process merge_mapping_steps{
 
 process combine_mapped_files{
    tag "$sample = $r1_prefix + $r2_prefix"
+   publishDir "${params.outdir}/mapping", mode: 'copy',
+   	      saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"}	      
+
    input:
       set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple()
 
    output:
       set val(sample), file("${sample}_bwt2pairs.bam") into paired_bam
+      file "*.pairstat" into combine_mapping_results
 
    script:
       r1_bam = aligned_bam[0]
@@ -484,7 +506,7 @@ process combine_mapped_files{
       r2_bam = aligned_bam[1]
       r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/
       
-      def opts = ""
+      def opts = "-t"
       opts = params.rm_singleton ? "${opts}" : "--single ${opts}"
       opts = params.rm_multi ? "${opts}" : "--multi ${opts}"
       """
@@ -500,15 +522,24 @@ process combine_mapped_files{
 
 process get_valid_interaction{
    tag "$sample"
+   publishDir "${params.outdir}/hic_results/data", mode: 'copy',
+   	      saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"}	      
+
    input:
       set val(sample), file(pe_bam) from paired_bam
-      val frag_file from res_frag_file
+      file frag_file from res_frag_file.collect()
 
    output:
       set val(sample), file("*.validPairs") into valid_pairs
       set val(sample), file("*.validPairs") into valid_pairs_4cool
+      file "*stat" into valid_interaction_results
 
    script:
+	
+      if (params.splitFastq){
+      	 sample = sample.toString() - ~/(\.[0-9]+)$/
+      }
+
       def opts = ""
       if (params.min_cis_dist != "") opts="${opts} -d ${params.min_cis_dist}"
       if (params.min_insert_size != "") opts="${opts} -s ${params.min_insert_size}"
@@ -525,14 +556,39 @@ process get_valid_interaction{
  * STEP3 - BUILD MATRIX
 */
 
+if ( params.splitFastq ){
+   process merge_sample {
+      tag "$sample"
+      publishDir "${params.outdir}/hic_results/data", mode: 'copy'
+
+      input:
+	set val(sample), file(vpairs) from valid_pairs.groupTuple()
+	
+      output:
+            set val(sample), file("*.allValidPairs") into all_valid_pairs
+	    set val(sample), file("*.allValidPairs") into all_valid_pairs_4cool
+      	    
+      script:
+      """
+      cat $vpairs > test.allValidPairs
+      """
+   }
+}else{
+   all_valid_pairs = valid_pairs
+   all_valid_pairs_4cool = valid_pairs	
+}
+
 process build_contact_maps{
    tag "$sample - $mres"
+   publishDir "${params.outdir}/hic_results/maps/iced", mode: 'copy'
+
    input:
-      set val(sample), file(vpairs), val(mres) from valid_pairs.combine(map_res)
-      val chrsize from chromosome_size
+      set val(sample), file(vpairs), val(mres) from all_valid_pairs.combine(map_res)
+      file chrsize from chromosome_size.collect()
 
    output:
       file("*.matrix") into raw_maps
+      file "*.bed"
 
    script:
    """
@@ -544,10 +600,13 @@ process build_contact_maps{
  * STEP 4 - NORMALIZE MATRIX
 */
 
-process run_iced{
+process run_ice{
    tag "$rmaps"
+   publishDir "${params.outdir}/hic_results/matrix/iced", mode: 'copy'
+
    input:
       file(rmaps) from raw_maps
+      file "*.biases"
 
    output:
       file("*iced.matrix") into iced_maps
@@ -568,8 +627,10 @@ process run_iced{
 
 process generate_cool{
    tag "$sample"
+   publishDir "${params.outdir}/export/cool", mode: 'copy'
+
    input:
-      set val(sample), file(vpairs) from valid_pairs_4cool
+      set val(sample), file(vpairs) from all_valid_pairs_4cool
       val chrsize from chromosome_size
 
    output:
@@ -583,15 +644,15 @@ process generate_cool{
 */
 
 /*
- // STEP 5 - MultiQC
+ * STEP 5 - MultiQC
 
 process multiqc {
     publishDir "${params.outdir}/MultiQC", mode: 'copy'
 
     input:
     file multiqc_config from ch_multiqc_config
-    // TODO nf-core: Add in log files from your new processes for MultiQC to find!
-    file ('fastqc/*') from fastqc_results.collect().ifEmpty([])
+    file ('mapping/stats/*') from combine_mapping_results.collect()
+    file ('hic_results/data/stats/*') from valid_interaction_results.collect()
     file ('software_versions/*') from software_versions_yaml
     file workflow_summary from create_workflow_summary(summary)
 
@@ -602,14 +663,19 @@ process multiqc {
     script:
     rtitle = custom_runName ? "--title \"$custom_runName\"" : ''
     rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
-    // TODO nf-core: Specify which MultiQC modules to use with -m for a faster run time
     """
     multiqc -f $rtitle $rfilename --config $multiqc_config .
     """
 }
+*/
 
+/****************************************************
+ * POST-PROCESSING
+ */
 
-// STEP 3 - Output Description HTML
+/* 
+ * Output Description HTML
+ */
 
 process output_documentation {
     publishDir "${params.outdir}/Documentation", mode: 'copy'
@@ -625,12 +691,13 @@ process output_documentation {
     markdown_to_html.r $output_docs results_description.html
     """
 }
-*/
+
 
 
 /*
  * Completion e-mail notification
  */
+
 workflow.onComplete {
 
     // Set up the e-mail variables
-- 
GitLab