From 168039301f5995afa18711b4118f6678a8e4990d Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Tue, 2 Apr 2019 10:15:23 +0200
Subject: [PATCH] running version + first docker,conda,singularity env

---
 bin/mapped_2hic_fragments.py |   4 +-
 conf/base.config             |  24 ++++++-
 conf/hicpro.config           |  27 +++++++-
 conf/test.config             |   5 +-
 environment.yml              |  10 ++-
 main.nf                      | 121 +++++++++++++++++++++++++++++------
 nextflow.config              |   4 +-
 7 files changed, 163 insertions(+), 32 deletions(-)

diff --git a/bin/mapped_2hic_fragments.py b/bin/mapped_2hic_fragments.py
index 391a58b..efa32e6 100755
--- a/bin/mapped_2hic_fragments.py
+++ b/bin/mapped_2hic_fragments.py
@@ -501,9 +501,9 @@ if __name__ == "__main__":
             minInsertSize = arg
         elif opt in ("-l", "--longestInsertSize"):
             maxInsertSize = arg
-        elif opt in ("-t", "--shortestFragmentLength"):
+        elif opt in ("-t", "--shortestFragmentSize"):
             minFragSize = arg
-        elif opt in ("-m", "--longestFragmentLength"):
+        elif opt in ("-m", "--longestFragmentSize"):
             maxFragSize = arg
         elif opt in ("-d", "--minCisDist"):
             minDist = arg
diff --git a/conf/base.config b/conf/base.config
index a8413de..a550a4a 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -33,13 +33,33 @@ process {
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
   withName:merge_mapping_steps {
-    cpus = { check_max( 1, 'cpus' ) }
+    cpus = { check_max( 4, 'cpus' ) }
     memory = { check_max( 20.GB * task.attempt, 'memory' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
   withName:trim_reads {
     cpus = { check_max (1, 'cpus')}
-    memory = { check_max( 10.GB * task.attempt, 'memory' ) }
+    memory = { check_max( 1.GB * task.attempt, 'memory' ) }
+    time = { check_max( 5.h * task.attempt, 'time' ) }
+  }
+  withName:combine_mapped_files {
+    cpus = { check_max( 1, 'cpus' ) }
+    memory = { check_max( 4.GB * task.attempt, 'memory' ) }
+    time = { check_max( 5.h * task.attempt, 'time' ) }
+  }
+  withName:get_valid_interaction {
+    cpus = { check_max( 1, 'cpus' ) }
+    memory = { check_max( 4.GB * task.attempt, 'memory' ) }
+    time = { check_max( 5.h * task.attempt, 'time' ) }
+  }
+ withName:build_matrix {
+    cpus = { check_max( 1, 'cpus' ) }
+    memory = { check_max( 6.GB * task.attempt, 'memory' ) }
+    time = { check_max( 5.h * task.attempt, 'time' ) }
+  }
+withName:run_iced {
+    cpus = { check_max( 1, 'cpus' ) }
+    memory = { check_max( 20.GB * task.attempt, 'memory' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
 }
diff --git a/conf/hicpro.config b/conf/hicpro.config
index 3eafbd1..9ef8dc6 100644
--- a/conf/hicpro.config
+++ b/conf/hicpro.config
@@ -8,10 +8,35 @@
  */
 
 params {
+
+       // Alignment options
        bwt2_index = '/data/annotations/pipelines/Human/hg19/indexes/bowtie2/hg19'
        bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'
        bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'
-       restriction_fragment_bed = '/data/users/nservant/Apps/HiC-Pro_annotation/HindIII_resfrag_hg19.bed'
+       min_mapq = 10       
+
+       // Genome Reference
        chromosome_size = '/data/users/nservant/Apps/HiC-Pro_annotation/chrom_hg19.sizes'
+
+       // Digestion Hi-C
+       restriction_fragment_bed = '/data/users/nservant/Apps/HiC-Pro_annotation/HindIII_resfrag_hg19.bed'
+       ligation_site = 'AAGCTAGCTT'
+       min_restriction_fragment_size = 0
+       max_restriction_fragment_size = 100
+       min_insert_size = 0
+       max_insert_size = 500
+
+       // Hi-C Processing
+       min_cis_dist = 1000
+       rm_singleton = true
+       rm_multi = true
+       rm_dup = true
+
+       bins_size = '1000000,500000'
+
+       ice_max_iter = 100
+       ice_filer_low_count_perc = 0.02
+       ice_filer_high_count_perc =  0
+       ice_eps = 0.1
 }
 
diff --git a/conf/test.config b/conf/test.config
index e8a8e0e..4a8a3ad 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -13,8 +13,7 @@ params {
   max_memory = 6.GB
   max_time = 48.h
   // Input data
-  readPaths = [
-    ['Testdata2', ['/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_00_R1.fastq.gz', '/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_00_R1.fastq.gz']],
-    ['Testdata1', ['/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_01_R1.fastq.gz', '/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_01_R1.fastq.gz']],
+    ['SRR400264_00', ['https://github.com/nf-core/test-datasets/raw/hic/SRR400264_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/SRR400264_00_R1.fastq.gz']],
+    ['SRR400264_01', ['https://github.com/nf-core/test-datasets/raw/hic/SRR400264_01_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/SRR400264_01_R1.fastq.gz']],
   ]
 }
diff --git a/environment.yml b/environment.yml
index 42c74b4..542fb9b 100644
--- a/environment.yml
+++ b/environment.yml
@@ -4,6 +4,12 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  # TODO nf-core: Add required software dependencies here
-  - fastqc=0.11.8
+  - python=2.7.11
+  - conda-forge::scipy=0.18.1
+  - conda-forge::numpy=1.12.1
+  - bcbio::bx-python=0.5.0
+  - bioconda::pysam=0.8.3
+  - cooler=0.8.3
+  - bowtie2=2.2.9
+  - samtools=1.9
   - multiqc=1.6
diff --git a/main.nf b/main.nf
index 49b3d61..6ce7933 100644
--- a/main.nf
+++ b/main.nf
@@ -10,8 +10,16 @@
 */
 
 
+/*TOOO
+- outputs
+- env
+- multiqc
+- update version tools
+- install + compile
+*/
+
+
 def helpMessage() {
-    // TODO nf-core: Add to this help message with new command line parameters
     log.info"""
     =======================================================
                                               ,--./,-.
@@ -39,10 +47,33 @@ def helpMessage() {
                                     Available: conda, docker, singularity, awsbatch, test and more.
 
     Options:
-
-
-    References                      If not specified in the configuration file or you wish to overwrite any of the references.
-      --fasta                       Path to Fasta reference
+      --bwt2_index		   Path to bowtie2 indexes (including indexes prefix)
+      --bwt2_opts_end2end	   Option for bowtie2 end-to-end mappinf (first mapping step)
+      --bwt2_opts_trimmed	   Option for bowtie2 mapping after ligation site trimming
+      --min_mapq		   Minimum mapping quality values to consider
+
+      --chromosome_size		   Path to chromosome size file
+      --restriction_fragment_bed   Path to restriction fragment file (bed)
+      --ligation-site		   Ligation motifs to trim (comma separated)
+
+      --min_restriction_fragment_size	    Minimum size of restriction fragments to consider
+      --max_restriction_framgnet_size	    Maximum size of restriction fragmants to consider
+      --min_insert_size			    Minimum insert size of mapped reads to consider
+      --max_insert_size			    Maximum insert size of mapped reads to consider
+      --min_cis_dist			    Minimum intra-chromosomal distance to consider
+      --rm_singleton			    Remove singleton reads
+      --rm_multi			    Remove multi-mapped reads
+      --rm_dup				    Remove duplicates
+
+      --bin_size			    Bin size for contact maps (comma separated)
+      --ice_max_iter			    Maximum number of iteration for ICE normalization
+      --ice_filter_low_count_perc	    Percentage of low counts columns/rows to filter before ICE normalization
+      --ice_filter_high_count_perc	    Percentage of high counts columns/rows to filter before ICE normalization
+      --ice_eps				    Convergence criteria for ICE normalization
+
+
+    //References                      If not specified in the configuration file or you wish to overwrite any of the references.
+    //  --fasta                       Path to Fasta reference
 
     Other options:
       --outdir                      The output directory where the results will be saved
@@ -129,11 +160,10 @@ bwt2_file = file("${params.bwt2_index}.1.bt2")
 if( !bwt2_file.exists() ) exit 1, "Reference genome Bowtie 2 not found: ${params.bwt2_index}"
 bwt2_index = Channel.value( "${params.bwt2_index}" )
 
-// Restriction fragment
-res_frag_file = Channel.value( "${params.restriction_fragment_bed}" )
 
-// Chromosome size
+res_frag_file = Channel.value( "${params.restriction_fragment_bed}" )
 chr_size = Channel.value( "${params.chromosome_size}" )
+map_res = Channel.from( params.bins_size.tokenize(',') )
 
 
 
@@ -157,8 +187,7 @@ summary['Pipeline Version'] = workflow.manifest.version
 summary['Run Name']     = custom_runName ?: workflow.runName
 // TODO nf-core: Report custom parameters here
 summary['Reads']        = params.reads
-summary['Fasta Ref']    = params.fasta
-//summary['Data Type']    = params.singleEnd ? 'Single-End' : 'Paired-End'
+//summary['Fasta Ref']    = params.fasta
 summary['Max Memory']   = params.max_memory
 summary['Max CPUs']     = params.max_cpus
 summary['Max Time']     = params.max_time
@@ -264,7 +293,7 @@ process trim_reads {
    script:
       """
       cutsite_trimming --fastq $reads \\
-       		       --cutsite  params.ligation_motifs \\
+       		       --cutsite  ${params.ligation_site} \\
                        --out ${prefix}_trimmed.fastq
       """
 }
@@ -280,10 +309,9 @@ process bowtie2_on_trimmed_reads {
 
    script:
       prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/
-      def bwt2_opts = params.bwt2_opts_trimmed
       """
       bowtie2 --rg-id BMG --rg SM:${prefix} \\
-      	      ${bwt2_opts} \\
+      	      ${params.bwt2_opts_trimmed} \\
               -p ${task.cpus} \\
 	      -x ${bt2_index} \\
 	      -U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam
@@ -328,8 +356,12 @@ process combine_mapped_files{
       r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/
       r2_bam = aligned_bam[1]
       r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/
+      
+      def opts = ""
+      opts = params.rm_singleton ? "${opts}" : "--single ${opts}"
+      opts = params.rm_multi ? "${opts}" : "--multi ${opts}"
       """
-      mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam
+      mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam -q ${params.min_mapq} ${opts}
       """
 }
 
@@ -345,10 +377,18 @@ process get_valid_interaction{
 
    output:
       set val(sample), file("*.validPairs") into valid_pairs
+      set val(sample), file("*.validPairs") into valid_pairs_4cool
 
    script:
+      def opts = ""
+      if (params.min_cis_dist != "") opts="${opts} -d ${params.min_cis_dist}"
+      if (params.min_insert_size != "") opts="${opts} -s ${params.min_insert_size}"
+      if (params.max_insert_size != "") opts="${opts} -l ${params.max_insert_size}"
+      if (params.min_restriction_fragment_size != "") opts="${opts} -t ${params.min_restriction_fragment_size}"
+      if (params.max_restriction_fragment_size != "") opts="${opts} -m ${params.max_restriction_fragment_size}"
+
       """
-      mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam}
+      mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} ${opts}
       """
 }
 
@@ -358,24 +398,64 @@ process get_valid_interaction{
 */
 
 process build_contact_maps{
-   tag "$sample"
+   tag "$sample - $mres"
    input:
-      set val(sample), file(vpairs) from valid_pairs
+      set val(sample), file(vpairs), val(mres) from valid_pairs.combine(map_res)
       val chrsize from chr_size
 
    output:
-      set val(sample), file("*.matrix") into matrix_file
+      file("*.matrix") into raw_maps
 
    script:
    """
-   build_matrix --matrix-format upper  --binsize 1000000 --chrsizes ${chrsize} --ifile ${vpairs} --oprefix ${sample}_1000000
+   build_matrix --matrix-format upper  --binsize ${mres} --chrsizes ${chrsize} --ifile ${vpairs} --oprefix ${sample}_${mres}
    """
+}
+
+/*
+ * STEP 4 - NORMALIZE MATRIX
+*/
+
+process run_iced{
+   tag "$rmaps"
+   input:
+      file(rmaps) from raw_maps
+
+   output:
+      file("*iced.matrix") into iced_maps
 
+   script:
+   prefix = rmaps.toString() - ~/(\.matrix)?$/
+   """
+   ice --filter_low_counts_perc ${params.ice_filer_low_count_perc} \
+   --results_filename ${prefix}_iced.matrix \
+   --filter_high_counts_perc ${params.ice_filer_high_count_perc} \
+   --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps}
+   """ 
 }
 
+/*
+ * STEP 5 - COOLER FILE
+
+
+process generate_cool{
+   tag "$sample"
+   input:
+      set val(sample), file(vpairs) from valid_pairs_4cool
+      val chrsize from chr_size
+
+   output:
+      file("*mcool") into cool_maps
+
+   script:
+   """
+   hicpro2higlass.sh -i $vpairs -r 5000 -c ${chrsize} -n
+   """ 
+}
+*/
 
 /*
- // STEP 2 - MultiQC
+ // STEP 5 - MultiQC
 
 process multiqc {
     publishDir "${params.outdir}/MultiQC", mode: 'copy'
@@ -495,5 +575,4 @@ workflow.onComplete {
     output_tf.withWriter { w -> w << email_txt }
 
     log.info "[nf-core/hic] Pipeline Complete"
-
 }
diff --git a/nextflow.config b/nextflow.config
index 0e1bb10..7bf286a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -17,7 +17,7 @@ params {
 
   // Workflow flags
   // TODO nf-core: Specify your pipeline's command line flags
-  reads = "data/*{1,2}.fastq.gz"
+  reads = "*{1,2}.fastq.gz"
   outdir = './results'
 
   // Boilerplate options
@@ -41,8 +41,10 @@ includeConfig 'conf/base.config'
 // Load nf-core custom profiles from different Institutions
 includeConfig "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}/nfcore_custom.config"
 
+// Load hic config file
 includeConfig 'conf/hicpro.config'
 
+// Create profiles
 profiles {
   awsbatch { includeConfig 'conf/awsbatch.config' }
   conda { process.conda = "$baseDir/environment.yml" }
-- 
GitLab