From 572989bb48dfd98c488d53830372187772b14811 Mon Sep 17 00:00:00 2001
From: nservant <nservant@curie.fr>
Date: Thu, 4 Apr 2019 12:11:32 +0200
Subject: [PATCH] creates annotation files on-the-fly

---
 .travis.yml        |  1 -
 conf/base.config   | 19 +++++++----
 conf/hicpro.config |  2 +-
 main.nf            | 82 +++++++++++++++++++++++++---------------------
 nextflow.config    |  7 +++-
 5 files changed, 64 insertions(+), 47 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 43a6766..966de4b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,7 +28,6 @@ install:
 
 env:
   - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work
-  - NXF_VER='' # Plus: get the latest NF version and check that it works
 
 script:
   # Lint the pipeline code
diff --git a/conf/base.config b/conf/base.config
index 11bc185..ad6b710 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -22,19 +22,26 @@ process {
   maxErrors = '-1'
 
   // Process-specific resource requirements
+
+  withName:makeBowtie2Index {
+     cpus = { check_max( 1, 'cpus' ) }
+     memory = { check_max( 10.GB * task.attempt, 'memory' ) }
+     time = { check_max( 12.h * task.attempt, 'time' ) } 
+  }
+
   withName:bowtie2_end_to_end {
     cpus = { check_max( 2, 'cpus' ) }
-    memory = { check_max( 16.GB * task.attempt, 'memory' ) }
+    memory = { check_max( 4.GB * task.attempt, 'memory' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
   withName:bowtie2_on_trimmed_reads {
     cpus = { check_max( 2, 'cpus' ) }
-    memory = { check_max( 16.GB * task.attempt, 'memory' ) }
+    memory = { check_max( 4.GB * task.attempt, 'memory' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
   withName:merge_mapping_steps {
     cpus = { check_max( 4, 'cpus' ) }
-    memory = { check_max( 20.GB * task.attempt, 'memory' ) }
+    memory = { check_max( 8.GB * task.attempt, 'memory' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
   withName:trim_reads {
@@ -59,15 +66,15 @@ process {
   }
 withName:run_iced {
     cpus = { check_max( 1, 'cpus' ) }
-    memory = { check_max( 20.GB * task.attempt, 'memory' ) }
+    memory = { check_max( 10.GB * task.attempt, 'memory' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
 }
 
 params {
   // Defaults only, expecting to be overwritten
-  max_memory = 20.GB
-  max_cpus = 1
+  max_memory = 8.GB
+  max_cpus = 2
   max_time = 24.h
   igenomes_base = 's3://ngi-igenomes/igenomes/'
 }
diff --git a/conf/hicpro.config b/conf/hicpro.config
index 63b1019..163b086 100644
--- a/conf/hicpro.config
+++ b/conf/hicpro.config
@@ -18,7 +18,7 @@ params {
        restriction_site = 'A^AGGCT'
        ligation_site = 'AAGCTAGCTT'
        min_restriction_fragment_size = 0
-       max_restriction_fragment_size = 100
+       max_restriction_fragment_size = 1000
        min_insert_size = 0
        max_insert_size = 500
 
diff --git a/main.nf b/main.nf
index 1e9ed2f..ba99843 100644
--- a/main.nf
+++ b/main.nf
@@ -104,6 +104,8 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome
 // Define these here - after the profiles are loaded with the iGenomes paths
 params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false 
 params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
+
+
 //params.chromosome_size = false
 //params.restriction_fragments = false
 
@@ -139,11 +141,6 @@ ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
  */
 
 if (params.readPaths){
-   Channel
-      .from( params.readPaths )
-      .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] }
-      .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" }
-      .set { raw_reads_pairs }
 
    raw_reads = Channel.create()
    raw_reads_2 = Channel.create()
@@ -152,11 +149,8 @@ if (params.readPaths){
       .from( params.readPaths )
       .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] }
       .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] }
+      .println()
 }else{
-   Channel
-      .fromFilePairs( params.reads )
-      .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" }
-      .set { raw_reads_pairs }
 
    raw_reads = Channel.create()
    raw_reads_2 = Channel.create()
@@ -166,6 +160,8 @@ if (params.readPaths){
       .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] }
 }
 
+raw_reads = raw_reads.concat( raw_reads_2 )
+
 // SPlit fastq files
 // https://www.nextflow.io/docs/latest/operator.html#splitfastq
 
@@ -174,21 +170,21 @@ if (params.readPaths){
  */
 
 // Reference genome
-
 if ( params.bwt2_index ){
    lastPath = params.bwt2_index.lastIndexOf(File.separator)
    bwt2_dir =  params.bwt2_index.substring(0,lastPath+1)
    bwt2_base = params.bwt2_index.substring(lastPath+1)
 
-   Channel.fromPath( bwt2_dir, checkIfExists: true )
+   Channel.fromPath( bwt2_dir , checkIfExists: true)
       .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" }
-      .into { bwt2_index_end2end; bwt2_index_trim } 
+      .into { bwt2_index_end2end; bwt2_index_trim }
+      
 }
 else if ( params.fasta ) {
     lastPath = params.fasta.lastIndexOf(File.separator)
     bwt2_base = params.fasta.substring(lastPath+1)
 
-   Channel.fromPath( params.fasta, checkIfExists: true )
+   Channel.fromPath( params.fasta )
 	.ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" }
         .set { fasta_for_index }
 }
@@ -196,14 +192,18 @@ else {
    exit 1, "No reference genome specified!"
 }
 
+//println (bwt2_dir)
+//println (bwt2_base)
+
+
 // Chromosome size
 
 if ( params.chromosome_size ){
-   Channel.FromPath( params.chromosome_size, checkIfExists: true )
-      .set {chromosome_size}
+   Channel.fromPath( params.chromosome_size , checkIfExists: true)
+         .set {chromosome_size}
 }
 else if ( params.fasta ){
-   Channel.fromPath( params.fasta, checkIfExists: true )
+   Channel.fromPath( params.fasta )
 	.ifEmpty { exit 1, "Chromosome sizes: Fasta file not found: ${params.fasta}" }
        	.set { fasta_for_chromsize }
 }
@@ -213,11 +213,11 @@ else {
 
 // Restriction fragments
 if ( params.restriction_fragments ){
-   Channel.FromPath( params.restriction_fragments, checkIfExists: true )
+   Channel.fromPath( params.restriction_fragments, checkIfExists: true )
       .set {res_frag_file}
 }
 else if ( params.fasta && params.restriction_site ){
-   Channel.fromPath(params.fasta, checkIfExists: true)
+   Channel.fromPath( params.fasta )
            .ifEmpty { exit 1, "Restriction fragments: Fasta file not found: ${params.fasta}" }
            .set { fasta_for_resfrag }
 }
@@ -247,10 +247,11 @@ def summary = [:]
 summary['Pipeline Name']  = 'nf-core/hic'
 summary['Pipeline Version'] = workflow.manifest.version
 summary['Run Name']     = custom_runName ?: workflow.runName
-// TODO nf-core: Report custom parameters here
+
 summary['Reads']        = params.reads
 summary['Fasta Ref']    = params.fasta
 
+
 summary['Max Memory']   = params.max_memory
 summary['Max CPUs']     = params.max_cpus
 summary['Max Time']     = params.max_time
@@ -302,13 +303,13 @@ process get_software_versions {
 
     script:
      """
-    echo $workflow.manifest.version > v_pipeline.txt
-    echo $workflow.nextflow.version > v_nextflow.txt
-    bowtie2 --version > v_bowtie2.txt
-    python --version > v_python.txt
-    samtools --version > v_samtools.txt
-    scrape_software_versions.py > software_versions_mqc.yaml
-    """
+     echo $workflow.manifest.version > v_pipeline.txt
+     echo $workflow.nextflow.version > v_nextflow.txt
+     bowtie2 --version > v_bowtie2.txt
+     python --version > v_python.txt
+     samtools --version > v_samtools.txt
+     scrape_software_versions.py > software_versions_mqc.yaml
+     """
 }
 
 
@@ -317,8 +318,8 @@ process get_software_versions {
  */
 
 if(!params.bwt2_index && params.fasta){
-    process makeBowtieIndex {
-        tag "$fasta"
+    process makeBowtie2Index {
+        tag "$bwt2_base"
         //publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
         //           saveAs: { params.saveReference ? it : null }, mode: 'copy'
 
@@ -326,12 +327,14 @@ if(!params.bwt2_index && params.fasta){
         file fasta from fasta_for_index
 
         output:
-        file "bowtie2" into bwt2_index
+        file "bowtie2_index" into bwt2_index_end2end
+	file "bowtie2_index" into bwt2_index_trim
 
         script:
+        bwt2_base = fasta.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?$/
         """
-        mkdir bwt2_index
-	
+        mkdir bowtie2_index
+	bowtie2-build ${fasta} bowtie2_index/${bwt2_base}
 	"""
       }
  }
@@ -351,7 +354,8 @@ if(!params.chromosome_size && params.fasta){
 
         script:
         """
-	samtools faidx ${fasta} | cut -f1,2 > chrom.size
+	samtools faidx ${fasta}
+	cut -f1,2 ${fasta}.fai > chrom.size
    	"""	
       }
  }
@@ -383,13 +387,11 @@ if(!params.restriction_fragments && params.fasta){
  * STEP 1 - Two-steps Reads Mapping
 */
 
-raw_reads = raw_reads.concat( raw_reads_2 )
-
 process bowtie2_end_to_end {
    tag "$prefix"
    input:
         set val(sample), file(reads) from raw_reads
-        file index from bwt2_index_end2end
+        file index from bwt2_index_end2end.collect()
  
    output:
 	set val(prefix), file("${prefix}_unmap.fastq") into unmapped_end_to_end
@@ -428,7 +430,7 @@ process bowtie2_on_trimmed_reads {
    tag "$prefix"
    input:
       set val(prefix), file(reads) from trimmed_reads
-      file index from bwt2_index_trim
+      file index from bwt2_index_trim.collect()
 
    output:
       set val(prefix), file("${prefix}_trimmed.bam") into trimmed_bam
@@ -468,7 +470,6 @@ process merge_mapping_steps{
       """
 }
 
-
 process combine_mapped_files{
    tag "$sample = $r1_prefix + $r2_prefix"
    input:
@@ -491,10 +492,12 @@ process combine_mapped_files{
       """
 }
 
+
 /*
  * STEP2 - DETECT VALID PAIRS
 */
 
+
 process get_valid_interaction{
    tag "$sample"
    input:
@@ -523,6 +526,7 @@ process get_valid_interaction{
  * STEP3 - BUILD MATRIX
 */
 
+/*
 process build_contact_maps{
    tag "$sample - $mres"
    input:
@@ -537,11 +541,13 @@ process build_contact_maps{
    build_matrix --matrix-format upper  --binsize ${mres} --chrsizes ${chrsize} --ifile ${vpairs} --oprefix ${sample}_${mres}
    """
 }
+*/
 
 /*
  * STEP 4 - NORMALIZE MATRIX
 */
 
+/*
 process run_iced{
    tag "$rmaps"
    input:
@@ -559,11 +565,11 @@ process run_iced{
    --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps}
    """ 
 }
+*/
 
 /*
  * STEP 5 - COOLER FILE
 
-
 process generate_cool{
    tag "$sample"
    input:
diff --git a/nextflow.config b/nextflow.config
index fe584c3..0075f7f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -20,6 +20,11 @@ params {
   // TODO nf-core: Specify your pipeline's command line flags
   reads = "*{1,2}.fastq.gz"
   outdir = './results'
+  genome = false
+  readPaths = false
+  chromosome_size = false
+  restriction_fragments = false
+
 
   // Boilerplate options
   name = false
@@ -27,7 +32,7 @@ params {
   email = false
   plaintext_email = false
   help = false
-  //igenomes_base = "./iGenomes"
+  igenomes_base = "./iGenomes"
   tracedir = "${params.outdir}/pipeline_info"
   clusterOptions = false
   awsqueue = false
-- 
GitLab