From 88f9c053ef11a5fadf5eb1e41c9bc5d3d50be942 Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Fri, 5 Apr 2019 19:25:45 +0200
Subject: [PATCH] add mapstat

---
 bin/mapping_stat.sh | 16 ++++++++++++++++
 conf/base.config    |  2 +-
 conf/hicpro.config  | 14 +++++++-------
 main.nf             | 31 ++++++++++++++++++-------------
 4 files changed, 42 insertions(+), 21 deletions(-)
 create mode 100755 bin/mapping_stat.sh

diff --git a/bin/mapping_stat.sh b/bin/mapping_stat.sh
new file mode 100755
index 0000000..774eac0
--- /dev/null
+++ b/bin/mapping_stat.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+bam1=$1
+bam2=$2
+merged=$3
+tag=$4
+  
+tot_reads=$(samtools view -c ${merged})
+map_reads=$(samtools view -c -F 4 ${merged})
+gmap_reads=$(samtools view -c -F 4 ${bam1})
+lmap_reads=$(samtools view -c -F 4 ${bam2})
+
+echo -e "total_${tag}\t$tot_reads" 
+echo -e "mapped_${tag}\t$map_reads"
+echo -e "global_${tag}\t$gmap_reads"
+echo -e "local_${tag}\t$lmap_reads" 
diff --git a/conf/base.config b/conf/base.config
index b01cd5d..98c3861 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -74,7 +74,7 @@ withName:run_ice {
 params {
   // Defaults only, expecting to be overwritten
   max_memory = 8.GB
-  max_cpus = 2
+  max_cpus = 4
   max_time = 24.h
   igenomes_base = 's3://ngi-igenomes/igenomes/'
 }
diff --git a/conf/hicpro.config b/conf/hicpro.config
index 4e0835c..d969960 100644
--- a/conf/hicpro.config
+++ b/conf/hicpro.config
@@ -13,18 +13,18 @@ params {
        splitFastq = false
        bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'
        bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'
-       min_mapq = 10       
+       min_mapq = 0       
 
        // Digestion Hi-C
-       restriction_site = 'A^AGGCT'
+       restriction_site = 'A^AGCTT'
        ligation_site = 'AAGCTAGCTT'
-       min_restriction_fragment_size = 0
-       max_restriction_fragment_size = 1000
-       min_insert_size = 0
-       max_insert_size = 500
+       min_restriction_fragment_size = 100
+       max_restriction_fragment_size = 100000
+       min_insert_size = 100
+       max_insert_size = 600
 
        // Hi-C Processing
-       min_cis_dist = 1000
+       min_cis_dist = 
        rm_singleton = true
        rm_multi = true
        rm_dup = true
diff --git a/main.nf b/main.nf
index 00d6704..705eba2 100644
--- a/main.nf
+++ b/main.nf
@@ -367,8 +367,8 @@ if(!params.chromosome_size && params.fasta){
  }
 
 if(!params.restriction_fragments && params.fasta){
-    process makeRestrictionFragments {
-        tag "$fasta - ${params.restriction_site}"
+    process getRestrictionFragments {
+        tag "$fasta [${params.restriction_site}]"
         publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
                    saveAs: { params.saveReference ? it : null }, mode: 'copy'
 
@@ -471,6 +471,7 @@ process merge_mapping_steps{
 
    output:
       set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam
+      set val(prefix), file("${prefix}.mapstat") into all_mapstat
 
    script:
       sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2)/
@@ -485,6 +486,9 @@ process merge_mapping_steps{
 	            ${prefix}_bwt2merged.bam
             
       mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam
+      
+      if [[ "${prefix}" =~ _R1|_val_1 ]]; then mapping_stat.sh ${bam1} ${bam2} ${prefix}_bwt2merged.bam "R1" > ${prefix}.mapstat; fi
+      if [[ "${prefix}" =~ _R2|_val_2 ]]; then mapping_stat.sh ${bam1} ${bam2} ${prefix}_bwt2merged.bam "R2" > ${prefix}.mapstat; fi
       """
 }
 
@@ -498,7 +502,7 @@ process combine_mapped_files{
 
    output:
       set val(sample), file("${sample}_bwt2pairs.bam") into paired_bam
-      file "*.pairstat" into combine_mapping_results
+      file "*.pairstat" into all_pairstat
 
    script:
       r1_bam = aligned_bam[0]
@@ -509,8 +513,9 @@ process combine_mapped_files{
       def opts = "-t"
       opts = params.rm_singleton ? "${opts}" : "--single ${opts}"
       opts = params.rm_multi ? "${opts}" : "--multi ${opts}"
+      if ("$params.min_mapq".isInteger()) opts="${opts} -q ${params.min_mapq}"
       """
-      mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam -q ${params.min_mapq} ${opts}
+      mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam ${opts}
       """
 }
 
@@ -532,7 +537,7 @@ process get_valid_interaction{
    output:
       set val(sample), file("*.validPairs") into valid_pairs
       set val(sample), file("*.validPairs") into valid_pairs_4cool
-      file "*stat" into valid_interaction_results
+      file "*RSstat" into all_rsstat
 
    script:
 	
@@ -541,11 +546,11 @@ process get_valid_interaction{
       }
 
       def opts = ""
-      if (params.min_cis_dist != "") opts="${opts} -d ${params.min_cis_dist}"
-      if (params.min_insert_size != "") opts="${opts} -s ${params.min_insert_size}"
-      if (params.max_insert_size != "") opts="${opts} -l ${params.max_insert_size}"
-      if (params.min_restriction_fragment_size != "") opts="${opts} -t ${params.min_restriction_fragment_size}"
-      if (params.max_restriction_fragment_size != "") opts="${opts} -m ${params.max_restriction_fragment_size}"
+      if ("$params.min_cis_dist".isInteger()) opts="${opts} -d ${params.min_cis_dist}"
+      if ("$params.min_insert_size".isInteger()) opts="${opts} -s ${params.min_insert_size}"
+      if ("$params.max_insert_size".isInteger()) opts="${opts} -l ${params.max_insert_size}"
+      if ("$params.min_restriction_fragment_size".isInteger()) opts="${opts} -t ${params.min_restriction_fragment_size}"
+      if ("$params.max_restriction_fragment_size".isInteger()) opts="${opts} -m ${params.max_restriction_fragment_size}"
 
       """
       mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} ${opts}
@@ -580,7 +585,7 @@ if ( params.splitFastq ){
 
 process build_contact_maps{
    tag "$sample - $mres"
-   publishDir "${params.outdir}/hic_results/maps/iced", mode: 'copy'
+   publishDir "${params.outdir}/hic_results/matrix/raw", mode: 'copy'
 
    input:
       set val(sample), file(vpairs), val(mres) from all_valid_pairs.combine(map_res)
@@ -651,8 +656,8 @@ process multiqc {
 
     input:
     file multiqc_config from ch_multiqc_config
-    file ('mapping/stats/*') from combine_mapping_results.collect()
-    file ('hic_results/data/stats/*') from valid_interaction_results.collect()
+    //file ('mapping/stats/*') from combine_mapping_results.collect()
+    //file ('hic_results/data/stats/*') from valid_interaction_results.collect()
     file ('software_versions/*') from software_versions_yaml
     file workflow_summary from create_workflow_summary(summary)
 
-- 
GitLab