From 1d54341fcf4a305e13d906868df993d44c95a0d1 Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Fri, 16 Apr 2021 20:15:22 +0200
Subject: [PATCH] [MODIF] issue with stats file

---
 main.nf | 44 ++++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/main.nf b/main.nf
index c035bb9..dc0ebe1 100644
--- a/main.nf
+++ b/main.nf
@@ -682,15 +682,15 @@ else{
 process remove_duplicates {
    tag "$sample"
    label 'process_highmem'
-   publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode
-
+   publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode,
+               saveAs: {filename -> filename.endsWith("stat") ? "stats/$filename" : "$filename"}
    input:
    set val(sample), file(vpairs) from valid_pairs.groupTuple()
 
    output:
    set val(sample), file("*.allValidPairs") into ch_vpairs, ch_vpairs_cool
    file("stats/") into mqc_mergestat
-   file("stats/*/*") into all_mergestat
+   file("*mergestat") into all_mergestat
 
    script:
    if ( ! params.keep_dups ){
@@ -701,26 +701,32 @@ process remove_duplicates {
    sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \
    awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs
 
-   echo -n "valid_interaction\t" > stats/${sample}/${sample}_allValidPairs.mergestat
-   cat ${vpairs} | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat
-   echo -n "valid_interaction_rmdup\t" >> stats/${sample}/${sample}_allValidPairs.mergestat
-   cat ${sample}.allValidPairs | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat
+   echo -n "valid_interaction\t" > ${sample}_allValidPairs.mergestat
+   cat ${vpairs} | wc -l >> ${sample}_allValidPairs.mergestat
+   echo -n "valid_interaction_rmdup\t" >> ${sample}_allValidPairs.mergestat
+   cat ${sample}.allValidPairs | wc -l >> ${sample}_allValidPairs.mergestat
 
    ## Count short range (<20000) vs long range contacts
-   awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> stats/${sample}/${sample}_allValidPairs.mergestat
-
+   awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> ${sample}_allValidPairs.mergestat
+ 
+   ## For MultiQC
+   mkdir -p stats/${sample} 
+   cp ${sample}_allValidPairs.mergestat stats/${sample}/
    """
    }else{
    """
-   mkdir -p stats/${sample}
    cat ${vpairs} > ${sample}.allValidPairs
-   echo -n "valid_interaction\t" > stats/${sample}/${sample}_allValidPairs.mergestat
-   cat ${vpairs} | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat
-   echo -n "valid_interaction_rmdup\t" >> stats/${sample}/${sample}_allValidPairs.mergestat
-   cat ${sample}.allValidPairs | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat
+   echo -n "valid_interaction\t" > ${sample}_allValidPairs.mergestat
+   cat ${vpairs} | wc -l >> ${sample}_allValidPairs.mergestat
+   echo -n "valid_interaction_rmdup\t" >> ${sample}_allValidPairs.mergestat
+   cat ${sample}.allValidPairs | wc -l >> ${sample}_allValidPairs.mergestat
 
    ## Count short range (<20000) vs long range contacts
-   awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> stats/${sample}/${sample}_allValidPairs.mergestat
+   awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> ${sample}_allValidPairs.mergestat
+
+   ## For MultiQC
+   mkdir -p stats/${sample}
+   cp ${sample}_allValidPairs.mergestat stats/${sample}/
    """
    }
 }
@@ -728,14 +734,15 @@ process remove_duplicates {
 process merge_stats {
    tag "$ext"
    label 'process_low'
-   publishDir "${params.outdir}/hicpro/", mode: params.publish_dir_mode
+   publishDir "${params.outdir}/hicpro/", mode: params.publish_dir_mode,
+               saveAs: {filename -> filename.endsWith("stat") ? "stats/$filename" : "$filename"}
 
    input:
    set val(prefix), file(fstat) from all_mapstat.groupTuple().concat(all_pairstat.groupTuple(), all_rsstat.groupTuple())
 
    output:
    file("stats/") into mqc_mstats
-   file("stats/*/*") into all_mstats
+   file("*stat") into all_mstats
 
   script:
   sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/
@@ -743,8 +750,9 @@ process merge_stats {
   if ( (fstat =~ /.pairstat/) ){ ext = "mpairstat" }
   if ( (fstat =~ /.RSstat/) ){ ext = "mRSstat" }
   """
+  merge_statfiles.py -f ${fstat} > ${prefix}.${ext}
   mkdir -p stats/${sample}
-  merge_statfiles.py -f ${fstat} > stats/${sample}/${prefix}.${ext}
+  cp ${prefix}.${ext} stats/${sample}/
   """
 }
 
-- 
GitLab