Merge branch 'dev' of github.com:nservant/nf-core-hic into dev

810250c6 · nservant · 6c8ecf3a · f3ba46f4 · 810250c6 · 810250c6
Commit 810250c6 authored 2 years ago by nservant
--- a/bin/hicpro_merge_validpairs.sh
+++ b/bin/hicpro_merge_validpairs.sh
 #!/bin/bash
+set -e
 ##
 ## HiC-Pro
@@ -17,11 +18,36 @@ done
 shift $(( OPTIND - 1 ))
 vpairs="$@"
+vpairs_sorted=$(echo $vpairs | sed -e 's/validPairs/sorted.validPairs/g')
+mkdir -p ./tmp/
 if [[ ${rmDup} == 1 ]]; then
-    ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs)
+    ## Sort individual validPairs files
-    sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \
+    fcounts=0
-        awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs
+    for vfile in ${vpairs}
+    do
+	echo "Sorting ${vfile} ..."
+	fcounts=$((fcounts+1))
+	ofile=$(echo ${vfile} | sed -e 's/validPairs/sorted.validPairs/')
+	#sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -o ${ofile} ${vfile}
+	sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -o ${ofile} ${vfile}
+    done
+    if [[ $fcounts -gt 1 ]]
+    then
+	echo "Merging and removing the duplicates ..."
+	## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs)
+	#sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \
+	 sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \
+            awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs
+    else
+	echo "Removing the duplicates ..."
+	cat ${vpairs_sorted} | awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs
+    fi
+    ## clean
+    /bin/rm -rf ${vpairs_sorted}
 else
    cat ${vpairs} > ${prefix}.allValidPairs
 fi
@@ -33,3 +59,6 @@ cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat
 ## Count short range (<20000) vs long range contacts
 awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} $2 == $5{cis=cis+1; d=$6>$3?$6-$3:$3-$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} $2!=$5{trans=trans+1}END{print "trans_interaction\t"trans"\ncis_interaction\t"cis"\ncis_shortRange\t"sr"\ncis_longRange\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat
+## clean
+/bin/rm -rf ./tmp/
--- a/conf/base.config
+++ b/conf/base.config
@@ -43,7 +43,7 @@ process {
        time   = { check_max( 20.h  * task.attempt, 'time'    ) }
    }
    withLabel:process_high_memory {
-        memory = { check_max( 200.GB * task.attempt, 'memory' ) }
+        memory = { check_max( 24.GB * task.attempt, 'memory' ) }
    }
    withLabel:error_ignore {
        errorStrategy = 'ignore'

--- a/conf/modules.config
+++ b/conf/modules.config
@@ -49,7 +49,7 @@ process {
            mode: 'copy',
            enabled: params.save_aligned_intermediates
        ]
-        ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}" : "${meta.id}_${meta.mates}" }
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
        ext.args = params.bwt2_opts_end2end ?: ''
        ext.args2 = !params.dnase ? "-F 4" :""
    }
@@ -68,7 +68,7 @@ process {
            mode: 'copy',
            enabled: params.save_aligned_intermediates
        ]
-        ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}_trimmed" : "${meta.id}_${meta.mates}_trimmed" }
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_trimmed" }
        ext.args = params.bwt2_opts_trimmed ?: ''
        ext.args2 = ""
    }
@@ -79,7 +79,7 @@ process {
            mode: 'copy',
            enabled: params.save_aligned_intermediates
        ]
-        ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}" : "${meta.id}_${meta.mates}" }
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
    }
    withName: 'COMBINE_MATES' {
@@ -93,7 +93,7 @@ process {
            params.keep_multi ? "--multi" : "",
            params.min_mapq ? "-q ${params.min_mapq}" : ""
        ].join(' ').trim()
-        ext.prefix = { params.split_fastq ? "${meta.chunk}" : "${meta.id}" }
+        ext.prefix = { "${meta.id}_${meta.chunk}" }
    }
    withName: 'GET_VALID_INTERACTION' {

--- a/modules/local/cooltools/eigs-cis.nf
+++ b/modules/local/cooltools/eigs-cis.nf
@@ -29,7 +29,7 @@ process CALL_COMPARTMENTS {
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
-        cooltools: \$(cooltools --version 2>&1 | sed 's/cooletools, version //')
+        cooltools: \$(cooltools --version 2>&1 | grep version | sed 's/cooltools, version //')
    END_VERSIONS
    """
 }
--- a/modules/local/hicpro/bowtie2_merge.nf
+++ b/modules/local/hicpro/bowtie2_merge.nf
@@ -24,7 +24,7 @@ process MERGE_BOWTIE2{
        ${bam1} ${bam2}
    samtools sort -@ ${task.cpus} -m 800M \\
-        -n  \\
+        -n \\
        -o ${prefix}_bwt2merged.sorted.bam \\
        ${prefix}_bwt2merged.bam

--- a/modules/local/hicpro/build_contact_maps.nf
+++ b/modules/local/hicpro/build_contact_maps.nf
 process BUILD_CONTACT_MAPS{
  tag "$meta.id - $res"
-  label 'process_highmem'
+  label 'process_high_memory'
  conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
  container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

--- a/modules/local/hicpro/dnase_mapping_stats.nf
+++ b/modules/local/hicpro/dnase_mapping_stats.nf
@@ -16,7 +16,7 @@ process MAPPING_STATS_DNASE {
    tuple val(meta), path("${prefix}.mapstat"), emit:stats
    script:
-    prefix = meta.id + "_" + meta.mates
+    prefix = meta.id + "_" + meta.chunk + "_" + meta.mates
    tag = meta.mates
    """
    echo "## ${prefix}" > ${prefix}.mapstat

--- a/modules/local/hicpro/hicpro2pairs.nf
+++ b/modules/local/hicpro/hicpro2pairs.nf
@@ -19,13 +19,13 @@ process HICPRO2PAIRS {
    prefix = "${meta.id}"
    """
    ##columns: readID chr1 pos1 chr2 pos2 strand1 strand2
-    awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > ${prefix}_contacts.pairs
+    awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs | bgzip -c > ${prefix}_contacts.pairs.gz
-    sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz
+    ##sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz
    pairix -f ${prefix}_contacts.pairs.gz
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
-        pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //')
+        pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //'))
    END_VERSIONS
    """
 }
--- a/modules/local/hicpro/merge_valid_interaction.nf
+++ b/modules/local/hicpro/merge_valid_interaction.nf
 process MERGE_VALID_INTERACTION {
    tag "$prefix"
-    label 'process_highmem'
+    label 'process_high_memory'
    conda (params.enable_conda ? "conda-forge::gawk=5.1.0" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

--- a/modules/local/hicpro/run_ice.nf
+++ b/modules/local/hicpro/run_ice.nf
 process ICE_NORMALIZATION{
    tag "$rmaps"
-    label 'process_highmem'
+    label 'process_high_memory'
    conda (params.enable_conda ? "conda-forge::python=3.9  bioconda::iced=0.5.10 conda-forge::numpy=1.22.3" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

--- a/modules/local/samplesheet_check.nf
+++ b/modules/local/samplesheet_check.nf
 process SAMPLESHEET_CHECK {
    tag "$samplesheet"

--- a/subworkflows/local/cooler.nf
+++ b/subworkflows/local/cooler.nf
@@ -63,8 +63,9 @@ workflow COOLER {
  if (!params.res_zoomify){
    ch_res_zoomify = cool_bins.min()
  }else{
-    ch_res_zoomify = params.res_zoomify
+    ch_res_zoomify = Channel.from(params.res_zoomify).splitCsv().flatten().unique().toInteger()
  }
  ch_cool
    .combine(ch_res_zoomify)
    .filter{ it[2] == it[3] }

--- a/workflows/hic.nf
+++ b/workflows/hic.nf
@@ -36,13 +36,20 @@ if (params.digestion){
 }
 //****************************************
-// Maps resolution for downstream analysis
+// Combine all maps resolution for downstream analysis
+ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten().toInteger()
+if (params.res_zoomify){
+  ch_zoom_res = Channel.from( params.res_zoomify ).splitCsv().flatten().toInteger()
+  ch_map_res = ch_map_res.concat(ch_zoom_res)
+}
-ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten()
 if (params.res_tads && !params.skip_tads){
  Channel.from( "${params.res_tads}" )
    .splitCsv()
    .flatten()
+    .toInteger()
    .set {ch_tads_res}
  ch_map_res = ch_map_res.concat(ch_tads_res)
 }else{
@@ -56,6 +63,7 @@ if (params.res_dist_decay && !params.skip_dist_decay){
  Channel.from( "${params.res_dist_decay}" )
    .splitCsv()
    .flatten()
+    .toInteger()
    .set {ch_ddecay_res}
   ch_map_res = ch_map_res.concat(ch_ddecay_res)
 }else{
@@ -69,6 +77,7 @@ if (params.res_compartments && !params.skip_compartments){
  Channel.from( "${params.res_compartments}" )
    .splitCsv()
    .flatten()
+    .toInteger()
    .set {ch_comp_res}
   ch_map_res = ch_map_res.concat(ch_comp_res)
 }else{
@@ -154,8 +163,6 @@ workflow HIC {
    ch_input
  )
-  INPUT_CHECK.out.reads.view()
  //
  // SUBWORKFLOW: Prepare genome annotation
  //
@@ -202,7 +209,6 @@ workflow HIC {
  if (!params.skip_dist_decay){
    COOLER.out.cool
      .combine(ch_ddecay_res)
-      .view()
      .filter{ it[0].resolution == it[2] }
      .map { it -> [it[0], it[1]]}
      .set{ ch_distdecay }
@@ -223,7 +229,7 @@ workflow HIC {
      .map { it -> [it[0], it[1], it[2]]}
      .set{ ch_cool_compartments }
-    COMPARTMENTS(
+    COMPARTMENTS (
      ch_cool_compartments,
      ch_fasta,
      PREPARE_GENOME.out.chromosome_size