diff --git a/bin/hicpro_merge_validpairs.sh b/bin/hicpro_merge_validpairs.sh index 09f97277aee9ee4e23199c2b996077db63e30464..40b758cb055c6fcd23dcb282d41a82b745cd02f3 100755 --- a/bin/hicpro_merge_validpairs.sh +++ b/bin/hicpro_merge_validpairs.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -e ## ## HiC-Pro @@ -17,11 +18,36 @@ done shift $(( OPTIND - 1 )) vpairs="$@" +vpairs_sorted=$(echo $vpairs | sed -e 's/validPairs/sorted.validPairs/g') + +mkdir -p ./tmp/ if [[ ${rmDup} == 1 ]]; then - ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) - sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \ - awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs + ## Sort individual validPairs files + fcounts=0 + for vfile in ${vpairs} + do + echo "Sorting ${vfile} ..." + fcounts=$((fcounts+1)) + ofile=$(echo ${vfile} | sed -e 's/validPairs/sorted.validPairs/') + #sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -o ${ofile} ${vfile} + sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -o ${ofile} ${vfile} + done + + if [[ $fcounts -gt 1 ]] + then + echo "Merging and removing the duplicates ..." + ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) + #sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \ + sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \ + awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs + else + echo "Removing the duplicates ..." + cat ${vpairs_sorted} | awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs + fi + + ## clean + /bin/rm -rf ${vpairs_sorted} else cat ${vpairs} > ${prefix}.allValidPairs fi @@ -33,3 +59,6 @@ cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat ## Count short range (<20000) vs long range contacts awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} $2 == $5{cis=cis+1; d=$6>$3?$6-$3:$3-$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} $2!=$5{trans=trans+1}END{print "trans_interaction\t"trans"\ncis_interaction\t"cis"\ncis_shortRange\t"sr"\ncis_longRange\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat + +## clean +/bin/rm -rf ./tmp/ diff --git a/conf/base.config b/conf/base.config index 96e174215898ee824c5a09641e864d2659e7ab40..e1895e3f5a6e7dfcc9b68b268225d7a8dd5376a7 100644 --- a/conf/base.config +++ b/conf/base.config @@ -43,7 +43,7 @@ process { time = { check_max( 20.h * task.attempt, 'time' ) } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { check_max( 24.GB * task.attempt, 'memory' ) } } withLabel:error_ignore { errorStrategy = 'ignore' diff --git a/conf/modules.config b/conf/modules.config index 06aaba2244691800445094279ee717851eafb94a..5b279c54cb9ec6fc7a5b127a9292a10316434e55 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -49,7 +49,7 @@ process { mode: 'copy', enabled: params.save_aligned_intermediates ] - ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}" : "${meta.id}_${meta.mates}" } + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } ext.args = params.bwt2_opts_end2end ?: '' ext.args2 = !params.dnase ? "-F 4" :"" } @@ -68,7 +68,7 @@ process { mode: 'copy', enabled: params.save_aligned_intermediates ] - ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}_trimmed" : "${meta.id}_${meta.mates}_trimmed" } + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_trimmed" } ext.args = params.bwt2_opts_trimmed ?: '' ext.args2 = "" } @@ -79,7 +79,7 @@ process { mode: 'copy', enabled: params.save_aligned_intermediates ] - ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}" : "${meta.id}_${meta.mates}" } + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } } withName: 'COMBINE_MATES' { @@ -93,7 +93,7 @@ process { params.keep_multi ? "--multi" : "", params.min_mapq ? "-q ${params.min_mapq}" : "" ].join(' ').trim() - ext.prefix = { params.split_fastq ? "${meta.chunk}" : "${meta.id}" } + ext.prefix = { "${meta.id}_${meta.chunk}" } } withName: 'GET_VALID_INTERACTION' { diff --git a/modules/local/cooltools/eigs-cis.nf b/modules/local/cooltools/eigs-cis.nf index a63dee2eb39ca6a7e37f06a6d09c39a4971a47e5..55d6d1697f80bdd893c95349b827861b05f5bf39 100644 --- a/modules/local/cooltools/eigs-cis.nf +++ b/modules/local/cooltools/eigs-cis.nf @@ -29,7 +29,7 @@ process CALL_COMPARTMENTS { cat <<-END_VERSIONS > versions.yml "${task.process}": - cooltools: \$(cooltools --version 2>&1 | sed 's/cooletools, version //') + cooltools: \$(cooltools --version 2>&1 | grep version | sed 's/cooltools, version //') END_VERSIONS """ } diff --git a/modules/local/hicpro/build_contact_maps.nf b/modules/local/hicpro/build_contact_maps.nf index 1ba3550d274e6f9327a895cde99dc11a4ab0ab70..097ff59a8f51465b4867146a400211664d24067c 100644 --- a/modules/local/hicpro/build_contact_maps.nf +++ b/modules/local/hicpro/build_contact_maps.nf @@ -1,6 +1,6 @@ process BUILD_CONTACT_MAPS{ tag "$meta.id - $res" - label 'process_highmem' + label 'process_high_memory' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/hicpro/dnase_mapping_stats.nf b/modules/local/hicpro/dnase_mapping_stats.nf index edc40dc26329dee3334b66240a85e8803352032b..950529822dcb16df817185fcfd5e31bdd79df602 100644 --- a/modules/local/hicpro/dnase_mapping_stats.nf +++ b/modules/local/hicpro/dnase_mapping_stats.nf @@ -16,7 +16,7 @@ process MAPPING_STATS_DNASE { tuple val(meta), path("${prefix}.mapstat"), emit:stats script: - prefix = meta.id + "_" + meta.mates + prefix = meta.id + "_" + meta.chunk + "_" + meta.mates tag = meta.mates """ echo "## ${prefix}" > ${prefix}.mapstat diff --git a/modules/local/hicpro/hicpro2pairs.nf b/modules/local/hicpro/hicpro2pairs.nf index 4ee35b0c1715ec35adb731738e3fb6f26e9976ab..aa475c23e150be45e0166ee21103750e5c0f66d6 100644 --- a/modules/local/hicpro/hicpro2pairs.nf +++ b/modules/local/hicpro/hicpro2pairs.nf @@ -19,13 +19,13 @@ process HICPRO2PAIRS { prefix = "${meta.id}" """ ##columns: readID chr1 pos1 chr2 pos2 strand1 strand2 - awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > ${prefix}_contacts.pairs - sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz + awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs | bgzip -c > ${prefix}_contacts.pairs.gz + ##sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz pairix -f ${prefix}_contacts.pairs.gz cat <<-END_VERSIONS > versions.yml "${task.process}": - pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //') + pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //')) END_VERSIONS """ } diff --git a/modules/local/hicpro/merge_valid_interaction.nf b/modules/local/hicpro/merge_valid_interaction.nf index 6da435621a1835e667f8111935c74cd8f6e0c008..c5b716d4493aad83d6339dffd85e287cfd317104 100644 --- a/modules/local/hicpro/merge_valid_interaction.nf +++ b/modules/local/hicpro/merge_valid_interaction.nf @@ -1,6 +1,6 @@ process MERGE_VALID_INTERACTION { tag "$prefix" - label 'process_highmem' + label 'process_high_memory' conda (params.enable_conda ? "conda-forge::gawk=5.1.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/hicpro/run_ice.nf b/modules/local/hicpro/run_ice.nf index e77b5c240934dac3f034939fba2f58c1303e7af2..996baf1e117dffa39ddc4a3c9769075771dab359 100644 --- a/modules/local/hicpro/run_ice.nf +++ b/modules/local/hicpro/run_ice.nf @@ -1,6 +1,6 @@ process ICE_NORMALIZATION{ tag "$rmaps" - label 'process_highmem' + label 'process_high_memory' conda (params.enable_conda ? "conda-forge::python=3.9 bioconda::iced=0.5.10 conda-forge::numpy=1.22.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/subworkflows/local/cooler.nf b/subworkflows/local/cooler.nf index 8780b821d05b059f439cb17ea703a64ef581daf5..7ce15de2065780d12154f1b9fa31044f33948c91 100644 --- a/subworkflows/local/cooler.nf +++ b/subworkflows/local/cooler.nf @@ -63,8 +63,9 @@ workflow COOLER { if (!params.res_zoomify){ ch_res_zoomify = cool_bins.min() }else{ - ch_res_zoomify = params.res_zoomify + ch_res_zoomify = Channel.from(params.res_zoomify).splitCsv().flatten().unique().toInteger() } + ch_cool .combine(ch_res_zoomify) .filter{ it[2] == it[3] } diff --git a/workflows/hic.nf b/workflows/hic.nf index 60e54a04d34dd5c7244db643a0faddfa33217544..a58b89c57a3b19399c95a22a93a82eb330cecf72 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -36,13 +36,20 @@ if (params.digestion){ } //**************************************** -// Maps resolution for downstream analysis +// Combine all maps resolution for downstream analysis + +ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten().toInteger() + +if (params.res_zoomify){ + ch_zoom_res = Channel.from( params.res_zoomify ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_zoom_res) +} -ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten() if (params.res_tads && !params.skip_tads){ Channel.from( "${params.res_tads}" ) .splitCsv() .flatten() + .toInteger() .set {ch_tads_res} ch_map_res = ch_map_res.concat(ch_tads_res) }else{ @@ -56,6 +63,7 @@ if (params.res_dist_decay && !params.skip_dist_decay){ Channel.from( "${params.res_dist_decay}" ) .splitCsv() .flatten() + .toInteger() .set {ch_ddecay_res} ch_map_res = ch_map_res.concat(ch_ddecay_res) }else{ @@ -69,6 +77,7 @@ if (params.res_compartments && !params.skip_compartments){ Channel.from( "${params.res_compartments}" ) .splitCsv() .flatten() + .toInteger() .set {ch_comp_res} ch_map_res = ch_map_res.concat(ch_comp_res) }else{ @@ -154,8 +163,6 @@ workflow HIC { ch_input ) - INPUT_CHECK.out.reads.view() - // // SUBWORKFLOW: Prepare genome annotation // @@ -202,7 +209,6 @@ workflow HIC { if (!params.skip_dist_decay){ COOLER.out.cool .combine(ch_ddecay_res) - .view() .filter{ it[0].resolution == it[2] } .map { it -> [it[0], it[1]]} .set{ ch_distdecay }