From f3ba46f4f324a068d58d39c5f53a48ab7af408f1 Mon Sep 17 00:00:00 2001 From: nservant <nicolas.servant@curie.fr> Date: Mon, 18 Jul 2022 16:18:19 +0200 Subject: [PATCH] [MODIF] test on real dataset --- bin/hicpro_merge_validpairs.sh | 35 +++++++++++++++++-- conf/base.config | 2 +- conf/modules.config | 8 ++--- modules/local/cooltools/eigs-cis.nf | 2 +- modules/local/hicpro/build_contact_maps.nf | 2 +- modules/local/hicpro/dnase_mapping_stats.nf | 2 +- modules/local/hicpro/hicpro2pairs.nf | 6 ++-- .../local/hicpro/merge_valid_interaction.nf | 2 +- modules/local/hicpro/run_ice.nf | 2 +- subworkflows/local/cooler.nf | 3 +- workflows/hic.nf | 16 ++++++--- 11 files changed, 58 insertions(+), 22 deletions(-) diff --git a/bin/hicpro_merge_validpairs.sh b/bin/hicpro_merge_validpairs.sh index 09f9727..40b758c 100755 --- a/bin/hicpro_merge_validpairs.sh +++ b/bin/hicpro_merge_validpairs.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -e ## ## HiC-Pro @@ -17,11 +18,36 @@ done shift $(( OPTIND - 1 )) vpairs="$@" +vpairs_sorted=$(echo $vpairs | sed -e 's/validPairs/sorted.validPairs/g') + +mkdir -p ./tmp/ if [[ ${rmDup} == 1 ]]; then - ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) - sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \ - awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs + ## Sort individual validPairs files + fcounts=0 + for vfile in ${vpairs} + do + echo "Sorting ${vfile} ..." + fcounts=$((fcounts+1)) + ofile=$(echo ${vfile} | sed -e 's/validPairs/sorted.validPairs/') + #sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -o ${ofile} ${vfile} + sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -o ${ofile} ${vfile} + done + + if [[ $fcounts -gt 1 ]] + then + echo "Merging and removing the duplicates ..." + ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) + #sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \ + sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \ + awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs + else + echo "Removing the duplicates ..." + cat ${vpairs_sorted} | awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs + fi + + ## clean + /bin/rm -rf ${vpairs_sorted} else cat ${vpairs} > ${prefix}.allValidPairs fi @@ -33,3 +59,6 @@ cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat ## Count short range (<20000) vs long range contacts awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} $2 == $5{cis=cis+1; d=$6>$3?$6-$3:$3-$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} $2!=$5{trans=trans+1}END{print "trans_interaction\t"trans"\ncis_interaction\t"cis"\ncis_shortRange\t"sr"\ncis_longRange\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat + +## clean +/bin/rm -rf ./tmp/ diff --git a/conf/base.config b/conf/base.config index 96e1742..e1895e3 100644 --- a/conf/base.config +++ b/conf/base.config @@ -43,7 +43,7 @@ process { time = { check_max( 20.h * task.attempt, 'time' ) } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { check_max( 24.GB * task.attempt, 'memory' ) } } withLabel:error_ignore { errorStrategy = 'ignore' diff --git a/conf/modules.config b/conf/modules.config index 06aaba2..5b279c5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -49,7 +49,7 @@ process { mode: 'copy', enabled: params.save_aligned_intermediates ] - ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}" : "${meta.id}_${meta.mates}" } + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } ext.args = params.bwt2_opts_end2end ?: '' ext.args2 = !params.dnase ? "-F 4" :"" } @@ -68,7 +68,7 @@ process { mode: 'copy', enabled: params.save_aligned_intermediates ] - ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}_trimmed" : "${meta.id}_${meta.mates}_trimmed" } + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_trimmed" } ext.args = params.bwt2_opts_trimmed ?: '' ext.args2 = "" } @@ -79,7 +79,7 @@ process { mode: 'copy', enabled: params.save_aligned_intermediates ] - ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}" : "${meta.id}_${meta.mates}" } + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } } withName: 'COMBINE_MATES' { @@ -93,7 +93,7 @@ process { params.keep_multi ? "--multi" : "", params.min_mapq ? "-q ${params.min_mapq}" : "" ].join(' ').trim() - ext.prefix = { params.split_fastq ? "${meta.chunk}" : "${meta.id}" } + ext.prefix = { "${meta.id}_${meta.chunk}" } } withName: 'GET_VALID_INTERACTION' { diff --git a/modules/local/cooltools/eigs-cis.nf b/modules/local/cooltools/eigs-cis.nf index a63dee2..55d6d16 100644 --- a/modules/local/cooltools/eigs-cis.nf +++ b/modules/local/cooltools/eigs-cis.nf @@ -29,7 +29,7 @@ process CALL_COMPARTMENTS { cat <<-END_VERSIONS > versions.yml "${task.process}": - cooltools: \$(cooltools --version 2>&1 | sed 's/cooletools, version //') + cooltools: \$(cooltools --version 2>&1 | grep version | sed 's/cooltools, version //') END_VERSIONS """ } diff --git a/modules/local/hicpro/build_contact_maps.nf b/modules/local/hicpro/build_contact_maps.nf index 1ba3550..097ff59 100644 --- a/modules/local/hicpro/build_contact_maps.nf +++ b/modules/local/hicpro/build_contact_maps.nf @@ -1,6 +1,6 @@ process BUILD_CONTACT_MAPS{ tag "$meta.id - $res" - label 'process_highmem' + label 'process_high_memory' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/hicpro/dnase_mapping_stats.nf b/modules/local/hicpro/dnase_mapping_stats.nf index edc40dc..9505298 100644 --- a/modules/local/hicpro/dnase_mapping_stats.nf +++ b/modules/local/hicpro/dnase_mapping_stats.nf @@ -16,7 +16,7 @@ process MAPPING_STATS_DNASE { tuple val(meta), path("${prefix}.mapstat"), emit:stats script: - prefix = meta.id + "_" + meta.mates + prefix = meta.id + "_" + meta.chunk + "_" + meta.mates tag = meta.mates """ echo "## ${prefix}" > ${prefix}.mapstat diff --git a/modules/local/hicpro/hicpro2pairs.nf b/modules/local/hicpro/hicpro2pairs.nf index 4ee35b0..aa475c2 100644 --- a/modules/local/hicpro/hicpro2pairs.nf +++ b/modules/local/hicpro/hicpro2pairs.nf @@ -19,13 +19,13 @@ process HICPRO2PAIRS { prefix = "${meta.id}" """ ##columns: readID chr1 pos1 chr2 pos2 strand1 strand2 - awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > ${prefix}_contacts.pairs - sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz + awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs | bgzip -c > ${prefix}_contacts.pairs.gz + ##sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz pairix -f ${prefix}_contacts.pairs.gz cat <<-END_VERSIONS > versions.yml "${task.process}": - pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //') + pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //')) END_VERSIONS """ } diff --git a/modules/local/hicpro/merge_valid_interaction.nf b/modules/local/hicpro/merge_valid_interaction.nf index 6da4356..c5b716d 100644 --- a/modules/local/hicpro/merge_valid_interaction.nf +++ b/modules/local/hicpro/merge_valid_interaction.nf @@ -1,6 +1,6 @@ process MERGE_VALID_INTERACTION { tag "$prefix" - label 'process_highmem' + label 'process_high_memory' conda (params.enable_conda ? "conda-forge::gawk=5.1.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/hicpro/run_ice.nf b/modules/local/hicpro/run_ice.nf index e77b5c2..996baf1 100644 --- a/modules/local/hicpro/run_ice.nf +++ b/modules/local/hicpro/run_ice.nf @@ -1,6 +1,6 @@ process ICE_NORMALIZATION{ tag "$rmaps" - label 'process_highmem' + label 'process_high_memory' conda (params.enable_conda ? "conda-forge::python=3.9 bioconda::iced=0.5.10 conda-forge::numpy=1.22.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/subworkflows/local/cooler.nf b/subworkflows/local/cooler.nf index 8780b82..7ce15de 100644 --- a/subworkflows/local/cooler.nf +++ b/subworkflows/local/cooler.nf @@ -63,8 +63,9 @@ workflow COOLER { if (!params.res_zoomify){ ch_res_zoomify = cool_bins.min() }else{ - ch_res_zoomify = params.res_zoomify + ch_res_zoomify = Channel.from(params.res_zoomify).splitCsv().flatten().unique().toInteger() } + ch_cool .combine(ch_res_zoomify) .filter{ it[2] == it[3] } diff --git a/workflows/hic.nf b/workflows/hic.nf index 60e54a0..a58b89c 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -36,13 +36,20 @@ if (params.digestion){ } //**************************************** -// Maps resolution for downstream analysis +// Combine all maps resolution for downstream analysis + +ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten().toInteger() + +if (params.res_zoomify){ + ch_zoom_res = Channel.from( params.res_zoomify ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_zoom_res) +} -ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten() if (params.res_tads && !params.skip_tads){ Channel.from( "${params.res_tads}" ) .splitCsv() .flatten() + .toInteger() .set {ch_tads_res} ch_map_res = ch_map_res.concat(ch_tads_res) }else{ @@ -56,6 +63,7 @@ if (params.res_dist_decay && !params.skip_dist_decay){ Channel.from( "${params.res_dist_decay}" ) .splitCsv() .flatten() + .toInteger() .set {ch_ddecay_res} ch_map_res = ch_map_res.concat(ch_ddecay_res) }else{ @@ -69,6 +77,7 @@ if (params.res_compartments && !params.skip_compartments){ Channel.from( "${params.res_compartments}" ) .splitCsv() .flatten() + .toInteger() .set {ch_comp_res} ch_map_res = ch_map_res.concat(ch_comp_res) }else{ @@ -154,8 +163,6 @@ workflow HIC { ch_input ) - INPUT_CHECK.out.reads.view() - // // SUBWORKFLOW: Prepare genome annotation // @@ -202,7 +209,6 @@ workflow HIC { if (!params.skip_dist_decay){ COOLER.out.cool .combine(ch_ddecay_res) - .view() .filter{ it[0].resolution == it[2] } .map { it -> [it[0], it[1]]} .set{ ch_distdecay } -- GitLab