Skip to content
Snippets Groups Projects
Commit 810250c6 authored by nservant's avatar nservant
Browse files

Merge branch 'dev' of github.com:nservant/nf-core-hic into dev

parents 6c8ecf3a f3ba46f4
No related branches found
No related tags found
No related merge requests found
#!/bin/bash #!/bin/bash
set -e
## ##
## HiC-Pro ## HiC-Pro
...@@ -17,11 +18,36 @@ done ...@@ -17,11 +18,36 @@ done
shift $(( OPTIND - 1 )) shift $(( OPTIND - 1 ))
vpairs="$@" vpairs="$@"
vpairs_sorted=$(echo $vpairs | sed -e 's/validPairs/sorted.validPairs/g')
mkdir -p ./tmp/
if [[ ${rmDup} == 1 ]]; then if [[ ${rmDup} == 1 ]]; then
## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) ## Sort individual validPairs files
sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \ fcounts=0
awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs for vfile in ${vpairs}
do
echo "Sorting ${vfile} ..."
fcounts=$((fcounts+1))
ofile=$(echo ${vfile} | sed -e 's/validPairs/sorted.validPairs/')
#sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -o ${ofile} ${vfile}
sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -o ${ofile} ${vfile}
done
if [[ $fcounts -gt 1 ]]
then
echo "Merging and removing the duplicates ..."
## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs)
#sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \
sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \
awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs
else
echo "Removing the duplicates ..."
cat ${vpairs_sorted} | awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs
fi
## clean
/bin/rm -rf ${vpairs_sorted}
else else
cat ${vpairs} > ${prefix}.allValidPairs cat ${vpairs} > ${prefix}.allValidPairs
fi fi
...@@ -33,3 +59,6 @@ cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat ...@@ -33,3 +59,6 @@ cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat
## Count short range (<20000) vs long range contacts ## Count short range (<20000) vs long range contacts
awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} $2 == $5{cis=cis+1; d=$6>$3?$6-$3:$3-$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} $2!=$5{trans=trans+1}END{print "trans_interaction\t"trans"\ncis_interaction\t"cis"\ncis_shortRange\t"sr"\ncis_longRange\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} $2 == $5{cis=cis+1; d=$6>$3?$6-$3:$3-$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} $2!=$5{trans=trans+1}END{print "trans_interaction\t"trans"\ncis_interaction\t"cis"\ncis_shortRange\t"sr"\ncis_longRange\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat
## clean
/bin/rm -rf ./tmp/
...@@ -43,7 +43,7 @@ process { ...@@ -43,7 +43,7 @@ process {
time = { check_max( 20.h * task.attempt, 'time' ) } time = { check_max( 20.h * task.attempt, 'time' ) }
} }
withLabel:process_high_memory { withLabel:process_high_memory {
memory = { check_max( 200.GB * task.attempt, 'memory' ) } memory = { check_max( 24.GB * task.attempt, 'memory' ) }
} }
withLabel:error_ignore { withLabel:error_ignore {
errorStrategy = 'ignore' errorStrategy = 'ignore'
......
...@@ -49,7 +49,7 @@ process { ...@@ -49,7 +49,7 @@ process {
mode: 'copy', mode: 'copy',
enabled: params.save_aligned_intermediates enabled: params.save_aligned_intermediates
] ]
ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}" : "${meta.id}_${meta.mates}" } ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
ext.args = params.bwt2_opts_end2end ?: '' ext.args = params.bwt2_opts_end2end ?: ''
ext.args2 = !params.dnase ? "-F 4" :"" ext.args2 = !params.dnase ? "-F 4" :""
} }
...@@ -68,7 +68,7 @@ process { ...@@ -68,7 +68,7 @@ process {
mode: 'copy', mode: 'copy',
enabled: params.save_aligned_intermediates enabled: params.save_aligned_intermediates
] ]
ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}_trimmed" : "${meta.id}_${meta.mates}_trimmed" } ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_trimmed" }
ext.args = params.bwt2_opts_trimmed ?: '' ext.args = params.bwt2_opts_trimmed ?: ''
ext.args2 = "" ext.args2 = ""
} }
...@@ -79,7 +79,7 @@ process { ...@@ -79,7 +79,7 @@ process {
mode: 'copy', mode: 'copy',
enabled: params.save_aligned_intermediates enabled: params.save_aligned_intermediates
] ]
ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}" : "${meta.id}_${meta.mates}" } ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
} }
withName: 'COMBINE_MATES' { withName: 'COMBINE_MATES' {
...@@ -93,7 +93,7 @@ process { ...@@ -93,7 +93,7 @@ process {
params.keep_multi ? "--multi" : "", params.keep_multi ? "--multi" : "",
params.min_mapq ? "-q ${params.min_mapq}" : "" params.min_mapq ? "-q ${params.min_mapq}" : ""
].join(' ').trim() ].join(' ').trim()
ext.prefix = { params.split_fastq ? "${meta.chunk}" : "${meta.id}" } ext.prefix = { "${meta.id}_${meta.chunk}" }
} }
withName: 'GET_VALID_INTERACTION' { withName: 'GET_VALID_INTERACTION' {
......
...@@ -29,7 +29,7 @@ process CALL_COMPARTMENTS { ...@@ -29,7 +29,7 @@ process CALL_COMPARTMENTS {
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
cooltools: \$(cooltools --version 2>&1 | sed 's/cooletools, version //') cooltools: \$(cooltools --version 2>&1 | grep version | sed 's/cooltools, version //')
END_VERSIONS END_VERSIONS
""" """
} }
...@@ -24,7 +24,7 @@ process MERGE_BOWTIE2{ ...@@ -24,7 +24,7 @@ process MERGE_BOWTIE2{
${bam1} ${bam2} ${bam1} ${bam2}
samtools sort -@ ${task.cpus} -m 800M \\ samtools sort -@ ${task.cpus} -m 800M \\
-n \\ -n \\
-o ${prefix}_bwt2merged.sorted.bam \\ -o ${prefix}_bwt2merged.sorted.bam \\
${prefix}_bwt2merged.bam ${prefix}_bwt2merged.bam
......
process BUILD_CONTACT_MAPS{ process BUILD_CONTACT_MAPS{
tag "$meta.id - $res" tag "$meta.id - $res"
label 'process_highmem' label 'process_high_memory'
conda (params.enable_conda ? "conda-forge::sed=4.7" : null) conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
......
...@@ -16,7 +16,7 @@ process MAPPING_STATS_DNASE { ...@@ -16,7 +16,7 @@ process MAPPING_STATS_DNASE {
tuple val(meta), path("${prefix}.mapstat"), emit:stats tuple val(meta), path("${prefix}.mapstat"), emit:stats
script: script:
prefix = meta.id + "_" + meta.mates prefix = meta.id + "_" + meta.chunk + "_" + meta.mates
tag = meta.mates tag = meta.mates
""" """
echo "## ${prefix}" > ${prefix}.mapstat echo "## ${prefix}" > ${prefix}.mapstat
......
...@@ -19,13 +19,13 @@ process HICPRO2PAIRS { ...@@ -19,13 +19,13 @@ process HICPRO2PAIRS {
prefix = "${meta.id}" prefix = "${meta.id}"
""" """
##columns: readID chr1 pos1 chr2 pos2 strand1 strand2 ##columns: readID chr1 pos1 chr2 pos2 strand1 strand2
awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > ${prefix}_contacts.pairs awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs | bgzip -c > ${prefix}_contacts.pairs.gz
sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz ##sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz
pairix -f ${prefix}_contacts.pairs.gz pairix -f ${prefix}_contacts.pairs.gz
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //') pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //'))
END_VERSIONS END_VERSIONS
""" """
} }
process MERGE_VALID_INTERACTION { process MERGE_VALID_INTERACTION {
tag "$prefix" tag "$prefix"
label 'process_highmem' label 'process_high_memory'
conda (params.enable_conda ? "conda-forge::gawk=5.1.0" : null) conda (params.enable_conda ? "conda-forge::gawk=5.1.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
......
process ICE_NORMALIZATION{ process ICE_NORMALIZATION{
tag "$rmaps" tag "$rmaps"
label 'process_highmem' label 'process_high_memory'
conda (params.enable_conda ? "conda-forge::python=3.9 bioconda::iced=0.5.10 conda-forge::numpy=1.22.3" : null) conda (params.enable_conda ? "conda-forge::python=3.9 bioconda::iced=0.5.10 conda-forge::numpy=1.22.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
......
process SAMPLESHEET_CHECK { process SAMPLESHEET_CHECK {
tag "$samplesheet" tag "$samplesheet"
......
...@@ -63,8 +63,9 @@ workflow COOLER { ...@@ -63,8 +63,9 @@ workflow COOLER {
if (!params.res_zoomify){ if (!params.res_zoomify){
ch_res_zoomify = cool_bins.min() ch_res_zoomify = cool_bins.min()
}else{ }else{
ch_res_zoomify = params.res_zoomify ch_res_zoomify = Channel.from(params.res_zoomify).splitCsv().flatten().unique().toInteger()
} }
ch_cool ch_cool
.combine(ch_res_zoomify) .combine(ch_res_zoomify)
.filter{ it[2] == it[3] } .filter{ it[2] == it[3] }
......
...@@ -36,13 +36,20 @@ if (params.digestion){ ...@@ -36,13 +36,20 @@ if (params.digestion){
} }
//**************************************** //****************************************
// Maps resolution for downstream analysis // Combine all maps resolution for downstream analysis
ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten().toInteger()
if (params.res_zoomify){
ch_zoom_res = Channel.from( params.res_zoomify ).splitCsv().flatten().toInteger()
ch_map_res = ch_map_res.concat(ch_zoom_res)
}
ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten()
if (params.res_tads && !params.skip_tads){ if (params.res_tads && !params.skip_tads){
Channel.from( "${params.res_tads}" ) Channel.from( "${params.res_tads}" )
.splitCsv() .splitCsv()
.flatten() .flatten()
.toInteger()
.set {ch_tads_res} .set {ch_tads_res}
ch_map_res = ch_map_res.concat(ch_tads_res) ch_map_res = ch_map_res.concat(ch_tads_res)
}else{ }else{
...@@ -56,6 +63,7 @@ if (params.res_dist_decay && !params.skip_dist_decay){ ...@@ -56,6 +63,7 @@ if (params.res_dist_decay && !params.skip_dist_decay){
Channel.from( "${params.res_dist_decay}" ) Channel.from( "${params.res_dist_decay}" )
.splitCsv() .splitCsv()
.flatten() .flatten()
.toInteger()
.set {ch_ddecay_res} .set {ch_ddecay_res}
ch_map_res = ch_map_res.concat(ch_ddecay_res) ch_map_res = ch_map_res.concat(ch_ddecay_res)
}else{ }else{
...@@ -69,6 +77,7 @@ if (params.res_compartments && !params.skip_compartments){ ...@@ -69,6 +77,7 @@ if (params.res_compartments && !params.skip_compartments){
Channel.from( "${params.res_compartments}" ) Channel.from( "${params.res_compartments}" )
.splitCsv() .splitCsv()
.flatten() .flatten()
.toInteger()
.set {ch_comp_res} .set {ch_comp_res}
ch_map_res = ch_map_res.concat(ch_comp_res) ch_map_res = ch_map_res.concat(ch_comp_res)
}else{ }else{
...@@ -154,8 +163,6 @@ workflow HIC { ...@@ -154,8 +163,6 @@ workflow HIC {
ch_input ch_input
) )
INPUT_CHECK.out.reads.view()
// //
// SUBWORKFLOW: Prepare genome annotation // SUBWORKFLOW: Prepare genome annotation
// //
...@@ -202,7 +209,6 @@ workflow HIC { ...@@ -202,7 +209,6 @@ workflow HIC {
if (!params.skip_dist_decay){ if (!params.skip_dist_decay){
COOLER.out.cool COOLER.out.cool
.combine(ch_ddecay_res) .combine(ch_ddecay_res)
.view()
.filter{ it[0].resolution == it[2] } .filter{ it[0].resolution == it[2] }
.map { it -> [it[0], it[1]]} .map { it -> [it[0], it[1]]}
.set{ ch_distdecay } .set{ ch_distdecay }
...@@ -223,7 +229,7 @@ workflow HIC { ...@@ -223,7 +229,7 @@ workflow HIC {
.map { it -> [it[0], it[1], it[2]]} .map { it -> [it[0], it[1], it[2]]}
.set{ ch_cool_compartments } .set{ ch_cool_compartments }
COMPARTMENTS( COMPARTMENTS (
ch_cool_compartments, ch_cool_compartments,
ch_fasta, ch_fasta,
PREPARE_GENOME.out.chromosome_size PREPARE_GENOME.out.chromosome_size
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment