Skip to content
Snippets Groups Projects
Commit f3ba46f4 authored by nservant's avatar nservant
Browse files

[MODIF] test on real dataset

parent 9312146d
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
set -e
##
## HiC-Pro
......@@ -17,11 +18,36 @@ done
shift $(( OPTIND - 1 ))
vpairs="$@"
vpairs_sorted=$(echo $vpairs | sed -e 's/validPairs/sorted.validPairs/g')
mkdir -p ./tmp/
if [[ ${rmDup} == 1 ]]; then
## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs)
sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \
awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs
## Sort individual validPairs files
fcounts=0
for vfile in ${vpairs}
do
echo "Sorting ${vfile} ..."
fcounts=$((fcounts+1))
ofile=$(echo ${vfile} | sed -e 's/validPairs/sorted.validPairs/')
#sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -o ${ofile} ${vfile}
sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -o ${ofile} ${vfile}
done
if [[ $fcounts -gt 1 ]]
then
echo "Merging and removing the duplicates ..."
## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs)
#sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \
sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \
awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs
else
echo "Removing the duplicates ..."
cat ${vpairs_sorted} | awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs
fi
## clean
/bin/rm -rf ${vpairs_sorted}
else
cat ${vpairs} > ${prefix}.allValidPairs
fi
......@@ -33,3 +59,6 @@ cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat
## Count short range (<20000) vs long range contacts
awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} $2 == $5{cis=cis+1; d=$6>$3?$6-$3:$3-$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} $2!=$5{trans=trans+1}END{print "trans_interaction\t"trans"\ncis_interaction\t"cis"\ncis_shortRange\t"sr"\ncis_longRange\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat
## clean
/bin/rm -rf ./tmp/
......@@ -43,7 +43,7 @@ process {
time = { check_max( 20.h * task.attempt, 'time' ) }
}
withLabel:process_high_memory {
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
memory = { check_max( 24.GB * task.attempt, 'memory' ) }
}
withLabel:error_ignore {
errorStrategy = 'ignore'
......
......@@ -49,7 +49,7 @@ process {
mode: 'copy',
enabled: params.save_aligned_intermediates
]
ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}" : "${meta.id}_${meta.mates}" }
ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
ext.args = params.bwt2_opts_end2end ?: ''
ext.args2 = !params.dnase ? "-F 4" :""
}
......@@ -68,7 +68,7 @@ process {
mode: 'copy',
enabled: params.save_aligned_intermediates
]
ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}_trimmed" : "${meta.id}_${meta.mates}_trimmed" }
ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_trimmed" }
ext.args = params.bwt2_opts_trimmed ?: ''
ext.args2 = ""
}
......@@ -79,7 +79,7 @@ process {
mode: 'copy',
enabled: params.save_aligned_intermediates
]
ext.prefix = { params.split_fastq ? "${meta.chunk}_${meta.mates}" : "${meta.id}_${meta.mates}" }
ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
}
withName: 'COMBINE_MATES' {
......@@ -93,7 +93,7 @@ process {
params.keep_multi ? "--multi" : "",
params.min_mapq ? "-q ${params.min_mapq}" : ""
].join(' ').trim()
ext.prefix = { params.split_fastq ? "${meta.chunk}" : "${meta.id}" }
ext.prefix = { "${meta.id}_${meta.chunk}" }
}
withName: 'GET_VALID_INTERACTION' {
......
......@@ -29,7 +29,7 @@ process CALL_COMPARTMENTS {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cooltools: \$(cooltools --version 2>&1 | sed 's/cooletools, version //')
cooltools: \$(cooltools --version 2>&1 | grep version | sed 's/cooltools, version //')
END_VERSIONS
"""
}
process BUILD_CONTACT_MAPS{
tag "$meta.id - $res"
label 'process_highmem'
label 'process_high_memory'
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
......
......@@ -16,7 +16,7 @@ process MAPPING_STATS_DNASE {
tuple val(meta), path("${prefix}.mapstat"), emit:stats
script:
prefix = meta.id + "_" + meta.mates
prefix = meta.id + "_" + meta.chunk + "_" + meta.mates
tag = meta.mates
"""
echo "## ${prefix}" > ${prefix}.mapstat
......
......@@ -19,13 +19,13 @@ process HICPRO2PAIRS {
prefix = "${meta.id}"
"""
##columns: readID chr1 pos1 chr2 pos2 strand1 strand2
awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > ${prefix}_contacts.pairs
sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz
awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs | bgzip -c > ${prefix}_contacts.pairs.gz
##sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz
pairix -f ${prefix}_contacts.pairs.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //')
pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //'))
END_VERSIONS
"""
}
process MERGE_VALID_INTERACTION {
tag "$prefix"
label 'process_highmem'
label 'process_high_memory'
conda (params.enable_conda ? "conda-forge::gawk=5.1.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
......
process ICE_NORMALIZATION{
tag "$rmaps"
label 'process_highmem'
label 'process_high_memory'
conda (params.enable_conda ? "conda-forge::python=3.9 bioconda::iced=0.5.10 conda-forge::numpy=1.22.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
......
......@@ -63,8 +63,9 @@ workflow COOLER {
if (!params.res_zoomify){
ch_res_zoomify = cool_bins.min()
}else{
ch_res_zoomify = params.res_zoomify
ch_res_zoomify = Channel.from(params.res_zoomify).splitCsv().flatten().unique().toInteger()
}
ch_cool
.combine(ch_res_zoomify)
.filter{ it[2] == it[3] }
......
......@@ -36,13 +36,20 @@ if (params.digestion){
}
//****************************************
// Maps resolution for downstream analysis
// Combine all maps resolution for downstream analysis
ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten().toInteger()
if (params.res_zoomify){
ch_zoom_res = Channel.from( params.res_zoomify ).splitCsv().flatten().toInteger()
ch_map_res = ch_map_res.concat(ch_zoom_res)
}
ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten()
if (params.res_tads && !params.skip_tads){
Channel.from( "${params.res_tads}" )
.splitCsv()
.flatten()
.toInteger()
.set {ch_tads_res}
ch_map_res = ch_map_res.concat(ch_tads_res)
}else{
......@@ -56,6 +63,7 @@ if (params.res_dist_decay && !params.skip_dist_decay){
Channel.from( "${params.res_dist_decay}" )
.splitCsv()
.flatten()
.toInteger()
.set {ch_ddecay_res}
ch_map_res = ch_map_res.concat(ch_ddecay_res)
}else{
......@@ -69,6 +77,7 @@ if (params.res_compartments && !params.skip_compartments){
Channel.from( "${params.res_compartments}" )
.splitCsv()
.flatten()
.toInteger()
.set {ch_comp_res}
ch_map_res = ch_map_res.concat(ch_comp_res)
}else{
......@@ -154,8 +163,6 @@ workflow HIC {
ch_input
)
INPUT_CHECK.out.reads.view()
//
// SUBWORKFLOW: Prepare genome annotation
//
......@@ -202,7 +209,6 @@ workflow HIC {
if (!params.skip_dist_decay){
COOLER.out.cool
.combine(ch_ddecay_res)
.view()
.filter{ it[0].resolution == it[2] }
.map { it -> [it[0], it[1]]}
.set{ ch_distdecay }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment