diff --git a/bin/hicpro2higlass.sh b/bin/hicpro2higlass.sh deleted file mode 100755 index ff11aeeb538bbfb06acead2d22646c93d8a567bf..0000000000000000000000000000000000000000 --- a/bin/hicpro2higlass.sh +++ /dev/null @@ -1,268 +0,0 @@ -#!/bin/bash - -## HiC-Pro -## Copyleft 2017 Institut Curie -## Author(s): Nicolas Servant -## Contact: nicolas.servant@curie.fr -## This software is distributed without any guarantee under the terms of the BSD licence - -## -## First version of converter between HiCPro and higlass. -## The cooler python package should be properly installed, as well as the higlass software -## - -## -## A few notes about higlass -## -## docker run will install the docker image and start it -## sudo docker run --detach --publish 8888:80 --volume ~/hg-data:/data --volume ~/hg-tmp:/tmp --name higlass-container gehlenborglab/higlass -## sudo docker start higlass-container -## sudo docker ps -all -## -## Once higlass is installed, you can just run it using -## sudo docker start higlass-container -## higlass will then be available at http://localhost:8888 -## - -########################### -## trap handler -########################### -function trap_error() -{ - echo "Error: $1 - line $2 - exit status of last command: $?. Exit" >&2 - exit 1 -} - -function trap_exit() -{ - ##Since bash-4.0 $LINENO is reset to 1 when the trap is triggered - if [ "$?" != "0" ]; then - echo "Error: exit status detected. Exit." >&2 - fi - - if [[ ! -z ${tmp_dir} && -e ${tmp_dir} ]]; then - echo -e "Cleaning temporary folders ..." >&2 - /bin/rm -rf ${tmp_dir} - fi -} - -trap 'trap_error "$0" "$LINENO"' ERR -trap 'trap_exit' 0 1 2 3 - -set -E ## export trap to functions -set -o pipefail ## trace ERR through pipes - -## 0 = -## 1 > -## 2 < -vercomp () { - if [[ $1 == $2 ]] - then - return 0 - fi - local IFS=. - local i ver1=($1) ver2=($2) - # fill empty fields in ver1 with zeros - for ((i=${#ver1[@]}; i<${#ver2[@]}; i++)) - do - ver1[i]=0 - done - - for ((i=0; i<${#ver1[@]}; i++)) - do - if [[ -z ${ver2[i]} ]] - then - # fill empty fields in ver2 with zeros - ver2[i]=0 - fi - if ((10#${ver1[i]} > 10#${ver2[i]})) - then - echo 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - echo 2 - fi - done - echo 0 -} - -function usage { - echo -e "usage : hicpro2higlass -i INPUT -r RESOLUTION -c CHROMSIZE [-n] [-o ODIR] [-t TEMP] [-h]" - echo -e "Use option -h|--help for more information" -} - -function help { - usage; - echo - echo "Generate Higlass input file from HiC-Pro results" - echo "See https://github.com/hms-dbmi/higlass-website for details about Higlass" - echo "---------------" - echo "OPTIONS" - echo - echo " -i|--input INPUT : allValidPairs or matrix file generated by HiC-Pro" - echo " -r|--res RESOLUTION : .matrix file resolution or maximum resolution to reach from the .allValidPairs input file" - echo " -c|--chrom CHROMSIZE : chromosome size file" - echo " -p|--proc NB_CPU : number of CPUs for cooler" - echo " [-n|--norm] : run cooler matrix balancing algorithm" - echo " [-o|--out] : output path. Default is current path" - echo " [-t|--temp] TEMP : path to tmp folder. Default is current path" - echo " [-h|--help]: help" - exit; -} - - -if [ $# -lt 1 ] -then - usage - exit -fi - -# Transform long options to short ones -for arg in "$@"; do - shift - case "$arg" in - "--input") set -- "$@" "-i" ;; - "--bed") set -- "$@" "-b" ;; - "--res") set -- "$@" "-r" ;; - "--chrom") set -- "$@" "-c" ;; - "--proc") set -- "$@" "-p" ;; - "--out") set -- "$@" "-o" ;; - "--temp") set -- "$@" "-t" ;; - "--norm") set -- "$@" "-n" ;; - "--help") set -- "$@" "-h" ;; - *) set -- "$@" "$arg" - esac -done - -INPUT_HICPRO="" -INPUT_BED="" -NORMALIZE=0 -NPROC=1 -CHROMSIZES_FILE="" -RES=10000 -OUT="./" -TEMP="./" - -while getopts ":i:b:c:p:r:o:t:nh" OPT -do - case $OPT in - i) INPUT_HICPRO=$OPTARG;; - b) INPUT_BED=$OPTARG;; - n) NORMALIZE=1;; - c) CHROMSIZES_FILE=$OPTARG;; - p) NPROC=$OPTARG;; - r) RES=$OPTARG;; - o) OUT=$OPTARG;; - t) TEMP=$OPTARG;; - h) help ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - usage - exit 1 - ;; - :) - echo "Option -$OPTARG requires an argument." >&2 - usage - exit 1 - ;; - esac -done - -if [[ -z $INPUT_HICPRO ]]; -then - usage - exit -fi - -if [[ ! -e $CHROMSIZES_FILE ]]; then - echo -e "$CHROMSIZES_FILE file not found. Exit" - exit 1 -fi - -## Detect input data type -DATATYPE="" -if [[ $INPUT_HICPRO == *.mat* ]]; then - DATATYPE="MATRIX" -elif [[ $INPUT_HICPRO == *allValidPairs* || $INPUT_HICPRO == *validPairs* ]]; then - DATATYPE="VALID" -else - echo -e "Unknown input data type. Expect .matrix or _allValidPairs input files." - exit 1 -fi -echo -e "$DATATYPE input file detected ..." - -## Check cooler version -which cooler > /dev/null; -if [ $? != "0" ]; then - echo -e "Cooler is not installed or is not in your $PATH. See https://github.com/mirnylab/cooler for details." - exit 1; -fi - -COOLER_VERSION=$(cooler --version 2>&1 | awk '{print $NF}') -echo "Cooler version $COOLER_VERSION detected ..." -cres=$(vercomp ${COOLER_VERSION} "0.7.6") -if [[ $cres == "2" ]]; then - echo "Cooler version must be >= 0.7.6 ! Stop." - exit 1 -fi - -if [[ $DATATYPE == "VALID" ]]; then - which pairix > /dev/null; - if [ $? != "0" ]; then - echo -e "Pairix is not installed or is not in your PATH. See https://github.com/4dn-dcic/pairix." - exit 1; - fi -fi - -echo -e "\nGenerating .cool files ..." -tmp_dir=${TEMP}/_tmp$$ -mkdir -p $tmp_dir - -if [[ $DATATYPE == "MATRIX" ]]; then - out=$(basename $INPUT_HICPRO | sed -e 's/.mat.*/.cool/') - - cooler makebins $CHROMSIZES_FILE $RES > $tmp_dir/bins.bed - cooler load -f coo --one-based $tmp_dir/bins.bed $INPUT_HICPRO $tmp_dir/$out - - echo -e "\nZoomify .cool file ..." - if [[ $NORMALIZE == 1 ]]; then - cooler zoomify --nproc ${NPROC} --balance $tmp_dir/$out - else - cooler zoomify --nproc ${NPROC} $tmp_dir/$out - fi - out=$(basename $INPUT_HICPRO | sed -e 's/.mat.*/.mcool/') - -elif [[ $DATATYPE == "VALID" ]]; then - out=$(basename $INPUT_HICPRO | sed -e 's/.allValidPairs.*/.cool/') - - awk '{OFS="\t";print $2,$3,$4,$5,$6,$7,1}' $INPUT_HICPRO | sed -e 's/+/1/g' -e 's/-/16/g' > $tmp_dir/contacts.txt - cooler csort --nproc ${NPROC} -c1 1 -p1 2 -s1 3 -c2 4 -p2 5 -s2 6 \ - -o $tmp_dir/contacts.sorted.txt.gz \ - $tmp_dir/contacts.txt \ - $CHROMSIZES_FILE - - cooler makebins $CHROMSIZES_FILE $RES > $tmp_dir/bins.bed - cooler cload pairix --nproc ${NPROC} $tmp_dir/bins.bed $tmp_dir/contacts.sorted.txt.gz $tmp_dir/$out - - echo -e "\nZoomify .cool file ..." - if [[ $NORMALIZE == 1 ]]; then - cooler zoomify --nproc ${NPROC} --balance $tmp_dir/$out - else - cooler zoomify --nproc ${NPROC} $tmp_dir/$out - fi - out=$(basename $INPUT_HICPRO | sed -e 's/.allValidPairs.*/.mcool/') -fi - -## mv to out -mv $tmp_dir/*cool ${OUT}/ - -## clean -/bin/rm -rf $tmp_dir - -echo -e "\nCooler file generated with success ..." -echo "Please copy the file $out in your Higlass input directory and run :" -echo "sudo docker exec higlass-container python higlass-server/manage.py ingest_tileset --filename /tmp/$out --datatype matrix --filetype cooler" - - - diff --git a/bin/hicpro_merge_validpairs.sh b/bin/hicpro_merge_validpairs.sh new file mode 100755 index 0000000000000000000000000000000000000000..6d455d008ccd4c197a095ded24f956dcefa1c5bc --- /dev/null +++ b/bin/hicpro_merge_validpairs.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +## +## HiC-Pro +## Internal function +## Merge valid interactions files and remove duplicates +## + +rmDup=0 +prefix="" +while getopts ":dp:" opt; do + case "$opt" in + d) rmDup=1 ;; + p) prefix=$OPTARG ;; + esac +done +shift $(( OPTIND - 1 )) + +vpairs="$@" + +if [[ ${rmDup} == 1 ]]; then + ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) + sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \ + awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs +else + cat ${vpairs} > ${prefix}.allValidPairs +fi + +echo -e -n "valid_interaction\t" > ${prefix}_allValidPairs.mergestat +cat ${vpairs} | wc -l >> ${prefix}_allValidPairs.mergestat +echo -e -n "valid_interaction_rmdup\t" >> ${prefix}_allValidPairs.mergestat +cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat + +## Count short range (<20000) vs long range contacts +awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} $2 == $5{cis=cis+1; d=$6>$3?$6-$3:$3-$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} $2!=$5{trans=trans+1}END{print "trans_interaction\t"trans"\ncis_interaction\t"cis"\ncis_shortRange\t"sr"\ncis_longRange\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat diff --git a/conf/modules.config b/conf/modules.config index 0e2268c54e7909641ca6de15910e777fa9eb3c4d..44ec6706090b6bcb18a54105e247a896db3a190d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -137,6 +137,7 @@ process { pattern: "*Pairs" ] ] + ext.args = { params.keep_dups ? '' : '-d' } } withName: 'MERGE_STATS' { diff --git a/environment.yml b/environment.yml index e3cd7576274730a4a3e7c76e8431993726bcc47d..dabfb9dfbe686aed1d8ab6e30c6ca39b77e68ce9 100644 --- a/environment.yml +++ b/environment.yml @@ -1,31 +1,32 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-hic-1.3.0 +name: nf-core-hic-1.4.0 channels: - conda-forge - bioconda - defaults dependencies: - - conda-forge::python=3.7.6 - - pip=20.0.1 + - conda-forge::python=3.9.12=h9a8a25e_1_cpython + - pip=22.0.4=pyhd8ed1ab_0 - conda-forge::tbb=2020.2=hc9558a2_0 - - conda-forge::scipy=1.4.1 - - conda-forge::numpy=1.18.1 - - bioconda::iced=0.5.6 - - bioconda::bx-python=0.8.8 - - bioconda::pysam=0.15.4 - - conda-forge::pymdown-extensions=7.1 - - bioconda::cooler=0.8.6 - - bioconda::cooltools=0.4.0 - - bioconda::bowtie2=2.3.5 - - bioconda::samtools=1.9 - - bioconda::multiqc=1.8 + - conda-forge::scipy=1.8.0=py39hee8e79c_1 + - conda-forge::numpy=1.22.3=py39hc58783e_2 + - bioconda::iced=0.5.10=py39h919a90d_1 + - bioconda::bx-python=0.8.13=py39h6471ffd_1 + - bioconda::pysam=0.19.0=py39h5030a8b_0 + - conda-forge::pymdown-extensions=7.1=pyh9f0ad1d_0 + - bioconda::cooler=0.8.11=pyh5e36f6f_1 + - bioconda::cooltools=0.5.1=py39h5371cbf_1 + - bioconda::bowtie2=2.4.5=py39hd2f7db1_2 + - bioconda::samtools=1.15.1=h1170115_0 + - bioconda::multiqc=1.12=pyhdfd78af_0 + - bioconda::fastqc=0.11.9=hdfd78af_1 ## Dev tools - - bioconda::hicexplorer=3.4.3 - - bioconda::bioconductor-hitc=1.32.0 - - conda-forge::r-optparse=1.6.6 - - bioconda::ucsc-bedgraphtobigwig=357 - - conda-forge::cython=0.29.19 + - bioconda::hicexplorer=3.7.2=pyhdfd78af_1 + - bioconda::bioconductor-hitc=1.38.0=r41hdfd78af_0 + - conda-forge::r-optparse=1.7.1=r41hc72bb7e_0 + - bioconda::ucsc-bedgraphtobigwig=377=ha8a8165_3 + - conda-forge::cython=0.29.28=py39h5a03fae_2 - pip: - - fanc==0.8.30 \ No newline at end of file + - fanc==0.9.23 \ No newline at end of file diff --git a/modules/local/hicpro/merge_valid_interaction.nf b/modules/local/hicpro/merge_valid_interaction.nf index 69163057aa2c29d3c3428275f99b80a85bc71362..2ea31823cdb906e178ef9cb79f3959b0848ba8a1 100644 --- a/modules/local/hicpro/merge_valid_interaction.nf +++ b/modules/local/hicpro/merge_valid_interaction.nf @@ -2,6 +2,11 @@ process MERGE_VALID_INTERACTION { tag "$prefix" label 'process_highmem' + conda (params.enable_conda ? "conda-forge::gawk=5.1.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + input: tuple val(meta), path(vpairs) @@ -13,41 +18,9 @@ process MERGE_VALID_INTERACTION { script: prefix = meta.id - if ( ! params.keep_dups ){ + def args = task.ext.args ?: '' """ - mkdir -p ${prefix} - - ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) - sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \\ - awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${prefix}.allValidPairs - - echo -n "valid_interaction\t" > ${prefix}_allValidPairs.mergestat - cat ${vpairs} | wc -l >> ${prefix}_allValidPairs.mergestat - echo -n "valid_interaction_rmdup\t" >> ${prefix}_allValidPairs.mergestat - cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat - - ## Count short range (<20000) vs long range contacts - awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat - - ## For MultiQC - mkdir -p ${prefix} - cp ${prefix}_allValidPairs.mergestat ${prefix}/ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sort: \$(echo \$(sort --version 2>&1 | head -1 | awk '{print \$NF}' 2>&1)) - END_VERSIONS - """ - }else{ - """ - cat ${vpairs} > ${prefix}.allValidPairs - echo -n "valid_interaction\t" > ${prefix}_allValidPairs.mergestat - cat ${vpairs} | wc -l >> ${prefix}_allValidPairs.mergestat - echo -n "valid_interaction_rmdup\t" >> ${prefix}_allValidPairs.mergestat - cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat - - ## Count short range (<20000) vs long range contacts - awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat + hicpro_merge_validpairs.sh ${args} -p ${prefix} ${vpairs} ## For MultiQC mkdir -p ${prefix} @@ -58,5 +31,4 @@ process MERGE_VALID_INTERACTION { sort: \$(echo \$(sort --version 2>&1 | head -1 | awk '{print \$NF}' 2>&1)) END_VERSIONS """ - } -} +}