Skip to content
Snippets Groups Projects
Commit adea9365 authored by nservant's avatar nservant
Browse files

add ICE and cooler files

parent ad8881f0
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
## HiC-Pro
## Copyleft 2017 Institut Curie
## Author(s): Nicolas Servant
## Contact: nicolas.servant@curie.fr
## This software is distributed without any guarantee under the terms of the BSD licence
##
## First version of converter between HiCPro and higlass.
## The cooler python package should be properly installed, as well as the higlass software
##
##
## A few notes about higlass
##
## docker run will install the docker image and start it
## sudo docker run --detach --publish 8888:80 --volume ~/hg-data:/data --volume ~/hg-tmp:/tmp --name higlass-container gehlenborglab/higlass
## sudo docker start higlass-container
## sudo docker ps -all
##
## Once higlass is installed, you can just run it using
## sudo docker start higlass-container
## higlass will then be available at http://localhost:8888
##
###########################
## trap handler
###########################
function trap_error()
{
echo "Error: $1 - line $2 - exit status of last command: $?. Exit" >&2
exit 1
}
function trap_exit()
{
##Since bash-4.0 $LINENO is reset to 1 when the trap is triggered
if [ "$?" != "0" ]; then
echo "Error: exit status detected. Exit." >&2
fi
if [[ ! -z ${tmp_dir} && -e ${tmp_dir} ]]; then
echo -e "Cleaning temporary folders ..." >&2
/bin/rm -rf ${tmp_dir}
fi
}
trap 'trap_error "$0" "$LINENO"' ERR
trap 'trap_exit' 0 1 2 3
set -E ## export trap to functions
set -o pipefail ## trace ERR through pipes
## 0 =
## 1 >
## 2 <
vercomp () {
if [[ $1 == $2 ]]
then
return 0
fi
local IFS=.
local i ver1=($1) ver2=($2)
# fill empty fields in ver1 with zeros
for ((i=${#ver1[@]}; i<${#ver2[@]}; i++))
do
ver1[i]=0
done
for ((i=0; i<${#ver1[@]}; i++))
do
if [[ -z ${ver2[i]} ]]
then
# fill empty fields in ver2 with zeros
ver2[i]=0
fi
if ((10#${ver1[i]} > 10#${ver2[i]}))
then
echo 1
fi
if ((10#${ver1[i]} < 10#${ver2[i]}))
then
echo 2
fi
done
echo 0
}
function usage {
echo -e "usage : hicpro2higlass -i INPUT -r RESOLUTION -c CHROMSIZE [-n] [-o ODIR] [-t TEMP] [-h]"
echo -e "Use option -h|--help for more information"
}
function help {
usage;
echo
echo "Generate Higlass input file from HiC-Pro results"
echo "See https://github.com/hms-dbmi/higlass-website for details about Higlass"
echo "---------------"
echo "OPTIONS"
echo
echo " -i|--input INPUT : allValidPairs or matrix file generated by HiC-Pro"
echo " -r|--res RESOLUTION : .matrix file resolution or maximum resolution to reach from the .allValidPairs input file"
echo " -c|--chrom CHROMSIZE : chromosome size file"
echo " [-n|--norm] : run cooler matrix balancing algorithm"
echo " [-o|--out] : output path. Default is current path"
echo " [-t|--temp] TEMP : path to tmp folder. Default is current path"
echo " [-h|--help]: help"
exit;
}
if [ $# -lt 1 ]
then
usage
exit
fi
# Transform long options to short ones
for arg in "$@"; do
shift
case "$arg" in
"--input") set -- "$@" "-i" ;;
"--bed") set -- "$@" "-b" ;;
"--res") set -- "$@" "-r" ;;
"--chrom") set -- "$@" "-c" ;;
"--out") set -- "$@" "-o" ;;
"--temp") set -- "$@" "-t" ;;
"--norm") set -- "$@" "-n" ;;
"--help") set -- "$@" "-h" ;;
*) set -- "$@" "$arg"
esac
done
INPUT_HICPRO=""
INPUT_BED=""
NORMALIZE=0
CHROMSIZES_FILE=""
RES=10000
OUT="./"
TEMP="./"
while getopts ":i:b:c:r:o:t:nh" OPT
do
case $OPT in
i) INPUT_HICPRO=$OPTARG;;
b) INPUT_BED=$OPTARG;;
n) NORMALIZE=1;;
c) CHROMSIZES_FILE=$OPTARG;;
r) RES=$OPTARG;;
o) OUT=$OPTARG;;
t) TEMP=$OPTARG;;
h) help ;;
\?)
echo "Invalid option: -$OPTARG" >&2
usage
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
usage
exit 1
;;
esac
done
if [[ -z $INPUT_HICPRO ]];
then
usage
exit
fi
if [[ ! -e $CHROMSIZES_FILE ]]; then
echo -e "$CHROMSIZES_FILE file not found. Exit"
exit 1
fi
## Detect input data type
DATATYPE=""
if [[ $INPUT_HICPRO == *.mat* ]]; then
DATATYPE="MATRIX"
elif [[ $INPUT_HICPRO == *allValidPairs* || $INPUT_HICPRO == *validPairs* ]]; then
DATATYPE="VALID"
else
echo -e "Unknown input data type. Expect .matrix or _allValidPairs input files."
exit 1
fi
echo -e "$DATATYPE input file detected ..."
## Check cooler version
which cooler > /dev/null;
if [ $? != "0" ]; then
echo -e "Cooler is not installed or is not in your $PATH. See https://github.com/mirnylab/cooler for details."
exit 1;
fi
COOLER_VERSION=$(cooler --version 2>&1 | awk '{print $NF}')
echo "Cooler version $COOLER_VERSION detected ..."
cres=$(vercomp ${COOLER_VERSION} "0.7.6")
if [[ $cres == "2" ]]; then
echo "Cooler version must be >= 0.7.6 ! Stop."
exit 1
fi
if [[ $DATATYPE == "VALID" ]]; then
which pairix > /dev/null;
if [ $? != "0" ]; then
echo -e "Pairix is not installed or is not in your PATH. See https://github.com/4dn-dcic/pairix."
exit 1;
fi
fi
echo -e "\nGenerating .cool files ..."
tmp_dir=${TEMP}/_tmp$$
mkdir -p $tmp_dir
if [[ $DATATYPE == "MATRIX" ]]; then
out=$(basename $INPUT_HICPRO | sed -e 's/.mat.*/.cool/')
cooler makebins $CHROMSIZES_FILE $RES > $tmp_dir/bins.bed
cooler load -f coo --one-based $tmp_dir/bins.bed $INPUT_HICPRO $tmp_dir/$out
echo -e "\nZoomify .cool file ..."
if [[ $NORMALIZE == 1 ]]; then
cooler zoomify --balance $tmp_dir/$out
else
cooler zoomify --no-balance $tmp_dir/$out
fi
out=$(basename $INPUT_HICPRO | sed -e 's/.mat.*/.mcool/')
elif [[ $DATATYPE == "VALID" ]]; then
out=$(basename $INPUT_HICPRO | sed -e 's/.allValidPairs.*/.cool/')
awk '{OFS="\t";print $2,$3,$4,$5,$6,$7,1}' $INPUT_HICPRO | sed -e 's/+/1/g' -e 's/-/16/g' > $tmp_dir/contacts.txt
cooler csort --nproc 2 -c1 1 -p1 2 -s1 3 -c2 4 -p2 5 -s2 6 \
-o $tmp_dir/contacts.sorted.txt.gz \
$tmp_dir/contacts.txt \
$CHROMSIZES_FILE
cooler makebins $CHROMSIZES_FILE $RES > $tmp_dir/bins.bed
cooler cload pairix $tmp_dir/bins.bed $tmp_dir/contacts.sorted.txt.gz $tmp_dir/$out
echo -e "\nZoomify .cool file ..."
if [[ $NORMALIZE == 1 ]]; then
cooler zoomify --balance $tmp_dir/$out
else
cooler zoomify --no-balance $tmp_dir/$out
fi
out=$(basename $INPUT_HICPRO | sed -e 's/.allValidPairs.*/.mcool/')
fi
## mv to out
mv $tmp_dir/*cool ${OUT}/
## clean
/bin/rm -rf $tmp_dir
echo -e "\nCooler file generated with success ..."
echo "Please copy the file $out in your Higlass input directory and run :"
echo "sudo docker exec higlass-container python higlass-server/manage.py ingest_tileset --filename /tmp/$out --datatype matrix --filetype cooler"
bin/ice 0 → 100755
#! /usr/bin/env python
import sys
import argparse
import numpy as np
from scipy import sparse
import iced
from iced.io import loadtxt, savetxt
parser = argparse.ArgumentParser("ICE normalization")
parser.add_argument('filename',
metavar='File to load',
type=str,
help='Path to file of contact counts to load')
parser.add_argument("--results_filename",
"-r",
type=str,
default=None,
help="results_filename")
parser.add_argument("--filtering_perc", "-f",
type=float,
default=None,
help="Percentage of reads to filter out")
parser.add_argument("--filter_low_counts_perc",
type=float,
default=0.02,
help="Percentage of reads to filter out")
parser.add_argument("--filter_high_counts_perc",
type=float,
default=0,
help="Percentage of reads to filter out")
parser.add_argument("--remove-all-zeros-loci", default=False,
action="store_true",
help="If provided, all non-interacting loci will be "
"removed prior to the filtering strategy chosen.")
parser.add_argument("--max_iter", "-m", default=100, type=int,
help="Maximum number of iterations")
parser.add_argument("--eps", "-e", default=0.1, type=float,
help="Precision")
parser.add_argument("--dense", "-d", default=False, action="store_true")
parser.add_argument("--output-bias", "-b", default=False, help="Output the bias vector")
parser.add_argument("--verbose", "-v", default=False)
args = parser.parse_args()
filename = args.filename
# Deprecating filtering_perc option
filter_low_counts = None
if "--filtering_perc" in sys.argv:
DeprecationWarning(
"Option '--filtering_perc' is deprecated. Please use "
"'--filter_low_counts_perc' instead.'")
# And print it again because deprecation warnings are not displayed for
# recent versions of python
print "--filtering_perc is deprecated. Please use filter_low_counts_perc"
print "instead. This option will be removed in ice 0.3"
filter_low_counts = args.filtering_perc
if "--filter_low_counts_perc" in sys.argv and "--filtering_perc" in sys.argv:
raise Warning("This two options are incompatible")
if "--filtering_perc" is None and "--filter_low_counts_perc" not in sys.argv:
filter_low_counts_perc = 0.02
elif args.filter_low_counts_perc is not None:
filter_low_counts_perc = args.filter_low_counts_perc
if args.verbose:
print("Using iced version %s" % iced.__version__)
print "Loading files..."
# Loads file as i, j, counts
i, j, data = loadtxt(filename).T
# Detecting whether the file is 0 or 1 based.
if min(i.min(), j.min()) == 0:
index_base = 0
N = max(i.max(), j.max()) + 1
counts = sparse.coo_matrix((data, (i, j)), shape=(N, N), dtype=float)
else:
index_base = 1
N = max(i.max(), j.max())
counts = sparse.coo_matrix((data, (i - 1, j - 1)), shape=(N, N), dtype=float)
if args.dense:
counts = np.array(counts.todense())
else:
counts = sparse.csr_matrix(counts)
if args.verbose:
print "Normalizing..."
if filter_low_counts_perc != 0:
counts = iced.filter.filter_low_counts(counts,
percentage=filter_low_counts_perc,
remove_all_zeros_loci=args.remove_all_zeros_loci,
copy=False, sparsity=False, verbose=args.verbose)
if args.filter_high_counts_perc != 0:
counts = iced.filter.filter_high_counts(
counts,
percentage=args.filter_high_counts_perc,
copy=False)
counts, bias = iced.normalization.ICE_normalization(
counts, max_iter=args.max_iter, copy=False,
verbose=args.verbose, eps=args.eps, output_bias=True)
if args.results_filename is None:
results_filename = ".".join(
filename.split(".")[:-1]) + "_normalized." + filename.split(".")[-1]
else:
results_filename = args.results_filename
counts = sparse.coo_matrix(counts)
if args.verbose:
print "Writing results..."
savetxt(
results_filename, counts.col + index_base, counts.row + index_base, counts.data)
if args.output_bias:
np.savetxt(results_filename + ".biases", bias)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment