Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hic
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
LBMC
Hub
hic
Commits
adea9365
Commit
adea9365
authored
5 years ago
by
nservant
Browse files
Options
Downloads
Patches
Plain Diff
add ICE and cooler files
parent
ad8881f0
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
bin/hicpro2higlass.sh
+264
-0
264 additions, 0 deletions
bin/hicpro2higlass.sh
bin/ice
+124
-0
124 additions, 0 deletions
bin/ice
with
388 additions
and
0 deletions
bin/hicpro2higlass.sh
0 → 100755
+
264
−
0
View file @
adea9365
#!/bin/bash
## HiC-Pro
## Copyleft 2017 Institut Curie
## Author(s): Nicolas Servant
## Contact: nicolas.servant@curie.fr
## This software is distributed without any guarantee under the terms of the BSD licence
##
## First version of converter between HiCPro and higlass.
## The cooler python package should be properly installed, as well as the higlass software
##
##
## A few notes about higlass
##
## docker run will install the docker image and start it
## sudo docker run --detach --publish 8888:80 --volume ~/hg-data:/data --volume ~/hg-tmp:/tmp --name higlass-container gehlenborglab/higlass
## sudo docker start higlass-container
## sudo docker ps -all
##
## Once higlass is installed, you can just run it using
## sudo docker start higlass-container
## higlass will then be available at http://localhost:8888
##
###########################
## trap handler
###########################
function
trap_error
()
{
echo
"Error:
$1
- line
$2
- exit status of last command:
$?
. Exit"
>
&2
exit
1
}
function
trap_exit
()
{
##Since bash-4.0 $LINENO is reset to 1 when the trap is triggered
if
[
"
$?
"
!=
"0"
]
;
then
echo
"Error: exit status detected. Exit."
>
&2
fi
if
[[
!
-z
${
tmp_dir
}
&&
-e
${
tmp_dir
}
]]
;
then
echo
-e
"Cleaning temporary folders ..."
>
&2
/bin/rm
-rf
${
tmp_dir
}
fi
}
trap
'trap_error "$0" "$LINENO"'
ERR
trap
'trap_exit'
0 1 2 3
set
-E
## export trap to functions
set
-o
pipefail
## trace ERR through pipes
## 0 =
## 1 >
## 2 <
vercomp
()
{
if
[[
$1
==
$2
]]
then
return
0
fi
local
IFS
=
.
local
i
ver1
=(
$1
)
ver2
=(
$2
)
# fill empty fields in ver1 with zeros
for
((
i
=
${#
ver1
[@]
}
;
i<
${#
ver2
[@]
}
;
i++
))
do
ver1[i]
=
0
done
for
((
i
=
0
;
i<
${#
ver1
[@]
}
;
i++
))
do
if
[[
-z
${
ver2
[i]
}
]]
then
# fill empty fields in ver2 with zeros
ver2[i]
=
0
fi
if
((
10#
${
ver1
[i]
}
>
10#
${
ver2
[i]
}
))
then
echo
1
fi
if
((
10#
${
ver1
[i]
}
< 10#
${
ver2
[i]
}
))
then
echo
2
fi
done
echo
0
}
function
usage
{
echo
-e
"usage : hicpro2higlass -i INPUT -r RESOLUTION -c CHROMSIZE [-n] [-o ODIR] [-t TEMP] [-h]"
echo
-e
"Use option -h|--help for more information"
}
function
help
{
usage
;
echo
echo
"Generate Higlass input file from HiC-Pro results"
echo
"See https://github.com/hms-dbmi/higlass-website for details about Higlass"
echo
"---------------"
echo
"OPTIONS"
echo
echo
" -i|--input INPUT : allValidPairs or matrix file generated by HiC-Pro"
echo
" -r|--res RESOLUTION : .matrix file resolution or maximum resolution to reach from the .allValidPairs input file"
echo
" -c|--chrom CHROMSIZE : chromosome size file"
echo
" [-n|--norm] : run cooler matrix balancing algorithm"
echo
" [-o|--out] : output path. Default is current path"
echo
" [-t|--temp] TEMP : path to tmp folder. Default is current path"
echo
" [-h|--help]: help"
exit
;
}
if
[
$#
-lt
1
]
then
usage
exit
fi
# Transform long options to short ones
for
arg
in
"
$@
"
;
do
shift
case
"
$arg
"
in
"--input"
)
set
--
"
$@
"
"-i"
;;
"--bed"
)
set
--
"
$@
"
"-b"
;;
"--res"
)
set
--
"
$@
"
"-r"
;;
"--chrom"
)
set
--
"
$@
"
"-c"
;;
"--out"
)
set
--
"
$@
"
"-o"
;;
"--temp"
)
set
--
"
$@
"
"-t"
;;
"--norm"
)
set
--
"
$@
"
"-n"
;;
"--help"
)
set
--
"
$@
"
"-h"
;;
*
)
set
--
"
$@
"
"
$arg
"
esac
done
INPUT_HICPRO
=
""
INPUT_BED
=
""
NORMALIZE
=
0
CHROMSIZES_FILE
=
""
RES
=
10000
OUT
=
"./"
TEMP
=
"./"
while
getopts
":i:b:c:r:o:t:nh"
OPT
do
case
$OPT
in
i
)
INPUT_HICPRO
=
$OPTARG
;;
b
)
INPUT_BED
=
$OPTARG
;;
n
)
NORMALIZE
=
1
;;
c
)
CHROMSIZES_FILE
=
$OPTARG
;;
r
)
RES
=
$OPTARG
;;
o
)
OUT
=
$OPTARG
;;
t
)
TEMP
=
$OPTARG
;;
h
)
help
;;
\?
)
echo
"Invalid option: -
$OPTARG
"
>
&2
usage
exit
1
;;
:
)
echo
"Option -
$OPTARG
requires an argument."
>
&2
usage
exit
1
;;
esac
done
if
[[
-z
$INPUT_HICPRO
]]
;
then
usage
exit
fi
if
[[
!
-e
$CHROMSIZES_FILE
]]
;
then
echo
-e
"
$CHROMSIZES_FILE
file not found. Exit"
exit
1
fi
## Detect input data type
DATATYPE
=
""
if
[[
$INPUT_HICPRO
==
*
.mat
*
]]
;
then
DATATYPE
=
"MATRIX"
elif
[[
$INPUT_HICPRO
==
*
allValidPairs
*
||
$INPUT_HICPRO
==
*
validPairs
*
]]
;
then
DATATYPE
=
"VALID"
else
echo
-e
"Unknown input data type. Expect .matrix or _allValidPairs input files."
exit
1
fi
echo
-e
"
$DATATYPE
input file detected ..."
## Check cooler version
which cooler
>
/dev/null
;
if
[
$?
!=
"0"
]
;
then
echo
-e
"Cooler is not installed or is not in your
$PATH
. See https://github.com/mirnylab/cooler for details."
exit
1
;
fi
COOLER_VERSION
=
$(
cooler
--version
2>&1 |
awk
'{print $NF}'
)
echo
"Cooler version
$COOLER_VERSION
detected ..."
cres
=
$(
vercomp
${
COOLER_VERSION
}
"0.7.6"
)
if
[[
$cres
==
"2"
]]
;
then
echo
"Cooler version must be >= 0.7.6 ! Stop."
exit
1
fi
if
[[
$DATATYPE
==
"VALID"
]]
;
then
which pairix
>
/dev/null
;
if
[
$?
!=
"0"
]
;
then
echo
-e
"Pairix is not installed or is not in your PATH. See https://github.com/4dn-dcic/pairix."
exit
1
;
fi
fi
echo
-e
"
\n
Generating .cool files ..."
tmp_dir
=
${
TEMP
}
/_tmp
$$
mkdir
-p
$tmp_dir
if
[[
$DATATYPE
==
"MATRIX"
]]
;
then
out
=
$(
basename
$INPUT_HICPRO
|
sed
-e
's/.mat.*/.cool/'
)
cooler makebins
$CHROMSIZES_FILE
$RES
>
$tmp_dir
/bins.bed
cooler load
-f
coo
--one-based
$tmp_dir
/bins.bed
$INPUT_HICPRO
$tmp_dir
/
$out
echo
-e
"
\n
Zoomify .cool file ..."
if
[[
$NORMALIZE
==
1
]]
;
then
cooler zoomify
--balance
$tmp_dir
/
$out
else
cooler zoomify
--no-balance
$tmp_dir
/
$out
fi
out
=
$(
basename
$INPUT_HICPRO
|
sed
-e
's/.mat.*/.mcool/'
)
elif
[[
$DATATYPE
==
"VALID"
]]
;
then
out
=
$(
basename
$INPUT_HICPRO
|
sed
-e
's/.allValidPairs.*/.cool/'
)
awk
'{OFS="\t";print $2,$3,$4,$5,$6,$7,1}'
$INPUT_HICPRO
|
sed
-e
's/+/1/g'
-e
's/-/16/g'
>
$tmp_dir
/contacts.txt
cooler csort
--nproc
2
-c1
1
-p1
2
-s1
3
-c2
4
-p2
5
-s2
6
\
-o
$tmp_dir
/contacts.sorted.txt.gz
\
$tmp_dir
/contacts.txt
\
$CHROMSIZES_FILE
cooler makebins
$CHROMSIZES_FILE
$RES
>
$tmp_dir
/bins.bed
cooler cload pairix
$tmp_dir
/bins.bed
$tmp_dir
/contacts.sorted.txt.gz
$tmp_dir
/
$out
echo
-e
"
\n
Zoomify .cool file ..."
if
[[
$NORMALIZE
==
1
]]
;
then
cooler zoomify
--balance
$tmp_dir
/
$out
else
cooler zoomify
--no-balance
$tmp_dir
/
$out
fi
out
=
$(
basename
$INPUT_HICPRO
|
sed
-e
's/.allValidPairs.*/.mcool/'
)
fi
## mv to out
mv
$tmp_dir
/
*
cool
${
OUT
}
/
## clean
/bin/rm
-rf
$tmp_dir
echo
-e
"
\n
Cooler file generated with success ..."
echo
"Please copy the file
$out
in your Higlass input directory and run :"
echo
"sudo docker exec higlass-container python higlass-server/manage.py ingest_tileset --filename /tmp/
$out
--datatype matrix --filetype cooler"
This diff is collapsed.
Click to expand it.
bin/ice
0 → 100755
+
124
−
0
View file @
adea9365
#! /usr/bin/env python
import
sys
import
argparse
import
numpy
as
np
from
scipy
import
sparse
import
iced
from
iced.io
import
loadtxt
,
savetxt
parser
=
argparse
.
ArgumentParser
(
"
ICE normalization
"
)
parser
.
add_argument
(
'
filename
'
,
metavar
=
'
File to load
'
,
type
=
str
,
help
=
'
Path to file of contact counts to load
'
)
parser
.
add_argument
(
"
--results_filename
"
,
"
-r
"
,
type
=
str
,
default
=
None
,
help
=
"
results_filename
"
)
parser
.
add_argument
(
"
--filtering_perc
"
,
"
-f
"
,
type
=
float
,
default
=
None
,
help
=
"
Percentage of reads to filter out
"
)
parser
.
add_argument
(
"
--filter_low_counts_perc
"
,
type
=
float
,
default
=
0.02
,
help
=
"
Percentage of reads to filter out
"
)
parser
.
add_argument
(
"
--filter_high_counts_perc
"
,
type
=
float
,
default
=
0
,
help
=
"
Percentage of reads to filter out
"
)
parser
.
add_argument
(
"
--remove-all-zeros-loci
"
,
default
=
False
,
action
=
"
store_true
"
,
help
=
"
If provided, all non-interacting loci will be
"
"
removed prior to the filtering strategy chosen.
"
)
parser
.
add_argument
(
"
--max_iter
"
,
"
-m
"
,
default
=
100
,
type
=
int
,
help
=
"
Maximum number of iterations
"
)
parser
.
add_argument
(
"
--eps
"
,
"
-e
"
,
default
=
0.1
,
type
=
float
,
help
=
"
Precision
"
)
parser
.
add_argument
(
"
--dense
"
,
"
-d
"
,
default
=
False
,
action
=
"
store_true
"
)
parser
.
add_argument
(
"
--output-bias
"
,
"
-b
"
,
default
=
False
,
help
=
"
Output the bias vector
"
)
parser
.
add_argument
(
"
--verbose
"
,
"
-v
"
,
default
=
False
)
args
=
parser
.
parse_args
()
filename
=
args
.
filename
# Deprecating filtering_perc option
filter_low_counts
=
None
if
"
--filtering_perc
"
in
sys
.
argv
:
DeprecationWarning
(
"
Option
'
--filtering_perc
'
is deprecated. Please use
"
"'
--filter_low_counts_perc
'
instead.
'"
)
# And print it again because deprecation warnings are not displayed for
# recent versions of python
print
"
--filtering_perc is deprecated. Please use filter_low_counts_perc
"
print
"
instead. This option will be removed in ice 0.3
"
filter_low_counts
=
args
.
filtering_perc
if
"
--filter_low_counts_perc
"
in
sys
.
argv
and
"
--filtering_perc
"
in
sys
.
argv
:
raise
Warning
(
"
This two options are incompatible
"
)
if
"
--filtering_perc
"
is
None
and
"
--filter_low_counts_perc
"
not
in
sys
.
argv
:
filter_low_counts_perc
=
0.02
elif
args
.
filter_low_counts_perc
is
not
None
:
filter_low_counts_perc
=
args
.
filter_low_counts_perc
if
args
.
verbose
:
print
(
"
Using iced version %s
"
%
iced
.
__version__
)
print
"
Loading files...
"
# Loads file as i, j, counts
i
,
j
,
data
=
loadtxt
(
filename
).
T
# Detecting whether the file is 0 or 1 based.
if
min
(
i
.
min
(),
j
.
min
())
==
0
:
index_base
=
0
N
=
max
(
i
.
max
(),
j
.
max
())
+
1
counts
=
sparse
.
coo_matrix
((
data
,
(
i
,
j
)),
shape
=
(
N
,
N
),
dtype
=
float
)
else
:
index_base
=
1
N
=
max
(
i
.
max
(),
j
.
max
())
counts
=
sparse
.
coo_matrix
((
data
,
(
i
-
1
,
j
-
1
)),
shape
=
(
N
,
N
),
dtype
=
float
)
if
args
.
dense
:
counts
=
np
.
array
(
counts
.
todense
())
else
:
counts
=
sparse
.
csr_matrix
(
counts
)
if
args
.
verbose
:
print
"
Normalizing...
"
if
filter_low_counts_perc
!=
0
:
counts
=
iced
.
filter
.
filter_low_counts
(
counts
,
percentage
=
filter_low_counts_perc
,
remove_all_zeros_loci
=
args
.
remove_all_zeros_loci
,
copy
=
False
,
sparsity
=
False
,
verbose
=
args
.
verbose
)
if
args
.
filter_high_counts_perc
!=
0
:
counts
=
iced
.
filter
.
filter_high_counts
(
counts
,
percentage
=
args
.
filter_high_counts_perc
,
copy
=
False
)
counts
,
bias
=
iced
.
normalization
.
ICE_normalization
(
counts
,
max_iter
=
args
.
max_iter
,
copy
=
False
,
verbose
=
args
.
verbose
,
eps
=
args
.
eps
,
output_bias
=
True
)
if
args
.
results_filename
is
None
:
results_filename
=
"
.
"
.
join
(
filename
.
split
(
"
.
"
)[:
-
1
])
+
"
_normalized.
"
+
filename
.
split
(
"
.
"
)[
-
1
]
else
:
results_filename
=
args
.
results_filename
counts
=
sparse
.
coo_matrix
(
counts
)
if
args
.
verbose
:
print
"
Writing results...
"
savetxt
(
results_filename
,
counts
.
col
+
index_base
,
counts
.
row
+
index_base
,
counts
.
data
)
if
args
.
output_bias
:
np
.
savetxt
(
results_filename
+
"
.biases
"
,
bias
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment