Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hic
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
LBMC
Hub
hic
Commits
e9207c34
Commit
e9207c34
authored
4 years ago
by
nservant
Browse files
Options
Downloads
Patches
Plain Diff
[MODIF] normalize with cooler
parent
56fe1c1d
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
main.nf
+92
-41
92 additions, 41 deletions
main.nf
with
92 additions
and
41 deletions
main.nf
+
92
−
41
View file @
e9207c34
...
@@ -219,7 +219,7 @@ else {
...
@@ -219,7 +219,7 @@ else {
// Chromosome size
// Chromosome size
if ( params.chromosome_size ){
if ( params.chromosome_size ){
Channel.fromPath( params.chromosome_size , checkIfExists: true)
Channel.fromPath( params.chromosome_size , checkIfExists: true)
.into {chr
omosome_size; chromosome_size_cool
}
.into {chr
size; chrsize_build; chrsize_raw; chrsize_balance
}
}
}
else if ( params.fasta ){
else if ( params.fasta ){
Channel.fromPath( params.fasta )
Channel.fromPath( params.fasta )
...
@@ -276,7 +276,7 @@ if (params.res_dist_decay){
...
@@ -276,7 +276,7 @@ if (params.res_dist_decay){
map_res = Channel.from( params.bin_size ).splitCsv().flatten()
map_res = Channel.from( params.bin_size ).splitCsv().flatten()
map_res.concat(tads_bin, ddecay_bin)
map_res.concat(tads_bin, ddecay_bin)
.unique()
.unique()
.
set
{ map_res }
.
into
{ map_res
; map_res_cool
}
/**********************************************************
/**********************************************************
...
@@ -438,7 +438,7 @@ if(!params.chromosome_size && params.fasta){
...
@@ -438,7 +438,7 @@ if(!params.chromosome_size && params.fasta){
file fasta from fasta_for_chromsize
file fasta from fasta_for_chromsize
output:
output:
file "*.size" into ch
romosome_
size, chr
omosome_
size_cool
file "*.size" into chsize, chrsize_cool
script:
script:
"""
"""
...
@@ -473,8 +473,8 @@ if(!params.restriction_fragments && params.fasta && !params.dnase){
...
@@ -473,8 +473,8 @@ if(!params.restriction_fragments && params.fasta && !params.dnase){
*/
*/
/*
/*
*
STEP 1
- Two-steps Reads Mapping
*
HiC-pro
- Two-steps Reads Mapping
*/
*/
process bowtie2_end_to_end {
process bowtie2_end_to_end {
tag "$sample"
tag "$sample"
...
@@ -674,7 +674,7 @@ process combine_mates{
...
@@ -674,7 +674,7 @@ process combine_mates{
/*
/*
*
STEP2 - DETECT VALID PAIRS
*
HiC-Pro - detect valid interaction from aligned data
*/
*/
if (!params.dnase){
if (!params.dnase){
...
@@ -682,7 +682,7 @@ if (!params.dnase){
...
@@ -682,7 +682,7 @@ if (!params.dnase){
tag "$sample"
tag "$sample"
label 'process_low'
label 'process_low'
publishDir "${params.outdir}/hic_results/data", mode: params.publish_dir_mode,
publishDir "${params.outdir}/hic_results/data", mode: params.publish_dir_mode,
saveAs: {filename -> filename.indexOf("
*
stat") > 0 ? "stats/$filename" : "$filename"}
saveAs: {filename -> filename.indexOf("
.
stat") > 0 ? "stats/$filename" : "$filename"}
input:
input:
set val(sample), file(pe_bam) from paired_bam
set val(sample), file(pe_bam) from paired_bam
...
@@ -721,7 +721,7 @@ else{
...
@@ -721,7 +721,7 @@ else{
tag "$sample"
tag "$sample"
label 'process_low'
label 'process_low'
publishDir "${params.outdir}/hic_results/data", mode: params.publish_dir_mode,
publishDir "${params.outdir}/hic_results/data", mode: params.publish_dir_mode,
saveAs: {filename -> filename.indexOf("
*
stat") > 0 ? "stats/$filename" : "$filename"}
saveAs: {filename -> filename.indexOf("
.
stat") > 0 ? "stats/$filename" : "$filename"}
input:
input:
set val(sample), file(pe_bam) from paired_bam
set val(sample), file(pe_bam) from paired_bam
...
@@ -747,20 +747,20 @@ else{
...
@@ -747,20 +747,20 @@ else{
/*
/*
*
STEP3 - BUILD MATRIX
*
Remove duplicates
*/
*/
process remove_duplicates {
process remove_duplicates {
tag "$sample"
tag "$sample"
label 'process_highmem'
label 'process_highmem'
publishDir "${params.outdir}/hic_results/data", mode: params.publish_dir_mode,
publishDir "${params.outdir}/hic_results/data", mode: params.publish_dir_mode,
saveAs: {filename -> filename.indexOf("
*
stat") > 0 ? "stats/$sample/$filename" : "$filename"}
saveAs: {filename -> filename.indexOf("
.
stat") > 0 ? "stats/$sample/$filename" : "$filename"}
input:
input:
set val(sample), file(vpairs) from valid_pairs.groupTuple().dump(tag:'final')
set val(sample), file(vpairs) from valid_pairs.groupTuple().dump(tag:'final')
output:
output:
set val(sample), file("*.allValidPairs") into ch_vpairs, ch_vpairs_
4
cool
set val(sample), file("*.allValidPairs") into ch_vpairs, ch_vpairs_cool
file("stats/") into all_mergestat
file("stats/") into all_mergestat
script:
script:
...
@@ -796,7 +796,7 @@ process remove_duplicates {
...
@@ -796,7 +796,7 @@ process remove_duplicates {
}
}
}
}
process merge_s
ample
{
process merge_s
tats
{
tag "$ext"
tag "$ext"
label 'process_low'
label 'process_low'
publishDir "${params.outdir}/hic_results/stats/${sample}", mode: params.publish_dir_mode
publishDir "${params.outdir}/hic_results/stats/${sample}", mode: params.publish_dir_mode
...
@@ -818,6 +818,12 @@ process merge_sample {
...
@@ -818,6 +818,12 @@ process merge_sample {
"""
"""
}
}
/*
* HiC-Pro build matrix processes
* TODO - TO REPLACED BY COOLER ?
*/
process build_contact_maps{
process build_contact_maps{
tag "$sample - $mres"
tag "$sample - $mres"
label 'process_highmem'
label 'process_highmem'
...
@@ -828,7 +834,7 @@ process build_contact_maps{
...
@@ -828,7 +834,7 @@ process build_contact_maps{
input:
input:
set val(sample), file(vpairs), val(mres) from ch_vpairs.combine(map_res)
set val(sample), file(vpairs), val(mres) from ch_vpairs.combine(map_res)
file chrsize from chr
omosome_
size.collect()
file chrsize from chrsize.collect()
output:
output:
set val(sample), val(mres), file("*.matrix"), file("*.bed") into raw_maps, raw_maps_4cool
set val(sample), val(mres), file("*.matrix"), file("*.bed") into raw_maps, raw_maps_4cool
...
@@ -839,10 +845,6 @@ process build_contact_maps{
...
@@ -839,10 +845,6 @@ process build_contact_maps{
"""
"""
}
}
/*
* STEP 4 - NORMALIZE MATRIX
*/
process run_ice{
process run_ice{
tag "$rmaps"
tag "$rmaps"
label 'process_highmem'
label 'process_highmem'
...
@@ -870,32 +872,84 @@ process run_ice{
...
@@ -870,32 +872,84 @@ process run_ice{
/*
/*
* C
reate cool file
* C
ooler
*/
*/
process co
nvert_to_cool
{
process co
oler_build
{
tag "$sample"
tag "$sample"
label 'process_medium'
label 'process_medium'
publishDir "${params.outdir}/contact_maps/cool", mode: 'copy'
when:
!params.skip_cool
input:
input:
set val(sample),
val(res), file(mat), file(bed) from iced_map
s_
4
cool
set val(sample),
file(vpairs) from ch_vpair
s_cool
file chrsize from chr
omosome_size_cool
.collect()
file chrsize from chr
size_build
.collect()
output:
output:
set val(sample), val(res), file("*.cool") into cool_maps
set val(sample), file("contacts.sorted.txt.gz"), file("contacts.sorted.txt.gz.px2") into cool_build
file("*.mcool") into mcools_maps
script:
script:
"""
"""
cooler load --count-as-float -f coo --one-based ${bed} ${mat} ${sample}_${res}.cool
awk '{OFS="\t";print \$2,\$3,\$4,\$5,\$6,\$7,1}' $vpairs | sed -e 's/+/1/g' -e 's/-/16/g' > contacts.txt
cooler zoomify --nproc ${task.cpus} ${sample}_${res}.cool
cooler csort --nproc ${task.cpus} -c1 1 -p1 2 -s1 3 -c2 4 -p2 5 -s2 6 \
contacts.txt \
-o contacts.sorted.txt.gz \
${chrsize}
"""
"""
}
}
process cooler_raw {
tag "$sample - ${res}"
label 'process_medium'
publishDir "${params.outdir}/contact_maps/", mode: 'copy',
saveAs: {filename -> filename.indexOf(".cool") > 0 ? "raw/cool/$filename" : "raw/txt/$filename"}
input:
set val(sample), file(contacts), file(index), val(res) from cool_build.combine(map_res_cool)
file chrsize from chrsize_raw.collect()
output:
set val(sample), val(res), file("*cool") into raw_cool_maps
set file("*.bed"), file("${sample}_${res}.txt") into raw_txt_maps
script:
"""
cooler makebins ${chrsize} ${res} > ${sample}_${res}.bed
cooler cload pairix --nproc ${task.cpus} ${sample}_${res}.bed ${contacts} ${sample}_${res}.cool
cooler dump ${sample}_${res}.cool --one-based-ids | awk '{OFS="\t"; print \$1+1,\$2+1,\$3}' > ${sample}_${res}.txt
"""
}
process cooler_balance {
tag "$sample - ${res}"
label 'process_medium'
publishDir "${params.outdir}/contact_maps/", mode: 'copy',
saveAs: {filename -> filename.indexOf(".cool") > 0 ? "norm/cool/$filename" : "norm/txt/$filename"}
input:
set val(sample), val(res), file(cool) from raw_cool_maps
file chrsize from chrsize_balance.collect()
output:
set val(sample), val(res), file("${sample}_${res}_norm.cool") into norm_cool_maps, norm_cool_maps_h5
file("${sample}_${res}_norm.txt") into norm_txt_maps
script:
"""
cp ${cool} ${sample}_${res}_norm.cool
cooler balance ${sample}_${res}_norm.cool -p ${task.cpus} --force
cooler dump ${sample}_${res}_norm.cool --one-based-ids --balanced --na-rep 0 | awk '{OFS="\t"; print \$1+1,\$2+1,\$4}' > ${sample}_${res}_norm.txt
"""
}
/*
-- TODO--
process cooler_zoomify {
}
*/
/*
/*
* Create h5 file
* Create h5 file
...
@@ -904,23 +958,21 @@ process convert_to_cool {
...
@@ -904,23 +958,21 @@ process convert_to_cool {
process convert_to_h5 {
process convert_to_h5 {
tag "$sample"
tag "$sample"
label 'process_medium'
label 'process_medium'
publishDir "${params.outdir}/contact_maps/h5", mode: 'copy'
publishDir "${params.outdir}/contact_maps/
norm/
h5", mode: 'copy'
input:
input:
set val(sample), val(res), file(maps)
, file(bed) from iced
_maps_
4
h5
set val(sample), val(res), file(maps)
from norm_cool
_maps_h5
output:
output:
set val(sample), val(res), file("*.h5") into h5maps_ddecay, h5maps_ccomp, h5maps_tads
set val(sample), val(res), file("*.h5") into h5maps_ddecay, h5maps_ccomp, h5maps_tads
script:
script:
prefix = maps.toString() - ~/(\.matrix)?$/
"""
"""
hicConvertFormat --matrices ${maps} \
hicConvertFormat --matrices ${maps} \
--outFileName ${
prefix
}.h5 \
--outFileName ${
sample
}.h5 \
--resolution ${res} \
--resolution ${res} \
--inputFormat
hicpro
\
--inputFormat
cool
\
--outputFormat h5 \
--outputFormat h5 \
-bf ${bed}
"""
"""
}
}
...
@@ -990,9 +1042,8 @@ process tads_hicexplorer {
...
@@ -990,9 +1042,8 @@ process tads_hicexplorer {
"""
"""
}
}
chIS = cool_maps.combine(tads_res_insulation).filter{ it[1] == it[3] }.dump(tag : "ins")
chIS =
norm_
cool_maps.combine(tads_res_insulation).filter{ it[1] == it[3] }.dump(tag : "ins")
/*
process tads_insulation {
process tads_insulation {
tag "$sample - $res"
tag "$sample - $res"
label 'process_medium'
label 'process_medium'
...
@@ -1012,11 +1063,11 @@ process tads_insulation {
...
@@ -1012,11 +1063,11 @@ process tads_insulation {
cooltools diamond-insulation --window-pixels ${cool} 15 25 50 > ${sample}_insulation.tsv
cooltools diamond-insulation --window-pixels ${cool} 15 25 50 > ${sample}_insulation.tsv
"""
"""
}
}
*/
/*
/*
*
STEP 6 -
MultiQC
* MultiQC
*/
*/
process multiqc {
process multiqc {
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment