Skip to content
Snippets Groups Projects
Commit ac7ab363 authored by nservant's avatar nservant
Browse files

add multiQC report

parent 31af5d39
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
## HiC-Pro
## Copyright (c) 2015 Institut Curie
## Author(s): Nicolas Servant, Eric Viara
## Contact: nicolas.servant@curie.fr
## This software is distributed without any guarantee under the terms of the BSD-3 licence.
## See the LICENCE file for details
"""
Script to merge any files with the same template
"""
import argparse
import sys
import glob
import os
from collections import OrderedDict
def num(s):
try:
return int(s)
except ValueError:
return float(s)
if __name__ == "__main__":
## Read command line arguments
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--files", help="List of input file(s)", type=str, nargs='+')
parser.add_argument("-v", "--verbose", help="verbose mode", action='store_true')
args = parser.parse_args()
infiles = args.files
li = len(infiles)
if li > 0:
if args.verbose:
print "## merge_statfiles.py"
print "## Merging "+ str(li)+" files"
## Reading first file to get the template
template = OrderedDict()
if args.verbose:
print "## Use "+infiles[0]+" as template"
with open(infiles[0]) as f:
for line in f:
if not line.startswith("#"):
lsp = line.strip().split("\t")
data = map(num, lsp[1:len(lsp)])
template[str(lsp[0])] = data
if len(template) == 0:
print "Cannot find template files !"
sys.exit(1)
## Int are counts / Float are percentage
for fidx in xrange(1, li):
with open(infiles[fidx]) as f:
for line in f:
if not line.startswith("#"):
lsp = line.strip().split("\t")
if lsp[0] in template:
for i in xrange(1, len(lsp)):
if isinstance(num(lsp[i]), int):
template[lsp[0]][i-1] += num(lsp[i])
else:
template[lsp[0]][i-1] = round((template[lsp[0]][i-1] + num(lsp[i]))/2,3)
else:
sys.stderr.write("Warning : '"+lsp[0]+"' not found in template ["+infiles[fidx]+"]\n")
## Print template
for x in template:
sys.stdout.write(x)
for y in template[x]:
sys.stdout.write("\t"+str(y))
sys.stdout.write("\n")
else:
print "No files to merge - stop"
sys.exit(1)
...@@ -471,11 +471,12 @@ process merge_mapping_steps{ ...@@ -471,11 +471,12 @@ process merge_mapping_steps{
output: output:
set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam
set val(prefix), file("${prefix}.mapstat") into all_mapstat set val(oname), file("${prefix}.mapstat") into all_mapstat
script: script:
sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2)/ sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2)/
tag = prefix.toString() =~/_R1|_val_1/ ? "R1" : "R2" tag = prefix.toString() =~/_R1|_val_1/ ? "R1" : "R2"
oname = prefix.toString() - ~/(\.[0-9]+)$/
""" """
samtools merge -@ ${task.cpus} \\ samtools merge -@ ${task.cpus} \\
...@@ -505,20 +506,21 @@ process combine_mapped_files{ ...@@ -505,20 +506,21 @@ process combine_mapped_files{
tag "$sample = $r1_prefix + $r2_prefix" tag "$sample = $r1_prefix + $r2_prefix"
publishDir "${params.outdir}/mapping", mode: 'copy', publishDir "${params.outdir}/mapping", mode: 'copy',
saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"} saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"}
input: input:
set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple() set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple()
output: output:
set val(sample), file("${sample}_bwt2pairs.bam") into paired_bam set val(sample), file("${sample}_bwt2pairs.bam") into paired_bam
file "*.pairstat" into all_pairstat set val(oname), file("*.pairstat") into all_pairstat
script: script:
r1_bam = aligned_bam[0] r1_bam = aligned_bam[0]
r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/ r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/
r2_bam = aligned_bam[1] r2_bam = aligned_bam[1]
r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/
oname = sample.toString() - ~/(\.[0-9]+)$/
def opts = "-t" def opts = "-t"
opts = params.rm_singleton ? "${opts}" : "--single ${opts}" opts = params.rm_singleton ? "${opts}" : "--single ${opts}"
opts = params.rm_multi ? "${opts}" : "--multi ${opts}" opts = params.rm_multi ? "${opts}" : "--multi ${opts}"
...@@ -546,7 +548,7 @@ process get_valid_interaction{ ...@@ -546,7 +548,7 @@ process get_valid_interaction{
output: output:
set val(sample), file("*.validPairs") into valid_pairs set val(sample), file("*.validPairs") into valid_pairs
set val(sample), file("*.validPairs") into valid_pairs_4cool set val(sample), file("*.validPairs") into valid_pairs_4cool
file "*RSstat" into all_rsstat set val(sample), file("*RSstat") into all_rsstat
script: script:
...@@ -570,28 +572,68 @@ process get_valid_interaction{ ...@@ -570,28 +572,68 @@ process get_valid_interaction{
* STEP3 - BUILD MATRIX * STEP3 - BUILD MATRIX
*/ */
if ( params.splitFastq ){ process remove_duplicates {
process merge_sample { tag "$sample"
tag "$sample" publishDir "${params.outdir}/hic_results/data", mode: 'copy',
publishDir "${params.outdir}/hic_results/data", mode: 'copy' saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$sample/$filename" : "$filename"}
input: input:
set val(sample), file(vpairs) from valid_pairs.groupTuple() set val(sample), file(vpairs) from valid_pairs.groupTuple()
output: output:
set val(sample), file("*.allValidPairs") into all_valid_pairs set val(sample), file("*.allValidPairs") into all_valid_pairs
set val(sample), file("*.allValidPairs") into all_valid_pairs_4cool set val(sample), file("*.allValidPairs") into all_valid_pairs_4cool
file("stats/") into all_mergestat
script:
""" script:
cat $vpairs > test.allValidPairs if ( params.rm_dup ){
""" """
mkdir -p stats/${sample}
sort -T /tmp/ -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \
awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs
echo -n "valid_interaction\t" > stats/${sample}/${sample}_allValidPairs.mergestat
cat ${vpairs} | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat
echo -n "valid_interaction_rmdup\t" >> stats/${sample}/${sample}_allValidPairs.mergestat
cat ${sample}.allValidPairs | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat
awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> stats/${sample}/${sample}_allValidPairs.mergestat
"""
}else{
"""
mkdir -p stats/${sample}
cat ${vpairs} > ${sample}.allValidPairs
echo -n "valid_interaction\t" > stats/${sample}/${sample}_allValidPairs.mergestat
cat ${vpairs} | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat
echo -n "valid_interaction_rmdup\t" >> stats/${sample}/${sample}_allValidPairs.mergestat
cat ${sample}.allValidPairs | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat
awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> stats/${sample}/${sample}_allValidPairs.mergestat
"""
} }
}else{
all_valid_pairs = valid_pairs
all_valid_pairs_4cool = valid_pairs
} }
process merge_sample {
tag "$ext"
publishDir "${params.outdir}/hic_results/stats/${sample}", mode: 'copy'
input:
set val(prefix), file(fstat) from all_mapstat.groupTuple().concat(all_pairstat.groupTuple(), all_rsstat.groupTuple())
output:
file("mstats/") into all_mstats
script:
sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2)/
if ( (fstat =~ /.mapstat/) ){ ext = "mmapstat" }
if ( (fstat =~ /.pairstat/) ){ ext = "mpairstat" }
if ( (fstat =~ /.RSstat/) ){ ext = "mRSstat" }
"""
mkdir -p mstats/${sample}
merge_statfiles.py -f ${fstat} > mstats/${sample}/${prefix}.${ext}
"""
}
process build_contact_maps{ process build_contact_maps{
tag "$sample - $mres" tag "$sample - $mres"
publishDir "${params.outdir}/hic_results/matrix/raw", mode: 'copy' publishDir "${params.outdir}/hic_results/matrix/raw", mode: 'copy'
...@@ -659,14 +701,13 @@ process generate_cool{ ...@@ -659,14 +701,13 @@ process generate_cool{
/* /*
* STEP 5 - MultiQC * STEP 5 - MultiQC
*/
process multiqc { process multiqc {
publishDir "${params.outdir}/MultiQC", mode: 'copy' publishDir "${params.outdir}/MultiQC", mode: 'copy'
input: input:
file multiqc_config from ch_multiqc_config file multiqc_config from ch_multiqc_config
//file ('mapping/stats/*') from combine_mapping_results.collect() file ('input_*/*') from all_mstats.concat(all_mergestat).collect()
//file ('hic_results/data/stats/*') from valid_interaction_results.collect()
file ('software_versions/*') from software_versions_yaml file ('software_versions/*') from software_versions_yaml
file workflow_summary from create_workflow_summary(summary) file workflow_summary from create_workflow_summary(summary)
...@@ -677,11 +718,12 @@ process multiqc { ...@@ -677,11 +718,12 @@ process multiqc {
script: script:
rtitle = custom_runName ? "--title \"$custom_runName\"" : '' rtitle = custom_runName ? "--title \"$custom_runName\"" : ''
rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
""" """
multiqc -f $rtitle $rfilename --config $multiqc_config . multiqc -f $rtitle $rfilename --config $multiqc_config .
""" """
} }
*/
/**************************************************** /****************************************************
* POST-PROCESSING * POST-PROCESSING
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment