diff --git a/bin/ice b/bin/ice deleted file mode 100755 index 10f5f224a6064961a04ac2c09bc5b29286bf5484..0000000000000000000000000000000000000000 --- a/bin/ice +++ /dev/null @@ -1,124 +0,0 @@ -#! /usr/bin/env python - -import sys -import argparse -import numpy as np -from scipy import sparse - -import iced -from iced.io import loadtxt, savetxt - - -parser = argparse.ArgumentParser("ICE normalization") -parser.add_argument('filename', - metavar='File to load', - type=str, - help='Path to file of contact counts to load') -parser.add_argument("--results_filename", - "-r", - type=str, - default=None, - help="results_filename") -parser.add_argument("--filtering_perc", "-f", - type=float, - default=None, - help="Percentage of reads to filter out") -parser.add_argument("--filter_low_counts_perc", - type=float, - default=0.02, - help="Percentage of reads to filter out") -parser.add_argument("--filter_high_counts_perc", - type=float, - default=0, - help="Percentage of reads to filter out") -parser.add_argument("--remove-all-zeros-loci", default=False, - action="store_true", - help="If provided, all non-interacting loci will be " - "removed prior to the filtering strategy chosen.") -parser.add_argument("--max_iter", "-m", default=100, type=int, - help="Maximum number of iterations") -parser.add_argument("--eps", "-e", default=0.1, type=float, - help="Precision") -parser.add_argument("--dense", "-d", default=False, action="store_true") -parser.add_argument("--output-bias", "-b", default=False, help="Output the bias vector") -parser.add_argument("--verbose", "-v", default=False) - - -args = parser.parse_args() -filename = args.filename - -# Deprecating filtering_perc option -filter_low_counts = None -if "--filtering_perc" in sys.argv: - DeprecationWarning( - "Option '--filtering_perc' is deprecated. Please use " - "'--filter_low_counts_perc' instead.'") - # And print it again because deprecation warnings are not displayed for - # recent versions of python - print "--filtering_perc is deprecated. Please use filter_low_counts_perc" - print "instead. This option will be removed in ice 0.3" - filter_low_counts = args.filtering_perc -if "--filter_low_counts_perc" in sys.argv and "--filtering_perc" in sys.argv: - raise Warning("This two options are incompatible") -if "--filtering_perc" is None and "--filter_low_counts_perc" not in sys.argv: - filter_low_counts_perc = 0.02 -elif args.filter_low_counts_perc is not None: - filter_low_counts_perc = args.filter_low_counts_perc - -if args.verbose: - print("Using iced version %s" % iced.__version__) - print "Loading files..." - -# Loads file as i, j, counts -i, j, data = loadtxt(filename).T - -# Detecting whether the file is 0 or 1 based. -if min(i.min(), j.min()) == 0: - index_base = 0 - N = max(i.max(), j.max()) + 1 - counts = sparse.coo_matrix((data, (i, j)), shape=(N, N), dtype=float) -else: - index_base = 1 - N = max(i.max(), j.max()) - counts = sparse.coo_matrix((data, (i - 1, j - 1)), shape=(N, N), dtype=float) - -if args.dense: - counts = np.array(counts.todense()) -else: - counts = sparse.csr_matrix(counts) - -if args.verbose: - print "Normalizing..." - -if filter_low_counts_perc != 0: - counts = iced.filter.filter_low_counts(counts, - percentage=filter_low_counts_perc, - remove_all_zeros_loci=args.remove_all_zeros_loci, - copy=False, sparsity=False, verbose=args.verbose) -if args.filter_high_counts_perc != 0: - counts = iced.filter.filter_high_counts( - counts, - percentage=args.filter_high_counts_perc, - copy=False) - -counts, bias = iced.normalization.ICE_normalization( - counts, max_iter=args.max_iter, copy=False, - verbose=args.verbose, eps=args.eps, output_bias=True) - -if args.results_filename is None: - results_filename = ".".join( - filename.split(".")[:-1]) + "_normalized." + filename.split(".")[-1] -else: - results_filename = args.results_filename - -counts = sparse.coo_matrix(counts) - -if args.verbose: - print "Writing results..." - -savetxt( - results_filename, counts.col + index_base, counts.row + index_base, counts.data) - - -if args.output_bias: - np.savetxt(results_filename + ".biases", bias) diff --git a/bin/merge_statfiles.py b/bin/merge_statfiles.py index ab3d078c657e632471a47b4bc990aa16998cc781..469cacd81b597e296eb3eb5b4acdc500028be927 100755 --- a/bin/merge_statfiles.py +++ b/bin/merge_statfiles.py @@ -1,8 +1,8 @@ #!/usr/bin/env python -## HiC-Pro -## Copyright (c) 2015 Institut Curie -## Author(s): Nicolas Servant, Eric Viara +## nf-core-hic +## Copyright (c) 2020 Institut Curie +## Author(s): Nicolas Servant ## Contact: nicolas.servant@curie.fr ## This software is distributed without any guarantee under the terms of the BSD-3 licence. ## See the LICENCE file for details @@ -36,13 +36,13 @@ if __name__ == "__main__": if li > 0: if args.verbose: - print "## merge_statfiles.py" - print "## Merging "+ str(li)+" files" + print("## merge_statfiles.py") + print("## Merging "+ str(li)+" files") ## Reading first file to get the template template = OrderedDict() if args.verbose: - print "## Use "+infiles[0]+" as template" + print("## Use "+infiles[0]+" as template") with open(infiles[0]) as f: for line in f: if not line.startswith("#"): @@ -51,17 +51,17 @@ if __name__ == "__main__": template[str(lsp[0])] = data if len(template) == 0: - print "Cannot find template files !" + print("Cannot find template files !") sys.exit(1) ## Int are counts / Float are percentage - for fidx in xrange(1, li): + for fidx in list(range(1, li)): with open(infiles[fidx]) as f: for line in f: if not line.startswith("#"): lsp = line.strip().split("\t") if lsp[0] in template: - for i in xrange(1, len(lsp)): + for i in list(range(1, len(lsp))): if isinstance(num(lsp[i]), int): template[lsp[0]][i-1] += num(lsp[i]) else: @@ -77,6 +77,6 @@ if __name__ == "__main__": sys.stdout.write("\n") else: - print "No files to merge - stop" + print("No files to merge - stop") sys.exit(1) diff --git a/environment.yml b/environment.yml index 9d0d609c544c67257c4359e58690093dd6a30aab..2680ede654e17edd79b58ce6efb2d3106602cdd6 100644 --- a/environment.yml +++ b/environment.yml @@ -9,9 +9,10 @@ dependencies: - conda-forge::python=3.7.6 - conda-forge::scipy=1.4.1 - conda-forge::numpy=1.18.1 - - bioconda::iced=0.5.4 + - bioconda::iced=0.5.6 - bioconda::bx-python=0.8.8 - bioconda::pysam=0.15.4 + - conda-forge::pymdown-extensions=7.1 - bioconda::cooler=0.8.6 - bioconda::bowtie2=2.3.5 - bioconda::samtools=1.9 diff --git a/main.nf b/main.nf index 85b4154a846e7a5c06a5ade07d54797c98e735d9..7b6b37452fd4584e5798ee8fd12d4c39dc25e49c 100644 --- a/main.nf +++ b/main.nf @@ -245,7 +245,7 @@ summary['DNase Mode'] = params.dnase summary['Remove Dup'] = params.rm_dup summary['Min MAPQ'] = params.min_mapq summary['Min Fragment Size']= params.min_restriction_fragment_size -summary['Max Fragment Size']= params.max_restriction_framgnet_size +summary['Max Fragment Size']= params.max_restriction_fragment_size summary['Min Insert Size'] = params.min_insert_size summary['Max Insert Size'] = params.max_insert_size summary['Min CIS dist'] = params.min_cis_dist @@ -302,6 +302,7 @@ Channel.from(summary.collect{ [it.key, it.value] }) /* * Parse software version numbers */ + process get_software_versions { publishDir "${params.outdir}/pipeline_info", mode: 'copy', saveAs: {filename -> @@ -325,6 +326,25 @@ process get_software_versions { """ } +def create_workflow_summary(summary) { + + def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') + yaml_file.text = """ + id: 'nf-core-chipseq-summary' + description: " - this information is collected when the pipeline is started." + section_name: 'nf-core/chipseq Workflow Summary' + section_href: 'https://github.com/nf-core/chipseq' + plot_type: 'html' + data: | + <dl class=\"dl-horizontal\"> +${summary.collect { k,v -> " <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")} + </dl> + """.stripIndent() + + return yaml_file +} + + /**************************************************** * PRE-PROCESSING @@ -357,7 +377,7 @@ if(!params.bwt2_index && params.fasta){ if(!params.chromosome_size && params.fasta){ process makeChromSize { tag "$fasta" - label 'process_low'' + label 'process_low' publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, saveAs: { params.save_reference ? it : null }, mode: 'copy' @@ -822,7 +842,7 @@ process multiqc { publishDir "${params.outdir}/MultiQC", mode: 'copy' when: - !params.skip_multiQC + !params.skip_multiqc input: file multiqc_config from ch_multiqc_config @@ -856,7 +876,7 @@ process output_documentation { script: """ - markdown_to_html.r $output_docs results_description.html + markdown_to_html.py $output_docs -o results_description.html """ } diff --git a/nextflow.config b/nextflow.config index ce5c79413343f20f7bc19b455f323e3fa88df5ee..b02a4f55efb1644ec3c803c1738fc587a3de0fcc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -18,10 +18,14 @@ params { readPaths = false chromosome_size = false restriction_fragments = false - skipMaps = false - skipIce = false - skipCool = false - skipMultiQC = false + skip_maps = false + skip_ice = false + skip_cool = false + skip_multiqc = false + save_reference = false + save_interaction_bam = false + save_aligned_intermediates = false + dnase = false // Boilerplate options @@ -45,8 +49,8 @@ params { config_profile_url = false // Defaults only, expecting to be overwritten - max_memory = 128.GB - max_cpus = 16 + max_memory = 24.GB + max_cpus = 8 max_time = 240.h }