diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index fda99de7282aba62eefbddccdcfef67fa67bb1a9..b88d0f43654c3a170c0cf930e212fa316dda28e2 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -6,7 +6,9 @@ We try to manage the required tasks for nf-core/hic using GitHub issues, you pro However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/hic then the best place to go is the Gitter chatroom where you can ask us questions directly: https://gitter.im/nf-core/Lobby +> If you need help using or modifying nf-core/hic then the best place to ask is on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). + + ## Contribution workflow If you'd like to write some code for nf-core/hic, the standard workflow @@ -42,4 +44,4 @@ If there are any failures then the automated tests fail. These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. ## Getting help -For further information/help, please consult the [nf-core/hic documentation](https://github.com/nf-core/hic#documentation) and don't hesitate to get in touch on [Gitter](https://gitter.im/nf-core/Lobby) +For further information/help, please consult the [nf-core/hic documentation](https://github.com/nf-core/hic#documentation) and don't hesitate to get in touch on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml new file mode 100644 index 0000000000000000000000000000000000000000..e052a635aa7c2787e741207a069d9a400358ca6c --- /dev/null +++ b/.github/markdownlint.yml @@ -0,0 +1,9 @@ +# Markdownlint configuration file +default: true, +line-length: false +no-multiple-blanks: 0 +blanks-around-headers: false +blanks-around-lists: false +header-increment: false +no-duplicate-header: + siblings_only: true diff --git a/.gitignore b/.gitignore index 46f69e414ba5d72f679f5140fee33188d84422f8..5b54e3e6c257de1e963395161372e1a2ca110fe7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ data/ results/ .DS_Store tests/test_data +*.pyc diff --git a/.travis.yml b/.travis.yml index 2eaea150d6ecf198773030802425b4a34edd7219..b3e7a99f81be8e2ce96c6be8f54801a093e76cc4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,8 @@ before_install: # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/hic:dev # Fake the tag locally so that the pipeline runs properly - - docker tag nfcore/hic:dev nfcore/hic:dev + # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) + - docker tag nfcore/hic:dev nfcore/hic:1.0.0 install: # Install Nextflow @@ -25,12 +26,17 @@ install: - pip install nf-core # Reset - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests + # Install markdownlint-cli + - sudo apt-get install npm && npm install -g markdownlint-cli env: - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work + - NXF_VER='' # Plus: get the latest NF version and check that it works script: # Lint the pipeline code - nf-core lint ${TRAVIS_BUILD_DIR} + # Lint the documentation + - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml # Run the pipeline with the test profile - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker diff --git a/CHANGELOG.md b/CHANGELOG.md index 25397928ef27bbee77b3d2af2c30edb9aa55233a..b982a5782b737510d5eccd2fe6331c73fd192480 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,14 +2,15 @@ ## v1.0dev - 2019-04-09 - First version of nf-core-hic pipeline which is a Nextflow implementation of the HiC-Pro pipeline [https://github.com/nservant/HiC-Pro]. - Note that all HiC-Pro functionalities are not yet all implemented. The current version is designed for protocols based on restriction enzyme digestion. - - In summary, this version allows : - * Automatic detection and generation of annotation files based on igenomes if not provided. - * Two-steps alignment of raw sequencing reads - * Reads filtering and detection of valid interaction products - * Generation of raw contact matrices for a set of resolutions - * Normalization of the contact maps using the ICE algorithm - * Generation of cooler file for visualization on higlass [https://higlass.io/] - * Quality report based on HiC-Pro MultiQC module +First version of nf-core-hic pipeline which is a Nextflow implementation of the [HiC-Pro pipeline](https://github.com/nservant/HiC-Pro/). +Note that all HiC-Pro functionalities are not yet all implemented. The current version is designed for protocols based on restriction enzyme digestion. + +In summary, this version allows : + +* Automatic detection and generation of annotation files based on igenomes if not provided. +* Two-steps alignment of raw sequencing reads +* Reads filtering and detection of valid interaction products +* Generation of raw contact matrices for a set of resolutions +* Normalization of the contact maps using the ICE algorithm +* Generation of cooler file for visualization on [higlass](https://higlass.io/) +* Quality report based on HiC-Pro MultiQC module diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 21096193adc83331ff86beea517b1d0e37e35c09..09226d0d8d896bbc3bdb632476430d6cad4b0aa7 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on the [Gitter channel](https://gitter.im/nf-core/Lobby). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/Dockerfile b/Dockerfile index 490ee3637819dd1ba8028a9d57e978fb794b1f2a..06374cf95db6f1a3a68e8c45ab48d2d3ac1d2c2f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,4 +7,4 @@ RUN apt-get update && apt-get install -y gcc g++ && apt-get clean -y COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-hic-1.0dev/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-hic-1.0.0/bin:$PATH diff --git a/README.md b/README.md index cf702bf337b100b48ab98405e67cb3b1e7c855d9..5f2c07459b0e8da3644198705f6ecb91b57bca01 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -#  +# nf-core/hic -**Analysis of Chromosome Conformation Capture data (Hi-C)** +**Analysis of Chromosome Conformation Capture data (Hi-C)**. -[](https://travis-ci.org/nf-core/hic) +[](https://travis-ci.com/nf-core/hic) [](https://www.nextflow.io/) [](http://bioconda.github.io/) diff --git a/assets/email_template.txt b/assets/email_template.txt index 59d7b549d375db84dae99a2e186b2a9df109f2d1..6c85add607a47589da20df83c6892bcfe5e04f1d 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -17,23 +17,6 @@ ${errorReport} } %> -<% if (!success){ - out << """#################################################### -## nf-core/hic execution completed unsuccessfully! ## -#################################################### -The exit status of the task that caused the workflow execution to fail was: $exitStatus. -The full error message was: - -${errorReport} -""" -} else { - out << "## nf-core/hic execution completed successfully! ##" -} -%> - - - - The workflow was completed at $dateComplete (duration: $duration) The command used to launch the workflow was as follows: diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d425b46caa3f6d032a2e5ed340788e583214d447 --- /dev/null +++ b/assets/multiqc_config.yaml @@ -0,0 +1,9 @@ +report_comment: > + This report has been generated by the <a href="https://github.com/nf-core/hic" target="_blank">nf-core/hic</a> + analysis pipeline. For information about how to interpret these results, please see the + <a href="https://github.com/nf-core/hic" target="_blank">documentation</a>. +report_section_order: + nf-core/hic-software-versions: + order: -1000 + +export_plots: true diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index fd1cd7396e07c870807d05843d5bd5a74e49c2d8..2d6712200607cb62f31be950cfe4c54e5ca1838a 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -1,11 +1,36 @@ To: $email Subject: $subject Mime-Version: 1.0 -Content-Type: multipart/related;boundary="nfmimeboundary" +Content-Type: multipart/related;boundary="nfcoremimeboundary" ---nfmimeboundary +--nfcoremimeboundary Content-Type: text/html; charset=utf-8 $email_html ---nfmimeboundary-- +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: <mqcreport> +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + +--nfcoremimeboundary-- diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 4a1747d86e627006574c326807fcb8ff7637c242..7a38feec0135d37268ff82fa0c92dab46c84ac6a 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -3,14 +3,22 @@ from __future__ import print_function from collections import OrderedDict import re -# TODO nf-core: Add additional regexes for new tools in process get_software_versions +# Add additional regexes for new tools in process get_software_versions regexes = { 'nf-core/hic': ['v_pipeline.txt', r"(\S+)"], 'Nextflow': ['v_nextflow.txt', r"(\S+)"], + 'Bowtie2': ['v_bowtie2.txt', r"Bowtie2 v(\S+)"], + 'Python': ['v_python.txt', r"Python v(\S+)"], + 'Samtools': ['v_samtools.txt', r"Samtools v(\S+)"], + 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], } results = OrderedDict() results['nf-core/hic'] = '<span style="color:#999999;\">N/A</span>' results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' +results['Bowtie2'] = '<span style="color:#999999;\">N/A</span>' +results['Python'] = '<span style="color:#999999;\">N/A</span>' +results['Samtools'] = '<span style="color:#999999;\">N/A</span>' +results['MultiQC'] = '<span style="color:#999999;\">N/A</span>' # Search each file using its regex for k, v in regexes.items(): @@ -20,9 +28,14 @@ for k, v in regexes.items(): if match: results[k] = "v{}".format(match.group(1)) +# Remove software set to false in results +for k in results: + if not results[k]: + del(results[k]) + # Dump to YAML print (''' -id: 'nf-core/hic-software-versions' +id: 'software_versions' section_name: 'nf-core/hic Software Versions' section_href: 'https://github.com/nf-core/hic' plot_type: 'html' @@ -31,5 +44,10 @@ data: | <dl class="dl-horizontal"> ''') for k,v in results.items(): - print(" <dt>{}</dt><dd>{}</dd>".format(k,v)) + print(" <dt>{}</dt><dd><samp>{}</samp></dd>".format(k,v)) print (" </dl>") + +# Write out regexes as csv file: +with open('software_versions.csv', 'w') as f: + for k,v in results.items(): + f.write("{}\t{}\n".format(k,v)) diff --git a/conf/awsbatch.config b/conf/awsbatch.config index 79078c7bd03ef1c4131ebfb5d46bf621150d74c1..14af5866f5c6c18db7e8d6b93b40da8ea8311721 100644 --- a/conf/awsbatch.config +++ b/conf/awsbatch.config @@ -1,10 +1,15 @@ /* * ------------------------------------------------- - * Nextflow config file for AWS Batch + * Nextflow config file for running on AWS batch * ------------------------------------------------- - * Imported under the 'awsbatch' Nextflow profile in nextflow.config - * Uses docker for software depedencies automagically, so not specified here. + * Base config needed for running with -profile awsbatch */ +params { + config_profile_name = 'AWSBATCH' + config_profile_description = 'AWSBATCH Cloud Profile' + config_profile_contact = 'Alexander Peltzer (@apeltzer)' + config_profile_url = 'https://aws.amazon.com/de/batch/' +} aws.region = params.awsregion process.executor = 'awsbatch' diff --git a/conf/base.config b/conf/base.config index 7f99f28907f6c88829f0cc094de6672a4bacd873..28b467901007da4efaf10945d3c2000644f69d90 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,6 +1,6 @@ /* * ------------------------------------------------- - * Nextflow base config file + * nf-core/hic Nextflow base config file * ------------------------------------------------- * A 'blank slate' config file, appropriate for general * use on most high performace compute environments. @@ -11,13 +11,12 @@ process { - container = process.container - - cpus = { check_max( 2, 'cpus' ) } + // Check the defaults for all processes + cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'terminate' } + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -25,7 +24,7 @@ process { withName:makeBowtie2Index { cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } } withName:bowtie2_end_to_end { cpus = { check_max( 4, 'cpus' ) } diff --git a/conf/igenomes.config b/conf/igenomes.config index 26950cf2932485d39cc3ed04705392efdacf6b71..92ad32389c6646cae0feea95e5e0a3bceeba909e 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -60,7 +60,7 @@ params { } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/genome" } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" @@ -96,7 +96,7 @@ params { } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/genome" } } } diff --git a/conf/test.config b/conf/test.config index b4fd1845c65349aa6a58a82dc033b38d6bf76815..592e3a40d8bce4cf22b5fe1ad9014ded48d439ce 100644 --- a/conf/test.config +++ b/conf/test.config @@ -16,7 +16,7 @@ params { max_cpus = 2 max_memory = 4.GB max_time = 1.h - + // Input data readPaths = [ ['SRR4292758_00', ['https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz']] @@ -31,4 +31,3 @@ params { // Options skip_cool = true } - diff --git a/docs/configuration/local.md b/docs/configuration/local.md index 9cd485e2cd60b7670e242f4a399fde8e15bdaf92..d4530fa9007866b32cf2dda77ed780c4fe19f1e8 100644 --- a/docs/configuration/local.md +++ b/docs/configuration/local.md @@ -10,6 +10,7 @@ Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker. First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) Then, simply run the analysis pipeline: + ```bash nextflow run nf-core/hic -profile docker --genome '<genome ID>' ``` diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md index 1fafa8feeaaef766fa294b9d1de1bd32ba1f7dc2..c52faf821ad7e676ed56dc04c92aae165ea573c7 100644 --- a/docs/configuration/reference_genomes.md +++ b/docs/configuration/reference_genomes.md @@ -39,11 +39,12 @@ Multiple reference index types are held together with consistent structure for m We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is configured to use this by default. The hosting fees for AWS iGenomes are currently kindly funded by a grant from Amazon. The pipeline will automatically download the required reference files when you run the pipeline. -For more information about the AWS iGenomes, see https://ewels.github.io/AWS-iGenomes/ +For more information about the AWS iGenomes, see [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) Downloading the files takes time and bandwidth, so we recommend making a local copy of the iGenomes resource. Once downloaded, you can customise the variable `params.igenomes_base` in your custom configuration file to point to the reference location. For example: + ```nextflow params.igenomes_base = '/path/to/data/igenomes/' ``` diff --git a/docs/installation.md b/docs/installation.md index 70c4a6d472f51c28925ab2bedf4f7ec4b468ad7e..9ac66d585871d374c90df1f14b2c192f2d24b7a8 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -74,7 +74,7 @@ Be warned of two important points about this default configuration: #### 3.1) Software deps: Docker First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) -Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (https://hub.docker.com/r/nfcore/hic). +Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from [dockerhub](https://hub.docker.com/r/nfcore/hic). #### 3.1) Software deps: Singularity If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative. diff --git a/docs/output.md b/docs/output.md index 518ac60f545d4f87051dfec5ace6f972cd93d65b..53c9c0c7c20b11e85acd758e4f7b157116ef2378 100644 --- a/docs/output.md +++ b/docs/output.md @@ -26,7 +26,7 @@ Singletons are discarded, and multi-hits are filtered according to the configura Note that if the `--dnase` mode is activated, HiC-Pro will skip the second mapping step. **Output directory: `results/mapping`** - + * `*bwt2pairs.bam` - final BAM file with aligned paired data * `*.pairstat` - mapping statistics @@ -50,7 +50,7 @@ Invalid pairs are classified as follow: * Dangling end, i.e. unligated fragments (both reads mapped on the same restriction fragment) * Self circles, i.e. fragments ligated on themselves (both reads mapped on the same restriction fragment in inverted orientation) * Religation, i.e. ligation of juxtaposed fragments -* Filtered pairs, i.e. any pairs that do not match the filtering criteria on inserts size, restriction fragments size +* Filtered pairs, i.e. any pairs that do not match the filtering criteria on inserts size, restriction fragments size * Dumped pairs, i.e. any pairs for which we were not able to reconstruct the ligation product. Only valid pairs involving two different restriction fragments are used to build the contact maps. @@ -59,12 +59,12 @@ Duplicated valid pairs associated to PCR artefacts are discarded (see `--rm_dup` In case of Hi-C protocols that do not require a restriction enzyme such as DNase Hi-C or micro Hi-C, the assignment to a restriction is not possible (see `--dnase`). Short range interactions that are likely to be spurious ligation products can thus be discarded using the `--min_cis_dist` parameter. -* `*.validPairs` - List of valid ligation products +* `*.validPairs` - List of valid ligation products * `*RSstat` - Statitics of number of read pairs falling in each category The validPairs are stored using a simple tab-delimited text format ; -``` +```bash read name / chr_reads1 / pos_reads1 / strand_reads1 / chr_reads2 / pos_reads2 / strand_reads2 / fragment_size / res frag name R1 / res frag R2 / mapping qual R1 / mapping qual R2 [/ allele_specific_tag] ``` @@ -102,7 +102,7 @@ A contact map is defined by : Based on the observation that a contact map is symmetric and usually sparse, only non-zero values are stored for half of the matrix. The user can specified if the 'upper', 'lower' or 'complete' matrix has to be stored. The 'asis' option allows to store the contacts as they are observed from the valid pairs files. -``` +```bash A B 10 A C 23 B C 24 @@ -124,4 +124,4 @@ The pipeline has special steps which allow the software versions used to be repo * `Project_multiqc_data/` * Directory containing parsed statistics from the different tools used in the pipeline -For more information about how to use MultiQC reports, see http://multiqc.info +For more information about how to use MultiQC reports, see [http://multiqc.info](http://multiqc.info) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index e6772eb34bd66f12b8477547a1c3cc250d34f33d..e0f2d0774afa327390d3e3cb33c7c3b1e6c829fb 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -7,11 +7,11 @@ If only no file, only one input file , or only read one and not read two is pick 1. The path must be enclosed in quotes (`'` or `"`) 2. The path must have at least one `*` wildcard character. This is even if you are only running one paired end sample. 3. When using the pipeline with paired end data, the path must use `{1,2}` or `{R1,R2}` notation to specify read pairs. -4. If you are running Single end data make sure to specify `--singleEnd` +4. If you are running Single end data make sure to specify `--singleEnd` If the pipeline can't find your files then you will get the following error -``` +```bash ERROR ~ Cannot find any reads matching: *{1,2}.fastq.gz ``` diff --git a/docs/usage.md b/docs/usage.md index 853c38414b6e53090c3d9b0f19e849a0972b1243..9b2bb6a5b160dbb19632188ee91c66e58ce58a21 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -7,69 +7,69 @@ * [Updating the pipeline](#updating-the-pipeline) * [Reproducibility](#reproducibility) * [Main arguments](#main-arguments) - * [`-profile`](#-profile-single-dash) - * [`awsbatch`](#awsbatch) - * [`conda`](#conda) - * [`docker`](#docker) - * [`singularity`](#singularity) - * [`test`](#test) - * [`--reads`](#--reads) - * [`--singleEnd`](#--singleend) + * [`-profile`](#-profile-single-dash) + * [`awsbatch`](#awsbatch) + * [`conda`](#conda) + * [`docker`](#docker) + * [`singularity`](#singularity) + * [`test`](#test) + * [`--reads`](#--reads) + * [`--singleEnd`](#--singleend) * [Reference genomes](#reference-genomes) - * [`--genome`](#--genome) - * [`--fasta`](#--fasta) - * [`--igenomesIgnore`](#--igenomesignore) - * [`--bwt2_index`](#--bwt2_index) - * [`--chromosome_size`](#--chromosome_size) - * [`--restriction_fragments`](#--restriction_fragments) + * [`--genome`](#--genome) + * [`--fasta`](#--fasta) + * [`--igenomesIgnore`](#--igenomesignore) + * [`--bwt2_index`](#--bwt2_index) + * [`--chromosome_size`](#--chromosome_size) + * [`--restriction_fragments`](#--restriction_fragments) * [Hi-C specific options](#hi-c-specific-options) - * [Reads mapping](#reads-mapping) - * [`--bwt2_opts_end2end`](#--bwt2_opts_end2end) - * [`--bwt2_opts_trimmed`](#--bwt2_opts_trimmed) - * [`--min_mapq`](#--min_mapq) - * [Digestion Hi-C](#digestion-hi-c) - * [`--restriction_site`](#--restriction_site) - * [`--ligation_site`](#--ligation_site) - * [`--min_restriction_fragment_size`](#--min_restriction_fragment_size) - * [`--max_restriction_fragment_size`](#--max_restriction_fragment_size) - * [`--min_insert_size`](#--min_insert_size) - * [`--max_insert_size`](#--max_insert_size) - * [DNase Hi-C](#dnase-hi-c) - * [`--dnase`](#--dnase) - * [Hi-C Processing](#hi-c-processing) - * [`--min_cis_dist`](#--min_cis_dist) - * [`--rm_singleton`](#--rm_singleton) - * [`--rm_dup`](#--rm_dup) - * [`--rm_multi`](#--rm_multi) - * [Genome-wide contact maps](#genome-wide-contact-maps) - * [`--bins_size`](#--bins_size) - * [`--ice_max_iter`](#--ice_max_iter) - * [`--ice_filer_low_count_perc`](#--ice_filer_low_count_perc) - * [`--ice_filer_high_count_perc`](#--ice_filer_high_count_perc) - * [`--ice_eps`](#--ice_eps) - * [Inputs/Outputs](#inputs-outputs) - * [`--splitFastq`](#--splitFastq) - * [`--saveReference`](#--saveReference) - * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates) + * [Reads mapping](#reads-mapping) + * [`--bwt2_opts_end2end`](#--bwt2_opts_end2end) + * [`--bwt2_opts_trimmed`](#--bwt2_opts_trimmed) + * [`--min_mapq`](#--min_mapq) + * [Digestion Hi-C](#digestion-hi-c) + * [`--restriction_site`](#--restriction_site) + * [`--ligation_site`](#--ligation_site) + * [`--min_restriction_fragment_size`](#--min_restriction_fragment_size) + * [`--max_restriction_fragment_size`](#--max_restriction_fragment_size) + * [`--min_insert_size`](#--min_insert_size) + * [`--max_insert_size`](#--max_insert_size) + * [DNase Hi-C](#dnase-hi-c) + * [`--dnase`](#--dnase) + * [Hi-C Processing](#hi-c-processing) + * [`--min_cis_dist`](#--min_cis_dist) + * [`--rm_singleton`](#--rm_singleton) + * [`--rm_dup`](#--rm_dup) + * [`--rm_multi`](#--rm_multi) + * [Genome-wide contact maps](#genome-wide-contact-maps) + * [`--bins_size`](#--bins_size) + * [`--ice_max_iter`](#--ice_max_iter) + * [`--ice_filer_low_count_perc`](#--ice_filer_low_count_perc) + * [`--ice_filer_high_count_perc`](#--ice_filer_high_count_perc) + * [`--ice_eps`](#--ice_eps) + * [Inputs/Outputs](#inputs-outputs) + * [`--splitFastq`](#--splitFastq) + * [`--saveReference`](#--saveReference) + * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates) * [Job resources](#job-resources) * [Automatic resubmission](#automatic-resubmission) * [Custom resource requests](#custom-resource-requests) * [AWS batch specific parameters](#aws-batch-specific-parameters) - * [`-awsbatch`](#-awsbatch) - * [`--awsqueue`](#--awsqueue) - * [`--awsregion`](#--awsregion) + * [`-awsbatch`](#-awsbatch) + * [`--awsqueue`](#--awsqueue) + * [`--awsregion`](#--awsregion) * [Other command line parameters](#other-command-line-parameters) - * [`--outdir`](#--outdir) - * [`--email`](#--email) - * [`-name`](#-name-single-dash) - * [`-resume`](#-resume-single-dash) - * [`-c`](#-c-single-dash) - * [`--custom_config_version`](#--custom_config_version) - * [`--max_memory`](#--max_memory) - * [`--max_time`](#--max_time) - * [`--max_cpus`](#--max_cpus) - * [`--plaintext_email`](#--plaintext_email) - * [`--multiqc_config`](#--multiqc_config) + * [`--outdir`](#--outdir) + * [`--email`](#--email) + * [`-name`](#-name-single-dash) + * [`-resume`](#-resume-single-dash) + * [`-c`](#-c-single-dash) + * [`--custom_config_version`](#--custom_config_version) + * [`--max_memory`](#--max_memory) + * [`--max_time`](#--max_time) + * [`--max_cpus`](#--max_cpus) + * [`--plaintext_email`](#--plaintext_email) + * [`--multiqc_config`](#--multiqc_config) ## General Nextflow info @@ -83,6 +83,7 @@ NXF_OPTS='-Xms1g -Xmx4g' ## Running the pipeline The typical command for running the pipeline is as follows: + ```bash nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -genome GRCh37 -profile docker ``` @@ -121,21 +122,20 @@ Use this parameter to choose a configuration profile. Profiles can give configur If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`. * `awsbatch` - * A generic configuration profile to be used with AWS Batch. + * A generic configuration profile to be used with AWS Batch. * `conda` - * A generic configuration profile to be used with [conda](https://conda.io/docs/) - * Pulls most software from [Bioconda](https://bioconda.github.io/) + * A generic configuration profile to be used with [conda](https://conda.io/docs/) + * Pulls most software from [Bioconda](https://bioconda.github.io/) * `docker` - * A generic configuration profile to be used with [Docker](http://docker.com/) - * Pulls software from dockerhub: [`nfcore/hic`](http://hub.docker.com/r/nfcore/hic/) + * A generic configuration profile to be used with [Docker](http://docker.com/) + * Pulls software from dockerhub: [`nfcore/hic`](http://hub.docker.com/r/nfcore/hic/) * `singularity` - * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) - * Pulls software from singularity-hub + * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) + * Pulls software from DockerHub: [`nfcore/hic`](http://hub.docker.com/r/nfcore/hic/) * `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters + * A profile with a complete configuration for automated testing + * Includes links to test data so needs no other parameters -<!-- TODO nf-core: Document required command line parameters --> ### `--reads` Use this to specify the location of your input FastQ files. For example: @@ -210,7 +210,8 @@ The bowtie2 indexes are required to run the Hi-C pipeline. If the `--bwt2_index` The Hi-C pipeline will also requires a two-columns text file with the chromosome name and its size (tab separated). If not specified, this file will be automatically created by the pipeline. In the latter case, the `--fasta` reference genome has to be specified. -``` + +```bash chr1 249250621 chr2 243199373 chr3 198022430 @@ -232,7 +233,7 @@ If not specified, this file will be automatically created by the pipeline. In th Finally, Hi-C experiments based on restriction enzyme digestion requires a BED file with coordinates of restriction fragments. -``` +```bash chr1 0 16007 HIC_chr1_1 0 + chr1 16007 24571 HIC_chr1_2 0 + chr1 24571 27981 HIC_chr1_3 0 + @@ -444,7 +445,7 @@ The `--splitFastq` option allows to automatically split input read pairs into ch If specified, annotation files automatically generated from the `--fasta` file are exported in the results folder. Default: false -``` +```bash --saveReference ``` @@ -452,7 +453,7 @@ If specified, annotation files automatically generated from the `--fasta` file a If specified, all intermediate mapping files are saved and exported in the results folder. Default: false -``` +```bash --saveReference ``` diff --git a/environment.yml b/environment.yml index ae15924597b0494e1e9f6005aa675cac8a64adb6..34958b7d3505d0ad73b33fb325ab1f87a0d6f8a3 100644 --- a/environment.yml +++ b/environment.yml @@ -1,19 +1,21 @@ -name: nf-core-hic-1.0dev +# You can use this file to create a conda environment for this pipeline: +# conda env create -f environment.yml +name: nf-core-hic-1.0.0 channels: - conda-forge - bioconda - defaults dependencies: - - python=2.7.13 - - pip=18.1 - - conda-forge::scipy=1.0.1 - - conda-forge::numpy=1.9.3 - - conda-forge::r-markdown=0.8 - - bcbio::bx-python=0.7.3 - - bioconda::pysam=0.14.1 - - cooler=0.8.3 + - python=2.7.15 + - pip=19.1 + - scipy=1.2.1 + - numpy=1.16.3 + - r-markdown=0.9 + - bx-python=0.8.2 + - pysam=0.15.2 + - cooler=0.8.5 - bowtie2=2.3.5 - - samtools=1.7 - - multiqc=1.6 + - samtools=1.9 + - multiqc=1.7 - pip: - - iced==0.4.2 \ No newline at end of file + - iced==0.5.1 diff --git a/main.nf b/main.nf index 5550fb1d156943fb8cdf46f6ae09a64b347a463a..eeb692312a671226acc27244bd86c7f8953e6ccb 100644 --- a/main.nf +++ b/main.nf @@ -11,19 +11,9 @@ def helpMessage() { + // Add to this help message with new command line parameters + log.info nfcoreHeader() log.info""" - ======================================================= - ,--./,-. - ___ __ __ __ ___ /,-._.--~\' - |\\ | |__ __ / ` / \\ |__) |__ } { - | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, - `._,._,\' - - nf-core/hic v${workflow.manifest.version} - ======================================================= - - This pipeline is a Nextflow version of the HiC-Pro pipeline for Hi-C data processing. - See https://github.com/nservant/HiC-Pro for details. Usage: @@ -75,6 +65,8 @@ def helpMessage() { -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. Step options: + --skip_maps Skip generation of contact maps. Useful for capture-C + --skip_ice Skip ICE normalization --skip_cool Skip generation of cool files --skip_multiQC Skip MultiQC @@ -105,7 +97,7 @@ if (!params.dnase && !params.ligation_site) { } // Reference index path configuration -params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false +params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false @@ -116,22 +108,21 @@ if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){ custom_runName = workflow.runName } + if( workflow.profile == 'awsbatch') { // AWSBatch sanity checking if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - if (!workflow.workDir.startsWith('s3') || !params.outdir.startsWith('s3')) exit 1, "Specify S3 URLs for workDir and outdir parameters on AWSBatch!" - // Check workDir/outdir paths to be S3 buckets if running on AWSBatch + // Check outdir paths to be S3 buckets if running on AWSBatch // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!workflow.workDir.startsWith('s3:') || !params.outdir.startsWith('s3:')) exit 1, "Workdir or Outdir not on S3 - specify S3 Buckets for each to run on AWSBatch!" + if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + // Prevent trace files to be stored on S3 since S3 does not support rolling files. + if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." } // Stage config files ch_multiqc_config = Channel.fromPath(params.multiqc_config) ch_output_docs = Channel.fromPath("$baseDir/docs/output.md") - - - /********************************************************** * SET UP CHANNELS */ @@ -183,7 +174,7 @@ if ( params.bwt2_index ){ Channel.fromPath( bwt2_dir , checkIfExists: true) .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } .into { bwt2_index_end2end; bwt2_index_trim } - + } else if ( params.fasta ) { lastPath = params.fasta.lastIndexOf(File.separator) @@ -197,7 +188,6 @@ else { exit 1, "No reference genome specified!" } - // Chromosome size if ( params.chromosome_size ){ @@ -239,23 +229,14 @@ ch_output_docs = Channel.fromPath("$baseDir/docs/output.md") */ // Header log info -log.info """======================================================= - ,--./,-. - ___ __ __ __ ___ /,-._.--~\' - |\\ | |__ __ / ` / \\ |__) |__ } { - | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, - `._,._,\' - -nf-core/hic v${workflow.manifest.version}" -=======================================================""" +log.info nfcoreHeader() def summary = [:] -summary['Pipeline Name'] = 'nf-core/hic' -summary['Pipeline Version'] = workflow.manifest.version +if(workflow.revision) summary['Pipeline Release'] = workflow.revision summary['Run Name'] = custom_runName ?: workflow.runName - summary['Reads'] = params.reads summary['splitFastq'] = params.splitFastq summary['Fasta Ref'] = params.fasta +summary['Restriction Motif']= params.restriction_site summary['Ligation Motif'] = params.ligation_site summary['DNase Mode'] = params.dnase summary['Remove Dup'] = params.rm_dup @@ -267,7 +248,7 @@ summary['Max Time'] = params.max_time summary['Output dir'] = params.outdir summary['Working dir'] = workflow.workDir summary['Container Engine'] = workflow.containerEngine -if(workflow.containerEngine) +if(workflow.containerEngine) summary['Container'] = workflow.container summary['Current home'] = "$HOME" summary['Current user'] = "$USER" @@ -280,10 +261,19 @@ if(workflow.profile == 'awsbatch'){ summary['AWS Region'] = params.awsregion summary['AWS Queue'] = params.awsqueue } -if(params.email) summary['E-mail Address'] = params.email -log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n") -log.info "=========================================" +summary['Config Profile'] = workflow.profile +if(params.config_profile_description) summary['Config Description'] = params.config_profile_description +if(params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact +if(params.config_profile_url) summary['Config URL'] = params.config_profile_url +if(params.email) { + summary['E-mail Address'] = params.email + summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize +} +log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") +log.info "\033[2m----------------------------------------------------\033[0m" +// Check the hostnames against configured profiles +checkHostname() def create_workflow_summary(summary) { def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') @@ -307,19 +297,26 @@ ${summary.collect { k,v -> " <dt>$k</dt><dd><samp>${v ?: '<span style * Parse software version numbers */ process get_software_versions { + publishDir "${params.outdir}/pipeline_info", mode: 'copy', + saveAs: {filename -> + if (filename.indexOf(".csv") > 0) filename + else null + } - output: - file 'software_versions_mqc.yaml' into software_versions_yaml + output: + file 'software_versions_mqc.yaml' into software_versions_yaml + file "software_versions.csv" - script: - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - bowtie2 --version > v_bowtie2.txt - python --version > v_python.txt - samtools --version > v_samtools.txt - scrape_software_versions.py > software_versions_mqc.yaml - """ + script: + """ + echo $workflow.manifest.version > v_pipeline.txt + echo $workflow.nextflow.version > v_nextflow.txt + bowtie2 --version > v_bowtie2.txt + python --version > v_python.txt 2>&1 + samtools --version > v_samtools.txt + multiqc --version > v_multiqc.txt + scrape_software_versions.py &> software_versions_mqc.yaml + """ } @@ -360,13 +357,13 @@ if(!params.chromosome_size && params.fasta){ file fasta from fasta_for_chromsize output: - file "*.size" into chromosome_size, chromosome_size_cool + file "*.size" into chromosome_size, chromosome_size_cool script: """ samtools faidx ${fasta} cut -f1,2 ${fasta}.fai > chrom.size - """ + """ } } @@ -405,7 +402,7 @@ process bowtie2_end_to_end { input: set val(sample), file(reads) from raw_reads file index from bwt2_index_end2end.collect() - + output: set val(prefix), file("${prefix}_unmap.fastq") into unmapped_end_to_end set val(prefix), file("${prefix}.bam") into end_to_end_bam @@ -413,7 +410,7 @@ process bowtie2_end_to_end { script: prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ def bwt2_opts = params.bwt2_opts_end2end - + if (!params.dnase){ """ bowtie2 --rg-id BMG --rg SM:${prefix} \\ @@ -504,13 +501,13 @@ if (!params.dnase){ """ samtools merge -@ ${task.cpus} \\ -f ${prefix}_bwt2merged.bam \\ - ${bam1} ${bam2} + ${bam1} ${bam2} samtools sort -@ ${task.cpus} -m 800M \\ -n -T /tmp/ \\ -o ${prefix}_bwt2merged.sorted.bam \\ ${prefix}_bwt2merged.bam - + mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam echo "## ${prefix}" > ${prefix}.mapstat @@ -555,13 +552,11 @@ if (!params.dnase){ } } - - process combine_mapped_files{ tag "$sample = $r1_prefix + $r2_prefix" publishDir "${params.outdir}/mapping", mode: 'copy', - saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"} - + saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"} + input: set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple() @@ -575,7 +570,7 @@ process combine_mapped_files{ r2_bam = aligned_bam[1] r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ oname = sample.toString() - ~/(\.[0-9]+)$/ - + def opts = "-t" opts = params.rm_singleton ? "${opts}" : "--single ${opts}" opts = params.rm_multi ? "${opts}" : "--multi ${opts}" @@ -594,7 +589,7 @@ if (!params.dnase){ process get_valid_interaction{ tag "$sample" publishDir "${params.outdir}/hic_results/data", mode: 'copy', - saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"} + saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"} input: set val(sample), file(pe_bam) from paired_bam @@ -605,7 +600,7 @@ if (!params.dnase){ set val(sample), file("*.validPairs") into valid_pairs_4cool set val(sample), file("*RSstat") into all_rsstat - script: + script: if (params.splitFastq){ sample = sample.toString() - ~/(\.[0-9]+)$/ } @@ -626,17 +621,17 @@ else{ process get_valid_interaction_dnase{ tag "$sample" publishDir "${params.outdir}/hic_results/data", mode: 'copy', - saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"} + saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"} input: set val(sample), file(pe_bam) from paired_bam - + output: set val(sample), file("*.validPairs") into valid_pairs set val(sample), file("*.validPairs") into valid_pairs_4cool set val(sample), file("*RSstat") into all_rsstat - script: + script: if (params.splitFastq){ sample = sample.toString() - ~/(\.[0-9]+)$/ } @@ -657,7 +652,7 @@ else{ process remove_duplicates { tag "$sample" publishDir "${params.outdir}/hic_results/data", mode: 'copy', - saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$sample/$filename" : "$filename"} + saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$sample/$filename" : "$filename"} input: set val(sample), file(vpairs) from valid_pairs.groupTuple() @@ -672,7 +667,7 @@ process remove_duplicates { """ mkdir -p stats/${sample} sort -T /tmp/ -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \ - awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs + awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs echo -n "valid_interaction\t" > stats/${sample}/${sample}_allValidPairs.mergestat cat ${vpairs} | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat echo -n "valid_interaction_rmdup\t" >> stats/${sample}/${sample}_allValidPairs.mergestat @@ -720,6 +715,9 @@ process build_contact_maps{ tag "$sample - $mres" publishDir "${params.outdir}/hic_results/matrix/raw", mode: 'copy' + when: + !params.skip_maps + input: set val(sample), file(vpairs), val(mres) from all_valid_pairs.combine(map_res) file chrsize from chromosome_size.collect() @@ -742,6 +740,9 @@ process run_ice{ tag "$rmaps" publishDir "${params.outdir}/hic_results/matrix/iced", mode: 'copy' + when: + !params.skip_maps && !params.skip_ice + input: file(rmaps) from raw_maps file "*.biases" @@ -756,7 +757,7 @@ process run_ice{ --results_filename ${prefix}_iced.matrix \ --filter_high_counts_perc ${params.ice_filer_high_count_perc} \ --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} - """ + """ } @@ -780,13 +781,13 @@ process generate_cool{ script: """ hicpro2higlass.sh -i $vpairs -r 5000 -c ${chrsize} -n - """ + """ } /* * STEP 5 - MultiQC - */ + */ process multiqc { publishDir "${params.outdir}/MultiQC", mode: 'copy' @@ -806,23 +807,19 @@ process multiqc { script: rtitle = custom_runName ? "--title \"$custom_runName\"" : '' rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - + """ multiqc -f $rtitle $rfilename --config $multiqc_config . """ } -/**************************************************** - * POST-PROCESSING - */ -/* - * Output Description HTML +/* + * STEP 3 - Output Description HTML */ - process output_documentation { - publishDir "${params.outdir}/Documentation", mode: 'copy' + publishDir "${params.outdir}/pipeline_info", mode: 'copy' input: file output_docs from ch_output_docs @@ -868,10 +865,26 @@ workflow.onComplete { if(workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision + if(workflow.container) email_fields['summary']['Docker image'] = workflow.container email_fields['summary']['Nextflow Version'] = workflow.nextflow.version email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + // If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize) + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList){ + log.warn "[nf-core/hic] Found multiple reports from process 'multiqc', will use only one" + mqc_report = mqc_report[0] + } + } + } catch (all) { + log.warn "[nf-core/hic] Could not attach MultiQC report to summary email" + } + // Render the TXT template def engine = new groovy.text.GStringTemplateEngine() def tf = new File("$baseDir/assets/email_template.txt") @@ -884,7 +897,7 @@ workflow.onComplete { def email_html = html_template.toString() // Render the sendmail template - def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir" ] + def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ] def sf = new File("$baseDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) def sendmail_html = sendmail_template.toString() @@ -904,7 +917,7 @@ workflow.onComplete { } // Write summary e-mail HTML to a file - def output_d = new File( "${params.outdir}/Documentation/" ) + def output_d = new File( "${params.outdir}/pipeline_info/" ) if( !output_d.exists() ) { output_d.mkdirs() } @@ -913,5 +926,67 @@ workflow.onComplete { def output_tf = new File( output_d, "pipeline_report.txt" ) output_tf.withWriter { w -> w << email_txt } - log.info "[nf-core/hic] Pipeline Complete" + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; + + if (workflow.stats.ignoredCountFmt > 0 && workflow.success) { + log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" + log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCountFmt} ${c_reset}" + log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCountFmt} ${c_reset}" + } + + if(workflow.success){ + log.info "${c_purple}[nf-core/hic]${c_green} Pipeline completed successfully${c_reset}" + } else { + checkHostname() + log.info "${c_purple}[nf-core/hic]${c_red} Pipeline completed with errors${c_reset}" + } + +} + + +def nfcoreHeader(){ + // Log colors ANSI codes + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_dim = params.monochrome_logs ? '' : "\033[2m"; + c_black = params.monochrome_logs ? '' : "\033[0;30m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; + c_blue = params.monochrome_logs ? '' : "\033[0;34m"; + c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; + c_white = params.monochrome_logs ? '' : "\033[0;37m"; + + return """ ${c_dim}----------------------------------------------------${c_reset} + ${c_green},--.${c_black}/${c_green},-.${c_reset} + ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} + ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} + ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} + ${c_green}`._,._,\'${c_reset} + ${c_purple} nf-core/hic v${workflow.manifest.version}${c_reset} + ${c_dim}----------------------------------------------------${c_reset} + """.stripIndent() +} + +def checkHostname(){ + def c_reset = params.monochrome_logs ? '' : "\033[0m" + def c_white = params.monochrome_logs ? '' : "\033[0;37m" + def c_red = params.monochrome_logs ? '' : "\033[1;91m" + def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" + if(params.hostnames){ + def hostname = "hostname".execute().text.trim() + params.hostnames.each { prof, hnames -> + hnames.each { hname -> + if(hostname.contains(hname) && !workflow.profile.contains(prof)){ + log.error "====================================================\n" + + " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + + " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + + " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + + "============================================================" + } + } + } + } } diff --git a/nextflow.config b/nextflow.config index 32486aab168ccff7647220cd75a61f72f695fbc5..356f20058f0aa048851d0fd965078417564701b3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,48 +3,59 @@ * nf-core/hic Nextflow config file * ------------------------------------------------- * Default config options for all environments. - * Cluster-specific config options should be saved - * in the conf folder and imported under a profile - * name here. */ // Global default params, used in configs params { // Workflow flags - // TODO nf-core: Specify your pipeline's command line flags + // Specify your pipeline's command line flags reads = "*{1,2}.fastq.gz" outdir = './results' genome = false readPaths = false chromosome_size = false restriction_fragments = false + skip_maps = false + skip_ice = false skip_cool = false skip_multiqc = false dnase = false // Boilerplate options name = false - multiqc_config = "$baseDir/conf/multiqc_config.yaml" + multiqc_config = "$baseDir/assets/multiqc_config.yaml" email = false + maxMultiqcEmailFileSize = 25.MB plaintext_email = false + monochrome_logs = false help = false igenomes_base = "./iGenomes" tracedir = "${params.outdir}/pipeline_info" - clusterOptions = false awsqueue = false awsregion = 'eu-west-1' igenomesIgnore = false custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + hostnames = false + config_profile_description = false + config_profile_contact = false + config_profile_url = false } -process.container = 'nfcore/hic:dev' +// Container slug. Stable releases should specify release tag! +// Developmental code should specify :dev +process.container = 'nfcore/hic:1.0.0' // Load base.config by default for all pipelines includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions -includeConfig "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}/nfcore_custom.config" +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} // Load hic config file includeConfig 'conf/hicpro.config' @@ -69,19 +80,19 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] timeline { enabled = true - file = "${params.tracedir}/nf-core/hic_timeline.html" + file = "${params.tracedir}/execution_timeline.html" } report { enabled = true - file = "${params.tracedir}/nf-core/hic_report.html" + file = "${params.tracedir}/execution_report.html" } trace { enabled = true - file = "${params.tracedir}/nf-core/hic_trace.txt" + file = "${params.tracedir}/execution_trace.txt" } dag { enabled = true - file = "${params.tracedir}/nf-core/hic_dag.svg" + file = "${params.tracedir}/pipeline_dag.svg" } manifest { @@ -91,7 +102,7 @@ manifest { description = 'Analysis of Chromosome Conformation Capture data (Hi-C)' mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - version = '1.0dev' + version = '1.0.0' } // Function to ensure that resource requirements don't go beyond