diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index fda99de7282aba62eefbddccdcfef67fa67bb1a9..b88d0f43654c3a170c0cf930e212fa316dda28e2 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -6,7 +6,9 @@ We try to manage the required tasks for nf-core/hic using GitHub issues, you pro However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/hic then the best place to go is the Gitter chatroom where you can ask us questions directly: https://gitter.im/nf-core/Lobby +> If you need help using or modifying nf-core/hic then the best place to ask is on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). + + ## Contribution workflow If you'd like to write some code for nf-core/hic, the standard workflow @@ -42,4 +44,4 @@ If there are any failures then the automated tests fail. These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. ## Getting help -For further information/help, please consult the [nf-core/hic documentation](https://github.com/nf-core/hic#documentation) and don't hesitate to get in touch on [Gitter](https://gitter.im/nf-core/Lobby) +For further information/help, please consult the [nf-core/hic documentation](https://github.com/nf-core/hic#documentation) and don't hesitate to get in touch on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml new file mode 100644 index 0000000000000000000000000000000000000000..e052a635aa7c2787e741207a069d9a400358ca6c --- /dev/null +++ b/.github/markdownlint.yml @@ -0,0 +1,9 @@ +# Markdownlint configuration file +default: true, +line-length: false +no-multiple-blanks: 0 +blanks-around-headers: false +blanks-around-lists: false +header-increment: false +no-duplicate-header: + siblings_only: true diff --git a/.gitignore b/.gitignore index 46f69e414ba5d72f679f5140fee33188d84422f8..5b54e3e6c257de1e963395161372e1a2ca110fe7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ data/ results/ .DS_Store tests/test_data +*.pyc diff --git a/.travis.yml b/.travis.yml index 2eaea150d6ecf198773030802425b4a34edd7219..bc8037e185a6231eed8da45d830148cdce17f087 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,7 @@ before_install: # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/hic:dev # Fake the tag locally so that the pipeline runs properly + # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - docker tag nfcore/hic:dev nfcore/hic:dev install: @@ -25,12 +26,17 @@ install: - pip install nf-core # Reset - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests + # Install markdownlint-cli + - sudo apt-get install npm && npm install -g markdownlint-cli env: - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work + - NXF_VER='' # Plus: get the latest NF version and check that it works script: # Lint the pipeline code - nf-core lint ${TRAVIS_BUILD_DIR} + # Lint the documentation + - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml # Run the pipeline with the test profile - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 21096193adc83331ff86beea517b1d0e37e35c09..09226d0d8d896bbc3bdb632476430d6cad4b0aa7 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on the [Gitter channel](https://gitter.im/nf-core/Lobby). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/README.md b/README.md index cf702bf337b100b48ab98405e67cb3b1e7c855d9..5f2c07459b0e8da3644198705f6ecb91b57bca01 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -#  +# nf-core/hic -**Analysis of Chromosome Conformation Capture data (Hi-C)** +**Analysis of Chromosome Conformation Capture data (Hi-C)**. -[](https://travis-ci.org/nf-core/hic) +[](https://travis-ci.com/nf-core/hic) [](https://www.nextflow.io/) [](http://bioconda.github.io/) diff --git a/assets/email_template.txt b/assets/email_template.txt index 59d7b549d375db84dae99a2e186b2a9df109f2d1..6c85add607a47589da20df83c6892bcfe5e04f1d 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -17,23 +17,6 @@ ${errorReport} } %> -<% if (!success){ - out << """#################################################### -## nf-core/hic execution completed unsuccessfully! ## -#################################################### -The exit status of the task that caused the workflow execution to fail was: $exitStatus. -The full error message was: - -${errorReport} -""" -} else { - out << "## nf-core/hic execution completed successfully! ##" -} -%> - - - - The workflow was completed at $dateComplete (duration: $duration) The command used to launch the workflow was as follows: diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d425b46caa3f6d032a2e5ed340788e583214d447 --- /dev/null +++ b/assets/multiqc_config.yaml @@ -0,0 +1,9 @@ +report_comment: > + This report has been generated by the <a href="https://github.com/nf-core/hic" target="_blank">nf-core/hic</a> + analysis pipeline. For information about how to interpret these results, please see the + <a href="https://github.com/nf-core/hic" target="_blank">documentation</a>. +report_section_order: + nf-core/hic-software-versions: + order: -1000 + +export_plots: true diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index fd1cd7396e07c870807d05843d5bd5a74e49c2d8..2d6712200607cb62f31be950cfe4c54e5ca1838a 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -1,11 +1,36 @@ To: $email Subject: $subject Mime-Version: 1.0 -Content-Type: multipart/related;boundary="nfmimeboundary" +Content-Type: multipart/related;boundary="nfcoremimeboundary" ---nfmimeboundary +--nfcoremimeboundary Content-Type: text/html; charset=utf-8 $email_html ---nfmimeboundary-- +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: <mqcreport> +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + +--nfcoremimeboundary-- diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 4a1747d86e627006574c326807fcb8ff7637c242..8cf977c143a81ee4acc6a958a492f269739bf2b1 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -7,10 +7,14 @@ import re regexes = { 'nf-core/hic': ['v_pipeline.txt', r"(\S+)"], 'Nextflow': ['v_nextflow.txt', r"(\S+)"], + 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], + 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], } results = OrderedDict() results['nf-core/hic'] = '<span style="color:#999999;\">N/A</span>' results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' +results['FastQC'] = '<span style="color:#999999;\">N/A</span>' +results['MultiQC'] = '<span style="color:#999999;\">N/A</span>' # Search each file using its regex for k, v in regexes.items(): @@ -20,9 +24,14 @@ for k, v in regexes.items(): if match: results[k] = "v{}".format(match.group(1)) +# Remove software set to false in results +for k in results: + if not results[k]: + del(results[k]) + # Dump to YAML print (''' -id: 'nf-core/hic-software-versions' +id: 'software_versions' section_name: 'nf-core/hic Software Versions' section_href: 'https://github.com/nf-core/hic' plot_type: 'html' @@ -31,5 +40,10 @@ data: | <dl class="dl-horizontal"> ''') for k,v in results.items(): - print(" <dt>{}</dt><dd>{}</dd>".format(k,v)) + print(" <dt>{}</dt><dd><samp>{}</samp></dd>".format(k,v)) print (" </dl>") + +# Write out regexes as csv file: +with open('software_versions.csv', 'w') as f: + for k,v in results.items(): + f.write("{}\t{}\n".format(k,v)) diff --git a/conf/awsbatch.config b/conf/awsbatch.config index 79078c7bd03ef1c4131ebfb5d46bf621150d74c1..14af5866f5c6c18db7e8d6b93b40da8ea8311721 100644 --- a/conf/awsbatch.config +++ b/conf/awsbatch.config @@ -1,10 +1,15 @@ /* * ------------------------------------------------- - * Nextflow config file for AWS Batch + * Nextflow config file for running on AWS batch * ------------------------------------------------- - * Imported under the 'awsbatch' Nextflow profile in nextflow.config - * Uses docker for software depedencies automagically, so not specified here. + * Base config needed for running with -profile awsbatch */ +params { + config_profile_name = 'AWSBATCH' + config_profile_description = 'AWSBATCH Cloud Profile' + config_profile_contact = 'Alexander Peltzer (@apeltzer)' + config_profile_url = 'https://aws.amazon.com/de/batch/' +} aws.region = params.awsregion process.executor = 'awsbatch' diff --git a/conf/base.config b/conf/base.config index 7f99f28907f6c88829f0cc094de6672a4bacd873..156fa28b432d8742d4252580f30329a12dfd6819 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,6 +1,6 @@ /* * ------------------------------------------------- - * Nextflow base config file + * nf-core/hic Nextflow base config file * ------------------------------------------------- * A 'blank slate' config file, appropriate for general * use on most high performace compute environments. @@ -11,13 +11,12 @@ process { - container = process.container - - cpus = { check_max( 2, 'cpus' ) } + // TODO nf-core: Check the defaults for all processes + cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'terminate' } + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -25,7 +24,7 @@ process { withName:makeBowtie2Index { cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } } withName:bowtie2_end_to_end { cpus = { check_max( 4, 'cpus' ) } diff --git a/conf/igenomes.config b/conf/igenomes.config index 26950cf2932485d39cc3ed04705392efdacf6b71..92ad32389c6646cae0feea95e5e0a3bceeba909e 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -60,7 +60,7 @@ params { } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/genome" } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" @@ -96,7 +96,7 @@ params { } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/genome" } } } diff --git a/conf/test.config b/conf/test.config index b4fd1845c65349aa6a58a82dc033b38d6bf76815..592e3a40d8bce4cf22b5fe1ad9014ded48d439ce 100644 --- a/conf/test.config +++ b/conf/test.config @@ -16,7 +16,7 @@ params { max_cpus = 2 max_memory = 4.GB max_time = 1.h - + // Input data readPaths = [ ['SRR4292758_00', ['https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz']] @@ -31,4 +31,3 @@ params { // Options skip_cool = true } - diff --git a/docs/output.md b/docs/output.md index 518ac60f545d4f87051dfec5ace6f972cd93d65b..f395dcd111f5c8c09371e84f0581297f326bed05 100644 --- a/docs/output.md +++ b/docs/output.md @@ -26,7 +26,7 @@ Singletons are discarded, and multi-hits are filtered according to the configura Note that if the `--dnase` mode is activated, HiC-Pro will skip the second mapping step. **Output directory: `results/mapping`** - + * `*bwt2pairs.bam` - final BAM file with aligned paired data * `*.pairstat` - mapping statistics @@ -50,7 +50,7 @@ Invalid pairs are classified as follow: * Dangling end, i.e. unligated fragments (both reads mapped on the same restriction fragment) * Self circles, i.e. fragments ligated on themselves (both reads mapped on the same restriction fragment in inverted orientation) * Religation, i.e. ligation of juxtaposed fragments -* Filtered pairs, i.e. any pairs that do not match the filtering criteria on inserts size, restriction fragments size +* Filtered pairs, i.e. any pairs that do not match the filtering criteria on inserts size, restriction fragments size * Dumped pairs, i.e. any pairs for which we were not able to reconstruct the ligation product. Only valid pairs involving two different restriction fragments are used to build the contact maps. @@ -59,7 +59,7 @@ Duplicated valid pairs associated to PCR artefacts are discarded (see `--rm_dup` In case of Hi-C protocols that do not require a restriction enzyme such as DNase Hi-C or micro Hi-C, the assignment to a restriction is not possible (see `--dnase`). Short range interactions that are likely to be spurious ligation products can thus be discarded using the `--min_cis_dist` parameter. -* `*.validPairs` - List of valid ligation products +* `*.validPairs` - List of valid ligation products * `*RSstat` - Statitics of number of read pairs falling in each category The validPairs are stored using a simple tab-delimited text format ; @@ -124,4 +124,4 @@ The pipeline has special steps which allow the software versions used to be repo * `Project_multiqc_data/` * Directory containing parsed statistics from the different tools used in the pipeline -For more information about how to use MultiQC reports, see http://multiqc.info +For more information about how to use MultiQC reports, see [http://multiqc.info](http://multiqc.info) diff --git a/docs/usage.md b/docs/usage.md index 853c38414b6e53090c3d9b0f19e849a0972b1243..4f6825eb506b05db2e73afc415863c6adcaef056 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -121,21 +121,22 @@ Use this parameter to choose a configuration profile. Profiles can give configur If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`. * `awsbatch` - * A generic configuration profile to be used with AWS Batch. + * A generic configuration profile to be used with AWS Batch. * `conda` - * A generic configuration profile to be used with [conda](https://conda.io/docs/) - * Pulls most software from [Bioconda](https://bioconda.github.io/) + * A generic configuration profile to be used with [conda](https://conda.io/docs/) + * Pulls most software from [Bioconda](https://bioconda.github.io/) * `docker` - * A generic configuration profile to be used with [Docker](http://docker.com/) - * Pulls software from dockerhub: [`nfcore/hic`](http://hub.docker.com/r/nfcore/hic/) + * A generic configuration profile to be used with [Docker](http://docker.com/) + * Pulls software from dockerhub: [`nfcore/hic`](http://hub.docker.com/r/nfcore/hic/) * `singularity` - * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) - * Pulls software from singularity-hub + * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) + * Pulls software from DockerHub: [`nfcore/hic`](http://hub.docker.com/r/nfcore/hic/) * `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters + * A profile with a complete configuration for automated testing + * Includes links to test data so needs no other parameters <!-- TODO nf-core: Document required command line parameters --> + ### `--reads` Use this to specify the location of your input FastQ files. For example: diff --git a/environment.yml b/environment.yml index ae15924597b0494e1e9f6005aa675cac8a64adb6..ed47c8b2057698e0d704be2bda4feba2179d0c62 100644 --- a/environment.yml +++ b/environment.yml @@ -1,3 +1,5 @@ +# You can use this file to create a conda environment for this pipeline: +# conda env create -f environment.yml name: nf-core-hic-1.0dev channels: - conda-forge @@ -16,4 +18,4 @@ dependencies: - samtools=1.7 - multiqc=1.6 - pip: - - iced==0.4.2 \ No newline at end of file + - iced==0.4.2 diff --git a/main.nf b/main.nf index 4b975d002949e63655eab5b051e5c52c10de30a2..bccbb0d121382893c819ecbad7e54c2136a902d1 100644 --- a/main.nf +++ b/main.nf @@ -11,19 +11,9 @@ def helpMessage() { + // TODO nf-core: Add to this help message with new command line parameters + log.info nfcoreHeader() log.info""" - ======================================================= - ,--./,-. - ___ __ __ __ ___ /,-._.--~\' - |\\ | |__ __ / ` / \\ |__) |__ } { - | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, - `._,._,\' - - nf-core/hic v${workflow.manifest.version} - ======================================================= - - This pipeline is a Nextflow version of the HiC-Pro pipeline for Hi-C data processing. - See https://github.com/nservant/HiC-Pro for details. Usage: @@ -107,7 +97,7 @@ if (!params.dnase && !params.ligation_site) { } // Reference index path configuration -params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false +params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false @@ -118,22 +108,21 @@ if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){ custom_runName = workflow.runName } + if( workflow.profile == 'awsbatch') { // AWSBatch sanity checking if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - if (!workflow.workDir.startsWith('s3') || !params.outdir.startsWith('s3')) exit 1, "Specify S3 URLs for workDir and outdir parameters on AWSBatch!" - // Check workDir/outdir paths to be S3 buckets if running on AWSBatch + // Check outdir paths to be S3 buckets if running on AWSBatch // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!workflow.workDir.startsWith('s3:') || !params.outdir.startsWith('s3:')) exit 1, "Workdir or Outdir not on S3 - specify S3 Buckets for each to run on AWSBatch!" + if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + // Prevent trace files to be stored on S3 since S3 does not support rolling files. + if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." } // Stage config files ch_multiqc_config = Channel.fromPath(params.multiqc_config) ch_output_docs = Channel.fromPath("$baseDir/docs/output.md") - - - /********************************************************** * SET UP CHANNELS */ @@ -185,7 +174,7 @@ if ( params.bwt2_index ){ Channel.fromPath( bwt2_dir , checkIfExists: true) .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } .into { bwt2_index_end2end; bwt2_index_trim } - + } else if ( params.fasta ) { lastPath = params.fasta.lastIndexOf(File.separator) @@ -199,7 +188,6 @@ else { exit 1, "No reference genome specified!" } - // Chromosome size if ( params.chromosome_size ){ @@ -241,20 +229,10 @@ ch_output_docs = Channel.fromPath("$baseDir/docs/output.md") */ // Header log info -log.info """======================================================= - ,--./,-. - ___ __ __ __ ___ /,-._.--~\' - |\\ | |__ __ / ` / \\ |__) |__ } { - | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, - `._,._,\' - -nf-core/hic v${workflow.manifest.version}" -=======================================================""" +log.info nfcoreHeader() def summary = [:] -summary['Pipeline Name'] = 'nf-core/hic' -summary['Pipeline Version'] = workflow.manifest.version +if(workflow.revision) summary['Pipeline Release'] = workflow.revision summary['Run Name'] = custom_runName ?: workflow.runName - summary['Reads'] = params.reads summary['splitFastq'] = params.splitFastq summary['Fasta Ref'] = params.fasta @@ -269,7 +247,7 @@ summary['Max Time'] = params.max_time summary['Output dir'] = params.outdir summary['Working dir'] = workflow.workDir summary['Container Engine'] = workflow.containerEngine -if(workflow.containerEngine) +if(workflow.containerEngine) summary['Container'] = workflow.container summary['Current home'] = "$HOME" summary['Current user'] = "$USER" @@ -282,10 +260,19 @@ if(workflow.profile == 'awsbatch'){ summary['AWS Region'] = params.awsregion summary['AWS Queue'] = params.awsqueue } -if(params.email) summary['E-mail Address'] = params.email -log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n") -log.info "=========================================" +summary['Config Profile'] = workflow.profile +if(params.config_profile_description) summary['Config Description'] = params.config_profile_description +if(params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact +if(params.config_profile_url) summary['Config URL'] = params.config_profile_url +if(params.email) { + summary['E-mail Address'] = params.email + summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize +} +log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") +log.info "\033[2m----------------------------------------------------\033[0m" +// Check the hostnames against configured profiles +checkHostname() def create_workflow_summary(summary) { def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') @@ -309,19 +296,25 @@ ${summary.collect { k,v -> " <dt>$k</dt><dd><samp>${v ?: '<span style * Parse software version numbers */ process get_software_versions { + publishDir "${params.outdir}/pipeline_info", mode: 'copy', + saveAs: {filename -> + if (filename.indexOf(".csv") > 0) filename + else null + } - output: - file 'software_versions_mqc.yaml' into software_versions_yaml + output: + file 'software_versions_mqc.yaml' into software_versions_yaml + file "software_versions.csv" - script: - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - bowtie2 --version > v_bowtie2.txt - python --version > v_python.txt - samtools --version > v_samtools.txt - scrape_software_versions.py > software_versions_mqc.yaml - """ + script: + """ + echo $workflow.manifest.version > v_pipeline.txt + echo $workflow.nextflow.version > v_nextflow.txt + bowtie2 --version > v_bowtie2.txt + python --version > v_python.txt + samtools --version > v_samtools.txt + scrape_software_versions.py &> software_versions_mqc.yaml + """ } @@ -362,13 +355,13 @@ if(!params.chromosome_size && params.fasta){ file fasta from fasta_for_chromsize output: - file "*.size" into chromosome_size, chromosome_size_cool + file "*.size" into chromosome_size, chromosome_size_cool script: """ samtools faidx ${fasta} cut -f1,2 ${fasta}.fai > chrom.size - """ + """ } } @@ -407,7 +400,7 @@ process bowtie2_end_to_end { input: set val(sample), file(reads) from raw_reads file index from bwt2_index_end2end.collect() - + output: set val(prefix), file("${prefix}_unmap.fastq") into unmapped_end_to_end set val(prefix), file("${prefix}.bam") into end_to_end_bam @@ -415,7 +408,7 @@ process bowtie2_end_to_end { script: prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ def bwt2_opts = params.bwt2_opts_end2end - + if (!params.dnase){ """ bowtie2 --rg-id BMG --rg SM:${prefix} \\ @@ -506,13 +499,13 @@ if (!params.dnase){ """ samtools merge -@ ${task.cpus} \\ -f ${prefix}_bwt2merged.bam \\ - ${bam1} ${bam2} + ${bam1} ${bam2} samtools sort -@ ${task.cpus} -m 800M \\ -n -T /tmp/ \\ -o ${prefix}_bwt2merged.sorted.bam \\ ${prefix}_bwt2merged.bam - + mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam echo "## ${prefix}" > ${prefix}.mapstat @@ -557,13 +550,11 @@ if (!params.dnase){ } } - - process combine_mapped_files{ tag "$sample = $r1_prefix + $r2_prefix" publishDir "${params.outdir}/mapping", mode: 'copy', - saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"} - + saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"} + input: set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple() @@ -577,7 +568,7 @@ process combine_mapped_files{ r2_bam = aligned_bam[1] r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ oname = sample.toString() - ~/(\.[0-9]+)$/ - + def opts = "-t" opts = params.rm_singleton ? "${opts}" : "--single ${opts}" opts = params.rm_multi ? "${opts}" : "--multi ${opts}" @@ -596,7 +587,7 @@ if (!params.dnase){ process get_valid_interaction{ tag "$sample" publishDir "${params.outdir}/hic_results/data", mode: 'copy', - saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"} + saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"} input: set val(sample), file(pe_bam) from paired_bam @@ -607,7 +598,7 @@ if (!params.dnase){ set val(sample), file("*.validPairs") into valid_pairs_4cool set val(sample), file("*RSstat") into all_rsstat - script: + script: if (params.splitFastq){ sample = sample.toString() - ~/(\.[0-9]+)$/ } @@ -628,17 +619,17 @@ else{ process get_valid_interaction_dnase{ tag "$sample" publishDir "${params.outdir}/hic_results/data", mode: 'copy', - saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"} + saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"} input: set val(sample), file(pe_bam) from paired_bam - + output: set val(sample), file("*.validPairs") into valid_pairs set val(sample), file("*.validPairs") into valid_pairs_4cool set val(sample), file("*RSstat") into all_rsstat - script: + script: if (params.splitFastq){ sample = sample.toString() - ~/(\.[0-9]+)$/ } @@ -659,7 +650,7 @@ else{ process remove_duplicates { tag "$sample" publishDir "${params.outdir}/hic_results/data", mode: 'copy', - saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$sample/$filename" : "$filename"} + saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$sample/$filename" : "$filename"} input: set val(sample), file(vpairs) from valid_pairs.groupTuple() @@ -674,7 +665,7 @@ process remove_duplicates { """ mkdir -p stats/${sample} sort -T /tmp/ -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \ - awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs + awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs echo -n "valid_interaction\t" > stats/${sample}/${sample}_allValidPairs.mergestat cat ${vpairs} | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat echo -n "valid_interaction_rmdup\t" >> stats/${sample}/${sample}_allValidPairs.mergestat @@ -764,7 +755,7 @@ process run_ice{ --results_filename ${prefix}_iced.matrix \ --filter_high_counts_perc ${params.ice_filer_high_count_perc} \ --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} - """ + """ } @@ -788,13 +779,13 @@ process generate_cool{ script: """ hicpro2higlass.sh -i $vpairs -r 5000 -c ${chrsize} -n - """ + """ } /* * STEP 5 - MultiQC - */ + */ process multiqc { publishDir "${params.outdir}/MultiQC", mode: 'copy' @@ -814,23 +805,19 @@ process multiqc { script: rtitle = custom_runName ? "--title \"$custom_runName\"" : '' rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - + """ multiqc -f $rtitle $rfilename --config $multiqc_config . """ } -/**************************************************** - * POST-PROCESSING - */ -/* - * Output Description HTML +/* + * STEP 3 - Output Description HTML */ - process output_documentation { - publishDir "${params.outdir}/Documentation", mode: 'copy' + publishDir "${params.outdir}/pipeline_info", mode: 'copy' input: file output_docs from ch_output_docs @@ -876,10 +863,26 @@ workflow.onComplete { if(workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision + if(workflow.container) email_fields['summary']['Docker image'] = workflow.container email_fields['summary']['Nextflow Version'] = workflow.nextflow.version email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + // TODO nf-core: If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize) + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList){ + log.warn "[nf-core/hic] Found multiple reports from process 'multiqc', will use only one" + mqc_report = mqc_report[0] + } + } + } catch (all) { + log.warn "[nf-core/hic] Could not attach MultiQC report to summary email" + } + // Render the TXT template def engine = new groovy.text.GStringTemplateEngine() def tf = new File("$baseDir/assets/email_template.txt") @@ -892,7 +895,7 @@ workflow.onComplete { def email_html = html_template.toString() // Render the sendmail template - def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir" ] + def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ] def sf = new File("$baseDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) def sendmail_html = sendmail_template.toString() @@ -912,7 +915,7 @@ workflow.onComplete { } // Write summary e-mail HTML to a file - def output_d = new File( "${params.outdir}/Documentation/" ) + def output_d = new File( "${params.outdir}/pipeline_info/" ) if( !output_d.exists() ) { output_d.mkdirs() } @@ -921,5 +924,67 @@ workflow.onComplete { def output_tf = new File( output_d, "pipeline_report.txt" ) output_tf.withWriter { w -> w << email_txt } - log.info "[nf-core/hic] Pipeline Complete" + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; + + if (workflow.stats.ignoredCountFmt > 0 && workflow.success) { + log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" + log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCountFmt} ${c_reset}" + log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCountFmt} ${c_reset}" + } + + if(workflow.success){ + log.info "${c_purple}[nf-core/hic]${c_green} Pipeline completed successfully${c_reset}" + } else { + checkHostname() + log.info "${c_purple}[nf-core/hic]${c_red} Pipeline completed with errors${c_reset}" + } + +} + + +def nfcoreHeader(){ + // Log colors ANSI codes + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_dim = params.monochrome_logs ? '' : "\033[2m"; + c_black = params.monochrome_logs ? '' : "\033[0;30m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; + c_blue = params.monochrome_logs ? '' : "\033[0;34m"; + c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; + c_white = params.monochrome_logs ? '' : "\033[0;37m"; + + return """ ${c_dim}----------------------------------------------------${c_reset} + ${c_green},--.${c_black}/${c_green},-.${c_reset} + ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} + ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} + ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} + ${c_green}`._,._,\'${c_reset} + ${c_purple} nf-core/hic v${workflow.manifest.version}${c_reset} + ${c_dim}----------------------------------------------------${c_reset} + """.stripIndent() +} + +def checkHostname(){ + def c_reset = params.monochrome_logs ? '' : "\033[0m" + def c_white = params.monochrome_logs ? '' : "\033[0;37m" + def c_red = params.monochrome_logs ? '' : "\033[1;91m" + def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" + if(params.hostnames){ + def hostname = "hostname".execute().text.trim() + params.hostnames.each { prof, hnames -> + hnames.each { hname -> + if(hostname.contains(hname) && !workflow.profile.contains(prof)){ + log.error "====================================================\n" + + " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + + " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + + " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + + "============================================================" + } + } + } + } } diff --git a/nextflow.config b/nextflow.config index e80bebe0fabe93b37f783ff04451a50256ffe8c2..a526e9eb077b2a1b40be644d5b1da4b694481a27 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,9 +3,6 @@ * nf-core/hic Nextflow config file * ------------------------------------------------- * Default config options for all environments. - * Cluster-specific config options should be saved - * in the conf folder and imported under a profile - * name here. */ // Global default params, used in configs @@ -27,26 +24,38 @@ params { // Boilerplate options name = false - multiqc_config = "$baseDir/conf/multiqc_config.yaml" + multiqc_config = "$baseDir/assets/multiqc_config.yaml" email = false + maxMultiqcEmailFileSize = 25.MB plaintext_email = false + monochrome_logs = false help = false igenomes_base = "./iGenomes" tracedir = "${params.outdir}/pipeline_info" - clusterOptions = false awsqueue = false awsregion = 'eu-west-1' igenomesIgnore = false custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + hostnames = false + config_profile_description = false + config_profile_contact = false + config_profile_url = false } +// Container slug. Stable releases should specify release tag! +// Developmental code should specify :dev process.container = 'nfcore/hic:dev' // Load base.config by default for all pipelines includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions -includeConfig "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}/nfcore_custom.config" +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} // Load hic config file includeConfig 'conf/hicpro.config' @@ -71,19 +80,19 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] timeline { enabled = true - file = "${params.tracedir}/nf-core/hic_timeline.html" + file = "${params.tracedir}/execution_timeline.html" } report { enabled = true - file = "${params.tracedir}/nf-core/hic_report.html" + file = "${params.tracedir}/execution_report.html" } trace { enabled = true - file = "${params.tracedir}/nf-core/hic_trace.txt" + file = "${params.tracedir}/execution_trace.txt" } dag { enabled = true - file = "${params.tracedir}/nf-core/hic_dag.svg" + file = "${params.tracedir}/pipeline_dag.svg" } manifest {