diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000000000000000000000000000000000000..ea27a5843a0ff5f97ef49908689f595c42e17a1f --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,27 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..75c2fe61f4886762396871469bba8684f282add2 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,32 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{md,yml,yaml,html,css,scss,js,cff}] +indent_size = 2 + +# These files are edited and tested upstream in nf-core/modules +[/modules/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +indent_size = unset + +[/assets/email*] +indent_size = unset + +# C++ compiles code +[/bin/cutsite_trimming] +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +indent_size = unset diff --git a/.gitattributes b/.gitattributes index 7fe55006f87bb1a423e2cdf70258a55543c2486d..7a2dabc29354bc42709be9241603cff642ce5c27 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 284970f00a1e6d4fa739c1faa0805642fdc0668f..3b558e400aaa1d5349ba9a3ca945664302ca189f 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -15,11 +15,10 @@ Contributions to the code are even more welcome ;) If you'd like to write some code for nf-core/hic, the standard workflow is as follows: -1. Check that there isn't already an issue about your idea in the [nf-core/hic issues](https://github.com/nf-core/hic/issues) to avoid duplicating work - * If there isn't one already, please create one so that others know you're working on this +1. Check that there isn't already an issue about your idea in the [nf-core/hic issues](https://github.com/nf-core/hic/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/hic repository](https://github.com/nf-core/hic) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) -4. Use `nf-core schema build .` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). @@ -49,9 +48,9 @@ These tests are run both with the latest available version of `Nextflow` and als :warning: Only in the unlikely and regretful event of a release happening with a bug. -* On your own fork, make a new branch `patch` based on `upstream/master`. -* Fix the bug, and bump version (X.Y.Z+1). -* A PR should be made on `master` from patch to directly this particular bug. +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. ## Getting help @@ -68,22 +67,19 @@ If you wish to contribute a new step, please use the following coding standards: 1. Define the corresponding input channel into your new process from the expected previous process channel 2. Write the process block (see below). 3. Define the output channel if needed (see below). -4. Add any new flags/options to `nextflow.config` with a default (see below). -5. Add any new flags/options to `nextflow_schema.json` with help text (with `nf-core schema build .`). -6. Add any new flags/options to the help message (for integer/text parameters, print to help the corresponding `nextflow.config` parameter). -7. Add sanity checks for all relevant parameters. -8. Add any new software to the `scrape_software_versions.py` script in `bin/` and the version command to the `scrape_software_versions` process in `main.nf`. -9. Do local tests that the new code works properly and as expected. -10. Add a new test command in `.github/workflow/ci.yaml`. -11. If applicable add a [MultiQC](https://https://multiqc.info/) module. -12. Update MultiQC config `assets/multiqc_config.yaml` so relevant suffixes, name clean up, General Statistics Table column order, and module figures are in the right order. -13. Optional: Add any descriptions of MultiQC report sections and output files to `docs/output.md`. +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. ### Default values Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. -Once there, use `nf-core schema build .` to add to `nextflow_schema.json`. +Once there, use `nf-core schema build` to add to `nextflow_schema.json`. ### Default processes resource requirements @@ -95,34 +91,29 @@ The process resources can be passed on to the tool dynamically within the proces Please use the following naming schemes, to make it easy to understand what is going where. -* initial process channel: `ch_output_from_<process>` -* intermediate and terminal channels: `ch_<previousprocess>_for_<nextprocess>` +- initial process channel: `ch_output_from_<process>` +- intermediate and terminal channels: `ch_<previousprocess>_for_<nextprocess>` ### Nextflow version bumping If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` -### Software version reporting - -If you add a new tool to the pipeline, please ensure you add the information of the tool to the `get_software_version` process. - -Add to the script block of the process, something like the following: +### Images and figures -```bash -<YOUR_TOOL> --version &> v_<YOUR_TOOL>.txt 2>&1 || true -``` +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). -or +## GitHub Codespaces -```bash -<YOUR_TOOL> --help | head -n 1 &> v_<YOUR_TOOL>.txt 2>&1 || true -``` +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. -You then need to edit the script `bin/scrape_software_versions.py` to: +To get started: -1. Add a Python regex for your tool's `--version` output (as in stored in the `v_<YOUR_TOOL>.txt` file), to ensure the version is reported as a `v` and the version number e.g. `v2.1.1` -2. Add a HTML entry to the `OrderedDict` for formatting in MultiQC. +- Open the repo in [Codespaces](https://github.com/nf-core/hic/codespaces) +- Tools installed + - nf-core + - Nextflow -### Images and figures +Devcontainer specs: -For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). +- [DevContainer config](.devcontainer/devcontainer.json) +- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 81c1e337d22cadb133d32afc4fd097ca49d55075..0000000000000000000000000000000000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -name: Bug report -about: Report something that is broken or incorrect -labels: bug ---- - -<!-- -# nf-core/hic bug report - -Hi there! - -Thanks for telling us about a problem with the pipeline. -Please delete this text and anything that's not relevant from the template below: ---> - -## Check Documentation - -I have checked the following places for your error: - -- [ ] [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) -- [ ] [nf-core/hic pipeline documentation](https://nf-co.re/hic/usage) - -## Description of the bug - -<!-- A clear and concise description of what the bug is. --> - -## Steps to reproduce - -Steps to reproduce the behaviour: - -1. Command line: <!-- [e.g. `nextflow run ...`] --> -2. See error: <!-- [Please provide your error message] --> - -## Expected behaviour - -<!-- A clear and concise description of what you expected to happen. --> - -## Log files - -Have you provided the following extra information/files: - -- [ ] The command used to run the pipeline -- [ ] The `.nextflow.log` file <!-- this is a hidden file in the directory where you launched the pipeline --> - -## System - -- Hardware: <!-- [e.g. HPC, Desktop, Cloud...] --> -- Executor: <!-- [e.g. slurm, local, awsbatch...] --> -- OS: <!-- [e.g. CentOS Linux, macOS, Linux Mint...] --> -- Version <!-- [e.g. 7, 10.13.6, 18.3...] --> - -## Nextflow Installation - -- Version: <!-- [e.g. 19.10.0] --> - -## Container engine - -- Engine: <!-- [e.g. Conda, Docker, Singularity, Podman, Shifter or Charliecloud] --> -- version: <!-- [e.g. 1.0.0] --> -- Image tag: <!-- [e.g. nfcore/hic:1.0.0] --> - -## Additional context - -<!-- Add any other context about the problem here. --> diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000000000000000000000000000000000000..e405327c328cc2a94ebadf33e9ac4cad766f7461 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/hic pipeline documentation](https://nf-co.re/hic/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 22.10.1)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/hic _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 887f04598ba6bb758d35d2ca79012ccc8129ff34..379c60ae3294f480310d7e5ebeb2f6641dac2374 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,3 @@ -blank_issues_enabled: false contact_links: - name: Join nf-core url: https://nf-co.re/join diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 2cec9b3b778f87d420a0d124094557fe5b8efadf..0000000000000000000000000000000000000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -name: Feature request -about: Suggest an idea for the nf-core/hic pipeline -labels: enhancement ---- - -<!-- -# nf-core/hic feature request - -Hi there! - -Thanks for suggesting a new feature for the pipeline! -Please delete this text and anything that's not relevant from the template below: ---> - -## Is your feature request related to a problem? Please describe - -<!-- A clear and concise description of what the problem is. --> - -<!-- e.g. [I'm always frustrated when ...] --> - -## Describe the solution you'd like - -<!-- A clear and concise description of what you want to happen. --> - -## Describe alternatives you've considered - -<!-- A clear and concise description of any alternative solutions or features you've considered. --> - -## Additional context - -<!-- Add any other context about the feature request here. --> diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000000000000000000000000000000000000..d411b185d23b589888a16fc63b564928ba95afca --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the nf-core/hic pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ab821a6a0f04cb49069cbd36f97f843a12405cd1..c67458d182e840707cbccc6914ff1e3d95331b0e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,17 +10,14 @@ Remember that PRs should be made against the dev branch, unless you're preparing Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/hic/tree/master/.github/CONTRIBUTING.md) --> -<!-- markdownlint-disable ul-indent --> ## PR checklist - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - add to the software_versions process and a regex to `scrape_software_versions.py` - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/hic/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/hic _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. -- [ ] Make sure your code lints (`nf-core lint .`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/hic/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/hic _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml deleted file mode 100644 index 8d7eb53b07463c24bd981a479a7d0591fabf7463..0000000000000000000000000000000000000000 --- a/.github/markdownlint.yml +++ /dev/null @@ -1,12 +0,0 @@ -# Markdownlint configuration file -default: true -line-length: false -no-duplicate-header: - siblings_only: true -no-inline-html: - allowed_elements: - - img - - p - - kbd - - details - - summary diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index cefb14a0ab730b7b0247a8efeb72a84da964bd92..ad7e2ddfdff0e494d19ce8baa49db8123d6728a6 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -1,45 +1,31 @@ name: nf-core AWS full size tests # This workflow is triggered on published releases. -# It can be additionally triggered manually with GitHub actions workflow dispatch. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. # It runs the -profile 'test_full' on AWS batch on: - workflow_run: - workflows: ["nf-core Docker push (release)"] - types: [completed] + release: + types: [published] workflow_dispatch: - - -env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - TOWER_ACCESS_TOKEN: ${{ secrets.AWS_TOWER_TOKEN }} - AWS_JOB_DEFINITION: ${{ secrets.AWS_JOB_DEFINITION }} - AWS_JOB_QUEUE: ${{ secrets.AWS_JOB_QUEUE }} - AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} - - jobs: - run-awstest: + run-tower: name: Run AWS full tests if: github.repository == 'nf-core/hic' runs-on: ubuntu-latest steps: - - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2 + - name: Launch workflow via tower + uses: nf-core/tower-action@v3 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/hic/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/hic/results-${{ github.sha }}" + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 with: - auto-update-conda: true - python-version: 3.7 - - name: Install awscli - run: conda install -c conda-forge awscli - - name: Start AWS batch job - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters - # Then specify `-profile test_full` instead of `-profile test` on the AWS batch command - run: | - aws batch submit-job \ - --region eu-west-1 \ - --job-name nf-core-hic \ - --job-queue $AWS_JOB_QUEUE \ - --job-definition $AWS_JOB_DEFINITION \ - --container-overrides '{"command": ["nf-core/hic", "-r '"${GITHUB_SHA}"' -profile test_full --outdir s3://'"${AWS_S3_BUCKET}"'/hic/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/hic/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}]}' + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index c9eafe60d1f9e232718fe7cd392ba09b984a06ce..3f9b365ffe5729722f94316715fe5e654860c152 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -1,41 +1,29 @@ name: nf-core AWS test -# This workflow is triggered on push to the master branch. -# It can be additionally triggered manually with GitHub actions workflow dispatch. -# It runs the -profile 'test' on AWS batch. +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch on: workflow_dispatch: - - -env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - TOWER_ACCESS_TOKEN: ${{ secrets.AWS_TOWER_TOKEN }} - AWS_JOB_DEFINITION: ${{ secrets.AWS_JOB_DEFINITION }} - AWS_JOB_QUEUE: ${{ secrets.AWS_JOB_QUEUE }} - AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} - - jobs: - run-awstest: + run-tower: name: Run AWS tests if: github.repository == 'nf-core/hic' runs-on: ubuntu-latest steps: - - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2 + # Launch workflow using Tower CLI tool action + - name: Launch workflow via tower + uses: nf-core/tower-action@v3 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/hic/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/hic/results-test-${{ github.sha }}" + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 with: - auto-update-conda: true - python-version: 3.7 - - name: Install awscli - run: conda install -c conda-forge awscli - - name: Start AWS batch job - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix - run: | - aws batch submit-job \ - --region eu-west-1 \ - --job-name nf-core-hic \ - --job-queue $AWS_JOB_QUEUE \ - --job-definition $AWS_JOB_DEFINITION \ - --container-overrides '{"command": ["nf-core/hic", "-r '"${GITHUB_SHA}"' -profile test --outdir s3://'"${AWS_S3_BUCKET}"'/hic/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/hic/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}]}' + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 3521022c4d8fd8f3ea7171bef94d9c3c96e2514d..b92e3d24195d72e7a4c39605fd29de3cf657bd06 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -15,7 +15,6 @@ jobs: run: | { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/hic ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] - # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment @@ -43,4 +42,3 @@ jobs: Thanks again for your contribution! repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false - diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0f0db1a211d8626b14bb0d38fa65e12c819d5030..5531e307166fc8f4d5717ba3efc646693560c67b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,50 +8,33 @@ on: release: types: [published] -# Uncomment if we need an edge release of Nextflow again -# env: NXF_EDGE: 1 +env: + NXF_ANSI_LOG: false + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true jobs: test: - name: Run workflow tests + name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/hic') }} + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/hic') }}" runs-on: ubuntu-latest - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false strategy: matrix: - # Nextflow versions: check pipeline minimum and current latest - nxf_ver: ['20.04.0', ''] + NXF_VER: + - "22.10.1" + - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Check if Dockerfile or Conda environment changed - uses: technote-space/get-diff-action@v4 - with: - FILES: | - Dockerfile - environment.yml - - - name: Build new docker image - if: env.MATCHED_FILES - run: docker build --no-cache . -t nfcore/hic:1.3.0 - - - name: Pull docker image - if: ${{ !env.MATCHED_FILES }} - run: | - docker pull nfcore/hic:dev - docker tag nfcore/hic:dev nfcore/hic:1.3.0 + uses: actions/checkout@v3 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker \ No newline at end of file + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 0000000000000000000000000000000000000000..66550055794ba4b8fa3458edcf8a3e33de57dc30 --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,55 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + deploy: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/hic' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@v3 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + - uses: actions/setup-node@v3 + + - name: Install Prettier + run: npm install -g prettier @prettier/plugin-php + + # Check that we actually need to fix something + - name: Run 'prettier --check' + id: prettier_status + run: | + if prettier --check ${GITHUB_WORKSPACE}; then + echo "result=pass" >> $GITHUB_OUTPUT + else + echo "result=fail" >> $GITHUB_OUTPUT + fi + + - name: Run 'prettier --write' + if: steps.prettier_status.outputs.result == 'fail' + run: prettier --write ${GITHUB_WORKSPACE} + + - name: Commit & push changes + if: steps.prettier_status.outputs.result == 'fail' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix linting with Prettier" + git push diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index fcde400cedbc1566f84e8a811e0b45a1c113df60..858d622efc884fba438eb48c80d4122443dfa3a0 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,63 +1,49 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] jobs: - Markdown: + EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v1 - with: - node-version: '10' - - name: Install markdownlint - run: npm install -g markdownlint-cli - - name: Run Markdownlint - run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml + - uses: actions/checkout@v3 - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - ## Markdown linting is failing + - uses: actions/setup-node@v3 - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: + - name: Install editorconfig-checker + run: npm install -g editorconfig-checker - * Install `markdownlint-cli` - * On Mac: `brew install markdownlint-cli` - * Everything else: [Install `npm`](https://www.npmjs.com/get-npm) then [install `markdownlint-cli`](https://www.npmjs.com/package/markdownlint-cli) (`npm install -g markdownlint-cli`) - * Fix the markdown errors - * Automatically: `markdownlint . --config .github/markdownlint.yml --fix` - * Manually resolve anything left from `markdownlint . --config .github/markdownlint.yml` + - name: Run ECLint check + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - Once you push these changes the test should pass, and you can hide this comment :+1: + Prettier: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 - We highly recommend setting up markdownlint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - uses: actions/setup-node@v3 - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Install Prettier + run: npm install -g prettier + - name: Run Prettier --check + run: prettier --check ${GITHUB_WORKSPACE} - YAML: + PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - uses: actions/setup-node@v1 - with: - node-version: '10' - - name: Install yaml-lint - run: npm install -g yaml-lint - - name: Run yaml-lint - run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml" -o -name "*.yaml") + - uses: actions/checkout@v3 + + - name: Check code lints with Black + uses: psf/black@stable # If the above check failed, post a comment on the PR explaining the failure - name: Post PR comment @@ -65,44 +51,35 @@ jobs: uses: mshick/add-pr-comment@v1 with: message: | - ## YAML linting is failing + ## Python linting (`black`) is failing To keep the code consistent with lots of contributors, we run automated code consistency checks. To fix this CI test, please run: - * Install `yaml-lint` - * [Install `npm`](https://www.npmjs.com/get-npm) then [install `yaml-lint`](https://www.npmjs.com/package/yaml-lint) (`npm install -g yaml-lint`) - * Fix the markdown errors - * Run the test locally: `yamllint $(find . -type f -name "*.yml" -o -name "*.yaml")` - * Fix any reported errors in your YAML files + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` Once you push these changes the test should pass, and you can hide this comment :+1: - We highly recommend setting up yaml-lint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! Thanks again for your contribution! repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false - nf-core: runs-on: ubuntu-latest steps: - - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v1 + - uses: actions/setup-python@v4 with: - python-version: '3.6' - architecture: 'x64' + python-version: "3.7" + architecture: "x64" - name: Install dependencies run: | @@ -114,7 +91,7 @@ jobs: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint ${GITHUB_WORKSPACE} --markdown lint_results.md + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Save PR number if: ${{ always() }} @@ -122,11 +99,10 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: linting-logs path: | lint_log.txt lint_results.md PR_number.txt - diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 90f03c6f91ba0c05d12daff7e15840145a63937f..0bbcd30f23effefe9ac5a7a49cc16f43140c20a7 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -1,4 +1,3 @@ - name: nf-core linting comment # This workflow is triggered after the linting action is complete # It posts an automated comment to the PR, even if the PR is coming from a fork @@ -15,10 +14,11 @@ jobs: uses: dawidd6/action-download-artifact@v2 with: workflow: linting.yml + workflow_conclusion: completed - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment uses: marocchino/sticky-pull-request-comment@v2 @@ -26,4 +26,3 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} path: linting-logs/lint_results.md - diff --git a/.github/workflows/push_dockerhub_dev.yml b/.github/workflows/push_dockerhub_dev.yml deleted file mode 100644 index d6fc716fb947d262e0613ccab07655d387a98d1f..0000000000000000000000000000000000000000 --- a/.github/workflows/push_dockerhub_dev.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: nf-core Docker push (dev) -# This builds the docker image and pushes it to DockerHub -# Runs on nf-core repo releases and push event to 'dev' branch (PR merges) -on: - push: - branches: - - dev - -jobs: - push_dockerhub: - name: Push new Docker image to Docker Hub (dev) - runs-on: ubuntu-latest - # Only run for the nf-core repo, for releases and merged PRs - if: ${{ github.repository == 'nf-core/hic' }} - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Build new docker image - run: docker build --no-cache . -t nfcore/hic:dev - - - name: Push Docker image to DockerHub (dev) - run: | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin - docker push nfcore/hic:dev diff --git a/.github/workflows/push_dockerhub_release.yml b/.github/workflows/push_dockerhub_release.yml deleted file mode 100644 index eda09ccfb6fcdd5791c56b6875343dc2bbf9c8c0..0000000000000000000000000000000000000000 --- a/.github/workflows/push_dockerhub_release.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: nf-core Docker push (release) -# This builds the docker image and pushes it to DockerHub -# Runs on nf-core repo releases and push event to 'dev' branch (PR merges) -on: - release: - types: [published] - -jobs: - push_dockerhub: - name: Push new Docker image to Docker Hub (release) - runs-on: ubuntu-latest - # Only run for the nf-core repo, for releases and merged PRs - if: ${{ github.repository == 'nf-core/hic' }} - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Build new docker image - run: docker build --no-cache . -t nfcore/hic:latest - - - name: Push Docker image to DockerHub (release) - run: | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin - docker push nfcore/hic:latest - docker tag nfcore/hic:latest nfcore/hic:${{ github.event.release.tag_name }} - docker push nfcore/hic:${{ github.event.release.tag_name }} diff --git a/.gitignore b/.gitignore index aa4bb5b375a9021f754dbd91d2321d16d1c0afc7..5124c9ac77e036998a69ae8e8e89f9ff6f9bcdb3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,6 @@ work/ data/ results/ .DS_Store -tests/ testing/ testing* *.pyc diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 0000000000000000000000000000000000000000..85d95ecc8eca8c1d7110ba1dd58d649e19bdc008 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,14 @@ +image: nfcore/gitpod:latest + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + - codezombiech.gitignore # Language support for .gitignore files + # - cssho.vscode-svgviewer # SVG viewer + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + # - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.nf-core-lint.yml b/.nf-core-lint.yml deleted file mode 100644 index a24fddf163b17b87225a0073de318213abb59c16..0000000000000000000000000000000000000000 --- a/.nf-core-lint.yml +++ /dev/null @@ -1,3 +0,0 @@ -files_unchanged: - - .github/ISSUE_TEMPLATE/bug_report.md - - .github/PULL_REQUEST_TEMPLATE.md diff --git a/.nf-core.yml b/.nf-core.yml new file mode 100644 index 0000000000000000000000000000000000000000..3805dc81c144cd8f7bf7e49106934a313cb7667a --- /dev/null +++ b/.nf-core.yml @@ -0,0 +1 @@ +repository_type: pipeline diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000000000000000000000000000000000000..437d763d0c2c8fdeb5f8f7e1e04d54771d9b46d6 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,12 @@ +email_template.html +adaptivecard.json +slackreport.json +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc +bin/ diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 0000000000000000000000000000000000000000..c81f9a7660b0c0b8df99c5c3f87ce16ef0783917 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a2d45fe9c8d83de81d5859fd4836d3746170d73..c111c7fb1930ea9ce19d66cfdd137b4f4b8417c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,114 +3,132 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.3.0 - 2021-22-05 - -* Change the `/tmp/` folder to `./tmp/` folder so that all tmp files are now in the work directory (#24) -* Add `--hicpro_maps` options to generate the raw and normalized HiC-Pro maps. The default is now to use cooler -* Add chromosome compartments calling with cooltools (#53) -* Add HiCExplorer distance decay quality control (#54) -* Add HiCExplorer TADs calling (#55) -* Add insulation score TADs calling (#55) -* Generate cooler/txt contact maps -* Normalize Hi-C data with cooler instead of iced -* New `--digestion` parameter to automatically set the restriction_site and ligation_site motifs -* New `--keep_multi` and `keep_dup` options. Default: false -* Template update for nf-core/tools -* Minor fix to summary log messages in pipeline header +## v2.0.0 - 2023-01-12 + +### `Added` + +- DSL2 version of nf-core-hic pipeline +- Add full test dataset (#80) +- Replace local modules by the cooler nf-core module + +### `Fixed` + +- Fix error in the Arima preset (#127) + +## v1.3.1 - 2021-09-25 + +### `Fixed` + +- Fix bug in conda environment for cooltools (#109) + +## v1.3.0 - 2021-05-22 + +- Change the `/tmp/` folder to `./tmp/` folder so that all tmp files are now in the work directory (#24) +- Add `--hicpro_maps` options to generate the raw and normalized HiC-Pro maps. The default is now to use cooler +- Add chromosome compartments calling with cooltools (#53) +- Add HiCExplorer distance decay quality control (#54) +- Add HiCExplorer TADs calling (#55) +- Add insulation score TADs calling (#55) +- Generate cooler/txt contact maps +- Normalize Hi-C data with cooler instead of iced +- New `--digestion` parameter to automatically set the restriction_site and ligation_site motifs +- New `--keep_multi` and `keep_dup` options. Default: false +- Template update for nf-core/tools +- Minor fix to summary log messages in pipeline header ### `Fixed` -* Fix bug in stats report which were not all correcly exported in the results folder -* Fix recurrent bug in input file extension (#86) -* Fix bug in `--bin_size` parameter (#85) -* `--min_mapq` is ignored if `--keep_multi` is used +- Fix bug in stats report which were not all correcly exported in the results folder +- Fix recurrent bug in input file extension (#86) +- Fix bug in `--bin_size` parameter (#85) +- `--min_mapq` is ignored if `--keep_multi` is used ### `Deprecated` -* `--rm_dup` and `--rm_multi` are replaced by `--keep_dups` and `--keep_multi` +- `--rm_dup` and `--rm_multi` are replaced by `--keep_dups` and `--keep_multi` ## v1.2.2 - 2020-09-02 ### `Added` -* Template update for nf-core/tools v1.10.2 -* Add the `--fastq_chunks_size` to specify the number of reads per chunks if split_fastq is true +- Template update for nf-core/tools v1.10.2 +- Add the `--fastq_chunks_size` to specify the number of reads per chunks if split_fastq is true ### `Fixed` -* Bug in `--split_fastq` option not recognized +- Bug in `--split_fastq` option not recognized ## v1.2.1 - 2020-07-06 ### `Fixed` -* Fix issue with `--fasta` option and `.fa` extension (#66) +- Fix issue with `--fasta` option and `.fa` extension (#66) ## v1.2.0 - 2020-06-18 ### `Added` -* Bump v1.2.0 -* Merge template nf-core 1.9 -* Move some options to camel_case -* Update python scripts for python3 -* Update conda environment file - * python base `2.7.15` > `3.7.6` - * pip `19.1` > `20.0.1` - * scipy `1.2.1` > `1.4.1` - * numpy `1.16.3` > `1.18.1` - * bx-python `0.8.2` > `0.8.8` - * pysam `0.15.2` > `0.15.4` - * cooler `0.8.5` > `0.8.6` - * multiqc `1.7` > `1.8` - * iced `0.5.1` > `0.5.6` - * *_New_* pymdown-extensions `7.1` - * *_New_* hicexplorer `3.4.3` - * *_New_* bioconductor-hitc `1.32.0` - * *_New_* r-optparse `1.6.6` - * *_New_* ucsc-bedgraphtobigwig `377` - * *_New_* cython `0.29.19` - * *_New_* cooltools `0.3.2` - * *_New_* fanc `0.8.30` - * *_Removed_* r-markdown +- Bump v1.2.0 +- Merge template nf-core 1.9 +- Move some options to camel_case +- Update python scripts for python3 +- Update conda environment file + - python base `2.7.15` > `3.7.6` + - pip `19.1` > `20.0.1` + - scipy `1.2.1` > `1.4.1` + - numpy `1.16.3` > `1.18.1` + - bx-python `0.8.2` > `0.8.8` + - pysam `0.15.2` > `0.15.4` + - cooler `0.8.5` > `0.8.6` + - multiqc `1.7` > `1.8` + - iced `0.5.1` > `0.5.6` + - _*New*_ pymdown-extensions `7.1` + - _*New*_ hicexplorer `3.4.3` + - _*New*_ bioconductor-hitc `1.32.0` + - _*New*_ r-optparse `1.6.6` + - _*New*_ ucsc-bedgraphtobigwig `377` + - _*New*_ cython `0.29.19` + - _*New*_ cooltools `0.3.2` + - _*New*_ fanc `0.8.30` + - _*Removed*_ r-markdown ### `Fixed` -* Fix error in doc for Arima kit usage -* Sort output of `get_valid_interaction` process as the input files of `remove_duplicates` -are expected to be sorted (sort -m) +- Fix error in doc for Arima kit usage +- Sort output of `get_valid_interaction` process as the input files of `remove_duplicates` + are expected to be sorted (sort -m) ### `Deprecated` -* Command line options converted to `camel_case`: - * `--skipMaps` > `--skip_maps` - * `--skipIce` > `--skip_ice` - * `--skipCool` > `--skip_cool` - * `--skipMultiQC` > `--skip_multiqc` - * `--saveReference` > `--save_reference` - * `--saveAlignedIntermediates` > `--save_aligned_intermediates` - * `--saveInteractionBAM` > `--save_interaction_bam` +- Command line options converted to `camel_case`: + - `--skipMaps` > `--skip_maps` + - `--skipIce` > `--skip_ice` + - `--skipCool` > `--skip_cool` + - `--skipMultiQC` > `--skip_multiqc` + - `--saveReference` > `--save_reference` + - `--saveAlignedIntermediates` > `--save_aligned_intermediates` + - `--saveInteractionBAM` > `--save_interaction_bam` ## v1.1.1 - 2020-04-02 ### `Fixed` -* Fix bug in tag. Remove '[' +- Fix bug in tag. Remove '[' ## v1.1.0 - 2019-10-15 ### `Added` -* Update hicpro2higlass with `-p` parameter -* Support 'N' base motif in restriction/ligation sites -* Support multiple restriction enzymes/ligattion sites (comma separated) ([#31](https://github.com/nf-core/hic/issues/31)) -* Add --saveInteractionBAM option -* Add DOI ([#29](https://github.com/nf-core/hic/issues/29)) -* Update manual ([#28](https://github.com/nf-core/hic/issues/28)) +- Update hicpro2higlass with `-p` parameter +- Support 'N' base motif in restriction/ligation sites +- Support multiple restriction enzymes/ligattion sites (comma separated) ([#31](https://github.com/nf-core/hic/issues/31)) +- Add --saveInteractionBAM option +- Add DOI ([#29](https://github.com/nf-core/hic/issues/29)) +- Update manual ([#28](https://github.com/nf-core/hic/issues/28)) ### `Fixed` -* Fix bug for reads extension `_1`/`_2` ([#30](https://github.com/nf-core/hic/issues/30)) +- Fix bug for reads extension `_1`/`_2` ([#30](https://github.com/nf-core/hic/issues/30)) ## v1.0 - [2019-05-06] @@ -126,11 +144,11 @@ DNase Hi-C, Micro-C, capture-C or HiChip data. In summary, this version allows : -* Automatic detection and generation of annotation files based on igenomes -if not provided. -* Two-steps alignment of raw sequencing reads -* Reads filtering and detection of valid interaction products -* Generation of raw contact matrices for a set of resolutions -* Normalization of the contact maps using the ICE algorithm -* Generation of cooler file for visualization on [higlass](https://higlass.io/) -* Quality report based on HiC-Pro MultiQC module +- Automatic detection and generation of annotation files based on igenomes + if not provided. +- Two-steps alignment of raw sequencing reads +- Reads filtering and detection of valid interaction products +- Generation of raw contact matrices for a set of resolutions +- Normalization of the contact maps using the ICE algorithm +- Generation of cooler file for visualization on [higlass](https://higlass.io/) +- Quality report based on HiC-Pro MultiQC module diff --git a/CITATIONS.md b/CITATIONS.md new file mode 100644 index 0000000000000000000000000000000000000000..0313a1a90267497fb6e5ccc8788838163da43100 --- /dev/null +++ b/CITATIONS.md @@ -0,0 +1,39 @@ +# nf-core/hic: Citations + +## [HiC-Pro](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0831-x) + +> Servant N, Varoquaux N, Lajoie BR, Viara E, Chen C, Vert JP, Dekker J, Heard E, Barillot E. Genome Biology 2015, 16:259 doi: [10.1186/s13059-015-0831-x](https://dx.doi.org/10.1186/s13059-015-0831-x) + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +## Software packaging/containerisation tools + +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 05547b6ad9405aef4f6e7bdf999ac011d085d187..0000000000000000000000000000000000000000 --- a/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM nfcore/base:1.14 -LABEL authors="Nicolas Servant" \ - description="Docker image containing all software requirements for the nf-core/hic pipeline" - -## Install gcc for pip iced install -RUN apt-get update && apt-get install -y gcc g++ && apt-get clean -y - -# Install the conda environment -COPY environment.yml / -RUN conda env create --quiet -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-hic-1.3.0/bin:$PATH - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-hic-1.3.0 > nf-core-hic-1.3.0.yml - -# Instruct R processes to use these empty files instead of clashing with a local version -RUN touch .Rprofile -RUN touch .Renviron diff --git a/README.md b/README.md index cb88454ceec117d40a3f43251d85a67078085b99..973e1321c8c96f11bf6c704ca4372aea401cb8ac 100644 --- a/README.md +++ b/README.md @@ -1,84 +1,69 @@ -#  +#   -**Analysis of Chromosome Conformation Capture data (Hi-C)**. +[](https://nf-co.re/hic/results)[](https://doi.org/10.5281/zenodo.2669512) -[](https://github.com/nf-core/hic/actions) -[](https://github.com/nf-core/hic/actions) -[](https://www.nextflow.io/) +[](https://www.nextflow.io/) +[](https://docs.conda.io/en/latest/) +[](https://www.docker.com/) +[](https://sylabs.io/docs/) +[](https://tower.nf/launch?pipeline=https://github.com/nf-core/hic) -[](https://bioconda.github.io/) -[](https://hub.docker.com/r/nfcore/hic) - -[](https://doi.org/10.5281/zenodo.2669513) -[](https://nfcore.slack.com/channels/hic) +[](https://nfcore.slack.com/channels/hic)[](https://twitter.com/nf_core)[](https://www.youtube.com/c/nf-core) ## Introduction -This pipeline was originally set up from the -[HiC-Pro workflow](https://github.com/nservant/HiC-Pro). -It was designed to process Hi-C data from raw FastQ files (paired-end Illumina -data) to normalized contact maps. -The current version supports most protocols, including digestion protocols as -well as protocols that do not require restriction enzymes such as DNase Hi-C. -In practice, this workflow was successfully applied to many data-sets including -dilution Hi-C, in situ Hi-C, DNase Hi-C, Micro-C, capture-C, capture Hi-C or -HiChip data. - -Contact maps are generated in standard formats including HiC-Pro, and cooler for -downstream analysis and visualization. -Addition analysis steps such as compartments and TADs calling are also available. - -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool -to run tasks across multiple compute infrastructures in a very portable manner. -It comes with docker / singularity containers making installation trivial and -results highly reproducible. +**nf-core/hic** is a bioinformatics best-practice analysis pipeline for Analysis of Chromosome Conformation Capture data (Hi-C). + +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! + +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/hic/results). ## Pipeline summary -1. HiC-Pro data processing ([`HiC-Pro`](https://github.com/nservant/HiC-Pro)) - 1. Mapping using a two steps strategy to rescue reads spanning the ligation - sites ([`bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) - 2. Detection of valid interaction products - 3. Duplicates removal - 4. Generate raw and normalized contact maps ([`iced`](https://github.com/hiclib/iced)) -2. Create genome-wide contact maps at various resolutions ([`cooler`](https://github.com/open2c/cooler)) -3. Contact maps normalization using balancing algorithm ([`cooler`](https://github.com/open2c/cooler)) -4. Export to various contact maps formats ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`cooler`](https://github.com/open2c/cooler)) -5. Quality controls ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`HiCExplorer`](https://github.com/deeptools/HiCExplorer)) -6. Compartments calling ([`cooltools`](https://cooltools.readthedocs.io/en/latest/)) -7. TADs calling ([`HiCExplorer`](https://github.com/deeptools/HiCExplorer), [`cooltools`](https://cooltools.readthedocs.io/en/latest/)) -8. Quality control report ([`MultiQC`](https://multiqc.info/)) +1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +2. Hi-C data processing + 1. [`HiC-Pro`](https://github.com/nservant/HiC-Pro) + 1. Mapping using a two steps strategy to rescue reads spanning the ligation + sites ([`bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) + 2. Detection of valid interaction products + 3. Duplicates removal + 4. Generate raw and normalized contact maps ([`iced`](https://github.com/hiclib/iced)) +3. Create genome-wide contact maps at various resolutions ([`cooler`](https://github.com/open2c/cooler)) +4. Contact maps normalization using balancing algorithm ([`cooler`](https://github.com/open2c/cooler)) +5. Export to various contact maps formats ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`cooler`](https://github.com/open2c/cooler)) +6. Quality controls ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`HiCExplorer`](https://github.com/deeptools/HiCExplorer)) +7. Compartments calling ([`cooltools`](https://cooltools.readthedocs.io/en/latest/)) +8. TADs calling ([`HiCExplorer`](https://github.com/deeptools/HiCExplorer), [`cooltools`](https://cooltools.readthedocs.io/en/latest/)) +9. Quality control report ([`MultiQC`](https://multiqc.info/)) ## Quick Start -1. Install [`nextflow`](https://nf-co.re/usage/installation) (`>=20.04.0`) +1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ +2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. -3. Download the pipeline and test it on a minimal dataset with a single command +3. Download the pipeline and test it on a minimal dataset with a single command: - ```bash - nextflow run nf-core/hic -profile test,<docker/singularity/podman/shifter/charliecloud/conda/institute> - ``` + ```bash + nextflow run nf-core/hic -profile test,YOURPROFILE --outdir <OUTDIR> + ``` - > Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) - to see if a custom config file to run nf-core pipelines already exists for your Institute. - If so, you can simply use `-profile <institute>` in your command. - This will enable either `docker` or `singularity` and set the appropriate execution - settings for your local compute environment. + Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. + + > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. + > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. + > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. 4. Start running your own analysis! - ```bash - nextflow run nf-core/hic -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> --input '*_R{1,2}.fastq.gz' --genome GRCh37 - ``` + ```bash + nextflow run nf-core/hic --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> + ``` ## Documentation -The nf-core/hic pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/hic/usage) and [output](https://nf-co.re/hic/output). - -For further information or help, don't hesitate to get in touch on [Slack](https://nfcore.slack.com/channels/hic). -You can join with [this invite](https://nf-co.re/join/slack). +The nf-core/hic pipeline comes with documentation about the pipeline [usage](https://nf-co.re/hic/usage), [parameters](https://nf-co.re/hic/parameters) and [output](https://nf-co.re/hic/output). ## Credits @@ -90,10 +75,11 @@ If you would like to contribute to this pipeline, please see the [contributing g For further information or help, don't hesitate to get in touch on the [Slack `#hic` channel](https://nfcore.slack.com/channels/hic) (you can join with [this invite](https://nf-co.re/join/slack)). -## Citation +## Citations -If you use nf-core/hic for your analysis, please cite it using the following -doi: [10.5281/zenodo.2669513](https://doi.org/10.5281/zenodo.2669513) +If you use nf-core/hic for your analysis, please cite it using the following doi: doi: [10.5281/zenodo.2669512](https://doi.org/10.5281/zenodo.2669512) + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: @@ -102,11 +88,3 @@ You can cite the `nf-core` publication as follows: > Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. > > _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). - -In addition, references of tools and data used in this pipeline are as follows: - -> **HiC-Pro: An optimized and flexible pipeline for Hi-C processing.** -> -> Nicolas Servant, Nelle Varoquaux, Bryan R. Lajoie, Eric Viara, Chongjian Chen, Jean-Philippe Vert, Job Dekker, Edith Heard, Emmanuel Barillot. -> -> Genome Biology 2015, 16:259 doi: [10.1186/s13059-015-0831-x](https://dx.doi.org/10.1186/s13059-015-0831-x) diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 0000000000000000000000000000000000000000..79f9dbe9924bf336c47a8ad509ae5bf68e430640 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/hic v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.txt b/assets/email_template.txt index a951c5e7f965fa5829707fc84f4351495995190f..6905d6fc70619b99a53a9c51670b026095e2965f 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/hic v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 0000000000000000000000000000000000000000..2f0a30855b2ab842a1f5b16cb9f5ce8437f96dbd --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,26 @@ +id: "nf-core-hic-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/hic Methods Description" +section_href: "https://github.com/nf-core/hic" +plot_type: "html" +## nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | + <h4>Methods</h4> + <p>Data was processed using nf-core/hic v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (<a href="https://doi.org/10.1038/s41587-020-0439-x">Ewels <em>et al.</em>, 2020</a>).</p> + <p>The pipeline was executed with Nextflow v${workflow.nextflow.version} (<a href="https://doi.org/10.1038/nbt.3820">Di Tommaso <em>et al.</em>, 2017</a>) with the following command:</p> + <pre><code>${workflow.commandLine}</code></pre> + <h4>References</h4> + <ul> + <li>Servant, N., Ewels, P. A., Peltzer, A., Garcia, M. U. (2021) nf-core/hic. Zenodo. https://doi.org/10.5281/zenodo.2669512</li> + <li>Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. <a href="https://doi.org/10.1038/nbt.3820">https://doi.org/10.1038/nbt.3820</a></li> + <li>Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. <a href="https://doi.org/10.1038/s41587-020-0439-x">https://doi.org/10.1038/s41587-020-0439-x</a></li> + </ul> + <div class="alert alert-info"> + <h5>Notes:</h5> + <ul> + ${nodoi_text} + <li>The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!</li> + <li>You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.</li> + </ul> + </div> diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index 41468cab303a5894aa01e0823790b22cb44c95cd..0000000000000000000000000000000000000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -report_comment: > - This report has been generated by the <a href="https://github.com/nf-core/hic" target="_blank">nf-core/hic</a> - analysis pipeline. For information about how to interpret these results, please see the - <a href="https://github.com/nf-core/hic" target="_blank">documentation</a>. -report_section_order: - software_versions: - order: -1000 - nf-core-hic-summary: - order: -1001 - -export_plots: true diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 0000000000000000000000000000000000000000..b2cf07d846fa86aa4100cd1ac05fd72b3827dde4 --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,13 @@ +report_comment: > + This report has been generated by the <a href="https://github.com/nf-core/hic" target="_blank">nf-core/hic</a> + analysis pipeline. For information about how to interpret these results, please see the + <a href="https://nf-co.re/hic" target="_blank">documentation</a>. +report_section_order: + "nf-core-hic-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-hic-summary": + order: -1002 + +export_plots: true diff --git a/assets/nf-core-hic_logo.png b/assets/nf-core-hic_logo.png deleted file mode 100644 index 37461d9a32ae1f73d9090a3a2387cf8997c9a0ed..0000000000000000000000000000000000000000 Binary files a/assets/nf-core-hic_logo.png and /dev/null differ diff --git a/assets/nf-core-hic_logo_light.png b/assets/nf-core-hic_logo_light.png new file mode 100644 index 0000000000000000000000000000000000000000..553c19d982fcf9e1e0c9a5f325a8ff290fc087f9 Binary files /dev/null and b/assets/nf-core-hic_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 0000000000000000000000000000000000000000..e699919c0e9610e4082734cdd164b3629cb8c4a2 --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,2 @@ +sample,fastq_1,fastq_2 +SRR4292758,https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz,https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 0000000000000000000000000000000000000000..fae1a32c7c094ab593690ee24f0544cf725b5b13 --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/hic/master/assets/schema_input.json", + "title": "nf-core/hic pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces" + }, + "fastq_1": { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "fastq_2": { + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + } + }, + "required": ["sample", "fastq_1"] + } +} diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index bdf905878111122e2d6b6983f72e6a04c78e97b5..3c7f7236d7a173bc0d6c1dc6c5cd1078d5a83786 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -12,18 +12,18 @@ $email_html Content-Type: image/png;name="nf-core-hic_logo.png" Content-Transfer-Encoding: base64 Content-ID: <nfcorepipelinelogo> -Content-Disposition: inline; filename="nf-core-hic_logo.png" +Content-Disposition: inline; filename="nf-core-hic_logo_light.png" -<% out << new File("$projectDir/assets/nf-core-hic_logo.png"). - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' ) %> +<% out << new File("$projectDir/assets/nf-core-hic_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> <% if (mqcFile){ @@ -37,15 +37,15 @@ Content-ID: <mqcreport> Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" ${mqcFileObj. - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' )} + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} """ }} %> diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 0000000000000000000000000000000000000000..043d02f27570da8e53dd7d3dd6d0a640cfa4636d --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/build_matrix b/bin/build_matrix index c61c6176c46edf71a6be8dcd3d090c0c1a0b9c4a..15aa38e07e09efee5ed6c688f90ae0af21365393 100755 Binary files a/bin/build_matrix and b/bin/build_matrix differ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py new file mode 100755 index 0000000000000000000000000000000000000000..c498ef45ee96e33a8d4f81b0be948028c712db61 --- /dev/null +++ b/bin/check_samplesheet.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python + +"""Provide a command line tool to validate and transform tabular samplesheets.""" + + +import argparse +import csv +import logging +import sys +from collections import Counter +from pathlib import Path + +logger = logging.getLogger() + + +class RowChecker: + """ + Define a service that can validate and transform each given row. + + Attributes: + modified (list): A list of dicts, where each dict corresponds to a previously + validated and transformed row. The order of rows is maintained. + + """ + + VALID_FORMATS = ( + ".fq.gz", + ".fastq.gz", + ) + + def __init__( + self, + sample_col="sample", + first_col="fastq_1", + second_col="fastq_2", + single_col="single_end", + **kwargs, + ): + """ + Initialize the row checker with the expected column names. + + Args: + sample_col (str): The name of the column that contains the sample name + (default "sample"). + first_col (str): The name of the column that contains the first (or only) + FASTQ file path (default "fastq_1"). + second_col (str): The name of the column that contains the second (if any) + FASTQ file path (default "fastq_2"). + single_col (str): The name of the new column that will be inserted and + records whether the sample contains single- or paired-end sequencing + reads (default "single_end"). + + """ + super().__init__(**kwargs) + self._sample_col = sample_col + self._first_col = first_col + self._second_col = second_col + self._single_col = single_col + self._seen = set() + self.modified = [] + + def validate_and_transform(self, row): + """ + Perform all validations on the given row and insert the read pairing status. + + Args: + row (dict): A mapping from column headers (keys) to elements of that row + (values). + + """ + self._validate_sample(row) + self._validate_first(row) + self._validate_second(row) + self._validate_pair(row) + self._seen.add((row[self._sample_col], row[self._first_col])) + self.modified.append(row) + + def _validate_sample(self, row): + """Assert that the sample name exists and convert spaces to underscores.""" + if len(row[self._sample_col]) <= 0: + raise AssertionError("Sample input is required.") + # Sanitize samples slightly. + row[self._sample_col] = row[self._sample_col].replace(" ", "_") + + def _validate_first(self, row): + """Assert that the first FASTQ entry is non-empty and has the right format.""" + if len(row[self._first_col]) <= 0: + raise AssertionError("At least the first FASTQ file is required.") + self._validate_fastq_format(row[self._first_col]) + + def _validate_second(self, row): + """Assert that the second FASTQ entry has the right format if it exists.""" + if len(row[self._second_col]) > 0: + self._validate_fastq_format(row[self._second_col]) + + def _validate_pair(self, row): + """Assert that read pairs have the same file extension. Report pair status.""" + if row[self._first_col] and row[self._second_col]: + row[self._single_col] = False + first_col_suffix = Path(row[self._first_col]).suffixes[-2:] + second_col_suffix = Path(row[self._second_col]).suffixes[-2:] + if first_col_suffix != second_col_suffix: + raise AssertionError("FASTQ pairs must have the same file extensions.") + else: + row[self._single_col] = True + + def _validate_fastq_format(self, filename): + """Assert that a given filename has one of the expected FASTQ extensions.""" + if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): + raise AssertionError( + f"The FASTQ file has an unrecognized extension: {filename}\n" + f"It should be one of: {', '.join(self.VALID_FORMATS)}" + ) + + def validate_unique_samples(self): + """ + Assert that the combination of sample name and FASTQ filename is unique. + + In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the + number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. + + """ + if len(self._seen) != len(self.modified): + raise AssertionError("The pair of sample name and FASTQ must be unique.") + seen = Counter() + for row in self.modified: + sample = row[self._sample_col] + seen[sample] += 1 + ##row[self._sample_col] = f"{sample}_T{seen[sample]}" + + +def read_head(handle, num_lines=10): + """Read the specified number of lines from the current position in the file.""" + lines = [] + for idx, line in enumerate(handle): + if idx == num_lines: + break + lines.append(line) + return "".join(lines) + + +def sniff_format(handle): + """ + Detect the tabular format. + + Args: + handle (text file): A handle to a `text file`_ object. The read position is + expected to be at the beginning (index 0). + + Returns: + csv.Dialect: The detected tabular format. + + .. _text file: + https://docs.python.org/3/glossary.html#term-text-file + + """ + peek = read_head(handle) + handle.seek(0) + sniffer = csv.Sniffer() + if not sniffer.has_header(peek): + logger.critical("The given sample sheet does not appear to contain a header.") + sys.exit(1) + dialect = sniffer.sniff(peek) + return dialect + + +def check_samplesheet(file_in, file_out): + """ + Check that the tabular samplesheet has the structure expected by nf-core pipelines. + + Validate the general shape of the table, expected columns, and each row. Also add + an additional column which records whether one or two FASTQ reads were found. + + Args: + file_in (pathlib.Path): The given tabular samplesheet. The format can be either + CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. + file_out (pathlib.Path): Where the validated and transformed samplesheet should + be created; always in CSV format. + + Example: + This function checks that the samplesheet follows the following structure, + see also the `viral recon samplesheet`_:: + + sample,fastq_1,fastq_2 + SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz + SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz + SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, + + .. _viral recon samplesheet: + https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv + + """ + required_columns = {"sample", "fastq_1", "fastq_2"} + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. + with file_in.open(newline="") as in_handle: + reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) + # Validate the existence of the expected header columns. + if not required_columns.issubset(reader.fieldnames): + req_cols = ", ".join(required_columns) + logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") + sys.exit(1) + # Validate each row. + checker = RowChecker() + for i, row in enumerate(reader): + try: + checker.validate_and_transform(row) + except AssertionError as error: + logger.critical(f"{str(error)} On line {i + 2}.") + sys.exit(1) + checker.validate_unique_samples() + header = list(reader.fieldnames) + header.insert(1, "single_end") + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. + with file_out.open(mode="w", newline="") as out_handle: + writer = csv.DictWriter(out_handle, header, delimiter=",") + writer.writeheader() + for row in checker.modified: + writer.writerow(row) + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Validate and transform a tabular samplesheet.", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", + ) + parser.add_argument( + "file_in", + metavar="FILE_IN", + type=Path, + help="Tabular input samplesheet in CSV or TSV format.", + ) + parser.add_argument( + "file_out", + metavar="FILE_OUT", + type=Path, + help="Transformed output samplesheet in CSV format.", + ) + parser.add_argument( + "-l", + "--log-level", + help="The desired log level (default WARNING).", + choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), + default="WARNING", + ) + return parser.parse_args(argv) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") + if not args.file_in.is_file(): + logger.error(f"The given input file {args.file_in} was not found!") + sys.exit(2) + args.file_out.parent.mkdir(parents=True, exist_ok=True) + check_samplesheet(args.file_in, args.file_out) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/cutsite_trimming b/bin/cutsite_trimming index aef62c5802acbb650dc7f60040cfde09f1e9d57f..0edd84b91b5bdaa96e3cd6c33bb16e13601ce6d9 100755 Binary files a/bin/cutsite_trimming and b/bin/cutsite_trimming differ diff --git a/bin/digest_genome.py b/bin/digest_genome.py index 2c29a49e1cf174f12142f78627fd799b83da2788..9f05b45b828e81abe35c146cc4b598334cf43916 100755 --- a/bin/digest_genome.py +++ b/bin/digest_genome.py @@ -18,15 +18,11 @@ import os import sys import numpy as np -RE_cutsite = { - "mboi": ["^GATC"], - "dpnii": ["^GATC"], - "bglii": ["A^GATCT"], - "hindiii": ["A^AGCTT"]} +RE_cutsite = {"mboi": ["^GATC"], "dpnii": ["^GATC"], "bglii": ["A^GATCT"], "hindiii": ["A^AGCTT"]} def find_re_sites(filename, sequences, offset): - with open(filename, 'r') as infile: + with open(filename, "r") as infile: chr_id = None big_str = "" indices = [] @@ -40,13 +36,12 @@ def find_re_sites(filename, sequences, offset): # If this is not the first chromosome, find the indices and append # them to the list if chr_id is not None: - for rs in range(len(sequences)): - pattern = "(?={})".format(sequences[rs].lower()) - indices += [m.start() + offset[rs]\ - for m in re.finditer(pattern, big_str)] - indices.sort() - all_indices.append(indices) - indices = [] + for rs in range(len(sequences)): + pattern = "(?={})".format(sequences[rs].lower()) + indices += [m.start() + offset[rs] for m in re.finditer(pattern, big_str)] + indices.sort() + all_indices.append(indices) + indices = [] # This is a new chromosome. Empty the sequence string, and add the # correct chrom id @@ -63,11 +58,10 @@ def find_re_sites(filename, sequences, offset): # Add the indices for the last chromosome for rs in range(len(sequences)): pattern = "(?={})".format(sequences[rs].lower()) - indices += [m.start() + offset[rs] - for m in re.finditer(pattern, big_str)] + indices += [m.start() + offset[rs] for m in re.finditer(pattern, big_str)] indices.sort() all_indices.append(indices) - + return contig_names, all_indices @@ -75,7 +69,7 @@ def find_chromsomose_lengths(reference_filename): chromosome_lengths = [] chromosome_names = [] length = None - with open(reference_filename, 'r') as infile: + with open(reference_filename, "r") as infile: for line in infile: if line.startswith(">"): chromosome_names.append(line[1:].strip()) @@ -89,11 +83,11 @@ def find_chromsomose_lengths(reference_filename): def replaceN(cs): - npos = int(cs.find('N')) + npos = int(cs.find("N")) cseql = [] if npos != -1: - for nuc in ["A","C","G","T"]: - tmp = cs.replace('N', nuc, 1) + for nuc in ["A", "C", "G", "T"]: + tmp = cs.replace("N", nuc, 1) tmpl = replaceN(tmp) if type(tmpl) == list: cseql = cseql + tmpl @@ -106,50 +100,59 @@ def replaceN(cs): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('fastafile') - parser.add_argument('-r', '--restriction_sites', - dest='res_sites', - nargs='+', - help=("The cutting position has to be specified using " - "'^'. For instance, -r A^AGCTT for HindIII " - "digestion. Several restriction enzyme can be " - "specified.")) - parser.add_argument('-o', '--out', default=None) + parser.add_argument("fastafile") + parser.add_argument( + "-r", + "--restriction_sites", + dest="res_sites", + nargs="+", + help=( + "The cutting position has to be specified using " + "'^'. For instance, -r A^AGCTT for HindIII " + "digestion. Several restriction enzyme can be " + "specified." + ), + ) + parser.add_argument("-o", "--out", default=None) args = parser.parse_args() filename = args.fastafile out = args.out - + # Split restriction sites if comma-separated - cutsites=[] + cutsites = [] for s in args.res_sites: - for m in s.split(','): + for m in s.split(","): cutsites.append(m) - + # process args and get restriction enzyme sequences sequences = [] offset = [] for cs in cutsites: if cs.lower() in RE_cutsite: - cseq = ''.join(RE_cutsite[cs.lower()]) + cseq = "".join(RE_cutsite[cs.lower()]) else: cseq = cs - offpos = int(cseq.find('^')) + offpos = int(cseq.find("^")) if offpos == -1: - print("Unable to detect offset for {}. Please, use '^' to specify the cutting position,\ - i.e A^GATCT for HindIII digestion.".format(cseq)) + print( + "Unable to detect offset for {}. Please, use '^' to specify the cutting position,\ + i.e A^GATCT for HindIII digestion.".format( + cseq + ) + ) sys.exit(-1) for nuc in list(set(cs)): - if nuc not in ['A','T','G','C','N','^']: + if nuc not in ["A", "T", "G", "C", "N", "^"]: print("Find unexpected character ['{}']in restriction motif".format(nuc)) print("Note that multiple motifs should be separated by a space (not a comma !)") sys.exit(-1) offset.append(offpos) - sequences.append(re.sub('\^', '', cseq)) + sequences.append(re.sub("\^", "", cseq)) # replace all N in restriction motif sequences_without_N = [] @@ -158,32 +161,32 @@ if __name__ == "__main__": nrs = replaceN(sequences[rs]) sequences_without_N = sequences_without_N + nrs offset_without_N = offset_without_N + [offset[rs]] * len(nrs) - + sequences = sequences_without_N offset = offset_without_N - + if out is None: out = os.path.splitext(filename)[0] + "_fragments.bed" print("Analyzing", filename) print("Restriction site(s)", ",".join(sequences)) - print("Offset(s)", ','.join(str(x) for x in offset)) + print("Offset(s)", ",".join(str(x) for x in offset)) # Read fasta file and look for rs per chromosome - contig_names, all_indices = find_re_sites(filename, sequences, offset=offset) + contig_names, all_indices = find_re_sites(filename, sequences, offset=offset) _, lengths = find_chromsomose_lengths(filename) valid_fragments = [] for i, indices in enumerate(all_indices): valid_fragments_chr = np.concatenate( - [np.concatenate([[0], indices])[:, np.newaxis], - np.concatenate([indices, [lengths[i]]])[:, np.newaxis]], - axis=1) + [np.concatenate([[0], indices])[:, np.newaxis], np.concatenate([indices, [lengths[i]]])[:, np.newaxis]], + axis=1, + ) valid_fragments.append(valid_fragments_chr) # Write results print("Writing to {} ...".format(out)) - with open(out, 'w') as outfile: + with open(out, "w") as outfile: for chrom_name, indices in zip(contig_names, valid_fragments): frag_id = 0 for begin, end in indices: @@ -192,4 +195,6 @@ if __name__ == "__main__": if end > begin: frag_id += 1 frag_name = "HIC_{}_{}".format(str(chrom_name), int(frag_id)) - outfile.write("{}\t{}\t{}\t{}\t0\t+\n".format(str(chrom_name), int(begin), int(end), str(frag_name))) + outfile.write( + "{}\t{}\t{}\t{}\t0\t+\n".format(str(chrom_name), int(begin), int(end), str(frag_name)) + ) diff --git a/bin/hicpro2higlass.sh b/bin/hicpro2higlass.sh deleted file mode 100755 index ff11aeeb538bbfb06acead2d22646c93d8a567bf..0000000000000000000000000000000000000000 --- a/bin/hicpro2higlass.sh +++ /dev/null @@ -1,268 +0,0 @@ -#!/bin/bash - -## HiC-Pro -## Copyleft 2017 Institut Curie -## Author(s): Nicolas Servant -## Contact: nicolas.servant@curie.fr -## This software is distributed without any guarantee under the terms of the BSD licence - -## -## First version of converter between HiCPro and higlass. -## The cooler python package should be properly installed, as well as the higlass software -## - -## -## A few notes about higlass -## -## docker run will install the docker image and start it -## sudo docker run --detach --publish 8888:80 --volume ~/hg-data:/data --volume ~/hg-tmp:/tmp --name higlass-container gehlenborglab/higlass -## sudo docker start higlass-container -## sudo docker ps -all -## -## Once higlass is installed, you can just run it using -## sudo docker start higlass-container -## higlass will then be available at http://localhost:8888 -## - -########################### -## trap handler -########################### -function trap_error() -{ - echo "Error: $1 - line $2 - exit status of last command: $?. Exit" >&2 - exit 1 -} - -function trap_exit() -{ - ##Since bash-4.0 $LINENO is reset to 1 when the trap is triggered - if [ "$?" != "0" ]; then - echo "Error: exit status detected. Exit." >&2 - fi - - if [[ ! -z ${tmp_dir} && -e ${tmp_dir} ]]; then - echo -e "Cleaning temporary folders ..." >&2 - /bin/rm -rf ${tmp_dir} - fi -} - -trap 'trap_error "$0" "$LINENO"' ERR -trap 'trap_exit' 0 1 2 3 - -set -E ## export trap to functions -set -o pipefail ## trace ERR through pipes - -## 0 = -## 1 > -## 2 < -vercomp () { - if [[ $1 == $2 ]] - then - return 0 - fi - local IFS=. - local i ver1=($1) ver2=($2) - # fill empty fields in ver1 with zeros - for ((i=${#ver1[@]}; i<${#ver2[@]}; i++)) - do - ver1[i]=0 - done - - for ((i=0; i<${#ver1[@]}; i++)) - do - if [[ -z ${ver2[i]} ]] - then - # fill empty fields in ver2 with zeros - ver2[i]=0 - fi - if ((10#${ver1[i]} > 10#${ver2[i]})) - then - echo 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - echo 2 - fi - done - echo 0 -} - -function usage { - echo -e "usage : hicpro2higlass -i INPUT -r RESOLUTION -c CHROMSIZE [-n] [-o ODIR] [-t TEMP] [-h]" - echo -e "Use option -h|--help for more information" -} - -function help { - usage; - echo - echo "Generate Higlass input file from HiC-Pro results" - echo "See https://github.com/hms-dbmi/higlass-website for details about Higlass" - echo "---------------" - echo "OPTIONS" - echo - echo " -i|--input INPUT : allValidPairs or matrix file generated by HiC-Pro" - echo " -r|--res RESOLUTION : .matrix file resolution or maximum resolution to reach from the .allValidPairs input file" - echo " -c|--chrom CHROMSIZE : chromosome size file" - echo " -p|--proc NB_CPU : number of CPUs for cooler" - echo " [-n|--norm] : run cooler matrix balancing algorithm" - echo " [-o|--out] : output path. Default is current path" - echo " [-t|--temp] TEMP : path to tmp folder. Default is current path" - echo " [-h|--help]: help" - exit; -} - - -if [ $# -lt 1 ] -then - usage - exit -fi - -# Transform long options to short ones -for arg in "$@"; do - shift - case "$arg" in - "--input") set -- "$@" "-i" ;; - "--bed") set -- "$@" "-b" ;; - "--res") set -- "$@" "-r" ;; - "--chrom") set -- "$@" "-c" ;; - "--proc") set -- "$@" "-p" ;; - "--out") set -- "$@" "-o" ;; - "--temp") set -- "$@" "-t" ;; - "--norm") set -- "$@" "-n" ;; - "--help") set -- "$@" "-h" ;; - *) set -- "$@" "$arg" - esac -done - -INPUT_HICPRO="" -INPUT_BED="" -NORMALIZE=0 -NPROC=1 -CHROMSIZES_FILE="" -RES=10000 -OUT="./" -TEMP="./" - -while getopts ":i:b:c:p:r:o:t:nh" OPT -do - case $OPT in - i) INPUT_HICPRO=$OPTARG;; - b) INPUT_BED=$OPTARG;; - n) NORMALIZE=1;; - c) CHROMSIZES_FILE=$OPTARG;; - p) NPROC=$OPTARG;; - r) RES=$OPTARG;; - o) OUT=$OPTARG;; - t) TEMP=$OPTARG;; - h) help ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - usage - exit 1 - ;; - :) - echo "Option -$OPTARG requires an argument." >&2 - usage - exit 1 - ;; - esac -done - -if [[ -z $INPUT_HICPRO ]]; -then - usage - exit -fi - -if [[ ! -e $CHROMSIZES_FILE ]]; then - echo -e "$CHROMSIZES_FILE file not found. Exit" - exit 1 -fi - -## Detect input data type -DATATYPE="" -if [[ $INPUT_HICPRO == *.mat* ]]; then - DATATYPE="MATRIX" -elif [[ $INPUT_HICPRO == *allValidPairs* || $INPUT_HICPRO == *validPairs* ]]; then - DATATYPE="VALID" -else - echo -e "Unknown input data type. Expect .matrix or _allValidPairs input files." - exit 1 -fi -echo -e "$DATATYPE input file detected ..." - -## Check cooler version -which cooler > /dev/null; -if [ $? != "0" ]; then - echo -e "Cooler is not installed or is not in your $PATH. See https://github.com/mirnylab/cooler for details." - exit 1; -fi - -COOLER_VERSION=$(cooler --version 2>&1 | awk '{print $NF}') -echo "Cooler version $COOLER_VERSION detected ..." -cres=$(vercomp ${COOLER_VERSION} "0.7.6") -if [[ $cres == "2" ]]; then - echo "Cooler version must be >= 0.7.6 ! Stop." - exit 1 -fi - -if [[ $DATATYPE == "VALID" ]]; then - which pairix > /dev/null; - if [ $? != "0" ]; then - echo -e "Pairix is not installed or is not in your PATH. See https://github.com/4dn-dcic/pairix." - exit 1; - fi -fi - -echo -e "\nGenerating .cool files ..." -tmp_dir=${TEMP}/_tmp$$ -mkdir -p $tmp_dir - -if [[ $DATATYPE == "MATRIX" ]]; then - out=$(basename $INPUT_HICPRO | sed -e 's/.mat.*/.cool/') - - cooler makebins $CHROMSIZES_FILE $RES > $tmp_dir/bins.bed - cooler load -f coo --one-based $tmp_dir/bins.bed $INPUT_HICPRO $tmp_dir/$out - - echo -e "\nZoomify .cool file ..." - if [[ $NORMALIZE == 1 ]]; then - cooler zoomify --nproc ${NPROC} --balance $tmp_dir/$out - else - cooler zoomify --nproc ${NPROC} $tmp_dir/$out - fi - out=$(basename $INPUT_HICPRO | sed -e 's/.mat.*/.mcool/') - -elif [[ $DATATYPE == "VALID" ]]; then - out=$(basename $INPUT_HICPRO | sed -e 's/.allValidPairs.*/.cool/') - - awk '{OFS="\t";print $2,$3,$4,$5,$6,$7,1}' $INPUT_HICPRO | sed -e 's/+/1/g' -e 's/-/16/g' > $tmp_dir/contacts.txt - cooler csort --nproc ${NPROC} -c1 1 -p1 2 -s1 3 -c2 4 -p2 5 -s2 6 \ - -o $tmp_dir/contacts.sorted.txt.gz \ - $tmp_dir/contacts.txt \ - $CHROMSIZES_FILE - - cooler makebins $CHROMSIZES_FILE $RES > $tmp_dir/bins.bed - cooler cload pairix --nproc ${NPROC} $tmp_dir/bins.bed $tmp_dir/contacts.sorted.txt.gz $tmp_dir/$out - - echo -e "\nZoomify .cool file ..." - if [[ $NORMALIZE == 1 ]]; then - cooler zoomify --nproc ${NPROC} --balance $tmp_dir/$out - else - cooler zoomify --nproc ${NPROC} $tmp_dir/$out - fi - out=$(basename $INPUT_HICPRO | sed -e 's/.allValidPairs.*/.mcool/') -fi - -## mv to out -mv $tmp_dir/*cool ${OUT}/ - -## clean -/bin/rm -rf $tmp_dir - -echo -e "\nCooler file generated with success ..." -echo "Please copy the file $out in your Higlass input directory and run :" -echo "sudo docker exec higlass-container python higlass-server/manage.py ingest_tileset --filename /tmp/$out --datatype matrix --filetype cooler" - - - diff --git a/bin/hicpro_merge_validpairs.sh b/bin/hicpro_merge_validpairs.sh new file mode 100755 index 0000000000000000000000000000000000000000..e6c5200bb9e09fc9b21302c6e6aae56a5468276a --- /dev/null +++ b/bin/hicpro_merge_validpairs.sh @@ -0,0 +1,64 @@ +#!/bin/bash +set -e + +## +## HiC-Pro +## Internal function +## Merge valid interactions files and remove duplicates +## + +rmDup=0 +prefix="" +while getopts ":dp:" opt; do + case "$opt" in + d) rmDup=1 ;; + p) prefix=$OPTARG ;; + esac +done +shift $(( OPTIND - 1 )) + +vpairs="$@" +vpairs_sorted=$(echo $vpairs | sed -e 's/validPairs/sorted.validPairs/g') + +mkdir -p ./tmp/ + +if [[ ${rmDup} == 1 ]]; then + ## Sort individual validPairs files + fcounts=0 + for vfile in ${vpairs} + do + echo "Sorting ${vfile} ..." + fcounts=$((fcounts+1)) + ofile=$(echo ${vfile} | sed -e 's/validPairs/sorted.validPairs/') + #sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -o ${ofile} ${vfile} + sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -o ${ofile} ${vfile} + done + + if [[ $fcounts -gt 1 ]] + then + echo "Merging and removing the duplicates ..." + ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) + #sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \ + sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \ + awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs + else + echo "Removing the duplicates ..." + cat ${vpairs_sorted} | awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs + fi + + ## clean + /bin/rm -rf ${vpairs_sorted} +else + cat ${vpairs} > ${prefix}.allValidPairs +fi + +echo -e -n "valid_interaction\t" > ${prefix}_allValidPairs.mergestat +cat ${vpairs} | wc -l >> ${prefix}_allValidPairs.mergestat +echo -e -n "valid_interaction_rmdup\t" >> ${prefix}_allValidPairs.mergestat +cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat + +## Count short range (<20000) vs long range contacts +awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} $2 == $5{cis=cis+1; d=$6>$3?$6-$3:$3-$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} $2!=$5{trans=trans+1}END{print "trans_interaction\t"trans"\ncis_interaction\t"cis"\ncis_shortRange\t"sr"\ncis_longRange\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat + +## clean +/bin/rm -rf ./tmp/ diff --git a/bin/mapped_2hic_dnase.py b/bin/mapped_2hic_dnase.py index dd023b0023e0c0a7aa4780bcc04289e467ed877b..ff593666f03c5b82928ced06b941be296d6169ea 100755 --- a/bin/mapped_2hic_dnase.py +++ b/bin/mapped_2hic_dnase.py @@ -25,8 +25,12 @@ def usage(): print("-r/--mappedReadsFile <BAM/SAM file of mapped reads>") print("[-o/--outputDir] <Output directory. Default is current directory>") print("[-d/--minCisDist] <Minimum distance between intrachromosomal contact to consider>") - print("[-g/--gtag] <Genotype tag. If specified, this tag will be reported in the valid pairs output for allele specific classification>") - print("[-a/--all] <Write all additional output files, with information about the discarded reads (self-circle, dangling end, etc.)>") + print( + "[-g/--gtag] <Genotype tag. If specified, this tag will be reported in the valid pairs output for allele specific classification>" + ) + print( + "[-a/--all] <Write all additional output files, with information about the discarded reads (self-circle, dangling end, etc.)>" + ) print("[-v/--verbose] <Verbose>") print("[-h/--help] <Help>") return @@ -38,8 +42,8 @@ def get_args(): opts, args = getopt.getopt( sys.argv[1:], "r:o:d:g:avh", - ["mappedReadsFile=", - "outputDir=", "minDist=", "gatg", "all", "verbose", "help"]) + ["mappedReadsFile=", "outputDir=", "minDist=", "gatg", "all", "verbose", "help"], + ) except getopt.GetoptError: usage() sys.exit(-1) @@ -78,8 +82,8 @@ def get_read_pos(read, st="start"): list of aligned reads """ if st == "middle": - pos = read.reference_start + int(read.alen/2) - elif st =="start": + pos = read.reference_start + int(read.alen / 2) + elif st == "start": pos = get_read_start(read) elif st == "left": pos = read.reference_start @@ -88,11 +92,11 @@ def get_read_pos(read, st="start"): def get_read_start(read): - """ - Return the 5' end of the read + """ + Return the 5' end of the read """ if read.is_reverse: - pos = read.reference_start + read.alen -1 + pos = read.reference_start + read.alen - 1 else: pos = read.reference_start return pos @@ -125,7 +129,7 @@ def get_ordered_reads(read1, read2): def isIntraChrom(read1, read2): """ Return true is the reads pair is intrachromosomal - + read1 : [AlignedRead] read2 : [AlignedRead] @@ -163,23 +167,23 @@ def get_valid_orientation(read1, read2): def get_cis_dist(read1, read2): - """ - Calculte the size of the DNA fragment library + """ + Calculte the size of the DNA fragment library - read1 : [AlignedRead] - read2 : [AlignedRead] + read1 : [AlignedRead] + read2 : [AlignedRead] - """ - # Get oriented reads - ##r1, r2 = get_ordered_reads(read1, read2) - dist = None - if not r1.is_unmapped and not r2.is_unmapped: - ## Contact distances can be calculated for intrachromosomal reads only - if isIntraChrom(read1, read2): - r1pos = get_read_pos(read1) - r2pos = get_read_pos(read2) - dist = abs(r1pos - r2pos) - return dist + """ + # Get oriented reads + ##r1, r2 = get_ordered_reads(read1, read2) + dist = None + if not r1.is_unmapped and not r2.is_unmapped: + ## Contact distances can be calculated for intrachromosomal reads only + if isIntraChrom(read1, read2): + r1pos = get_read_pos(read1) + r2pos = get_read_pos(read2) + dist = abs(r1pos - r2pos) + return dist def get_read_tag(read, tag): @@ -255,15 +259,15 @@ if __name__ == "__main__": CF_ascounter = 0 baseReadsFile = os.path.basename(mappedReadsFile) - baseReadsFile = re.sub(r'\.bam$|\.sam$', '', baseReadsFile) + baseReadsFile = re.sub(r"\.bam$|\.sam$", "", baseReadsFile) # Open handlers for output files - handle_valid = open(outputDir + '/' + baseReadsFile + '.validPairs', 'w') + handle_valid = open(outputDir + "/" + baseReadsFile + ".validPairs", "w") if allOutput: - handle_dump = open(outputDir + '/' + baseReadsFile + '.DumpPairs', 'w') - handle_single = open(outputDir + '/' + baseReadsFile + '.SinglePairs','w') - handle_filt = open(outputDir + '/' + baseReadsFile + '.FiltPairs','w') + handle_dump = open(outputDir + "/" + baseReadsFile + ".DumpPairs", "w") + handle_single = open(outputDir + "/" + baseReadsFile + ".SinglePairs", "w") + handle_filt = open(outputDir + "/" + baseReadsFile + ".FiltPairs", "w") # Read the SAM/BAM file if verbose: @@ -306,7 +310,7 @@ if __name__ == "__main__": cur_handler = handle_single if allOutput else None # Check Distance criteria - Filter - if (minDist is not None and dist is not None and dist < int(minDist)): + if minDist is not None and dist is not None and dist < int(minDist): interactionType = "FILT" filt_counter += 1 cur_handler = handle_filt if allOutput else None @@ -330,13 +334,11 @@ if __name__ == "__main__": dump_counter += 1 cur_handler = handle_dump if allOutput else None - - # Split valid pairs based on XA tag if gtag is not None: r1as = get_read_tag(r1, gtag) r2as = get_read_tag(r2, gtag) - + if r1as == 1 and r2as == 1: G1G1_ascounter += 1 elif r1as == 2 and r2as == 2: @@ -357,11 +359,10 @@ if __name__ == "__main__": CF_ascounter += 1 else: UU_ascounter += 1 - - + if cur_handler is not None: if not r1.is_unmapped and not r2.is_unmapped: - + ##reorient reads to ease duplicates removal or1, or2 = get_ordered_reads(r1, r2) or1_chrom = samfile.get_reference_name(or1.reference_id) @@ -371,53 +372,93 @@ if __name__ == "__main__": r1as = get_read_tag(or1, gtag) r2as = get_read_tag(or2, gtag) if gtag is not None: - htag = str(r1as)+"-"+str(r2as) - + htag = str(r1as) + "-" + str(r2as) + cur_handler.write( - or1.query_name + "\t" + - or1_chrom + "\t" + - str(get_read_pos(or1)+1) + "\t" + - str(get_read_strand(or1)) + "\t" + - or2_chrom + "\t" + - str(get_read_pos(or2)+1) + "\t" + - str(get_read_strand(or2)) + "\t" + - "NA" + "\t" + ##dist - "NA" + "\t" + ##resfrag1 - "NA" + "\t" + ##resfrag2 - str(or1.mapping_quality) + "\t" + - str(or2.mapping_quality) + "\t" + - str(htag) + "\n") - + or1.query_name + + "\t" + + or1_chrom + + "\t" + + str(get_read_pos(or1) + 1) + + "\t" + + str(get_read_strand(or1)) + + "\t" + + or2_chrom + + "\t" + + str(get_read_pos(or2) + 1) + + "\t" + + str(get_read_strand(or2)) + + "\t" + + "NA" + + "\t" + + "NA" ##dist + + "\t" + + "NA" ##resfrag1 + + "\t" + + str(or1.mapping_quality) ##resfrag2 + + "\t" + + str(or2.mapping_quality) + + "\t" + + str(htag) + + "\n" + ) + elif r2.is_unmapped and not r1.is_unmapped: cur_handler.write( - r1.query_name + "\t" + - r1_chrom + "\t" + - str(get_read_pos(r1)+1) + "\t" + - str(get_read_strand(r1)) + "\t" + - "*" + "\t" + - "*" + "\t" + - "*" + "\t" + - "*" + "\t" + - "*" + "\t" + - "*" + "\t" + - str(r1.mapping_quality) + "\t" + - "*" + "\n") + r1.query_name + + "\t" + + r1_chrom + + "\t" + + str(get_read_pos(r1) + 1) + + "\t" + + str(get_read_strand(r1)) + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + str(r1.mapping_quality) + + "\t" + + "*" + + "\n" + ) elif r1.is_unmapped and not r2.is_unmapped: cur_handler.write( - r2.query_name + "\t" + - "*" + "\t" + - "*" + "\t" + - "*" + "\t" + - r2_chrom + "\t" + - str(get_read_pos(r2)+1) + "\t" + - str(get_read_strand(r2)) + "\t" + - "*" + "\t" + - "*" + "\t" + - "*" + "\t" + - "*" + "\t" + - str(r2.mapping_quality) + "\n") - - if (reads_counter % 100000 == 0 and verbose): + r2.query_name + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + r2_chrom + + "\t" + + str(get_read_pos(r2) + 1) + + "\t" + + str(get_read_strand(r2)) + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + str(r2.mapping_quality) + + "\n" + ) + + if reads_counter % 100000 == 0 and verbose: print("##", reads_counter) # Close handler @@ -428,7 +469,7 @@ if __name__ == "__main__": handle_filt.close() # Write stats file - with open(outputDir + '/' + baseReadsFile + '.RSstat', 'w') as handle_stat: + with open(outputDir + "/" + baseReadsFile + ".RSstat", "w") as handle_stat: handle_stat.write("## Hi-C processing - no restriction fragments\n") handle_stat.write("Valid_interaction_pairs\t" + str(valid_counter) + "\n") handle_stat.write("Valid_interaction_pairs_FF\t" + str(valid_counter_FF) + "\n") @@ -439,17 +480,24 @@ if __name__ == "__main__": handle_stat.write("Filtered_pairs\t" + str(filt_counter) + "\n") handle_stat.write("Dumped_pairs\t" + str(dump_counter) + "\n") - ## Write AS report + ## Write AS report if gtag is not None: handle_stat.write("## ======================================\n") handle_stat.write("## Allele specific information\n") handle_stat.write("Valid_pairs_from_ref_genome_(1-1)\t" + str(G1G1_ascounter) + "\n") - handle_stat.write("Valid_pairs_from_ref_genome_with_one_unassigned_mate_(0-1/1-0)\t" + str(UG1_ascounter+G1U_ascounter) + "\n") + handle_stat.write( + "Valid_pairs_from_ref_genome_with_one_unassigned_mate_(0-1/1-0)\t" + + str(UG1_ascounter + G1U_ascounter) + + "\n" + ) handle_stat.write("Valid_pairs_from_alt_genome_(2-2)\t" + str(G2G2_ascounter) + "\n") - handle_stat.write("Valid_pairs_from_alt_genome_with_one_unassigned_mate_(0-2/2-0)\t" + str(UG2_ascounter+G2U_ascounter) + "\n") - handle_stat.write("Valid_pairs_from_alt_and_ref_genome_(1-2/2-1)\t" + str(G1G2_ascounter+G2G1_ascounter) + "\n") + handle_stat.write( + "Valid_pairs_from_alt_genome_with_one_unassigned_mate_(0-2/2-0)\t" + + str(UG2_ascounter + G2U_ascounter) + + "\n" + ) + handle_stat.write( + "Valid_pairs_from_alt_and_ref_genome_(1-2/2-1)\t" + str(G1G2_ascounter + G2G1_ascounter) + "\n" + ) handle_stat.write("Valid_pairs_with_both_unassigned_mated_(0-0)\t" + str(UU_ascounter) + "\n") handle_stat.write("Valid_pairs_with_at_least_one_conflicting_mate_(3-)\t" + str(CF_ascounter) + "\n") - - - diff --git a/bin/mapped_2hic_fragments.py b/bin/mapped_2hic_fragments.py index e823ee02cce862b704c2b6939d1642db579665be..cc0e40b472dfb764ba1bf2f550c00c79bce7bd3f 100755 --- a/bin/mapped_2hic_fragments.py +++ b/bin/mapped_2hic_fragments.py @@ -32,8 +32,12 @@ def usage(): print("[-t/--shortestFragmentLength] <Shortest restriction fragment length to consider>") print("[-m/--longestFragmentLength] <Longest restriction fragment length to consider>") print("[-d/--minCisDist] <Minimum distance between intrachromosomal contact to consider>") - print("[-g/--gtag] <Genotype tag. If specified, this tag will be reported in the valid pairs output for allele specific classification>") - print("[-a/--all] <Write all additional output files, with information about the discarded reads (self-circle, dangling end, etc.)>") + print( + "[-g/--gtag] <Genotype tag. If specified, this tag will be reported in the valid pairs output for allele specific classification>" + ) + print( + "[-a/--all] <Write all additional output files, with information about the discarded reads (self-circle, dangling end, etc.)>" + ) print("[-S/--sam] <Output an additional SAM file with flag 'CT' for pairs classification>") print("[-v/--verbose] <Verbose>") print("[-h/--help] <Help>") @@ -46,13 +50,22 @@ def get_args(): opts, args = getopt.getopt( sys.argv[1:], "f:r:o:s:l:t:m:d:g:Svah", - ["fragmentFile=", - "mappedReadsFile=", - "outputDir=", - "minInsertSize=", "maxInsertSize", - "minFragSize", "maxFragSize", - "minDist", - "gatg", "sam", "verbose", "all", "help"]) + [ + "fragmentFile=", + "mappedReadsFile=", + "outputDir=", + "minInsertSize=", + "maxInsertSize", + "minFragSize", + "maxFragSize", + "minDist", + "gatg", + "sam", + "verbose", + "all", + "help", + ], + ) except getopt.GetoptError: usage() sys.exit(-1) @@ -66,7 +79,7 @@ def timing(function, *args): """ startTime = time.time() result = function(*args) - print('{} function took {:.3f}ms'.format(function.__name__, (time.time() - startTime) * 1000)) + print("{} function took {:.3f}ms".format(function.__name__, (time.time() - startTime) * 1000)) return result @@ -88,7 +101,7 @@ def get_read_strand(read): def isIntraChrom(read1, read2): """ Return true is the reads pair is intrachromosomal - + read1 : [AlignedRead] read2 : [AlignedRead] @@ -99,22 +112,22 @@ def isIntraChrom(read1, read2): def get_cis_dist(read1, read2): - """ - Calculte the contact distance between two intrachromosomal reads + """ + Calculte the contact distance between two intrachromosomal reads - read1 : [AlignedRead] - read2 : [AlignedRead] + read1 : [AlignedRead] + read2 : [AlignedRead] - """ - # Get oriented reads - ##r1, r2 = get_ordered_reads(read1, read2) - dist = None - if not read1.is_unmapped and not read2.is_unmapped: - ## Contact distances can be calculated for intrachromosomal reads only - if isIntraChrom(read1, read2): - r1pos, r2pos = get_read_pos(read1), get_read_pos(read2) - dist = abs(r1pos - r2pos) - return dist + """ + # Get oriented reads + ##r1, r2 = get_ordered_reads(read1, read2) + dist = None + if not read1.is_unmapped and not read2.is_unmapped: + ## Contact distances can be calculated for intrachromosomal reads only + if isIntraChrom(read1, read2): + r1pos, r2pos = get_read_pos(read1), get_read_pos(read2) + dist = abs(r1pos - r2pos) + return dist def get_read_pos(read, st="start"): @@ -135,12 +148,12 @@ def get_read_pos(read, st="start"): """ if st == "middle": - pos = read.reference_start + int(read.alen/2) - elif st =="start": + pos = read.reference_start + int(read.alen / 2) + elif st == "start": pos = get_read_start(read) elif st == "left": pos = read.reference_start - + return pos @@ -149,11 +162,12 @@ def get_read_start(read): Return the 5' end of the read """ if read.is_reverse: - pos = read.reference_start + read.alen -1 + pos = read.reference_start + read.alen - 1 else: pos = read.reference_start return pos + def get_ordered_reads(read1, read2): """ Reorient reads @@ -183,9 +197,10 @@ def get_ordered_reads(read1, read2): r1, r2 = read1, read2 else: r1, r2 = read2, read1 - + return r1, r2 + def load_restriction_fragment(in_file, minfragsize=None, maxfragsize=None, verbose=False): """ Read a BED file and store the intervals in a tree @@ -204,37 +219,37 @@ def load_restriction_fragment(in_file, minfragsize=None, maxfragsize=None, verbo nline = 0 nfilt = 0 for line in bed_handle: - nline += 1 - bedtab = line.split("\t") - try: - chromosome, start, end, name = bedtab[:4] - except ValueError: - print("Warning : wrong input format in line {}. Not a BED file ?!".format(nline)) - continue + nline += 1 + bedtab = line.split("\t") + try: + chromosome, start, end, name = bedtab[:4] + except ValueError: + print("Warning : wrong input format in line {}. Not a BED file ?!".format(nline)) + continue # BED files are zero-based as Intervals objects - start = int(start) # + 1 - end = int(end) - fragl = abs(end - start) - name = name.strip() - - ## Discard fragments outside the size range - filt = False - if minfragsize != None and int(fragl) < int(minfragsize): - nfilt += 1 - filt = True - elif maxfragsize != None and int(fragl) > int(maxfragsize): - nfilt += 1 - filt = True - - if chromosome in resFrag: - tree = resFrag[chromosome] - tree.add_interval(Interval(start, end, value={'name': name, 'filter': filt})) - else: - tree = Intersecter() - tree.add_interval(Interval(start, end, value={'name': name, 'filter': filt})) - resFrag[chromosome] = tree - + start = int(start) # + 1 + end = int(end) + fragl = abs(end - start) + name = name.strip() + + ## Discard fragments outside the size range + filt = False + if minfragsize != None and int(fragl) < int(minfragsize): + nfilt += 1 + filt = True + elif maxfragsize != None and int(fragl) > int(maxfragsize): + nfilt += 1 + filt = True + + if chromosome in resFrag: + tree = resFrag[chromosome] + tree.add_interval(Interval(start, end, value={"name": name, "filter": filt})) + else: + tree = Intersecter() + tree.add_interval(Interval(start, end, value={"name": name, "filter": filt})) + resFrag[chromosome] = tree + if nfilt > 0: print("Warning : {} fragment(s) outside of range and discarded. {} remaining.".format(nfilt, nline - nfilt)) bed_handle.close() @@ -253,10 +268,10 @@ def get_overlapping_restriction_fragment(resFrag, chrom, read): """ # Get read position (middle or start) pos = get_read_pos(read, st="middle") - + if chrom in resFrag: # Overlap with the position of the read (zero-based) - resfrag = resFrag[chrom].find(pos, pos+1) + resfrag = resFrag[chrom].find(pos, pos + 1) if len(resfrag) > 1: print("Warning : {} restictions fragments found for {} -skipped".format(len(resfrag), read.query_name)) return None @@ -271,21 +286,22 @@ def get_overlapping_restriction_fragment(resFrag, chrom, read): def are_contiguous_fragments(frag1, frag2, chr1, chr2): - ''' + """ Compare fragment positions to check if they are contiguous - ''' + """ ret = False if chr1 == chr2: if int(frag1.start) < int(frag2.start): d = int(frag2.start) - int(frag1.end) else: d = int(frag1.start) - int(frag2.end) - + if d == 0: ret = True - + return ret + def is_religation(read1, read2, frag1, frag2): """ Reads are expected to map adjacent fragments @@ -294,8 +310,8 @@ def is_religation(read1, read2, frag1, frag2): """ ret = False if are_contiguous_fragments(frag1, frag2, read1.tid, read2.tid): - #r1, r2 = get_ordered_reads(read1, read2) - #if get_read_strand(r1) == "+" and get_read_strand(r2) == "-": + # r1, r2 = get_ordered_reads(read1, read2) + # if get_read_strand(r1) == "+" and get_read_strand(r2) == "-": ret = True return ret @@ -405,8 +421,7 @@ def get_PE_fragment_size(read1, read2, resFrag1, resFrag2, interactionType): return fragmentsize -def get_interaction_type(read1, read1_chrom, resfrag1, read2, - read2_chrom, resfrag2, verbose): +def get_interaction_type(read1, read1_chrom, resfrag1, read2, read2_chrom, resfrag2, verbose): """ Returns the interaction type @@ -433,7 +448,7 @@ def get_interaction_type(read1, read1_chrom, resfrag1, read2, # If returned InteractionType=None -> Same restriction fragment # and same strand = Dump interactionType = None - + if not read1.is_unmapped and not read2.is_unmapped and resfrag1 is not None and resfrag2 is not None: # same restriction fragment if resfrag1 == resfrag2: @@ -549,29 +564,29 @@ if __name__ == "__main__": CF_ascounter = 0 baseReadsFile = os.path.basename(mappedReadsFile) - baseReadsFile = re.sub(r'\.bam$|\.sam$', '', baseReadsFile) + baseReadsFile = re.sub(r"\.bam$|\.sam$", "", baseReadsFile) # Open handlers for output files - handle_valid = open(outputDir + '/' + baseReadsFile + '.validPairs', 'w') + handle_valid = open(outputDir + "/" + baseReadsFile + ".validPairs", "w") if allOutput: - handle_de = open(outputDir + '/' + baseReadsFile + '.DEPairs', 'w') - handle_re = open(outputDir + '/' + baseReadsFile + '.REPairs', 'w') - handle_sc = open(outputDir + '/' + baseReadsFile + '.SCPairs', 'w') - handle_dump = open(outputDir + '/' + baseReadsFile + '.DumpPairs', 'w') - handle_single = open(outputDir + '/' + baseReadsFile + '.SinglePairs', 'w') - handle_filt = open(outputDir + '/' + baseReadsFile + '.FiltPairs', 'w') + handle_de = open(outputDir + "/" + baseReadsFile + ".DEPairs", "w") + handle_re = open(outputDir + "/" + baseReadsFile + ".REPairs", "w") + handle_sc = open(outputDir + "/" + baseReadsFile + ".SCPairs", "w") + handle_dump = open(outputDir + "/" + baseReadsFile + ".DumpPairs", "w") + handle_single = open(outputDir + "/" + baseReadsFile + ".SinglePairs", "w") + handle_filt = open(outputDir + "/" + baseReadsFile + ".FiltPairs", "w") # Read the BED file resFrag = timing(load_restriction_fragment, fragmentFile, minFragSize, maxFragSize, verbose) - + # Read the SAM/BAM file if verbose: print("## Opening SAM/BAM file {} ...".format(mappedReadsFile)) samfile = pysam.Samfile(mappedReadsFile, "rb") if samOut: - handle_sam = pysam.AlignmentFile(outputDir + '/' + baseReadsFile + '_interaction.bam', "wb", template=samfile) + handle_sam = pysam.AlignmentFile(outputDir + "/" + baseReadsFile + "_interaction.bam", "wb", template=samfile) # Reads are 0-based too (for both SAM and BAM format) # Loop on all reads @@ -608,22 +623,24 @@ if __name__ == "__main__": interactionType = get_interaction_type(r1, r1_chrom, r1_resfrag, r2, r2_chrom, r2_resfrag, verbose) dist = get_PE_fragment_size(r1, r2, r1_resfrag, r2_resfrag, interactionType) cdist = get_cis_dist(r1, r2) - + ## Filter based on restriction fragments - if (r1_resfrag is not None and r1_resfrag.value['filter'] == True) or (r2_resfrag is not None and r2_resfrag.value['filter']) == True: + if (r1_resfrag is not None and r1_resfrag.value["filter"] == True) or ( + r2_resfrag is not None and r2_resfrag.value["filter"] + ) == True: interactionType = "FILT" - + # Check Insert size criteria - FILT - if (minInsertSize is not None and dist is not None and - dist < int(minInsertSize)) or \ - (maxInsertSize is not None and dist is not None and dist > int(maxInsertSize)): + if (minInsertSize is not None and dist is not None and dist < int(minInsertSize)) or ( + maxInsertSize is not None and dist is not None and dist > int(maxInsertSize) + ): interactionType = "FILT" # Check Distance criteria - FILT # Done for VI otherwise this criteria will overwrite all other invalid classification - if (interactionType == "VI" and minDist is not None and cdist is not None and cdist < int(minDist)): + if interactionType == "VI" and minDist is not None and cdist is not None and cdist < int(minDist): interactionType = "FILT" - + if interactionType == "VI": valid_counter += 1 cur_handler = handle_valid @@ -677,11 +694,11 @@ if __name__ == "__main__": elif interactionType == "SI": single_counter += 1 cur_handler = handle_single if allOutput else None - + elif interactionType == "FILT": filt_counter += 1 cur_handler = handle_filt if allOutput else None - + else: interactionType = "DUMP" dump_counter += 1 @@ -694,17 +711,17 @@ if __name__ == "__main__": ## Write results in right handler if cur_handler is not None: - if not r1.is_unmapped and not r2.is_unmapped: + if not r1.is_unmapped and not r2.is_unmapped: ##reorient reads to ease duplicates removal or1, or2 = get_ordered_reads(r1, r2) or1_chrom = samfile.get_reference_name(or1.tid) or2_chrom = samfile.get_reference_name(or2.tid) - + ##reset as tag now that the reads are oriented r1as = get_read_tag(or1, gtag) r2as = get_read_tag(or2, gtag) if gtag is not None: - htag = str(r1as)+"-"+str(r2as) + htag = str(r1as) + "-" + str(r2as) ##get fragment name and reorient if necessary if or1 == r1 and or2 == r2: @@ -715,73 +732,113 @@ if __name__ == "__main__": or2_resfrag = r1_resfrag if or1_resfrag is not None: - or1_fragname = or1_resfrag.value['name'] + or1_fragname = or1_resfrag.value["name"] else: - or1_fragname = 'None' - + or1_fragname = "None" + if or2_resfrag is not None: - or2_fragname = or2_resfrag.value['name'] + or2_fragname = or2_resfrag.value["name"] else: - or2_fragname = 'None' - + or2_fragname = "None" + cur_handler.write( - or1.query_name + "\t" + - or1_chrom + "\t" + - str(get_read_pos(or1)+1) + "\t" + - str(get_read_strand(or1)) + "\t" + - or2_chrom + "\t" + - str(get_read_pos(or2)+1) + "\t" + - str(get_read_strand(or2)) + "\t" + - str(dist) + "\t" + - or1_fragname + "\t" + - or2_fragname + "\t" + - str(or1.mapping_quality) + "\t" + - str(or2.mapping_quality) + "\t" + - str(htag) + "\n") + or1.query_name + + "\t" + + or1_chrom + + "\t" + + str(get_read_pos(or1) + 1) + + "\t" + + str(get_read_strand(or1)) + + "\t" + + or2_chrom + + "\t" + + str(get_read_pos(or2) + 1) + + "\t" + + str(get_read_strand(or2)) + + "\t" + + str(dist) + + "\t" + + or1_fragname + + "\t" + + or2_fragname + + "\t" + + str(or1.mapping_quality) + + "\t" + + str(or2.mapping_quality) + + "\t" + + str(htag) + + "\n" + ) elif r2.is_unmapped and not r1.is_unmapped: if r1_resfrag is not None: - r1_fragname = r1_resfrag.value['name'] - + r1_fragname = r1_resfrag.value["name"] + cur_handler.write( - r1.query_name + "\t" + - r1_chrom + "\t" + - str(get_read_pos(r1)+1) + "\t" + - str(get_read_strand(r1)) + "\t" + - "*" + "\t" + - "*" + "\t" + - "*" + "\t" + - "*" + "\t" + - r1_fragname + "\t" + - "*" + "\t" + - str(r1.mapping_quality) + "\t" + - "*" + "\n") + r1.query_name + + "\t" + + r1_chrom + + "\t" + + str(get_read_pos(r1) + 1) + + "\t" + + str(get_read_strand(r1)) + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + r1_fragname + + "\t" + + "*" + + "\t" + + str(r1.mapping_quality) + + "\t" + + "*" + + "\n" + ) elif r1.is_unmapped and not r2.is_unmapped: if r2_resfrag is not None: - r2_fragname = r2_resfrag.value['name'] - + r2_fragname = r2_resfrag.value["name"] + cur_handler.write( - r2.query_name + "\t" + - "*" + "\t" + - "*" + "\t" + - "*" + "\t" + - r2_chrom + "\t" + - str(get_read_pos(r2)+1) + "\t" + - str(get_read_strand(r2)) + "\t" + - "*" + "\t" + - "*" + "\t" + - r2_fragname + "\t" + - "*" + "\t" + - str(r2.mapping_quality) + "\n") - - ## Keep initial order + r2.query_name + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + r2_chrom + + "\t" + + str(get_read_pos(r2) + 1) + + "\t" + + str(get_read_strand(r2)) + + "\t" + + "*" + + "\t" + + "*" + + "\t" + + r2_fragname + + "\t" + + "*" + + "\t" + + str(r2.mapping_quality) + + "\n" + ) + + ## Keep initial order if samOut: - r1.tags = r1.tags + [('CT', str(interactionType))] - r2.tags = r2.tags + [('CT', str(interactionType))] + r1.tags = r1.tags + [("CT", str(interactionType))] + r2.tags = r2.tags + [("CT", str(interactionType))] handle_sam.write(r1) handle_sam.write(r2) - if (reads_counter % 100000 == 0 and verbose): + if reads_counter % 100000 == 0 and verbose: print("##", reads_counter) # Close handler @@ -794,9 +851,8 @@ if __name__ == "__main__": handle_single.close() handle_filt.close() - # Write stats file - handle_stat = open(outputDir + '/' + baseReadsFile + '.RSstat', 'w') + handle_stat = open(outputDir + "/" + baseReadsFile + ".RSstat", "w") handle_stat.write("## Hi-C processing\n") handle_stat.write("Valid_interaction_pairs\t" + str(valid_counter) + "\n") handle_stat.write("Valid_interaction_pairs_FF\t" + str(valid_counter_FF) + "\n") @@ -815,10 +871,20 @@ if __name__ == "__main__": handle_stat.write("## ======================================\n") handle_stat.write("## Allele specific information\n") handle_stat.write("Valid_pairs_from_ref_genome_(1-1)\t" + str(G1G1_ascounter) + "\n") - handle_stat.write("Valid_pairs_from_ref_genome_with_one_unassigned_mate_(0-1/1-0)\t" + str(UG1_ascounter+G1U_ascounter) + "\n") + handle_stat.write( + "Valid_pairs_from_ref_genome_with_one_unassigned_mate_(0-1/1-0)\t" + + str(UG1_ascounter + G1U_ascounter) + + "\n" + ) handle_stat.write("Valid_pairs_from_alt_genome_(2-2)\t" + str(G2G2_ascounter) + "\n") - handle_stat.write("Valid_pairs_from_alt_genome_with_one_unassigned_mate_(0-2/2-0)\t" + str(UG2_ascounter+G2U_ascounter) + "\n") - handle_stat.write("Valid_pairs_from_alt_and_ref_genome_(1-2/2-1)\t" + str(G1G2_ascounter+G2G1_ascounter) + "\n") + handle_stat.write( + "Valid_pairs_from_alt_genome_with_one_unassigned_mate_(0-2/2-0)\t" + + str(UG2_ascounter + G2U_ascounter) + + "\n" + ) + handle_stat.write( + "Valid_pairs_from_alt_and_ref_genome_(1-2/2-1)\t" + str(G1G2_ascounter + G2G1_ascounter) + "\n" + ) handle_stat.write("Valid_pairs_with_both_unassigned_mated_(0-0)\t" + str(UU_ascounter) + "\n") handle_stat.write("Valid_pairs_with_at_least_one_conflicting_mate_(3-)\t" + str(CF_ascounter) + "\n") diff --git a/bin/markdown_to_html.py b/bin/markdown_to_html.py deleted file mode 100755 index a26d1ff5e6de3c09385760e76cc40f11a512b3a4..0000000000000000000000000000000000000000 --- a/bin/markdown_to_html.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -import argparse -import markdown -import os -import sys -import io - - -def convert_markdown(in_fn): - input_md = io.open(in_fn, mode="r", encoding="utf-8").read() - html = markdown.markdown( - "[TOC]\n" + input_md, - extensions=["pymdownx.extra", "pymdownx.b64", "pymdownx.highlight", "pymdownx.emoji", "pymdownx.tilde", "toc"], - extension_configs={ - "pymdownx.b64": {"base_path": os.path.dirname(in_fn)}, - "pymdownx.highlight": {"noclasses": True}, - "toc": {"title": "Table of Contents"}, - }, - ) - return html - - -def wrap_html(contents): - header = """<!DOCTYPE html><html> - <head> - <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous"> - <style> - body { - font-family: -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji"; - padding: 3em; - margin-right: 350px; - max-width: 100%; - } - .toc { - position: fixed; - right: 20px; - width: 300px; - padding-top: 20px; - overflow: scroll; - height: calc(100% - 3em - 20px); - } - .toctitle { - font-size: 1.8em; - font-weight: bold; - } - .toc > ul { - padding: 0; - margin: 1rem 0; - list-style-type: none; - } - .toc > ul ul { padding-left: 20px; } - .toc > ul > li > a { display: none; } - img { max-width: 800px; } - pre { - padding: 0.6em 1em; - } - h2 { - - } - </style> - </head> - <body> - <div class="container"> - """ - footer = """ - </div> - </body> - </html> - """ - return header + contents + footer - - -def parse_args(args=None): - parser = argparse.ArgumentParser() - parser.add_argument("mdfile", type=argparse.FileType("r"), nargs="?", help="File to convert. Defaults to stdin.") - parser.add_argument( - "-o", "--out", type=argparse.FileType("w"), default=sys.stdout, help="Output file name. Defaults to stdout." - ) - return parser.parse_args(args) - - -def main(args=None): - args = parse_args(args) - converted_md = convert_markdown(args.mdfile.name) - html = wrap_html(converted_md) - args.out.write(html) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/mergeSAM.py b/bin/mergeSAM.py index a907fd77438307ffc808ce7d5ac0d7684c22f5f8..82ab8c34d979240d12de57b1f2510a54a79d2ca9 100755 --- a/bin/mergeSAM.py +++ b/bin/mergeSAM.py @@ -1,7 +1,7 @@ #!/usr/bin/env python ## HiC-Pro -## Copyright (c) 2015 Institut Curie +## Copyright (c) 2015 Institut Curie ## Author(s): Nicolas Servant, Eric Viara ## Contact: nicolas.servant@curie.fr ## This software is distributed without any guarantee under the terms of the BSD-3 licence. @@ -20,6 +20,7 @@ import os import re import pysam + def usage(): """Usage function""" print("Usage : python mergeSAM.py") @@ -41,10 +42,8 @@ def get_args(): opts, args = getopt.getopt( sys.argv[1:], "f:r:o:q:smtvh", - ["forward=", - "reverse=", - "output=", "qual=", - "single", "multi", "stat", "verbose", "help"]) + ["forward=", "reverse=", "output=", "qual=", "single", "multi", "stat", "verbose", "help"], + ) except getopt.GetoptError: usage() sys.exit(-1) @@ -53,24 +52,26 @@ def get_args(): def is_unique_bowtie2(read): ret = False - if not read.is_unmapped and read.has_tag('AS'): - if read.has_tag('XS'): - primary = read.get_tag('AS') - secondary = read.get_tag('XS') - if (primary > secondary): + if not read.is_unmapped and read.has_tag("AS"): + if read.has_tag("XS"): + primary = read.get_tag("AS") + secondary = read.get_tag("XS") + if primary > secondary: ret = True else: ret = True return ret + ## Remove everything after "/" or " " in read's name def get_read_name(read): name = read.query_name - #return name.split("/",1)[0] - return re.split('/| ', name)[0] + # return name.split("/",1)[0] + return re.split("/| ", name)[0] + def sam_flag(read1, read2, hr1, hr2): - + f1 = read1.flag f2 = read2.flag @@ -81,7 +82,7 @@ def sam_flag(read1, read2, hr1, hr2): if r2.is_unmapped == False: r2_chrom = hr2.get_reference_name(r2.reference_id) else: - r2_chrom="*" + r2_chrom = "*" ##Relevant bitwise flags (flag in an 11-bit binary number) ##1 The read is one of a pair @@ -92,54 +93,53 @@ def sam_flag(read1, read2, hr1, hr2): ##32 The other mate in the paired-end alignment is aligned to the reverse reference strand ##64 The read is the first (#1) mate in a pair ##128 The read is the second (#2) mate in a pair - - ##The reads were mapped as single-end data, so should expect flags of + + ##The reads were mapped as single-end data, so should expect flags of ##0 (map to the '+' strand) or 16 (map to the '-' strand) - ##Output example: a paired-end read that aligns to the reverse strand + ##Output example: a paired-end read that aligns to the reverse strand ##and is the first mate in the pair will have flag 83 (= 64 + 16 + 2 + 1) - + if f1 & 0x4: f1 = f1 | 0x8 if f2 & 0x4: f2 = f2 | 0x8 - - if (not (f1 & 0x4) and not (f2 & 0x4)): + + if not (f1 & 0x4) and not (f2 & 0x4): ##The flag should now indicate this is paired-end data f1 = f1 | 0x1 f1 = f1 | 0x2 f2 = f2 | 0x1 - f2 = f2 | 0x2 - + f2 = f2 | 0x2 + ##Indicate if the pair is on the reverse strand if f1 & 0x10: f2 = f2 | 0x20 - + if f2 & 0x10: f1 = f1 | 0x20 - + ##Is this first or the second pair? f1 = f1 | 0x40 f2 = f2 | 0x80 - + ##Insert the modified bitwise flags into the reads read1.flag = f1 read2.flag = f2 - + ##Determine the RNEXT and PNEXT values (i.e. the positional values of a read's pair) - #RNEXT + # RNEXT if r1_chrom == r2_chrom: read1.next_reference_id = r1.reference_id read2.next_reference_id = r1.reference_id else: read1.next_reference_id = r2.reference_id read2.next_reference_id = r1.reference_id - #PNEXT + # PNEXT read1.next_reference_start = read2.reference_start read2.next_reference_start = read1.reference_start - return(read1, read2) - + return (read1, read2) if __name__ == "__main__": @@ -196,13 +196,13 @@ if __name__ == "__main__": tot_pairs_counter = 0 multi_pairs_counter = 0 uniq_pairs_counter = 0 - unmapped_pairs_counter = 0 + unmapped_pairs_counter = 0 lowq_pairs_counter = 0 multi_singles_counter = 0 uniq_singles_counter = 0 lowq_singles_counter = 0 - #local_counter = 0 + # local_counter = 0 paired_reads_counter = 0 singleton_counter = 0 reads_counter = 0 @@ -213,31 +213,31 @@ if __name__ == "__main__": ## Loop on all reads if verbose: print("## Merging forward and reverse tags ...") - - with pysam.Samfile(R1file, "rb") as hr1, pysam.Samfile(R2file, "rb") as hr2: + + with pysam.Samfile(R1file, "rb") as hr1, pysam.Samfile(R2file, "rb") as hr2: if output == "-": outfile = pysam.AlignmentFile(output, "w", template=hr1) else: outfile = pysam.AlignmentFile(output, "wb", template=hr1) - + for r1, r2 in zip(hr1.fetch(until_eof=True), hr2.fetch(until_eof=True)): - reads_counter +=1 - if (reads_counter % 1000000 == 0 and verbose): + reads_counter += 1 + if reads_counter % 1000000 == 0 and verbose: print("##", reads_counter) - + if get_read_name(r1) == get_read_name(r2): ## both unmapped if r1.is_unmapped == True and r2.is_unmapped == True: unmapped_pairs_counter += 1 continue - + ## both mapped elif r1.is_unmapped == False and r2.is_unmapped == False: ## quality if mapq != None and (r1.mapping_quality < int(mapq) or r2.mapping_quality < int(mapq)): lowq_pairs_counter += 1 continue - + ## Unique mapping if is_unique_bowtie2(r1) == True and is_unique_bowtie2(r2) == True: uniq_pairs_counter += 1 @@ -253,7 +253,7 @@ if __name__ == "__main__": continue if r1.is_unmapped == False: ## first end is mapped, second is not ## quality - if mapq != None and (r1.mapping_quality < int(mapq)): + if mapq != None and (r1.mapping_quality < int(mapq)): lowq_singles_counter += 1 continue ## Unique mapping @@ -265,7 +265,7 @@ if __name__ == "__main__": continue else: ## second end is mapped, first is not ## quality - if mapq != None and (r2.mapping_quality < int(mapq)): + if mapq != None and (r2.mapping_quality < int(mapq)): lowq_singles_counter += 1 continue ## Unique mapping @@ -276,34 +276,95 @@ if __name__ == "__main__": if report_multi == False: continue - tot_pairs_counter += 1 - (r1, r2) = sam_flag(r1,r2, hr1, hr2) + tot_pairs_counter += 1 + (r1, r2) = sam_flag(r1, r2, hr1, hr2) ## Write output outfile.write(r1) outfile.write(r2) - + else: - print("Forward and reverse reads not paired. Check that BAM files have the same read names and are sorted.") + print( + "Forward and reverse reads not paired. Check that BAM files have the same read names and are sorted." + ) sys.exit(1) if stat: - if output == '-': + if output == "-": statfile = "pairing.stat" else: - statfile = re.sub('\.bam$', '.pairstat', output) - with open(statfile, 'w') as handle_stat: - handle_stat.write("Total_pairs_processed\t" + str(reads_counter) + "\t" + str(round(float(reads_counter)/float(reads_counter)*100,3)) + "\n") - handle_stat.write("Unmapped_pairs\t" + str(unmapped_pairs_counter) + "\t" + str(round(float(unmapped_pairs_counter)/float(reads_counter)*100,3)) + "\n") - handle_stat.write("Low_qual_pairs\t" + str(lowq_pairs_counter) + "\t" + str(round(float(lowq_pairs_counter)/float(reads_counter)*100,3)) + "\n") - handle_stat.write("Unique_paired_alignments\t" + str(uniq_pairs_counter) + "\t" + str(round(float(uniq_pairs_counter)/float(reads_counter)*100,3)) + "\n") - handle_stat.write("Multiple_pairs_alignments\t" + str(multi_pairs_counter) + "\t" + str(round(float(multi_pairs_counter)/float(reads_counter)*100,3)) + "\n") - handle_stat.write("Pairs_with_singleton\t" + str(singleton_counter) + "\t" + str(round(float(singleton_counter)/float(reads_counter)*100,3)) + "\n") - handle_stat.write("Low_qual_singleton\t" + str(lowq_singles_counter) + "\t" + str(round(float(lowq_singles_counter)/float(reads_counter)*100,3)) + "\n") - handle_stat.write("Unique_singleton_alignments\t" + str(uniq_singles_counter) + "\t" + str(round(float(uniq_singles_counter)/float(reads_counter)*100,3)) + "\n") - handle_stat.write("Multiple_singleton_alignments\t" + str(multi_singles_counter) + "\t" + str(round(float(multi_singles_counter)/float(reads_counter)*100,3)) + "\n") - handle_stat.write("Reported_pairs\t" + str(tot_pairs_counter) + "\t" + str(round(float(tot_pairs_counter)/float(reads_counter)*100,3)) + "\n") + statfile = re.sub("\.bam$", ".pairstat", output) + with open(statfile, "w") as handle_stat: + handle_stat.write( + "Total_pairs_processed\t" + + str(reads_counter) + + "\t" + + str(round(float(reads_counter) / float(reads_counter) * 100, 3)) + + "\n" + ) + handle_stat.write( + "Unmapped_pairs\t" + + str(unmapped_pairs_counter) + + "\t" + + str(round(float(unmapped_pairs_counter) / float(reads_counter) * 100, 3)) + + "\n" + ) + handle_stat.write( + "Low_qual_pairs\t" + + str(lowq_pairs_counter) + + "\t" + + str(round(float(lowq_pairs_counter) / float(reads_counter) * 100, 3)) + + "\n" + ) + handle_stat.write( + "Unique_paired_alignments\t" + + str(uniq_pairs_counter) + + "\t" + + str(round(float(uniq_pairs_counter) / float(reads_counter) * 100, 3)) + + "\n" + ) + handle_stat.write( + "Multiple_pairs_alignments\t" + + str(multi_pairs_counter) + + "\t" + + str(round(float(multi_pairs_counter) / float(reads_counter) * 100, 3)) + + "\n" + ) + handle_stat.write( + "Pairs_with_singleton\t" + + str(singleton_counter) + + "\t" + + str(round(float(singleton_counter) / float(reads_counter) * 100, 3)) + + "\n" + ) + handle_stat.write( + "Low_qual_singleton\t" + + str(lowq_singles_counter) + + "\t" + + str(round(float(lowq_singles_counter) / float(reads_counter) * 100, 3)) + + "\n" + ) + handle_stat.write( + "Unique_singleton_alignments\t" + + str(uniq_singles_counter) + + "\t" + + str(round(float(uniq_singles_counter) / float(reads_counter) * 100, 3)) + + "\n" + ) + handle_stat.write( + "Multiple_singleton_alignments\t" + + str(multi_singles_counter) + + "\t" + + str(round(float(multi_singles_counter) / float(reads_counter) * 100, 3)) + + "\n" + ) + handle_stat.write( + "Reported_pairs\t" + + str(tot_pairs_counter) + + "\t" + + str(round(float(tot_pairs_counter) / float(reads_counter) * 100, 3)) + + "\n" + ) hr1.close() hr2.close() outfile.close() - diff --git a/bin/merge_statfiles.py b/bin/merge_statfiles.py index dc11bf75d31973df86a0eaae0aa1c4b37e004e27..c3986e1e6534eef84c0d11a7e95ee608dc571de2 100755 --- a/bin/merge_statfiles.py +++ b/bin/merge_statfiles.py @@ -1,7 +1,7 @@ #!/usr/bin/env python ## nf-core-hic -## Copyright (c) 2020 Institut Curie +## Copyright (c) 2020 Institut Curie ## Author(s): Nicolas Servant ## Contact: nicolas.servant@curie.fr ## This software is distributed without any guarantee under the terms of the BSD-3 licence. @@ -17,6 +17,7 @@ import glob import os from collections import OrderedDict + def num(s): try: return int(s) @@ -26,30 +27,30 @@ def num(s): if __name__ == "__main__": ## Read command line arguments - parser = argparse.ArgumentParser() - parser.add_argument("-f", "--files", help="List of input file(s)", type=str, nargs='+') - parser.add_argument("-v", "--verbose", help="verbose mode", action='store_true') + parser = argparse.ArgumentParser() + parser.add_argument("-f", "--files", help="List of input file(s)", type=str, nargs="+") + parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true") args = parser.parse_args() - + infiles = args.files li = len(infiles) if li > 0: if args.verbose: print("## merge_statfiles.py") - print("## Merging "+ str(li)+" files") - + print("## Merging " + str(li) + " files") + ## Reading first file to get the template template = OrderedDict() if args.verbose: - print("## Use "+infiles[0]+" as template") + print("## Use " + infiles[0] + " as template") with open(infiles[0]) as f: for line in f: if not line.startswith("#"): lsp = line.strip().split("\t") - data = map(num, lsp[1:len(lsp)]) + data = map(num, lsp[1 : len(lsp)]) template[str(lsp[0])] = list(data) - + if len(template) == 0: print("Cannot find template files !") sys.exit(1) @@ -63,20 +64,21 @@ if __name__ == "__main__": if lsp[0] in template: for i in list(range(1, len(lsp))): if isinstance(num(lsp[i]), int): - template[lsp[0]][i-1] += num(lsp[i]) + template[lsp[0]][i - 1] += num(lsp[i]) else: - template[lsp[0]][i-1] = round((template[lsp[0]][i-1] + num(lsp[i]))/2,3) + template[lsp[0]][i - 1] = round((template[lsp[0]][i - 1] + num(lsp[i])) / 2, 3) else: - sys.stderr.write("Warning : '"+lsp[0]+"' not found in template ["+infiles[fidx]+"]\n") - + sys.stderr.write( + "Warning : '" + lsp[0] + "' not found in template [" + infiles[fidx] + "]\n" + ) + ## Print template for x in template: sys.stdout.write(x) for y in template[x]: - sys.stdout.write("\t"+str(y)) + sys.stdout.write("\t" + str(y)) sys.stdout.write("\n") else: print("No files to merge - stop") sys.exit(1) - diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py deleted file mode 100755 index 5ff3fcfe270923ed0aeeec220e82a348a529b3e4..0000000000000000000000000000000000000000 --- a/bin/scrape_software_versions.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -from collections import OrderedDict -import re - -# Add additional regexes for new tools in process get_software_versions -regexes = { - 'nf-core/hic': ['v_pipeline.txt', r"(\S+)"], - 'Nextflow': ['v_nextflow.txt', r"(\S+)"], - 'Bowtie2': ['v_bowtie2.txt', r"bowtie2-align-s version (\S+)"], - 'Python': ['v_python.txt', r"Python (\S+)"], - 'Samtools': ['v_samtools.txt', r"samtools (\S+)"], - 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], -} -results = OrderedDict() -results['nf-core/hic'] = '<span style="color:#999999;\">N/A</span>' -results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' -results['Bowtie2'] = '<span style="color:#999999;\">N/A</span>' -results['Python'] = '<span style="color:#999999;\">N/A</span>' -results['Samtools'] = '<span style="color:#999999;\">N/A</span>' -results['MultiQC'] = '<span style="color:#999999;\">N/A</span>' - -# Search each file using its regex -for k, v in regexes.items(): - try: - with open(v[0]) as x: - versions = x.read() - match = re.search(v[1], versions) - if match: - results[k] = "v{}".format(match.group(1)) - except IOError: - results[k] = False - -# Remove software set to false in results -for k in list(results): - if not results[k]: - del results[k] - -# Dump to YAML -print( - """ -id: 'software_versions' -section_name: 'nf-core/hic Software Versions' -section_href: 'https://github.com/nf-core/hic' -plot_type: 'html' -description: 'are collected at run time from the software output.' -data: | - <dl class="dl-horizontal"> -""" -) -for k, v in results.items(): - print(" <dt>{}</dt><dd><samp>{}</samp></dd>".format(k, v)) -print(" </dl>") - -# Write out regexes as csv file: -with open("software_versions.csv", "w") as f: - for k, v in results.items(): - f.write("{}\t{}\n".format(k, v)) diff --git a/bin/src/cutsite_trimming.cpp b/bin/src/cutsite_trimming.cpp index ef3fa869cd3bfe5f4e473908224cb42c2b99cbfe..c7b6608123666e18364f379e3a48de03203102b7 100644 --- a/bin/src/cutsite_trimming.cpp +++ b/bin/src/cutsite_trimming.cpp @@ -1,13 +1,11 @@ // HiC-Pro -// Copyright 2015 Institut Curie +// Copyright 2015 Institut Curie // Author(s): Nicolas Servant // Contact: nicolas.servant@curie.fr // This software is distributed without any guarantee under the terms of the BSD-3 licence - // g++ -std=c++0x -o cutsite_trimming cutsite_trimming.cpp //./cutsite_trimming -fastq fastq -cutsite AGCTT - #include <iostream> // std::cout #include <stdlib.h> #include <string.h> @@ -18,136 +16,128 @@ static const char* prog; static int usage(int ret=1) { - std::cerr << "usage: " << prog << " --fastq FASTQFILE --cutsite CUTSITE --out OUTFILE [--rmuntrim] \n"; - std::cerr << "usage: " << prog << " --help\n"; - return ret; + std::cerr << "usage: " << prog << " --fastq FASTQFILE --cutsite CUTSITE --out OUTFILE [--rmuntrim] \n"; + std::cerr << "usage: " << prog << " --help\n"; + return ret; } -static int get_options(int argc, char* argv[], std::string& fastqFile, - std::vector<std::string>& cutSites, std::string& output, bool& rmuntrim) +static int get_options(int argc, char* argv[], std::string& fastqFile, std::vector<std::string>& cutSites, std::string& output, bool& rmuntrim) { - prog = argv[0]; - if (argc == 1){ - exit(usage()); - } - for (int ac = 1; ac < argc; ++ac) { - const char* opt = argv[ac]; - if (*opt == '-') { - if (!strcmp(opt, "--fastq")) { - fastqFile = std::string(argv[++ac]); - } else if (!strcmp(opt, "--cutsite")) { - - std::string cutSitesSequence; - cutSitesSequence = std::string(argv[++ac]); - size_t pos = cutSitesSequence.find(","); - size_t begin = 0; - while(pos != std::string::npos){ - cutSites.push_back(cutSitesSequence.substr(begin, pos - begin)); - begin = pos + 1; - pos = cutSitesSequence.find(",", begin + 1); + prog = argv[0]; + if (argc == 1){ + exit(usage()); + } + for (int ac = 1; ac < argc; ++ac) { + const char* opt = argv[ac]; + if (*opt == '-') { + if (!strcmp(opt, "--fastq")) { + fastqFile = std::string(argv[++ac]); + } + else if (!strcmp(opt, "--cutsite")) { + std::string cutSitesSequence; + cutSitesSequence = std::string(argv[++ac]); + size_t pos = cutSitesSequence.find(","); + size_t begin = 0; + while(pos != std::string::npos){ + cutSites.push_back(cutSitesSequence.substr(begin, pos - begin)); + begin = pos + 1; + pos = cutSitesSequence.find(",", begin + 1); + } + cutSites.push_back(cutSitesSequence.substr(begin, pos)); + } + else if (!strcmp(opt, "--out")) { + output = std::string(argv[++ac]); + } + else if (!strcmp(opt, "--rmuntrim")) { + rmuntrim = true; + } + }else { + std::cerr << prog << ": unknown option " << opt << std::endl; + return usage(); } - cutSites.push_back(cutSitesSequence.substr(begin, pos)); - - } - else if (!strcmp(opt, "--out")) { - output = std::string(argv[++ac]); - } - else if (!strcmp(opt, "--rmuntrim")) { - rmuntrim = true; - } - }else { - std::cerr << prog << ": unknown option " << opt << std::endl; - return usage(); - } - } - return 0; + } + return 0; } -static int trim_fastq(std::string& fastqFile, - std::vector<std::string>& cutSites, - std::string& outFile, bool& rmuntrim) -{ - - int trim_count=0; - std::string ID; - std::ifstream ifs (fastqFile); - std::ofstream ofs (outFile); - - if (ifs.is_open()){ - while (getline(ifs, ID)) { - std::string seq; - std::string dummy; - std::string qual; - - getline(ifs, seq); - getline(ifs, dummy); - getline(ifs, qual); +static int trim_fastq(std::string& fastqFile, std::vector<std::string>& cutSites, std::string& outFile, bool& rmuntrim){ + int trim_count=0; + std::string ID; + std::ifstream ifs (fastqFile); + std::ofstream ofs (outFile); + + if (ifs.is_open()){ + while (getline(ifs, ID)) { + std::string seq; + std::string dummy; + std::string qual; + + getline(ifs, seq); + getline(ifs, dummy); + getline(ifs, qual); + + bool find_pos = false; + size_t pos = std::string::npos; + for (std::vector<std::string>::iterator it = cutSites.begin(); it != cutSites.end(); ++it){ + size_t tmp_pos = seq.find(*it); + if (tmp_pos != std::string::npos) { + // If find_pos is alread True, there is a problem (there are two cut + // sites in the same read).) + if (find_pos == true){ + if(tmp_pos < pos) { + pos = tmp_pos; + } + } else { + find_pos = true; + pos = tmp_pos; + } + } + } - bool find_pos = false; - size_t pos = std::string::npos; - for (std::vector<std::string>::iterator it = cutSites.begin(); it != cutSites.end(); ++it){ - size_t tmp_pos = seq.find(*it); - if (tmp_pos != std::string::npos) { - // If find_pos is alread True, there is a problem (there are two cut - // sites in the same read).) - if (find_pos == true){ - if(tmp_pos < pos) { - pos = tmp_pos; + if (pos != std::string::npos) { + trim_count++; + ofs << ID << '\n'; + ofs << seq.substr(0, pos) << '\n'; + ofs << "+\n"; + ofs << qual.substr(0, pos) << '\n'; + } else { + if (!rmuntrim){ + ofs << ID << '\n'; + ofs << seq << '\n'; + ofs << "+\n"; + ofs << qual << '\n'; + } } - } else { - find_pos = true; - pos = tmp_pos; - } - } - } - - if (pos != std::string::npos) { - trim_count++; - ofs << ID << '\n'; - ofs << seq.substr(0, pos) << '\n'; - ofs << "+\n"; - ofs << qual.substr(0, pos) << '\n'; - } else { - if (!rmuntrim){ - ofs << ID << '\n'; - ofs << seq << '\n'; - ofs << "+\n"; - ofs << qual << '\n'; + find_pos = false; } - } - find_pos = false; + }else{ + std::cerr << "Error : Cannot open file : " << fastqFile; } - }else{ - std::cerr << "Error : Cannot open file : " << fastqFile; - } - return trim_count; + return trim_count; } int main(int argc, char* argv[]) { - - std::string fastqFile; - std::vector<std::string> cutSites; - std::string outFile; - bool rmuntrim = false; - - int ret = get_options(argc, argv, fastqFile, cutSites, outFile, rmuntrim); - printf("##Fastq file: %s\n", fastqFile.c_str()); - printf("##Restriction sites:\n"); - for(std::vector<std::string>::iterator it = cutSites.begin(); it != cutSites.end(); ++it){ - std::cout << *it << std::endl; - } - printf("##Output File: %s\n", outFile.c_str()); - - if (fastqFile.empty() || cutSites.size() == 0 || outFile.empty()){ - usage(); - exit(ret); - } + std::string fastqFile; + std::vector<std::string> cutSites; + std::string outFile; + bool rmuntrim = false; + + int ret = get_options(argc, argv, fastqFile, cutSites, outFile, rmuntrim); + printf("##Fastq file: %s\n", fastqFile.c_str()); + printf("##Restriction sites:\n"); + for(std::vector<std::string>::iterator it = cutSites.begin(); it != cutSites.end(); ++it){ + std::cout << *it << std::endl; + } + printf("##Output File: %s\n", outFile.c_str()); - int trim_count=trim_fastq(fastqFile, cutSites, outFile, rmuntrim); - printf("\n##Trimmed reads: %d\n", trim_count); - return(0); - } + if (fastqFile.empty() || cutSites.size() == 0 || outFile.empty()){ + usage(); + exit(ret); + } + int trim_count=trim_fastq(fastqFile, cutSites, outFile, rmuntrim); + printf("\n##Trimmed reads: %d\n", trim_count); + return(0); +} diff --git a/conf/base.config b/conf/base.config index ddec1a8507ded18a2d81923bc87daea40963c346..6808dbe2205a3cbe53975d4d2d2ec2fbc1998ec7 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,46 +1,63 @@ /* - * ------------------------------------------------- - * nf-core/hic Nextflow base config file - * ------------------------------------------------- - * A 'blank slate' config file, appropriate for general - * use on most high performace compute environments. - * Assumes that all software is installed and available - * on the PATH. Runs in `local` mode - all jobs will be - * run on the logged in environment. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/hic Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 7.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' - - withLabel:process_low { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( 6.h * task.attempt, 'time' ) } - } - withLabel:process_medium { - cpus = { check_max( 4 * task.attempt, 'cpus' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - } - withLabel:process_high { - cpus = { check_max( 8 * task.attempt, 'cpus' ) } - memory = { check_max( 64.GB * task.attempt, 'memory' ) } - time = { check_max( 10.h * task.attempt, 'time' ) } - } - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } - } - withLabel:process_highmem { - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - } - withName:get_software_versions { - cache = false - } + time = { check_max( 12.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 64.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 24.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { + cache = false + } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 1ba2588593f4e1940dc0bf3a3380f0114a71684e..8b49d0827c7f45bb7f874427d7210c7f1eda3421 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -1,162 +1,163 @@ /* - * ------------------------------------------------- - * Nextflow config file for iGenomes paths - * ------------------------------------------------- - * Defines reference genomes, using iGenome paths - * Can be used by any config that customises the base - * path using $params.igenomes_base / --igenomes_base - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines reference genomes using iGenome paths. + Can be used by any config that customises the base path using: + $params.igenomes_base / --igenomes_base +---------------------------------------------------------------------------------------- +*/ params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + } } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - } - } } diff --git a/conf/modules.config b/conf/modules.config new file mode 100644 index 0000000000000000000000000000000000000000..096a86006168e216e1f68863f65e4dd2d5d96c7d --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,289 @@ +process { + + //Default + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: 'copy', + pattern: '*_versions.yml' + ] + } + + //********************************************** + // PREPARE_GENOME + withName: 'BOWTIE2_BUILD' { + publishDir = [ + path: { "${params.outdir}/genome/bowtie2" }, + mode: 'copy', + enabled: params.save_reference + ] + } + + withName: 'CUSTOM_GETCHROMSIZES' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: 'copy', + enabled: params.save_reference + ] + } + + withName: 'GET_RESTRICTION_FRAGMENTS' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: 'copy', + enabled: params.save_reference + ] + } + + //******************************************* + // HICPRO + withName: 'BOWTIE2_ALIGN' { + publishDir = [ + path: { "${params.outdir}/hicpro/mapping" }, + mode: 'copy', + enabled: params.save_aligned_intermediates + ] + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } + ext.args = params.bwt2_opts_end2end ?: '' + ext.args2 = !params.dnase ? "-F 4" :"" + } + + withName: 'TRIM_READS' { + publishDir = [ + path: { "${params.outdir}/hicpro/mapping/" }, + mode: 'copy', + enabled: params.save_aligned_intermediates + ] + } + + withName: 'BOWTIE2_ALIGN_TRIMMED' { + publishDir = [ + path: { "${params.outdir}/hicpro/mapping" }, + mode: 'copy', + enabled: params.save_aligned_intermediates + ] + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_trimmed" } + ext.args = params.bwt2_opts_trimmed ?: '' + ext.args2 = "" + } + + withName: 'MERGE_BOWTIE2' { + publishDir = [ + path: { "${params.outdir}/hicpro/mapping" }, + mode: 'copy', + enabled: params.save_aligned_intermediates + ] + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } + } + + withName: 'COMBINE_MATES' { + publishDir = [ + path: { "${params.outdir}/hicpro/mapping" }, + mode: 'copy', + pattern: '*.bam' + ] + ext.args = [ + "-t", + params.keep_multi ? "--multi" : "", + params.min_mapq ? "-q ${params.min_mapq}" : "" + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.chunk}" } + } + + withName: 'GET_VALID_INTERACTION' { + publishDir = [ + path: { "${params.outdir}/hicpro/valid_pairs" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename}, + mode: 'copy', + enabled: params.save_pairs_intermediates + ] + ext.args = { [ + params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '', + params.min_insert_size > 0 ? " -s ${params.min_insert_size}" : '', + params.max_insert_size > 0 ? " -l ${params.max_insert_size}" : '', + params.min_restriction_fragment_size > 0 ? " -t ${params.min_restriction_fragment_size}" : '', + params.max_restriction_fragment_size > 0 ? " -m ${params.max_restriction_fragment_size}" : '', + params.save_interaction_bam ? " --sam" : '' + ].join(' ').trim() } + } + + withName: 'GET_VALID_INTERACTION_DNASE' { + publishDir = [ + path: { "${params.outdir}/hicpro/valid_pairs" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy', + enabled: params.save_pairs_intermediates + ] + ext.args = { params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : "" } + } + + withName: 'MERGE_VALID_INTERACTION' { + publishDir = [ + [ + path: { "${params.outdir}/hicpro/stats/${meta.id}" }, + mode: 'copy', + pattern: "*stat" + ], + [ + path: { "${params.outdir}/hicpro/valid_pairs" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*Pairs" + ] + ] + ext.args = { params.keep_dups ? '' : '-d' } + } + + withName: 'MERGE_STATS' { + publishDir = [ + path: { "${params.outdir}/hicpro/stats/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy', + pattern: "*stat" + ] + } + + withName: 'HICPRO2PAIRS' { + publishDir = [ + path: { "${params.outdir}/hicpro/valid_pairs/pairix/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + } + + withName: 'BUILD_CONTACT_MAPS' { + publishDir = [ + path: { "${params.outdir}/hicpro/matrix/raw" }, + mode: 'copy', + enabled: params.hicpro_maps + ] + ext.prefix = { "${meta.id}.${resolution}" } + } + + withName: 'ICE_NORMALIZATION' { + publishDir = [ + path: { "${params.outdir}/hicpro/matrix/iced" }, + mode: 'copy', + enabled: params.hicpro_maps + ] + } + + //***************************************** + // QUALITY METRICS + + withName: 'HIC_PLOT_DIST_VS_COUNTS'{ + publishDir = [ + path: { "${params.outdir}/distance_decay/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + } + + //***************************************** + // COOLER + + withName: 'COOLER_MAKEBINS' { + publishDir = [ + path: { "${params.outdir}/contact_maps/bins/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + ext.prefix={ "cooler_bins_${cool_bin}" } + } + + withName: 'COOLER_CLOAD' { + publishDir = [ + path: { "${params.outdir}/contact_maps/cool/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy', + enabled : params.save_raw_maps + ] + ext.prefix = { "${meta.id}.${cool_bin}" } + ext.args = "pairs -c1 2 -p1 3 -c2 4 -p2 5" + } + + withName: 'COOLER_BALANCE' { + publishDir = [ + path: { "${params.outdir}/contact_maps/cool/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + ext.args = '--force' + ext.prefix = { "${cool.baseName}_balanced" } + } + + withName: 'COOLER_DUMP' { + publishDir = [ + enabled: false + ] + ext.prefix = { "${cool.baseName}" } + ext.args = "--one-based-ids --balanced --na-rep 0" + } + + withName:'SPLIT_COOLER_DUMP' { + publishDir = [ + [ + path: { "${params.outdir}/contact_maps/txt/" }, + mode: 'copy', + pattern: "*_raw.txt", + enabled: params.save_raw_maps + ], + [ + path: { "${params.outdir}/contact_maps/txt/" }, + mode: 'copy', + pattern: "*_balanced.txt" + ] + ] + } + + withName: 'COOLER_ZOOMIFY' { + publishDir = [ + path: { "${params.outdir}/contact_maps/cool/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + ext.args = "--balance" + } + + //******************************** + // COMPARTMENTS + + withName: 'COOLTOOLS_EIGSCIS' { + publishDir = [ + path: { "${params.outdir}/compartments/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + ext.args = '--bigwig' + ext.prefix = { "${meta.id}.${resolution}" } + } + + //******************************** + // TADS + + withName: 'COOLTOOLS_INSULATION' { + publishDir = [ + path: { "${params.outdir}/tads/insulation/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + ext.args = '15 25 50 --window-pixels' + ext.prefix = { "${cool.baseName}" } + } + + withName: 'HIC_FIND_TADS' { + publishDir = [ + path: { "${params.outdir}/tads/hicExplorer" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + ext.args = '--correctForMultipleTesting fdr' + ext.prefix = { "${cool.baseName}" } + } +} diff --git a/conf/test.config b/conf/test.config index 5c5fc84c35989f039418aeba4bc5b5b1c10da1a6..1501b027bf9a0af965f021c6d85604de690834ff 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,41 +1,42 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/hic -profile test,<docker/singularity> - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/hic -profile test,<docker/singularity> --outdir <OUTDIR> + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Hi-C test data from Schalbetter et al. (2017)' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on Travis - max_cpus = 2 - max_memory = 4.GB - max_time = 1.h - - // Input data - input_paths = [ - ['SRR4292758_00', ['https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz']] - ] - - // Annotations - fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa' - digestion = 'hindiii' - min_mapq = 10 - min_restriction_fragment_size = 100 - max_restriction_fragment_size = 100000 - min_insert_size = 100 - max_insert_size = 600 - - bin_size = '1000' - res_dist_decay = '1000' - res_tads = '1000' - tads_caller = 'insulation,hicexplorer' - res_compartments = '1000' - - // Ignore `--input` as otherwise the parameter validation will throw an error - schema_ignore_params = 'genomes,digest,input_paths,input' + config_profile_name = 'Hi-C test data from Schalbetter et al. (2017)' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on Travis + max_cpus = 2 + max_memory = 4.GB + max_time = 1.h + + // Input data + input = "${baseDir}/assets/samplesheet.csv" + + // Annotations + fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa' + digestion = 'hindiii' + min_mapq = 10 + min_restriction_fragment_size = 100 + max_restriction_fragment_size = 100000 + min_insert_size = 100 + max_insert_size = 600 + + bin_size = '2000,1000' + res_dist_decay = '1000' + res_tads = '1000' + tads_caller = 'insulation,hicexplorer' + res_compartments = '2000' + + // Ignore `--input` as otherwise the parameter validation will throw an error + schema_ignore_params = 'genomes,digest,input_paths,input' } diff --git a/conf/test_full.config b/conf/test_full.config index 1e793cc57628bdbed6bbe322e558bffc0e15a3d1..f7b39d4fe2c22e98608b0ce00dec75274bde30ca 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,36 +1,27 @@ /* - * ------------------------------------------------- - * Nextflow config file for running full-size tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a full size pipeline test. Use as follows: - * nextflow run nf-core/hic -profile test_full,<docker/singularity> - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/hic -profile test_full,<docker/singularity> --outdir <OUTDIR> +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full size test - input_paths = [ - ['SRR4292758_00', ['https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz']] - ] + // Input data for full size test + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/hic/samplesheet/samplesheet_HiC_mESC_full_test.csv' - // Annotations - fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa' - digestion = 'hindiii' - min_mapq = 10 - min_restriction_fragment_size = 100 - max_restriction_fragment_size = 100000 - min_insert_size = 100 - max_insert_size = 600 + // Genome references + genome = 'mm10' - bin_size = '1000' - res_dist_decay = '1000' - res_tads = '1000' - tads_caller = 'insulation,hicexplorer' - res_compartments = '1000' - - // Ignore `--input` as otherwise the parameter validation will throw an error - schema_ignore_params = 'genomes,digest,input_paths,input' + // Other options + digestion = 'dpnii' + bin_size = '40000,250000,500000,1000000' + res_compartments = '500000,250000' + res_tads = '40000,20000' } diff --git a/docs/README.md b/docs/README.md index a6889549c7f27bda0aed81947685713781fe2d1b..d673c508157b82e49fb5a9b1390d905734671cd0 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,9 +2,9 @@ The nf-core/hic documentation is split into the following pages: -* [Usage](usage.md) - * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. -* [Output](output.md) - * An overview of the different results produced by the pipeline and how to interpret them. +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png new file mode 100755 index 0000000000000000000000000000000000000000..361d0e47acfb424dea1f326590d1eb2f6dfa26b5 Binary files /dev/null and b/docs/images/mqc_fastqc_adapter.png differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png new file mode 100755 index 0000000000000000000000000000000000000000..cb39ebb80a71dc4cdeee076c107e30a6c944441b Binary files /dev/null and b/docs/images/mqc_fastqc_counts.png differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png new file mode 100755 index 0000000000000000000000000000000000000000..a4b89bf56ab2ba88cab87841916eb680a816deae Binary files /dev/null and b/docs/images/mqc_fastqc_quality.png differ diff --git a/docs/images/nf-core-hic_logo.png b/docs/images/nf-core-hic_logo.png deleted file mode 100644 index 274eb3dc3f3db879c7f3cbc3fd8f49a705a9a3fb..0000000000000000000000000000000000000000 Binary files a/docs/images/nf-core-hic_logo.png and /dev/null differ diff --git a/docs/images/nf-core-hic_logo_dark.png b/docs/images/nf-core-hic_logo_dark.png new file mode 100644 index 0000000000000000000000000000000000000000..e245502fd09cb0c22db3025a4e6c9ac4476a77a1 Binary files /dev/null and b/docs/images/nf-core-hic_logo_dark.png differ diff --git a/docs/images/nf-core-hic_logo_light.png b/docs/images/nf-core-hic_logo_light.png new file mode 100644 index 0000000000000000000000000000000000000000..5601950978b384469e001f12c14318c51dfb064d Binary files /dev/null and b/docs/images/nf-core-hic_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 8b3fd0a40579b5ee19f107acdf6f531a8d98702f..9f6f7033efa9e7f87d190d5a6c1bdb548eeff9e9 100644 --- a/docs/output.md +++ b/docs/output.md @@ -7,32 +7,34 @@ The directories listed below will be created in the results directory after the ## Pipeline overview -The pipeline is built using [Nextflow](https://www.nextflow.io/) -and processes data using the following steps: - -* [HiC-Pro](#hicpro) - * [Reads alignment](#reads-alignment) - * [Valid pairs detection](#valid-pairs-detection) - * [Duplicates removal](#duplicates-removal) - * [Contact maps](#hicpro-contact-maps) -* [Hi-C contact maps](#hic-contact-maps) -* [Downstream analysis](#downstream-analysis) - * [Distance decay](#distance-decay) - * [Compartments calling](#compartments-calling) - * [TADs calling](#tads-calling) -* [MultiQC](#multiqc) - aggregate report and quality controls, describing -results of the whole pipeline -* [Export](#exprot) - additionnal export for compatibility with downstream -analysis tool and visualization - -## HiC-Pro +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: + +- [From raw data to valid pairs](#from-raw-data-to-valid-pairs) + - [HiC-Pro](#hicpro) + - [Reads alignment](#reads-alignment) + - [Valid pairs detection](#valid-pairs-detection) + - [Duplicates removal](#duplicates-removal) + - [Contact maps](#hicpro-contact-maps) +- [Hi-C contact maps](#hic-contact-maps) +- [Downstream analysis](#downstream-analysis) + - [Distance decay](#distance-decay) + - [Compartments calling](#compartments-calling) + - [TADs calling](#tads-calling) +- [MultiQC](#multiqc) - aggregate report and quality controls, describing + results of the whole pipeline +- [Export](#exprot) - additionnal export for compatibility with downstream + analysis tool and visualization + +## From raw data to valid pairs + +### HiC-Pro The current version is mainly based on the [HiC-Pro](https://github.com/nservant/HiC-Pro) pipeline. For details about the workflow, see [Servant et al. 2015](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0831-x) -### Reads alignment +#### Reads alignment Using Hi-C data, each reads mate has to be independantly aligned on the reference genome. @@ -42,57 +44,55 @@ Second, reads spanning the ligation junction are trimmmed from their 3' end, and aligned back on the genome. Aligned reads for both fragment mates are then paired in a single paired-end BAM file. -Singletons are discarded, and multi-hits are filtered according to the -configuration parameters (`--rm-multi`). +Singletons and low quality mapped reads are filtered (`--min_mapq`). Note that if the `--dnase` mode is activated, HiC-Pro will skip the second mapping step. **Output directory: `results/hicpro/mapping`** -* `*bwt2pairs.bam` - final BAM file with aligned paired data -* `*.pairstat` - mapping statistics +- `*bwt2pairs.bam` - final BAM file with aligned paired data -if `--saveAlignedIntermediates` is specified, additional mapping file results +if `--save_aligned_intermediates` is specified, additional mapping file results are available ; -* `*.bam` - Aligned reads (R1 and R2) from end-to-end alignment -* `*_unmap.fastq` - Unmapped reads after end-to-end alignment -* `*_trimmed.fastq` - Trimmed reads after end-to-end alignment -* `*_trimmed.bam` - Alignment of trimmed reads -* `*bwt2merged.bam` - merged BAM file after the two-steps alignment -* `*.mapstat` - mapping statistics per read mate +- `*.bam` - Aligned reads (R1 and R2) from end-to-end alignment +- `*_unmap.fastq` - Unmapped reads after end-to-end alignment +- `*_trimmed.fastq` - Trimmed reads after end-to-end alignment +- `*_trimmed.bam` - Alignment of trimmed reads +- `*bwt2merged.bam` - merged BAM file after the two-steps alignment +- `*.mapstat` - mapping statistics per read mate Usually, a high fraction of reads is expected to be aligned on the genome (80-90%). Among them, we usually observed a few percent (around 10%) of step 2 aligned reads. Those reads are chimeric fragments for which we detect a ligation junction. An abnormal level of chimeric reads can reflect a ligation issue during the library preparation. -The fraction of singleton or multi-hits depends on the genome complexity and +The fraction of singleton or low quality reads depends on the genome complexity and the fraction of unmapped reads. The fraction of singleton is usually close to the sum of unmapped R1 and R2 reads, as it is unlikely that both mates from the same pair were unmapped. -### Valid pairs detection with HiC-Pro +#### Valid pairs detection with HiC-Pro Each aligned reads can be assigned to one restriction fragment according to the reference genome and the digestion protocol. Invalid pairs are classified as follow: -* Dangling end, i.e. unligated fragments (both reads mapped on the same -restriction fragment) -* Self circles, i.e. fragments ligated on themselves (both reads mapped on the -same restriction fragment in inverted orientation) -* Religation, i.e. ligation of juxtaposed fragments -* Filtered pairs, i.e. any pairs that do not match the filtering criteria on -inserts size, restriction fragments size -* Dumped pairs, i.e. any pairs for which we were not able to reconstruct the -ligation product. +- Dangling end, i.e. unligated fragments (both reads mapped on the same + restriction fragment) +- Self circles, i.e. fragments ligated on themselves (both reads mapped on the + same restriction fragment in inverted orientation) +- Religation, i.e. ligation of juxtaposed fragments +- Filtered pairs, i.e. any pairs that do not match the filtering criteria on + inserts size, restriction fragments size +- Dumped pairs, i.e. any pairs for which we were not able to reconstruct the + ligation product. Only valid pairs involving two different restriction fragments are used to build the contact maps. Duplicated valid pairs associated to PCR artefacts are discarded -(see `--rm_dup`). +(see `--keep_dup` to not discard them). In case of Hi-C protocols that do not require a restriction enzyme such as DNase Hi-C or micro Hi-C, the assignment to a restriction is not possible @@ -102,19 +102,20 @@ can thus be discarded using the `--min_cis_dist` parameter. **Output directory: `results/hicpro/valid_pairs`** -* `*.validPairs` - List of valid ligation products -* `*.DEpairs` - List of dangling-end products -* `*.SCPairs` - List of self-circle products -* `*.REPairs` - List of religation products -* `*.FiltPairs` - List of filtered pairs -* `*RSstat` - Statitics of number of read pairs falling in each category +- `*.validPairs` - List of valid ligation products +- `*.DEpairs` - List of dangling-end products +- `*.SCPairs` - List of self-circle products +- `*.REPairs` - List of religation products +- `*.FiltPairs` - List of filtered pairs +- `*RSstat` - Statitics of number of read pairs falling in each category -The validPairs are stored using a simple tab-delimited text format ; +Of note, these results are saved only if `--save_pairs_intermediates` is used. +The `validPairs` are stored using a simple tab-delimited text format ; ```bash read name / chr_reads1 / pos_reads1 / strand_reads1 / chr_reads2 / pos_reads2 / strand_reads2 / fragment_size / res frag name R1 / res frag R2 / mapping qual R1 -/ mapping qual R2 [/ allele_specific_tag] +/ mapping qual R2 ``` The ligation efficiency can be assessed using the filtering of valid and @@ -128,16 +129,16 @@ is skipped. The aligned pairs are therefore directly used to generate the contact maps. A filter of the short range contact (typically <1kb) is recommanded as this pairs are likely to be self ligation products. -### Duplicates removal +#### Duplicates removal -Note that validPairs file are generated per reads chunck. -These files are then merged in the allValidPairs file, and duplicates are -removed if the `--rm_dup` parameter is used. +Note that `validPairs` file are generated per reads chunck (and saved only if +`--save_pairs_intermediates` is specified). +These files are then merged in the `allValidPairs` file, and duplicates are +removed (see `--keep_dups` to disable duplicates filtering). **Output directory: `results/hicpro/valid_pairs`** -* `*allValidPairs` - combined valid pairs from all read chunks -* `*mergestat` - statistics about duplicates removal and valid pairs information +- `*allValidPairs` - combined valid pairs from all read chunks Additional quality controls such as fragment size distribution can be extracted from the list of valid interaction products. @@ -145,11 +146,35 @@ We usually expect to see a distribution centered around 300 pb which correspond to the paired-end insert size commonly used. The fraction of dplicates is also presented. A high level of duplication indicates a poor molecular complexity and a potential PCR bias. -Finaly, an important metric is to look at the fraction of intra and +Finally, an important metric is to look at the fraction of intra and inter-chromosomal interactions, as well as long range (>20kb) versus short range (<20kb) intra-chromosomal interactions. -### Contact maps +#### Pairs file + +`.pairs` is a standard tabular format proposed by the 4DN Consortium +for storing DNA contacts detected in a Hi-C experiment +(see https://pairtools.readthedocs.io/en/latest/formats.html). +This format is the entry point of the downstream steps of the pipeline after +detection of valid pairs. + +**Output directory: `results/hicpro/valid_pairs/pairix`** + +- `*pairix` - compressed and indexed pairs file + +#### Statistics + +Various statistics files are generated all along the data processing. +All results are available in `results/hicpro/stats`. + +**Output directory: `results/hicpro/stats`** + +- \*mapstat - mapping statistics per read mate +- \*pairstat - R1/R2 pairing statistics +- \*RSstat - Statitics of number of read pairs falling in each category +- \*mergestat - statistics about duplicates removal and valid pairs information + +#### Contact maps Intra et inter-chromosomal contact maps are build for all specified resolutions. The genome is splitted into bins of equal size. Each valid interaction is @@ -167,15 +192,15 @@ is specified on the command line. **Output directory: `results/hicpro/matrix`** -* `*.matrix` - genome-wide contact maps -* `*_iced.matrix` - genome-wide iced contact maps +- `*.matrix` - genome-wide contact maps +- `*_iced.matrix` - genome-wide iced contact maps The contact maps are generated for all specified resolutions (see `--bin_size` argument). A contact map is defined by : -* A list of genomic intervals related to the specified resolution (BED format). -* A matrix, stored as standard triplet sparse format (i.e. list format). +- A list of genomic intervals related to the specified resolution (BED format). +- A matrix, stored as standard triplet sparse format (i.e. list format). Based on the observation that a contact map is symmetric and usually sparse, only non-zero values are stored for half of the matrix. The user can specified @@ -196,15 +221,16 @@ downstream analysis. ## Hi-C contact maps Contact maps are usually stored as simple txt (`HiC-Pro`), .hic (`Juicer/Juicebox`) and .(m)cool (`cooler/Higlass`) formats. -Note that .cool and .hic format are compressed and usually much more efficient that the txt format. +The .cool and .hic format are compressed and indexed and usually much more efficient that the txt format. In the current workflow, we propose to use the `cooler` format as a standard to build the raw and normalized maps after valid pairs detection as it is used by several downstream analysis and visualization tools. Raw contact maps are therefore in **`results/contact_maps/raw`** which contains the different maps in `txt` and `cool` formats, at various resolutions. Normalized contact maps are stored in **`results/contact_maps/norm`** which contains the different maps in `txt`, `cool`, and `mcool` format. +The bin coordinates used for all resolutions are available in **`results/contact_maps/bins`**. Note that `txt` contact maps generated with `cooler` are identical to those generated by `HiC-Pro`. -However, differences can be observed on the normalized contact maps as the balancing algorithm is not the same. +However, differences can be observed on the normalized contact maps as the balancing algorithm is not exactly the same. ## Downstream analysis @@ -228,8 +254,8 @@ Here, we use the implementation available in the [`cooltools`](https://cooltools Results are available in **`results/compartments/`** folder and includes : -* `*cis.vecs.tsv`: eigenvectors decomposition along the genome -* `*cis.lam.txt`: eigenvalues associated with the eigenvectors +- `*cis.vecs.tsv`: eigenvectors decomposition along the genome +- `*cis.lam.txt`: eigenvalues associated with the eigenvectors ### TADs calling @@ -240,38 +266,37 @@ TADs calling remains a challenging task, and even if many methods have been prop Currently, the pipeline proposes two approaches : -* Insulation score using the [`cooltools`](https://cooltools.readthedocs.io/en/latest/cli.html#cooltools-diamond-insulation) package. Results are availabe in **`results/tads/insulation`**. -* [`HiCExplorer TADs calling`](https://hicexplorer.readthedocs.io/en/latest/content/tools/hicFindTADs.html). Results are available at **`results/tads/hicexplorer`**. +- Insulation score using the [`cooltools`](https://cooltools.readthedocs.io/en/latest/cli.html#cooltools-diamond-insulation) package. Results are availabe in **`results/tads/insulation`**. +- [`HiCExplorer TADs calling`](https://hicexplorer.readthedocs.io/en/latest/content/tools/hicFindTADs.html). Results are available at **`results/tads/hicexplorer`**. Usually, TADs results are presented as simple BED files, or bigWig files, with the position of boundaries along the genome. -## MultiQC +### MultiQC -[MultiQC](http://multiqc.info) is a visualisation tool that generates a single -HTML report summarising all samples in your project. Most of the pipeline QC -results are visualised in the report and further statistics are available in -within the report data directory. +<details markdown="1"> +<summary>Output files</summary> -The pipeline has special steps which allow the software versions used to be -reported in the MultiQC output for future traceability. +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. -**Output files:** +</details> -* `multiqc/` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `multiqc_plots/`: directory containing static images from the report in various formats. +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. -## Pipeline information +Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see <http://multiqc.info>. -[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. +### Pipeline information -**Output files:** +<details markdown="1"> +<summary>Output files</summary> -* `pipeline_info/` - * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, - `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - * Reports generated by the pipeline: `pipeline_report.html`, - `pipeline_report.txt` and `software_versions.csv`. - * Documentation for interpretation of results in HTML format: - `results_description.html`. +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + +</details> + +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md index 800d44713563554482d79b8e165d06514ad921e3..961766cbf434073443c9147dd4afee2ef8aa6fcb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,12 +6,50 @@ ## Introduction +## Samplesheet input + +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. + +```bash +--input '[path to samplesheet file]' +``` + +### Multiple runs of the same sample + +The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. Below is an example for the same sample sequenced across 3 lanes: + +```console +sample,fastq_1,fastq_2 +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz +CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +``` + +### Full samplesheet + +The `nf-core-hic` pipeline is designed to work only with paired-end data. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. + +```console +sample,fastq_1,fastq_2 +SAMPLE_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +SAMPLE_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz +SAMPLE_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz +``` + +| Column | Description | +| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | + +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. + ## Running the pipeline The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/hic --input '*_R{1,2}.fastq.gz' -profile docker +nextflow run nf-core/hic --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. @@ -20,9 +58,9 @@ See below for more information about profiles. Note that the pipeline will create the following files in your working directory: ```bash -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow +work # Directory containing the nextflow working files +<OUTDIR> # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -36,29 +74,16 @@ nextflow pull nf-core/hic ### Reproducibility -It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the -[nf-core/hic releases page](https://github.com/nf-core/hic/releases) and find -the latest version number - numeric only (eg. `1.3.1`). -Then specify this when running the pipeline with `-r` (one hyphen) -eg. `-r 1.3.1`. +First, go to the [nf-core/hic releases page](https://github.com/nf-core/hic/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -This version number will be logged in reports when you run the pipeline, so -that you'll know what you used when you look back in the future. - -### Automatic resubmission - -Each step in the pipeline has a default set of requirements for number of CPUs, -memory and time. For most of the steps in the pipeline, if the job exits with -an error code of `143` (exceeded requested resources) it will automatically -resubmit with higher requests (2 x original, then 3 x original). If it still -fails after three times then the pipeline is stopped. +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. ## Core Nextflow arguments > **NB:** These options are part of Nextflow and use a _single_ hyphen -(pipeline parameters use a double-hyphen). +> (pipeline parameters use a double-hyphen). ### `-profile` @@ -68,7 +93,7 @@ configuration presets for different compute environments. Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. > We highly recommend the use of Docker or Singularity containers for full -pipeline reproducibility, however when this is not possible, Conda is also supported. +> pipeline reproducibility, however when this is not possible, Conda is also supported. The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) @@ -83,40 +108,29 @@ the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and -expect all software to be -installed and available on the `PATH`. This is _not_ recommended. - -* `docker` - * A generic configuration profile to be used with [Docker](https://docker.com/) - * Pulls software from Docker Hub: [`nfcore/hic`](https://hub.docker.com/r/nfcore/hic/) -* `singularity` - * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) - * Pulls software from Docker Hub: [`nfcore/hic`](https://hub.docker.com/r/nfcore/hic/) -* `podman` - * A generic configuration profile to be used with [Podman](https://podman.io/) - * Pulls software from Docker Hub: [`nfcore/hic`](https://hub.docker.com/r/nfcore/hic/) -* `shifter` - * A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - * Pulls software from Docker Hub: [`nfcore/hic`](https://hub.docker.com/r/nfcore/hic/) -* `charliecloud` - * A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - * Pulls software from Docker Hub: [`nfcore/hic`](https://hub.docker.com/r/nfcore/hic/) -* `conda` - * Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. - * A generic configuration profile to be used with [Conda](https://conda.io/docs/) - * Pulls most software from [Bioconda](https://bioconda.github.io/) -* `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. ### `-resume` -Specify this when restarting a pipeline. Nextflow will used cached results from -any pipeline steps where the inputs are the same, continuing from where it got -to previously. -You can also supply a run name to resume a specific run: `-resume [run-name]`. -Use the `nextflow log` command to show previous run names. +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). + +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. ### `-c` @@ -124,38 +138,128 @@ Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. -#### Custom resource requests +## Custom configuration + +### Resource requests + +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. + +For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: + +```console +[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) +Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' + +Caused by: + Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -Each step in the pipeline has a default set of requirements for number of CPUs, -memory and time. For most of the steps in the pipeline, if the job exits with -an error code of `143` (exceeded requested resources) it will automatically resubmit -with higher requests (2 x original, then 3 x original). If it still fails after three -times then the pipeline is stopped. +Command executed: + STAR \ + --genomeDir star \ + --readFilesIn WT_REP1_trimmed.fq.gz \ + --runThreadN 2 \ + --outFileNamePrefix WT_REP1. \ + <TRUNCATED> -Whilst these default requirements will hopefully work for most people with most data, -you may find that you want to customise the compute resources that the pipeline requests. -You can do this by creating a custom config file. For example, to give the workflow -process `star` 32GB of memory, you could use the following config: +Command exit status: + 137 + +Command output: + (empty) + +Command error: + .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. <TRUNCATED> +Work dir: + /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb + +Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` +``` + +#### For beginners + +A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below. + +#### Advanced option on process level + +To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). +We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`. +If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). +The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. +The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. +Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. +The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. ```nextflow process { - withName: star { - memory = 32.GB - } + withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { + memory = 100.GB + } } ``` -To find the exact name of a process you wish to modify the compute resources, check the live-status of a nextflow run displayed on your terminal or check the nextflow error for a line like so: `Error executing process > 'bowtie2_end_to_end'`. In this case the name to specify in the custom config file is `bowtie2_end_to_end`. +> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. +> +> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. + +### Updating containers (advanced users) + +The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. + +1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) +2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) +3. Create the custom config accordingly: + + - For Docker: -See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information. + ```nextflow + process { + withName: PANGOLIN { + container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` -If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition above). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + - For Singularity: + + ```nextflow + process { + withName: PANGOLIN { + container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` + + - For Conda: + + ```nextflow + process { + withName: PANGOLIN { + conda = 'bioconda::pangolin=3.0.5' + } + } + ``` + +> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. + +### nf-core/configs + +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). -### Running in the background +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + +## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. @@ -169,7 +273,7 @@ session which you can log back into at a later time. Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). -#### Nextflow memory requirements +## Nextflow memory requirements In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. @@ -219,15 +323,15 @@ Please note the following requirements: 1. The path must be enclosed in quotes 2. The path must have at least one `*` wildcard character 3. When using the pipeline with paired end data, the path must use `{1,2}` -notation to specify read pairs. + notation to specify read pairs. If left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz` -Note that the Hi-C data analysis requires paired-end data. +Note that the Hi-C data analysis workflow requires paired-end data. ## Reference genomes -The pipeline config files come bundled with paths to the illumina iGenomes reference +The pipeline config files come bundled with paths to the Illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. @@ -237,7 +341,7 @@ There are many different species supported in the iGenomes references. To run the pipeline, you must specify which to use with the `--genome` flag. You can find the keys to specify the genomes in the -[iGenomes config file](../conf/igenomes.config). +[iGenomes config file](https://github.com/nf-core/hic/blob/master/conf/igenomes.config). ### `--fasta` @@ -251,7 +355,7 @@ run the pipeline: ### `--bwt2_index` The bowtie2 indexes are required to align the data with the HiC-Pro workflow. If the -`--bwt2_index` is not specified, the pipeline will either use the igenome +`--bwt2_index` is not specified, the pipeline will either use the iGenomes bowtie2 indexes (see `--genome` option) or build the indexes on-the-fly (see `--fasta` option) @@ -261,8 +365,8 @@ bowtie2 indexes (see `--genome` option) or build the indexes on-the-fly ### `--chromosome_size` -The Hi-C pipeline will also requires a two-columns text file with the -chromosome name and its size (tab separated). +The Hi-C pipeline also requires a two-column text file with the +chromosome name and the chromosome size (tab-separated). If not specified, this file will be automatically created by the pipeline. In the latter case, the `--fasta` reference genome has to be specified. @@ -286,7 +390,7 @@ In the latter case, the `--fasta` reference genome has to be specified. ### `--restriction_fragments` -Finally, Hi-C experiments based on restriction enzyme digestion requires a BED +Finally, Hi-C experiments based on restriction enzyme digestion require a BED file with coordinates of restriction fragments. ```bash @@ -303,23 +407,23 @@ file with coordinates of restriction fragments. (...) ``` -If not specified, this file will be automatically created by the pipline. +If not specified, this file will be automatically created by the pipeline. In this case, the `--fasta` reference genome will be used. -Note that the `digestion` or `--restriction_site` parameter is mandatory to create this file. +Note that the `--digestion` or `--restriction_site` parameter is mandatory to create this file. ## Hi-C specific options The following options are defined in the `nextflow.config` file, and can be updated either using a custom configuration file (see `-c` option) or using -command line parameter. +command line parameters. ### HiC-pro mapping The reads mapping is currently based on the two-steps strategy implemented in the HiC-pro pipeline. The idea is to first align reads from end-to-end. -Reads that do not aligned are then trimmed at the ligation site, and their 5' +Reads that do not align are then trimmed at the ligation site, and their 5' end is re-aligned to the reference genome. -Note that the default option are quite stringent, and can be updated according +Note that the default options are quite stringent, and can be updated according to the reads quality or the reference genome. #### `--bwt2_opts_end2end` @@ -356,7 +460,7 @@ Minimum mapping quality. Reads with lower quality are discarded. Default: 10 This parameter allows to automatically set the `--restriction_site` and `--ligation_site` parameter according to the restriction enzyme you used. -Available keywords are 'hindiii', 'dpnii', 'mboi', 'arima'. +Available keywords are 'hindiii', 'dpnii', 'mboi', 'arima'. ```bash --digestion 'hindiii' @@ -365,17 +469,17 @@ Available keywords are 'hindiii', 'dpnii', 'mboi', 'arima'. #### `--restriction_site` If the restriction enzyme is not available through the `--digestion` -parameter, you can also defined manually the restriction motif(s) for +parameter, you can also define manually the restriction motif(s) for Hi-C digestion protocol. The restriction motif(s) is(are) used to generate the list of restriction fragments. The precise cutting site of the restriction enzyme has to be specified using the '^' character. Default: 'A^AGCTT' Here are a few examples: -* MboI: ^GATC -* DpnII: ^GATC -* HindIII: A^AGCTT -* ARIMA kit: ^GATC,G^ANTC +- MboI: ^GATC +- DpnII: ^GATC +- HindIII: A^AGCTT +- ARIMA kit: ^GATC,G^ANTC Note that multiples restriction motifs can be provided (comma-separated) and that 'N' base are supported. @@ -388,7 +492,7 @@ that 'N' base are supported. Ligation motif after reads ligation. This motif is used for reads trimming and depends on the fill in strategy. -Note that multiple ligation sites can be specified (comma separated) and that +Note that multiple ligation sites can be specified (comma-separated) and that 'N' base is interpreted and replaced by 'A','C','G','T'. Default: 'AAGCTAGCTT' @@ -404,11 +508,11 @@ Exemple of the ARIMA kit: GATCGATC,GANTGATC,GANTANTC,GATCANTC In DNAse Hi-C mode, all options related to digestion Hi-C (see previous section) are ignored. -In this case, it is highly recommanded to use the `--min_cis_dist` parameter +In this case, it is highly recommended to use the `--min_cis_dist` parameter to remove spurious ligation products. ```bash ---dnase' +--dnase ``` ### HiC-pro processing @@ -460,7 +564,7 @@ Mainly useful for DNase Hi-C. Default: '0' #### `--keep_dups` -If specified, duplicates reads are not discarded before building contact maps. +If specified, duplicate reads are not discarded before building contact maps. ```bash --keep_dups @@ -484,7 +588,7 @@ framework to build the raw and balanced contact maps in txt and (m)cool formats. ### `--bin_size` -Resolution of contact maps to generate (comma separated). +Resolution of contact maps to generate (comma-separated). Default:'1000000,500000' ```bash @@ -525,7 +629,7 @@ Default: 100 #### `--ice_filer_low_count_perc` -Define which pourcentage of bins with low counts should be force to zero. +Define which percentage of bins with low counts should be forced to zero. Default: 0.02 ```bash @@ -534,7 +638,7 @@ Default: 0.02 #### `--ice_filer_high_count_perc` -Define which pourcentage of bins with low counts should be discarded before +Define which percentage of bins with low counts should be discarded before normalization. Default: 0 ```bash @@ -557,7 +661,7 @@ normalization. Default: 0.1 #### `--res_dist_decay` Generates distance vs Hi-C counts plots at a given resolution using `HiCExplorer`. -Several resolution can be specified (comma separeted). Default: '250000' +Several resolutions can be specified (comma-separeted). Default: '250000' ```bash --res_dist_decay '[string]' @@ -569,7 +673,7 @@ Call open/close compartments for each chromosome, using the `cooltools` command. #### `--res_compartments` -Resolution to call the chromosome compartments (comma separated). +Resolution to call the chromosome compartments (comma-separated). Default: '250000' ```bash @@ -582,7 +686,7 @@ Default: '250000' TADs calling can be performed using different approaches. Currently available options are `insulation` and `hicexplorer`. -Note that all options can be specified (comma separated). +Note that all options can be specified (comma-separated). Default: 'insulation' ```bash @@ -591,7 +695,7 @@ Default: 'insulation' #### `--res_tads` -Resolution to run the TADs calling analysis (comma separated). +Resolution to run the TADs calling analysis (comma-separated). Default: '40000,20000' ```bash @@ -634,7 +738,7 @@ results folder. Default: false ### `--save_interaction_bam` -If specified, write a BAM file with all classified reads (valid paires, +If specified, write a BAM file with all classified reads (valid pairs, dangling end, self-circle, etc.) and its tags. ```bash @@ -646,7 +750,7 @@ dangling end, self-circle, etc.) and its tags. ### `--skip_maps` If defined, the workflow stops with the list of valid interactions, and the -genome-wide maps are not built. Usefult for capture-C analysis. Default: false +genome-wide maps are not built. Useful for capture-C analysis. Default: false ```bash --skip_maps @@ -669,7 +773,7 @@ If defined, cooler files are not generated. Default: false --skip_cool ``` -### `skip_dist_decay` +### `--skip_dist_decay` Do not run distance decay plots. Default: false @@ -677,7 +781,7 @@ Do not run distance decay plots. Default: false --skip_dist_decay ``` -### `skip_compartments` +### `--skip_compartments` Do not call compartments. Default: false @@ -685,7 +789,7 @@ Do not call compartments. Default: false --skip_compartments ``` -### `skip_tads` +### `--skip_tads` Do not call TADs. Default: false diff --git a/environment.yml b/environment.yml index 9d357598b764a0dbf9adbeccd3fe6767828fa844..b8abcdfc65aae3942b48c43e1fbb5fe7f2bb8bb1 100644 --- a/environment.yml +++ b/environment.yml @@ -1,31 +1,32 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-hic-1.3.0 +name: nf-core-hic-2.0.0 channels: - conda-forge - bioconda - defaults dependencies: - - conda-forge::python=3.7.6 - - pip=20.0.1 - - conda-forge::tbb=2020.2=hc9558a2_0 - - conda-forge::scipy=1.4.1 - - conda-forge::numpy=1.18.1 - - bioconda::iced=0.5.6 - - bioconda::bx-python=0.8.8 - - bioconda::pysam=0.15.4 - - conda-forge::pymdown-extensions=7.1 - - bioconda::cooler=0.8.6 - - bioconda::bowtie2=2.3.5 - - bioconda::samtools=1.9 - - bioconda::multiqc=1.8 + - conda-forge::python=3.9.12=h9a8a25e_1_cpython + - pip=22.0.4=pyhd8ed1ab_0 + - conda-forge::tbb=2020.2=hc9558a2_0 + - conda-forge::scipy=1.8.0=py39hee8e79c_1 + - conda-forge::numpy=1.22.3=py39hc58783e_2 + - bioconda::iced=0.5.10=py39h919a90d_1 + - bioconda::bx-python=0.8.13=py39h6471ffd_1 + - bioconda::pysam=0.19.0=py39h5030a8b_0 + - conda-forge::pymdown-extensions=7.1=pyh9f0ad1d_0 + - bioconda::cooler=0.8.11=pyh5e36f6f_1 + - bioconda::cooltools=0.5.1=py39h5371cbf_1 + - bioconda::bowtie2=2.4.5=py39hd2f7db1_2 + - bioconda::samtools=1.15.1=h1170115_0 + - bioconda::multiqc=1.12=pyhdfd78af_0 + - bioconda::fastqc=0.11.9=hdfd78af_1 -## Dev tools - - bioconda::hicexplorer=3.4.3 - - bioconda::bioconductor-hitc=1.32.0 - - conda-forge::r-optparse=1.6.6 - - bioconda::ucsc-bedgraphtobigwig=357 - - conda-forge::cython=0.29.19 + ## Dev tools + - bioconda::hicexplorer=3.7.2=pyhdfd78af_1 + - bioconda::bioconductor-hitc=1.38.0=r41hdfd78af_0 + - conda-forge::r-optparse=1.7.1=r41hc72bb7e_0 + - bioconda::ucsc-bedgraphtobigwig=377=ha8a8165_3 + - conda-forge::cython=0.29.28=py39h5a03fae_2 - pip: - - cooltools==0.4.0 - - fanc==0.8.30 \ No newline at end of file + - fanc==0.9.23 diff --git a/lib/Headers.groovy b/lib/Headers.groovy deleted file mode 100644 index 15d1d388006df42e226aea961f0d21dbdabaa8cb..0000000000000000000000000000000000000000 --- a/lib/Headers.groovy +++ /dev/null @@ -1,43 +0,0 @@ -/* - * This file holds several functions used to render the nf-core ANSI header. - */ - -class Headers { - - private static Map log_colours(Boolean monochrome_logs) { - Map colorcodes = [:] - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['yellow_bold'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - colorcodes['red'] = monochrome_logs ? '' : "\033[1;91m" - return colorcodes - } - - static String dashed_line(monochrome_logs) { - Map colors = log_colours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - static String nf_core(workflow, monochrome_logs) { - Map colors = log_colours(monochrome_logs) - String.format( - """\n - ${dashed_line(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} - ${dashed_line(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy old mode 100644 new mode 100755 index 52ee730432905c5f6dc3e2c89352bbaee6ea145b..33cd4f6e8df62276afa55ab4c00cd59900ea013e --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -1,6 +1,6 @@ -/* - * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. - */ +// +// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. +// import org.everit.json.schema.Schema import org.everit.json.schema.loader.SchemaLoader @@ -13,16 +13,23 @@ import groovy.json.JsonBuilder class NfcoreSchema { - /* - * Function to loop over all parameters defined in schema and check - * whether the given paremeters adhere to the specificiations - */ + // + // Resolve Schema path relative to main workflow directory + // + public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { + return "${workflow.projectDir}/${schema_filename}" + } + + // + // Function to loop over all parameters defined in schema and check + // whether the given parameters adhere to the specifications + // /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - private static void validateParameters(params, jsonSchema, log) { + public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { def has_error = false - //=====================================================================// + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// // Check for nextflow core params and unexpected params - def json = new File(jsonSchema).text + def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') def nf_params = [ // Options for base `nextflow` command @@ -39,7 +46,6 @@ class NfcoreSchema { 'quiet', 'syslog', 'v', - 'version', // Options for `nextflow run` command 'ansi', @@ -98,9 +104,13 @@ class NfcoreSchema { // Collect expected parameters from the schema def expectedParams = [] + def enums = [:] for (group in schemaParams) { for (p in group.value['properties']) { expectedParams.push(p.key) + if (group.value['properties'][p.key].containsKey('enum')) { + enums[p.key] = group.value['properties'][p.key]['enum'] + } } } @@ -114,7 +124,8 @@ class NfcoreSchema { def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !expectedParamsLowerCase.contains(specifiedParamLowerCase)) { + def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) + if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { // Temporarily remove camelCase/camel-case params #1035 def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ @@ -123,38 +134,38 @@ class NfcoreSchema { } } - //=====================================================================// + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// // Validate parameters against the schema - InputStream inputStream = new File(jsonSchema).newInputStream() - JSONObject rawSchema = new JSONObject(new JSONTokener(inputStream)) + InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() + JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) // Remove anything that's in params.schema_ignore_params - rawSchema = removeIgnoredParams(rawSchema, params) + raw_schema = removeIgnoredParams(raw_schema, params) - Schema schema = SchemaLoader.load(rawSchema) + Schema schema = SchemaLoader.load(raw_schema) // Clean the parameters def cleanedParams = cleanParameters(params) // Convert to JSONObject def jsonParams = new JsonBuilder(cleanedParams) - JSONObject paramsJSON = new JSONObject(jsonParams.toString()) + JSONObject params_json = new JSONObject(jsonParams.toString()) // Validate try { - schema.validate(paramsJSON) + schema.validate(params_json) } catch (ValidationException e) { println '' log.error 'ERROR: Validation of pipeline parameters failed!' JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, paramsJSON, log) + printExceptions(exceptionJSON, params_json, log, enums) println '' has_error = true } // Check for unexpected parameters if (unexpectedParams.size() > 0) { - Map colors = log_colours(params.monochrome_logs) + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) println '' def warn_msg = 'Found unexpected parameters:' for (unexpectedParam in unexpectedParams) { @@ -170,266 +181,17 @@ class NfcoreSchema { } } - // Loop over nested exceptions and print the causingException - private static void printExceptions(exJSON, paramsJSON, log) { - def causingExceptions = exJSON['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = exJSON['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (exJSON['pointerToViolation'] == '#') { - log.error "* ${exJSON['message']}" - } - // Error with specific param - else { - def param = exJSON['pointerToViolation'] - ~/^#\// - def param_val = paramsJSON[param].toString() - log.error "* --${param}: ${exJSON['message']} (${param_val})" - } - } - for (ex in causingExceptions) { - printExceptions(ex, paramsJSON, log) - } - } - - // Remove an element from a JSONArray - private static JSONArray removeElement(jsonArray, element){ - def list = [] - int len = jsonArray.length() - for (int i=0;i<len;i++){ - list.add(jsonArray.get(i).toString()) - } - list.remove(element) - JSONArray jsArray = new JSONArray(list) - return jsArray - } - - private static JSONObject removeIgnoredParams(rawSchema, params){ - // Remove anything that's in params.schema_ignore_params - params.schema_ignore_params.split(',').each{ ignore_param -> - if(rawSchema.keySet().contains('definitions')){ - rawSchema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(rawSchema.keySet().contains('properties') && rawSchema.get('properties').keySet().contains(ignore_param)) { - rawSchema.get("properties").remove(ignore_param) - } - if(rawSchema.keySet().contains('required') && rawSchema.required.contains(ignore_param)) { - def cleaned_required = removeElement(rawSchema.required, ignore_param) - rawSchema.put("required", cleaned_required) - } - } - return rawSchema - } - - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - /* - * This method tries to read a JSON params file - */ - private static LinkedHashMap params_load(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = params_read(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - private static Map log_colours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - static String dashed_line(monochrome_logs) { - Map colors = log_colours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - /* - Method to actually read in JSON file using Groovy. - Group (as Key), values are all parameters - - Parameter1 as Key, Description as Value - - Parameter2 as Key, Description as Value - .... - Group - - - */ - private static LinkedHashMap params_read(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - /* - * Get maximum number of characters across all parameter names - */ - private static Integer params_max_chars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } - - /* - * Beautify parameters for --help - */ - private static String params_help(workflow, params, json_schema, command) { - Map colors = log_colours(params.monochrome_logs) + // + // Beautify parameters for --help + // + public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) Integer num_hidden = 0 String output = '' output += 'Typical pipeline command:\n\n' output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = params_load(json_schema) - Integer max_chars = params_max_chars(params_map) + 1 + Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + Integer max_chars = paramsMaxChars(params_map) + 1 Integer desc_indent = max_chars + 14 Integer dec_linewidth = 160 - desc_indent for (group in params_map.keySet()) { @@ -443,7 +205,7 @@ class NfcoreSchema { } def type = '[' + group_params.get(param).type + ']' def description = group_params.get(param).description - def defaultValue = group_params.get(param).default ? " [default: " + group_params.get(param).default.toString() + "]" : '' + def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' def description_default = description + colors.dim + defaultValue + colors.reset // Wrap long description texts // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap @@ -469,18 +231,17 @@ class NfcoreSchema { output += group_output } } - output += dashed_line(params.monochrome_logs) if (num_hidden > 0){ - output += colors.dim + "\n Hiding $num_hidden params, use --show_hidden_params to show.\n" + colors.reset - output += dashed_line(params.monochrome_logs) + output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset } + output += NfcoreTemplate.dashedLine(params.monochrome_logs) return output } - /* - * Groovy Map summarising parameters/workflow options used by the pipeline - */ - private static LinkedHashMap params_summary_map(workflow, params, json_schema) { + // + // Groovy Map summarising parameters/workflow options used by the pipeline + // + public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { // Get a selection of core Nextflow workflow options def Map workflow_summary = [:] if (workflow.revision) { @@ -502,13 +263,12 @@ class NfcoreSchema { // Get pipeline parameters defined in JSON Schema def Map params_summary = [:] - def blacklist = ['hostnames'] - def params_map = params_load(json_schema) + def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) for (group in params_map.keySet()) { def sub_params = new LinkedHashMap() def group_params = params_map.get(group) // This gets the parameters of that particular group for (param in group_params.keySet()) { - if (params.containsKey(param) && !blacklist.contains(param)) { + if (params.containsKey(param)) { def params_value = params.get(param) def schema_value = group_params.get(param).default def param_type = group_params.get(param).type @@ -546,14 +306,14 @@ class NfcoreSchema { return [ 'Core Nextflow options' : workflow_summary ] << params_summary } - /* - * Beautify parameters for summary and return as string - */ - private static String params_summary_log(workflow, params, json_schema) { - Map colors = log_colours(params.monochrome_logs) + // + // Beautify parameters for summary and return as string + // + public static String paramsSummaryLog(workflow, params) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) String output = '' - def params_map = params_summary_map(workflow, params, json_schema) - def max_chars = params_max_chars(params_map) + def params_map = paramsSummaryMap(workflow, params) + def max_chars = paramsMaxChars(params_map) for (group in params_map.keySet()) { def group_params = params_map.get(group) // This gets the parameters of that particular group if (group_params) { @@ -564,10 +324,205 @@ class NfcoreSchema { output += '\n' } } - output += dashed_line(params.monochrome_logs) - output += colors.dim + "\n Only displaying parameters that differ from defaults.\n" + colors.reset - output += dashed_line(params.monochrome_logs) + output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" + output += NfcoreTemplate.dashedLine(params.monochrome_logs) return output } + // + // Loop over nested exceptions and print the causingException + // + private static void printExceptions(ex_json, params_json, log, enums, limit=5) { + def causingExceptions = ex_json['causingExceptions'] + if (causingExceptions.length() == 0) { + def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ + // Missing required param + if (m.matches()) { + log.error "* Missing required parameter: --${m[0][1]}" + } + // Other base-level error + else if (ex_json['pointerToViolation'] == '#') { + log.error "* ${ex_json['message']}" + } + // Error with specific param + else { + def param = ex_json['pointerToViolation'] - ~/^#\// + def param_val = params_json[param].toString() + if (enums.containsKey(param)) { + def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" + if (enums[param].size() > limit) { + log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" + } else { + log.error "${error_msg}: ${enums[param].join(', ')})" + } + } else { + log.error "* --${param}: ${ex_json['message']} (${param_val})" + } + } + } + for (ex in causingExceptions) { + printExceptions(ex, params_json, log, enums) + } + } + + // + // Remove an element from a JSONArray + // + private static JSONArray removeElement(json_array, element) { + def list = [] + int len = json_array.length() + for (int i=0;i<len;i++){ + list.add(json_array.get(i).toString()) + } + list.remove(element) + JSONArray jsArray = new JSONArray(list) + return jsArray + } + + // + // Remove ignored parameters + // + private static JSONObject removeIgnoredParams(raw_schema, params) { + // Remove anything that's in params.schema_ignore_params + params.schema_ignore_params.split(',').each{ ignore_param -> + if(raw_schema.keySet().contains('definitions')){ + raw_schema.definitions.each { definition -> + for (key in definition.keySet()){ + if (definition[key].get("properties").keySet().contains(ignore_param)){ + // Remove the param to ignore + definition[key].get("properties").remove(ignore_param) + // If the param was required, change this + if (definition[key].has("required")) { + def cleaned_required = removeElement(definition[key].required, ignore_param) + definition[key].put("required", cleaned_required) + } + } + } + } + } + if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { + raw_schema.get("properties").remove(ignore_param) + } + if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { + def cleaned_required = removeElement(raw_schema.required, ignore_param) + raw_schema.put("required", cleaned_required) + } + } + return raw_schema + } + + // + // Clean and check parameters relative to Nextflow native classes + // + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + + // + // This function tries to read a JSON params file + // + private static LinkedHashMap paramsLoad(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = paramsRead(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + // + // Method to actually read in JSON file using Groovy. + // Group (as Key), values are all parameters + // - Parameter1 as Key, Description as Value + // - Parameter2 as Key, Description as Value + // .... + // Group + // - + private static LinkedHashMap paramsRead(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') + def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + * properties <- parameters can also be ungrouped, outside of definitions + parameter 1 + type + description + */ + + // Grouped params + def params_map = new LinkedHashMap() + schema_definitions.each { key, val -> + def Map group = schema_definitions."$key".properties // Gets the property object of the group + def title = schema_definitions."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + + // Ungrouped params + def ungrouped_params = new LinkedHashMap() + schema_properties.each { innerkey, value -> + ungrouped_params.put(innerkey, value) + } + params_map.put("Other parameters", ungrouped_params) + + return params_map + } + + // + // Get maximum number of characters across all parameter names + // + private static Integer paramsMaxChars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } } diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy new file mode 100755 index 0000000000000000000000000000000000000000..25a0a74a645332b1cacfc99b6e5c9c4500c49423 --- /dev/null +++ b/lib/NfcoreTemplate.groovy @@ -0,0 +1,336 @@ +// +// This file holds several functions used within the nf-core pipeline template. +// + +import org.yaml.snakeyaml.Yaml + +class NfcoreTemplate { + + // + // Check AWS Batch related parameters have been specified correctly + // + public static void awsBatch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + // Check params.awsqueue and params.awsregion have been set if running on AWSBatch + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + } + } + + // + // Warn if a -profile or Nextflow config has not been provided to run the pipeline + // + public static void checkConfigProvided(workflow, log) { + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + } + } + + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + + // + // Construct and send completion email + // + public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = NfcoreTemplate.version(workflow) + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(params.monochrome_logs) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + } + + // + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack + // + public static void IM_notification(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = NfcoreTemplate.version(workflow) + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + + // + // Print pipeline summary on completion + // + public static void summary(workflow, params, log) { + Map colors = logColours(params.monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } + + // + // ANSII Colours used for terminal logging + // + public static Map logColours(Boolean monochrome_logs) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes + } + + // + // Does what is says on the tin + // + public static String dashedLine(monochrome_logs) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + // + // nf-core logo + // + public static String logo(workflow, monochrome_logs) { + Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/Utils.groovy b/lib/Utils.groovy new file mode 100644 index 0000000000000000000000000000000000000000..8d030f4e844bb87ca93fbe4f905e53a833b40840 --- /dev/null +++ b/lib/Utils.groovy @@ -0,0 +1,47 @@ +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml + +class Utils { + + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + } +} diff --git a/lib/WorkflowHic.groovy b/lib/WorkflowHic.groovy new file mode 100755 index 0000000000000000000000000000000000000000..3dea732db2bb9c05d5002920297d8d727efea9d0 --- /dev/null +++ b/lib/WorkflowHic.groovy @@ -0,0 +1,84 @@ +// +// This file holds several functions specific to the workflow/hic.nf in the nf-core/hic pipeline +// + +import groovy.text.SimpleTemplateEngine + +class WorkflowHic { + + // + // Check and validate parameters + // + public static void initialise(params, log) { + genomeExistsError(params, log) + + // digestion parameters + if (params.digest && params.digestion && !params.digest.containsKey(params.digestion)) { + log.error "Unknown digestion protocol. Currently, the available digestion options are ${params.digest.keySet().join(", ")}. Please set manually the '--restriction_site' and '--ligation_site' parameters." + System.exit(1) + } + + // Check Digestion or DNase Hi-C mode + //if (!params.dnase && !params.ligation_site) { + // log.error "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase Hi-C, please use '--dnase' option" + // System.exit(1) + //} + + } + + // + // Get workflow summary for MultiQC + // + public static String paramsSummaryMultiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += " <p style=\"font-size:110%\"><b>$group</b></p>\n" + summary_section += " <dl class=\"dl-horizontal\">\n" + for (param in group_params.keySet()) { + summary_section += " <dt>$param</dt><dd><samp>${group_params.get(param) ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>\n" + } + summary_section += " </dl>\n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + return yaml_file_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + meta["doi_text"] = meta.manifest_map.doi ? "(doi: <a href=\'https://doi.org/${meta.manifest_map.doi}\'>${meta.manifest_map.doi}</a>)" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "<li>If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. </li>" + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html + }// + // Exit pipeline if incorrect --genome key provided + // + private static void genomeExistsError(params, log) { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + System.exit(1) + } + } +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy new file mode 100755 index 0000000000000000000000000000000000000000..239d4fe7253d3a060bd99bff6b7842cdeb2891c8 --- /dev/null +++ b/lib/WorkflowMain.groovy @@ -0,0 +1,98 @@ +// +// This file holds several functions specific to the main.nf workflow in the nf-core/hic pipeline +// + +class WorkflowMain { + + // + // Citation string for pipeline + // + public static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " https://doi.org/10.5281/zenodo.2669513\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + + // + // Generate help string + // + public static String help(workflow, params, log) { + def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + def help_string = '' + help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) + help_string += NfcoreSchema.paramsHelp(workflow, params, command) + help_string += '\n' + citation(workflow) + '\n' + help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) + return help_string + } + + // + // Generate parameter summary log string + // + public static String paramsSummaryLog(workflow, params, log) { + def summary_log = '' + summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) + summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) + summary_log += '\n' + citation(workflow) + '\n' + summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) + return summary_log + } + + // + // Validate parameters and print summary to screen + // + public static void initialise(workflow, params, log) { + // Print help to screen if required + if (params.help) { + log.info help(workflow, params, log) + System.exit(0) + } + + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) + } + + // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) + + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + + // Check that a -profile or Nextflow config has been provided to run the pipeline + NfcoreTemplate.checkConfigProvided(workflow, log) + + // Check that conda channels are set-up correctly + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + Utils.checkCondaChannels(log) + } + + // Check AWS batch settings + NfcoreTemplate.awsBatch(workflow, params) + + // Check input has been provided + if (!params.input) { + log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" + System.exit(1) + } + } + // + // Get attribute from genome config file e.g. fasta + // + public static Object getGenomeAttribute(params, attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null + } +} diff --git a/main.nf b/main.nf index a8611d564b4a7d9d029e0549976ceaa69afb93f4..dd564aa4e80652c1a21ba504c0564faddfb74bcd 100644 --- a/main.nf +++ b/main.nf @@ -1,1236 +1,65 @@ #!/usr/bin/env nextflow /* -======================================================================================== - nf-core/hic -======================================================================================== - nf-core/hic Analysis Pipeline. - #### Homepage / Documentation - https://github.com/nf-core/hic +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/hic +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/nf-core/hic + + Website: https://nf-co.re/hic + Slack : https://nfcore.slack.com/channels/hic ---------------------------------------------------------------------------------------- */ -log.info Headers.nf_core(workflow, params.monochrome_logs) - -//////////////////////////////////////////////////// -/* -- PRINT HELP -- */ -////////////////////////////////////////////////////+ -def json_schema = "$projectDir/nextflow_schema.json" -if (params.help) { - def command = "nextflow run nf-core/hic --input '*_R{1,2}.fastq.gz' -profile docker" - log.info NfcoreSchema.params_help(workflow, params, json_schema, command) - exit 0 -} - -//////////////////////////////////////////////////// -/* -- VALIDATE PARAMETERS -- */ -////////////////////////////////////////////////////+ -if (params.validate_params) { - NfcoreSchema.validateParameters(params, json_schema, log) -} - -// Check if genome exists in the config file -if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(', ')}" -} - -if (params.digest && params.digestion && !params.digest.containsKey(params.digestion)) { - exit 1, "Unknown digestion protocol. Currently, the available digestion options are ${params.digest.keySet().join(", ")}. Please set manually the '--restriction_site' and '--ligation_site' parameters." -} - -params.restriction_site = params.digestion ? params.digest[ params.digestion ].restriction_site ?: false : false -params.ligation_site = params.digestion ? params.digest[ params.digestion ].ligation_site ?: false : false - -// Check Digestion or DNase Hi-C mode -if (!params.dnase && !params.ligation_site) { - exit 1, "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase Hi-C, please use '--dnase' option" -} - -// Reference index path configuration -params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false -params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false - - -//////////////////////////////////////////////////// -/* -- Collect configuration parameters -- */ -//////////////////////////////////////////////////// - -// Check AWS batch settings -if (workflow.profile.contains('awsbatch')) { - // AWSBatch sanity checking - if (!params.awsqueue || !params.awsregion) exit 1, 'Specify correct --awsqueue and --awsregion parameters on AWSBatch!' - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, 'Outdir not on S3 - specify S3 Bucket to run on AWSBatch!' - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (params.tracedir.startsWith('s3:')) exit 1, 'Specify a local tracedir or run without trace! S3 cannot be used for tracefiles.' -} - -// Stage config files -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() -ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true) -ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true) - -/* - * input read files - */ - -if (params.input_paths){ - - raw_reads = Channel.create() - raw_reads_2 = Channel.create() - - Channel - .from( params.input_paths ) - .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] } - .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0] + "_R1", a[1][0]), tuple(a[0] + "_R2", a[1][1])] } - -}else{ - raw_reads = Channel.create() - raw_reads_2 = Channel.create() - - if ( params.split_fastq ){ - Channel - .fromFilePairs( params.input, flat:true ) - .splitFastq( by: params.fastq_chunks_size, pe:true, file: true, compress:true) - .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0] + "_R1", a[1]), tuple(a[0] + "_R2", a[2])] } - }else{ - Channel - .fromFilePairs( params.input ) - .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0] + "_R1", a[1][0]), tuple(a[0] + "_R2", a[1][1])] } - } -} - -// Update sample name if splitFastq is used -def updateSampleName(x) { - if ((matcher = x[1] =~ /\s*(\.[\d]+).fastq.gz/)) { - res = matcher[0][1] - } - return [x[0] + res, x[1]] -} - -if (params.split_fastq ){ - raw_reads = raw_reads.concat( raw_reads_2 ).map{it -> updateSampleName(it)}.dump(tag:'input') -}else{ - raw_reads = raw_reads.concat( raw_reads_2 ).dump(tag:'input') -} - -/* - * Other input channels - */ - -// Reference genome -if ( params.bwt2_index ){ - - Channel.fromPath( params.bwt2_index , checkIfExists: true) - .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } - .into { bwt2_index_end2end; bwt2_index_trim } - -} -else if ( params.fasta ) { - Channel.fromPath( params.fasta ) - .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" } - .into { fasta_for_index } -} -else { - exit 1, "No reference genome specified!" -} - -// Chromosome size -if ( params.chromosome_size ){ - Channel.fromPath( params.chromosome_size , checkIfExists: true) - .into {chrsize; chrsize_build; chrsize_raw; chrsize_balance; chrsize_zoom; chrsize_compartments} -} -else if ( params.fasta ){ - Channel.fromPath( params.fasta ) - .ifEmpty { exit 1, "Chromosome sizes: Fasta file not found: ${params.fasta}" } - .set { fasta_for_chromsize } -} -else { - exit 1, "No chromosome size specified!" -} - -// Restriction fragments -if ( params.restriction_fragments ){ - Channel.fromPath( params.restriction_fragments, checkIfExists: true ) - .set {res_frag_file} -} -else if ( params.fasta && params.restriction_site ){ - Channel.fromPath( params.fasta ) - .ifEmpty { exit 1, "Restriction fragments: Fasta file not found: ${params.fasta}" } - .set { fasta_for_resfrag } -} -else if (! params.dnase) { - exit 1, "No restriction fragments file specified!" -} - -// Resolutions for contact maps -map_res = Channel.from( params.bin_size ).splitCsv().flatten() -all_res = params.bin_size -if (params.res_tads && !params.skip_tads){ - Channel.from( "${params.res_tads}" ) - .splitCsv() - .flatten() - .into {tads_bin; tads_res_hicexplorer; tads_res_insulation} - map_res = map_res.concat(tads_bin) - all_res = all_res + ',' + params.res_tads -}else{ - tads_res_hicexplorer=Channel.empty() - tads_res_insulation=Channel.empty() - tads_bin=Channel.empty() - if (!params.skip_tads){ - log.warn "[nf-core/hic] Hi-C resolution for TADs calling not specified. See --res_tads" - } -} - -if (params.res_dist_decay && !params.skip_dist_decay){ - Channel.from( "${params.res_dist_decay}" ) - .splitCsv() - .flatten() - .into {ddecay_res; ddecay_bin } - map_res = map_res.concat(ddecay_bin) - all_res = all_res + ',' + params.res_dist_decay -}else{ - ddecay_res = Channel.create() - ddecay_bin = Channel.create() - if (!params.skip_dist_decay){ - log.warn "[nf-core/hic] Hi-C resolution for distance decay not specified. See --res_dist_decay" - } -} - -if (params.res_compartments && !params.skip_compartments){ - Channel.fromPath( params.fasta ) - .ifEmpty { exit 1, "Compartments calling: Fasta file not found: ${params.fasta}" } - .set { fasta_for_compartments } - Channel.from( "${params.res_compartments}" ) - .splitCsv() - .flatten() - .into {comp_bin; comp_res} - map_res = map_res.concat(comp_bin) - all_res = all_res + ',' + params.res_compartments -}else{ - fasta_for_compartments = Channel.empty() - comp_res = Channel.create() - if (!params.skip_compartments){ - log.warn "[nf-core/hic] Hi-C resolution for compartment calling not specified. See --res_compartments" - } -} - -map_res - .unique() - .into { map_res_summary; map_res; map_res_cool; map_comp } - - -//////////////////////////////////////////////////// -/* -- PRINT PARAMETER SUMMARY -- */ -//////////////////////////////////////////////////// -log.info NfcoreSchema.params_summary_log(workflow, params, json_schema) - -// Header log info -def summary = [:] -if (workflow.revision) summary['Pipeline Release'] = workflow.revision -summary['Run Name'] = workflow.runName -summary['Input'] = params.input -summary['splitFastq'] = params.split_fastq -if (params.split_fastq) - summary['Read chunks Size'] = params.fastq_chunks_size -summary['Fasta Ref'] = params.fasta -if (params.restriction_site){ - summary['Digestion'] = params.digestion - summary['Restriction Motif']= params.restriction_site - summary['Ligation Motif'] = params.ligation_site - summary['Min Fragment Size']= params.min_restriction_fragment_size - summary['Max Fragment Size']= params.max_restriction_fragment_size - summary['Min Insert Size'] = params.min_insert_size - summary['Max Insert Size'] = params.max_insert_size -}else{ - summary['DNase Mode'] = params.dnase - summary['Min CIS dist'] = params.min_cis_dist -} -summary['Min MAPQ'] = params.min_mapq -summary['Keep Duplicates'] = params.keep_dups ? 'Yes' : 'No' -summary['Keep Multihits'] = params.keep_multi ? 'Yes' : 'No' -summary['Maps resolution'] = all_res -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output dir'] = params.outdir -summary['Launch dir'] = workflow.launchDir -summary['Working dir'] = workflow.workDir -summary['Script dir'] = workflow.projectDir -summary['User'] = workflow.userName -if (workflow.profile.contains('awsbatch')) { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue - summary['AWS CLI'] = params.awscli -} -summary['Config Profile'] = workflow.profile -if (params.config_profile_description) summary['Config Profile Description'] = params.config_profile_description -if (params.config_profile_contact) summary['Config Profile Contact'] = params.config_profile_contact -if (params.config_profile_url) summary['Config Profile URL'] = params.config_profile_url -summary['Config Files'] = workflow.configFiles.join(', ') -if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC maxsize'] = params.max_multiqc_email_size -} - -// Check the hostnames against configured profiles -checkHostname() - -Channel.from(summary.collect{ [it.key, it.value] }) - .map { k,v -> "<dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" } - .reduce { a, b -> return [a, b].join("\n ") } - .map { x -> """ - id: 'nf-core-hic-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/hic Workflow Summary' - section_href: 'https://github.com/nf-core/hic' - plot_type: 'html' - data: | - <dl class=\"dl-horizontal\"> - $x - </dl> - """.stripIndent() } - .set { ch_workflow_summary } +nextflow.enable.dsl = 2 /* - * Parse software version numbers - */ - -process get_software_versions { - publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode, - saveAs: { filename -> if (filename.indexOf('.csv') > 0) filename else null } - - output: - file 'software_versions_mqc.yaml' into ch_software_versions_yaml - file 'software_versions.csv' - - script: - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - bowtie2 --version > v_bowtie2.txt - python --version > v_python.txt 2>&1 - samtools --version > v_samtools.txt - multiqc --version > v_multiqc.txt - scrape_software_versions.py &> software_versions_mqc.yaml - """ -} - -/**************************************************** - * PRE-PROCESSING - */ - -if(!params.bwt2_index && params.fasta){ - process makeBowtie2Index { - tag "$fasta_base" - label 'process_highmem' - publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode - - input: - file fasta from fasta_for_index - - output: - file "bowtie2_index" into bwt2_index_end2end - file "bowtie2_index" into bwt2_index_trim - - script: - fasta_base = fasta.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?(\.fsa)?$/ - """ - mkdir bowtie2_index - bowtie2-build ${fasta} bowtie2_index/${fasta_base} - """ - } - } - - -if(!params.chromosome_size && params.fasta){ - process makeChromSize { - tag "$fasta" - label 'process_low' - publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode - - input: - file fasta from fasta_for_chromsize - - output: - file "*.size" into chrsize, chrsize_build, chrsize_raw, chrsize_balance, chrsize_zoom, chrsize_compartments - - script: - """ - samtools faidx ${fasta} - cut -f1,2 ${fasta}.fai > chrom.size - """ - } - } - -if(!params.restriction_fragments && params.fasta && !params.dnase){ - process getRestrictionFragments { - tag "$fasta ${params.restriction_site}" - label 'process_low' - publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode - - input: - file fasta from fasta_for_resfrag - - output: - file "*.bed" into res_frag_file - - script: - """ - digest_genome.py -r ${params.restriction_site} -o restriction_fragments.bed ${fasta} - """ - } - } - -/**************************************************** - * MAIN WORKFLOW - */ - -/* - * HiC-pro - Two-steps Reads Mapping - */ - -process bowtie2_end_to_end { - tag "$sample" - label 'process_medium' - publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping/bwt2_end2end" : params.outdir }, - saveAs: { filename -> if (params.save_aligned_intermediates) filename }, mode: params.publish_dir_mode - - input: - set val(sample), file(reads) from raw_reads - file index from bwt2_index_end2end.collect() - - output: - set val(sample), file("${prefix}_unmap.fastq") into unmapped_end_to_end - set val(sample), file("${prefix}.bam") into end_to_end_bam - - script: - prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ - def bwt2_opts = params.bwt2_opts_end2end - if (!params.dnase){ - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 --rg-id BMG --rg SM:${prefix} \\ - ${bwt2_opts} \\ - -p ${task.cpus} \\ - -x \${INDEX} \\ - --un ${prefix}_unmap.fastq \\ - -U ${reads} | samtools view -F 4 -bS - > ${prefix}.bam - """ - }else{ - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 --rg-id BMG --rg SM:${prefix} \\ - ${bwt2_opts} \\ - -p ${task.cpus} \\ - -x \${INDEX} \\ - --un ${prefix}_unmap.fastq \\ - -U ${reads} > ${prefix}.bam - """ - } -} - -process trim_reads { - tag "$sample" - label 'process_low' - publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping/bwt2_trimmed" : params.outdir }, - saveAs: { filename -> if (params.save_aligned_intermediates) filename }, mode: params.publish_dir_mode - - when: - !params.dnase - - input: - set val(sample), file(reads) from unmapped_end_to_end - - output: - set val(sample), file("${prefix}_trimmed.fastq") into trimmed_reads - - script: - prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ - """ - cutsite_trimming --fastq $reads \\ - --cutsite ${params.ligation_site} \\ - --out ${prefix}_trimmed.fastq - """ -} - -process bowtie2_on_trimmed_reads { - tag "$sample" - label 'process_medium' - publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping/bwt2_trimmed" : params.outdir }, - saveAs: { filename -> if (params.save_aligned_intermediates) filename }, mode: params.publish_dir_mode - - when: - !params.dnase - - input: - set val(sample), file(reads) from trimmed_reads - file index from bwt2_index_trim.collect() - - output: - set val(sample), file("${prefix}_trimmed.bam") into trimmed_bam - - script: - prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/ - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 --rg-id BMG --rg SM:${prefix} \\ - ${params.bwt2_opts_trimmed} \\ - -p ${task.cpus} \\ - -x \${INDEX} \\ - -U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam - """ -} - -if (!params.dnase){ - process bowtie2_merge_mapping_steps{ - tag "$prefix = $bam1 + $bam2" - label 'process_medium' - publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, - saveAs: { filename -> if (params.save_aligned_intermediates && filename.endsWith("stat")) "stats/$filename" - else if (params.save_aligned_intermediates) filename} - - input: - set val(prefix), file(bam1), file(bam2) from end_to_end_bam.join( trimmed_bam ).dump(tag:'merge') - - output: - set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam - set val(oname), file("${prefix}.mapstat") into all_mapstat - - script: - sample = prefix.toString() - ~/(_R1|_R2)/ - tag = prefix.toString() =~/_R1/ ? "R1" : "R2" - oname = prefix.toString() - ~/(\.[0-9]+)$/ - """ - samtools merge -@ ${task.cpus} \\ - -f ${prefix}_bwt2merged.bam \\ - ${bam1} ${bam2} - - samtools sort -@ ${task.cpus} -m 800M \\ - -n \\ - -o ${prefix}_bwt2merged.sorted.bam \\ - ${prefix}_bwt2merged.bam - - mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam - - echo "## ${prefix}" > ${prefix}.mapstat - echo -n "total_${tag}\t" >> ${prefix}.mapstat - samtools view -c ${prefix}_bwt2merged.bam >> ${prefix}.mapstat - echo -n "mapped_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${prefix}_bwt2merged.bam >> ${prefix}.mapstat - echo -n "global_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat - echo -n "local_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam2} >> ${prefix}.mapstat - """ - } -}else{ - process dnase_mapping_stats{ - tag "$sample = $bam" - label 'process_medium' - publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, - saveAs: { filename -> if (params.save_aligned_intermediates && filename.endsWith("stat")) "stats/$filename" - else if (params.save_aligned_intermediates) filename} - - input: - set val(prefix), file(bam) from end_to_end_bam - - output: - set val(sample), file(bam) into bwt2_merged_bam - set val(oname), file("${prefix}.mapstat") into all_mapstat - - script: - sample = prefix.toString() - ~/(_R1|_R2)/ - tag = prefix.toString() =~/_R1/ ? "R1" : "R2" - oname = prefix.toString() - ~/(\.[0-9]+)$/ - """ - echo "## ${prefix}" > ${prefix}.mapstat - echo -n "total_${tag}\t" >> ${prefix}.mapstat - samtools view -c ${bam} >> ${prefix}.mapstat - echo -n "mapped_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam} >> ${prefix}.mapstat - echo -n "global_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam} >> ${prefix}.mapstat - echo -n "local_${tag}\t0" >> ${prefix}.mapstat - """ - } -} - -process combine_mates{ - tag "$sample = $r1_prefix + $r2_prefix" - label 'process_low' - publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, - saveAs: {filename -> filename.endsWith(".pairstat") ? "stats/$filename" : "$filename"} - - input: - set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple() - - output: - set val(oname), file("${sample}_bwt2pairs.bam") into paired_bam - set val(oname), file("*.pairstat") into all_pairstat - - script: - r1_bam = aligned_bam[0] - r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/ - r2_bam = aligned_bam[1] - r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ - oname = sample.toString() - ~/(\.[0-9]+)$/ - - def opts = "-t" - if (params.keep_multi) { - opts="${opts} --multi" - }else if (params.min_mapq){ - opts="${opts} -q ${params.min_mapq}" - } - """ - mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam ${opts} - """ -} - -/* - * HiC-Pro - detect valid interaction from aligned data - */ - -if (!params.dnase){ - process get_valid_interaction{ - tag "$sample" - label 'process_low' - publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, - saveAs: {filename -> if (filename.endsWith("RSstat")) "stats/$filename" - else if (filename.endsWith(".validPairs")) filename - else if (params.save_nonvalid_pairs) filename} - - input: - set val(sample), file(pe_bam) from paired_bam - file frag_file from res_frag_file.collect() - - output: - set val(sample), file("*.validPairs") into valid_pairs - set val(sample), file("*.validPairs") into valid_pairs_4cool - set val(sample), file("*.DEPairs") into de_pairs - set val(sample), file("*.SCPairs") into sc_pairs - set val(sample), file("*.REPairs") into re_pairs - set val(sample), file("*.FiltPairs") into filt_pairs - set val(sample), file("*RSstat") into all_rsstat - - script: - if (params.split_fastq){ - sample = sample.toString() - ~/(\.[0-9]+)$/ - } - - def opts = "" - opts += params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '' - opts += params.min_insert_size > 0 ? " -s ${params.min_insert_size}" : '' - opts += params.max_insert_size > 0 ? " -l ${params.max_insert_size}" : '' - opts += params.min_restriction_fragment_size > 0 ? " -t ${params.min_restriction_fragment_size}" : '' - opts += params.max_restriction_fragment_size > 0 ? " -m ${params.max_restriction_fragment_size}" : '' - opts += params.save_interaction_bam ? " --sam" : '' - prefix = pe_bam.toString() - ~/.bam/ - """ - mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} --all ${opts} - sort -k2,2V -k3,3n -k5,5V -k6,6n -o ${prefix}.validPairs ${prefix}.validPairs - """ - } -} -else{ - process get_valid_interaction_dnase{ - tag "$sample" - label 'process_low' - publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, - saveAs: {filename -> if (filename.endsWith("RSstat")) "stats/$filename" - else filename} - - input: - set val(sample), file(pe_bam) from paired_bam - - output: - set val(sample), file("*.validPairs") into valid_pairs - set val(sample), file("*.validPairs") into valid_pairs_4cool - set val(sample), file("*RSstat") into all_rsstat - - script: - if (params.split_fastq){ - sample = sample.toString() - ~/(\.[0-9]+)$/ - } - - opts = params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '' - prefix = pe_bam.toString() - ~/.bam/ - """ - mapped_2hic_dnase.py -r ${pe_bam} ${opts} - sort -k2,2V -k3,3n -k5,5V -k6,6n -o ${prefix}.validPairs ${prefix}.validPairs - """ - } -} - -/* - * Remove duplicates - */ - -process remove_duplicates { - tag "$sample" - label 'process_highmem' - publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, - saveAs: {filename -> if (filename.endsWith("mergestat")) "stats/$filename" - else if (filename.endsWith("allValidPairs")) "$filename"} - input: - set val(sample), file(vpairs) from valid_pairs.groupTuple() - - output: - set val(sample), file("*.allValidPairs") into ch_vpairs, ch_vpairs_cool - file("stats/") into mqc_mergestat - file("*mergestat") into all_mergestat - - script: - if ( ! params.keep_dups ){ - """ - mkdir -p stats/${sample} - - ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) - sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \\ - awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs - - echo -n "valid_interaction\t" > ${sample}_allValidPairs.mergestat - cat ${vpairs} | wc -l >> ${sample}_allValidPairs.mergestat - echo -n "valid_interaction_rmdup\t" >> ${sample}_allValidPairs.mergestat - cat ${sample}.allValidPairs | wc -l >> ${sample}_allValidPairs.mergestat - - ## Count short range (<20000) vs long range contacts - awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> ${sample}_allValidPairs.mergestat - - ## For MultiQC - mkdir -p stats/${sample} - cp ${sample}_allValidPairs.mergestat stats/${sample}/ - """ - }else{ - """ - cat ${vpairs} > ${sample}.allValidPairs - echo -n "valid_interaction\t" > ${sample}_allValidPairs.mergestat - cat ${vpairs} | wc -l >> ${sample}_allValidPairs.mergestat - echo -n "valid_interaction_rmdup\t" >> ${sample}_allValidPairs.mergestat - cat ${sample}.allValidPairs | wc -l >> ${sample}_allValidPairs.mergestat - - ## Count short range (<20000) vs long range contacts - awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> ${sample}_allValidPairs.mergestat - - ## For MultiQC - mkdir -p stats/${sample} - cp ${sample}_allValidPairs.mergestat stats/${sample}/ - """ - } -} - -process merge_stats { - tag "$ext" - label 'process_low' - publishDir "${params.outdir}/hicpro/", mode: params.publish_dir_mode, - saveAs: {filename -> if (filename.endsWith("stat")) "stats/$filename"} - - input: - set val(prefix), file(fstat) from all_mapstat.groupTuple().concat(all_pairstat.groupTuple(), all_rsstat.groupTuple()) - - output: - file("stats/") into mqc_mstats - file("*stat") into all_mstats - - script: - sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ - if ( (fstat =~ /.mapstat/) ){ ext = "mmapstat" } - if ( (fstat =~ /.pairstat/) ){ ext = "mpairstat" } - if ( (fstat =~ /.RSstat/) ){ ext = "mRSstat" } - """ - merge_statfiles.py -f ${fstat} > ${prefix}.${ext} - mkdir -p stats/${sample} - cp ${prefix}.${ext} stats/${sample}/ - """ -} - -/* - * HiC-Pro build matrix processes - * kept for backward compatibility - */ - - -process build_contact_maps{ - tag "$sample - $mres" - label 'process_highmem' - publishDir "${params.outdir}/hicpro/matrix/raw", mode: params.publish_dir_mode - - when: - !params.skip_maps && params.hicpro_maps - - input: - set val(sample), file(vpairs), val(mres) from ch_vpairs.combine(map_res) - file chrsize from chrsize.collect() - - output: - set val(sample), val(mres), file("*.matrix"), file("*.bed") into raw_maps, raw_maps_4cool - - script: - """ - build_matrix --matrix-format upper --binsize ${mres} --chrsizes ${chrsize} --ifile ${vpairs} --oprefix ${sample}_${mres} - """ -} - -process run_ice{ - tag "$rmaps" - label 'process_highmem' - publishDir "${params.outdir}/hicpro/matrix/iced", mode: params.publish_dir_mode - - when: - !params.skip_maps && !params.skip_balancing && params.hicpro_maps - - input: - set val(sample), val(res), file(rmaps), file(bed) from raw_maps - - output: - set val(sample), val(res), file("*iced.matrix"), file(bed) into hicpro_iced_maps - file ("*.biases") into hicpro_iced_bias - - script: - prefix = rmaps.toString() - ~/(\.matrix)?$/ - """ - ice --filter_low_counts_perc ${params.ice_filter_low_count_perc} \ - --results_filename ${prefix}_iced.matrix \ - --filter_high_counts_perc ${params.ice_filter_high_count_perc} \ - --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} - """ -} - - -/* - * Cooler - */ - -process convert_to_pairs { - tag "$sample" - label 'process_medium' - - when: - !params.skip_maps - - input: - set val(sample), file(vpairs) from ch_vpairs_cool - file chrsize from chrsize_build.collect() - - output: - set val(sample), file("*.txt.gz") into cool_build, cool_build_zoom - - script: - """ - ## chr/pos/strand/chr/pos/strand - awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > contacts.txt - gzip contacts.txt - """ -} - - -process cooler_raw { - tag "$sample - ${res}" - label 'process_medium' - - publishDir "${params.outdir}/contact_maps/", mode: 'copy', - saveAs: {filename -> filename.endsWith(".cool") ? "raw/cool/$filename" : "raw/txt/$filename"} - - input: - set val(sample), file(contacts), val(res) from cool_build.combine(map_res_cool) - file chrsize from chrsize_raw.collect() - - output: - set val(sample), val(res), file("*cool") into raw_cool_maps - set file("*.bed"), file("${sample}_${res}.txt") into raw_txt_maps - - script: - """ - cooler makebins ${chrsize} ${res} > ${sample}_${res}.bed - cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 ${sample}_${res}.bed ${contacts} ${sample}_${res}.cool - cooler dump ${sample}_${res}.cool | awk '{OFS="\t"; print \$1+1,\$2+1,\$3}' > ${sample}_${res}.txt - """ -} - -process cooler_balance { - tag "$sample - ${res}" - label 'process_medium' - - publishDir "${params.outdir}/contact_maps/", mode: 'copy', - saveAs: {filename -> filename.endsWith(".cool") ? "norm/cool/$filename" : "norm/txt/$filename"} - - when: - !params.skip_balancing - - input: - set val(sample), val(res), file(cool) from raw_cool_maps - file chrsize from chrsize_balance.collect() - - output: - set val(sample), val(res), file("${sample}_${res}_norm.cool") into balanced_cool_maps - file("${sample}_${res}_norm.txt") into norm_txt_maps - - script: - """ - cp ${cool} ${sample}_${res}_norm.cool - cooler balance ${sample}_${res}_norm.cool -p ${task.cpus} --force - cooler dump ${sample}_${res}_norm.cool --balanced --na-rep 0 | awk '{OFS="\t"; print \$1+1,\$2+1,\$4}' > ${sample}_${res}_norm.txt - """ -} - -process cooler_zoomify { - tag "$sample" - label 'process_medium' - publishDir "${params.outdir}/contact_maps/norm/mcool", mode: 'copy' - - when: - !params.skip_mcool - - input: - set val(sample), file(contacts) from cool_build_zoom - file chrsize from chrsize_zoom.collect() - - output: - file("*mcool") into mcool_maps - - script: - """ - cooler makebins ${chrsize} ${params.res_zoomify} > bins.bed - cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 bins.bed ${contacts} ${sample}.cool - cooler zoomify --nproc ${task.cpus} --balance ${sample}.cool - """ -} - - -/**************************************************** - * DOWNSTREAM ANALYSIS - */ - -(maps_cool_insulation, maps_cool_comp, maps_hicexplorer_ddecay, maps_hicexplorer_tads) = balanced_cool_maps.into(4) - -/* - * Counts vs distance QC - */ - -if (!params.skip_dist_decay){ - chddecay = maps_hicexplorer_ddecay.combine(ddecay_res).filter{ it[1] == it[3] }.dump(tag: "ddecay") -}else{ - chddecay = Channel.empty() -} - -process dist_decay { - tag "$sample" - label 'process_medium' - publishDir "${params.outdir}/dist_decay", mode: 'copy' - - when: - !params.skip_dist_decay - - input: - set val(sample), val(res), file(maps), val(r) from chddecay - - output: - file("*_distcount.txt") - file("*.png") - - - script: - """ - hicPlotDistVsCounts --matrices ${maps} \ - --plotFile ${maps.baseName}_distcount.png \ - --outFileData ${maps.baseName}_distcount.txt - """ -} - -/* - * Compartment calling - */ - -if(!params.skip_compartments){ - chcomp = maps_cool_comp.combine(comp_res).filter{ it[1] == it[3] }.dump(tag: "comp") -}else{ - chcomp = Channel.empty() -} - -process compartment_calling { - tag "$sample - $res" - label 'process_medium' - publishDir "${params.outdir}/compartments", mode: 'copy' - - when: - !params.skip_compartments - - input: - set val(sample), val(res), file(cool), val(r) from chcomp - file(fasta) from fasta_for_compartments.collect() - file(chrsize) from chrsize_compartments.collect() - - output: - file("*compartments*") optional true into out_compartments - - script: - """ - cooltools genome binnify --all-names ${chrsize} ${res} > genome_bins.txt - cooltools genome gc genome_bins.txt ${fasta} > genome_gc.txt - cooltools call-compartments --contact-type cis -o ${sample}_compartments ${cool} - awk -F"\t" 'NR>1{OFS="\t"; if(\$6==""){\$6=0}; print \$1,\$2,\$3,\$6}' ${sample}_compartments.cis.vecs.tsv | sort -k1,1 -k2,2n > ${sample}_compartments.cis.E1.bedgraph - """ -} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENOME PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.bwt2_index = WorkflowMain.getGenomeAttribute(params, 'bowtie2') /* - * TADs calling - */ - -if (!params.skip_tads){ - chtads = maps_hicexplorer_tads.combine(tads_res_hicexplorer).filter{ it[1] == it[3] }.dump(tag: "hicexp") -}else{ - chtads = Channel.empty() -} - -process tads_hicexplorer { - tag "$sample - $res" - label 'process_medium' - publishDir "${params.outdir}/tads/hicexplorer", mode: 'copy' - - when: - !params.skip_tads && params.tads_caller =~ 'hicexplorer' - - input: - set val(sample), val(res), file(cool), val(r) from chtads - - output: - file("*.{bed,bedgraph,gff}") into hicexplorer_tads - - script: - """ - hicFindTADs --matrix ${cool} \ - --outPrefix tad \ - --correctForMultipleTesting fdr \ - --numberOfProcessors ${task.cpus} - """ -} - -if (!params.skip_tads){ - chIS = maps_cool_insulation.combine(tads_res_insulation).filter{ it[1] == it[3] }.dump(tag : "ins") -}else{ - chIS = Channel.empty() -} - -process tads_insulation { - tag "$sample - $res" - label 'process_medium' - publishDir "${params.outdir}/tads/insulation", mode: 'copy' - - when: - !params.skip_tads && params.tads_caller =~ 'insulation' - - input: - set val(sample), val(res), file(cool), val(r) from chIS - - output: - file("*tsv") into insulation_tads - - script: - """ - cooltools diamond-insulation --window-pixels ${cool} 15 25 50 > ${sample}_insulation.tsv - """ -} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE & PRINT PARAMETER SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +WorkflowMain.initialise(workflow, params, log) /* - * MultiQC - */ - -process multiqc { - label 'process_low' - publishDir "${params.outdir}/MultiQC", mode: params.publish_dir_mode - - when: - !params.skip_multiqc - - input: - file multiqc_config from ch_multiqc_config - file (mqc_custom_config) from ch_multiqc_custom_config.collect().ifEmpty([]) - file ('input_*/*') from mqc_mstats.concat(mqc_mergestat).collect() - file ('software_versions/*') from ch_software_versions_yaml - file workflow_summary from ch_workflow_summary.collect() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOW FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - output: - file "*multiqc_report.html" into multiqc_report - file "*_data" +include { HIC } from './workflows/hic' - script: - rtitle = '' - rfilename = '' - if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { - rtitle = "--title \"${workflow.runName}\"" - rfilename = "--filename " + workflow.runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" - } - custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : '' - """ - multiqc -f $rtitle $rfilename $custom_config_file . - """ +// +// WORKFLOW: Run main nf-core/hic analysis pipeline +// +workflow NFCORE_HIC { + HIC () } /* - * Output Description HTML - */ -process output_documentation { - publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode - - input: - file output_docs from ch_output_docs - file images from ch_output_docs_images - - output: - file 'results_description.html' +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN ALL WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - script: - """ - markdown_to_html.py $output_docs -o results_description.html - """ +// +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 +// +workflow { + NFCORE_HIC () } /* - * Completion e-mail notification - */ - -workflow.onComplete { - - // Set up the e-mail variables - def subject = "[nf-core/hic] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[nf-core/hic] FAILED: $workflow.runName" - } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = ch_multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList) { - log.warn "[nf-core/hic] Found multiple reports from process 'multiqc', will use only one" - mqc_report = mqc_report[0] - } - } - } catch (all) { - log.warn "[nf-core/hic] Could not attach MultiQC report to summary email" - } - - // Check if we are only sending emails on failure - email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/hic] Sent summary e-mail to $email_address (sendmail)" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= params.max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "[nf-core/hic] Sent summary e-mail to $email_address (mail)" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; - - if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-" - log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-" - log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}-" - } - - if (workflow.success) { - log.info "-${c_purple}[nf-core/hic]${c_green} Pipeline completed successfully${c_reset}-" - } else { - checkHostname() - log.info "-${c_purple}[nf-core/hic]${c_red} Pipeline completed with errors${c_reset}-" - } -} - -workflow.onError { - // Print unexpected parameters - easiest is to just rerun validation - NfcoreSchema.validateParameters(params, json_schema, log) -} - -def checkHostname() { - def c_reset = params.monochrome_logs ? '' : "\033[0m" - def c_white = params.monochrome_logs ? '' : "\033[0;37m" - def c_red = params.monochrome_logs ? '' : "\033[1;91m" - def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if (params.hostnames) { - def hostname = 'hostname'.execute().text.trim() - params.hostnames.each { prof, hnames -> - hnames.each { hname -> - if (hostname.contains(hname) && !workflow.profile.contains(prof)) { - log.error "${c_red}====================================================${c_reset}\n" + - " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + - " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + - " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + - "${c_red}====================================================${c_reset}\n" - } - } - } - } -} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/modules.json b/modules.json new file mode 100644 index 0000000000000000000000000000000000000000..45540ca9ea0c4c8493525e58d8671176087348dc --- /dev/null +++ b/modules.json @@ -0,0 +1,65 @@ +{ + "name": "nf-core/hic", + "homePage": "https://github.com/nf-core/hic", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "bowtie2/align": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "cooler/balance": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "cooler/cload": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "cooler/dump": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "cooler/makebins": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "cooler/zoomify": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "custom/getchromsizes": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "nf-core": {} + } + } + } +} diff --git a/modules/local/cooltools/eigscis.nf b/modules/local/cooltools/eigscis.nf new file mode 100644 index 0000000000000000000000000000000000000000..feaa30007099cae34a18eaa8105a3367e260e1b6 --- /dev/null +++ b/modules/local/cooltools/eigscis.nf @@ -0,0 +1,36 @@ +/* + * cooltools - call_compartments + */ + +process COOLTOOLS_EIGSCIS { + tag "${meta.id}" + label 'process_medium' + + conda "bioconda::cooltools=0.5.1 bioconda::ucsc-bedgraphtobigwig=377" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c81d8d6b6acf4714ffaae1a274527a41958443f6:cc7ea58b8cefc76bed985dcfe261cb276ed9e0cf-0' : + 'quay.io/biocontainers/mulled-v2-c81d8d6b6acf4714ffaae1a274527a41958443f6:cc7ea58b8cefc76bed985dcfe261cb276ed9e0cf-0' }" + + input: + tuple val(meta), path(cool), val(resolution) + path(fasta) + path(chrsize) + + output: + path("*compartments*"), emit: results + path("versions.yml"), emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cooltools genome binnify --all-names ${chrsize} ${resolution} > genome_bins.txt + cooltools genome gc genome_bins.txt ${fasta} > genome_gc.txt + cooltools eigs-cis ${args} -o ${prefix}_compartments ${cool} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooltools: \$(cooltools --version 2>&1 | grep version | sed 's/cooltools, version //') + END_VERSIONS + """ +} diff --git a/modules/local/cooltools/insulation.nf b/modules/local/cooltools/insulation.nf new file mode 100644 index 0000000000000000000000000000000000000000..8a9127ea47824f9541c4faf2263d8e61030468c3 --- /dev/null +++ b/modules/local/cooltools/insulation.nf @@ -0,0 +1,32 @@ +/* + * Cooltools - diamond-insulation + */ + +process COOLTOOLS_INSULATION { + tag "${meta.id}" + label 'process_medium' + + conda "bioconda::cooltools=0.5.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooltools:0.5.1--py37h37892f8_0' : + 'quay.io/biocontainers/cooltools:0.5.1--py37h37892f8_0' }" + + input: + tuple val(meta), path(cool) + + output: + path("*tsv"), emit:tsv + path("versions.yml"), emit:versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cooltools insulation ${cool} ${args} > ${prefix}_insulation.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooltools: \$(cooltools --version 2>&1 | sed 's/cooltools, version //') + END_VERSIONS + """ +} diff --git a/modules/local/hicexplorer/hicFindTADs.nf b/modules/local/hicexplorer/hicFindTADs.nf new file mode 100644 index 0000000000000000000000000000000000000000..b6cae335c22111407b3d782e95a067ed087645e0 --- /dev/null +++ b/modules/local/hicexplorer/hicFindTADs.nf @@ -0,0 +1,34 @@ +/* + * hicexplorer - hicFindTADs + */ + +process HIC_FIND_TADS { + label 'process_medium' + + conda "bioconda::hicexplorer=3.7.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hicexplorer:3.7.2--pyhdfd78af_1' : + 'quay.io/biocontainers/hicexplorer:3.7.2--pyhdfd78af_1' }" + + input: + tuple val(meta), path(cool) + + output: + path("*hicfindtads*"), emit:results + path("versions.yml"), emit:versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + hicFindTADs --matrix ${cool} \ + --outPrefix ${prefix}_hicfindtads \ + ${args} \ + --numberOfProcessors ${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hicexplorer: \$(hicFindTADs --version 2>&1 | sed 's/hicFindTADs //') + END_VERSIONS + """ +} diff --git a/modules/local/hicexplorer/hicPlotDistVsCounts.nf b/modules/local/hicexplorer/hicPlotDistVsCounts.nf new file mode 100644 index 0000000000000000000000000000000000000000..3e37d3fe7b466dbe45b2f19c220e2f9a65a07a9a --- /dev/null +++ b/modules/local/hicexplorer/hicPlotDistVsCounts.nf @@ -0,0 +1,34 @@ +/* + * hicexplorer - Genomic distance/counts plots + */ + +process HIC_PLOT_DIST_VS_COUNTS { + tag "${meta.id}" + label 'process_medium' + + conda "bioconda::hicexplorer=3.7.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hicexplorer:3.7.2--pyhdfd78af_1' : + 'quay.io/biocontainers/hicexplorer:3.7.2--pyhdfd78af_1' }" + + input: + tuple val(meta), path(cool) + + output: + path("*distcount*"), emit:results + path("versions.yml"), emit:versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + hicPlotDistVsCounts --matrices ${cool} \ + --plotFile ${prefix}_distcount.png \ + --outFileData ${prefix}_distcount.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hicexplorer: \$(hicPlotDistVsCounts --version 2>&1 | sed 's/hicPlotDistVsCounts //') + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/bowtie2_merge.nf b/modules/local/hicpro/bowtie2_merge.nf new file mode 100644 index 0000000000000000000000000000000000000000..7a4e96be9aaf2470a10ca19672591e0f84853602 --- /dev/null +++ b/modules/local/hicpro/bowtie2_merge.nf @@ -0,0 +1,48 @@ +process MERGE_BOWTIE2{ + tag "${meta.id}" + label 'process_medium' + + conda "bioconda::samtools=1.15.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(bam1), path(bam2) + + output: + tuple val(meta), path("${prefix}_bwt2merged.bam"), emit: bam + tuple val(meta), path("${prefix}.mapstat"), emit: stats + path("versions.yml"), emit: versions + + script: + prefix = task.ext.prefix ?: "${meta.id}" + tag = meta.mates + """ + samtools merge -@ ${task.cpus} \\ + -f ${prefix}_bwt2merged.bam \\ + ${bam1} ${bam2} + + samtools sort -@ ${task.cpus} -m 800M \\ + -n \\ + -o ${prefix}_bwt2merged.sorted.bam \\ + ${prefix}_bwt2merged.bam + + mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam + + echo "## ${prefix}" > ${prefix}.mapstat + echo -n "total_${tag}\t" >> ${prefix}.mapstat + samtools view -c ${prefix}_bwt2merged.bam >> ${prefix}.mapstat + echo -n "mapped_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${prefix}_bwt2merged.bam >> ${prefix}.mapstat + echo -n "global_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat + echo -n "local_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam2} >> ${prefix}.mapstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/build_contact_maps.nf b/modules/local/hicpro/build_contact_maps.nf new file mode 100644 index 0000000000000000000000000000000000000000..fd633a984f26b4cccc901a92e983cf0acfaa8eed --- /dev/null +++ b/modules/local/hicpro/build_contact_maps.nf @@ -0,0 +1,27 @@ +process BUILD_CONTACT_MAPS{ + tag "${meta.id}" + label 'process_high_memory' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(vpairs), val(resolution) + tuple val(meta2), path(chrsize) + + output: + tuple val(meta), val(resolution), path("*.matrix"), path("*.bed"), emit: maps + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + build_matrix \\ + --matrix-format upper \\ + --binsize ${resolution} \\ + --chrsizes ${chrsize} \\ + --ifile ${vpairs} \\ + --oprefix ${prefix} + """ +} diff --git a/modules/local/hicpro/combine_mates.nf b/modules/local/hicpro/combine_mates.nf new file mode 100644 index 0000000000000000000000000000000000000000..da95be819a03560d75db2bc80ab3e8b72bf774aa --- /dev/null +++ b/modules/local/hicpro/combine_mates.nf @@ -0,0 +1,29 @@ +process COMBINE_MATES { + tag "${meta.id}" + label 'process_low' + + conda "conda-forge::python=3.9 bioconda::pysam=0.19.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : + 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*bwt2pairs.bam"), emit:bam + tuple val(meta), path("*.pairstat"), optional:true, emit:stats + path("versions.yml"), emit: versions + + script: + prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + """ + mergeSAM.py -f ${bam[0]} -r ${bam[1]} -o ${prefix}_bwt2pairs.bam ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(echo \$(python --version 2>&1) | sed 's/Python //') + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/dnase_mapping_stats.nf b/modules/local/hicpro/dnase_mapping_stats.nf new file mode 100644 index 0000000000000000000000000000000000000000..64cb0792e4d93dfcc1b1e7d0b5d50bf7e0b75c73 --- /dev/null +++ b/modules/local/hicpro/dnase_mapping_stats.nf @@ -0,0 +1,31 @@ +process MAPPING_STATS_DNASE { + tag "$sample = $bam" + label 'process_medium' + + conda "bioconda::samtools=1.15.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path(bam), emit:bam + tuple val(meta), path("${prefix}.mapstat"), emit:stats + + script: + prefix = meta.id + "_" + meta.chunk + "_" + meta.mates + tag = meta.mates + """ + echo "## ${prefix}" > ${prefix}.mapstat + echo -n "total_${tag}\t" >> ${prefix}.mapstat + samtools view -c ${bam} >> ${prefix}.mapstat + echo -n "mapped_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam} >> ${prefix}.mapstat + echo -n "global_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam} >> ${prefix}.mapstat + echo -n "local_${tag}\t0" >> ${prefix}.mapstat + """ +} diff --git a/modules/local/hicpro/get_restriction_fragments.nf b/modules/local/hicpro/get_restriction_fragments.nf new file mode 100644 index 0000000000000000000000000000000000000000..affc2d986a339d239602c6e66ad6f0d50517505e --- /dev/null +++ b/modules/local/hicpro/get_restriction_fragments.nf @@ -0,0 +1,27 @@ +process GET_RESTRICTION_FRAGMENTS { + tag "$res_site" + label 'process_low' + + conda "conda-forge::python=3.9 conda-forge::numpy=1.22.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : + 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0'}" + + input: + tuple val(meta), path(fasta) + val(res_site) + + output: + tuple val(meta), path("*.bed"), emit: results + path("versions.yml"), emit: versions + + script: + """ + digest_genome.py -r ${res_site} -o restriction_fragments.bed ${fasta} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(echo \$(python --version 2>&1) | sed 's/Python //') + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/get_valid_interaction.nf b/modules/local/hicpro/get_valid_interaction.nf new file mode 100644 index 0000000000000000000000000000000000000000..cc148371e52e04e96ca8dc9461a46ce80216b85e --- /dev/null +++ b/modules/local/hicpro/get_valid_interaction.nf @@ -0,0 +1,37 @@ +process GET_VALID_INTERACTION { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.9 bioconda::pysam=0.19.0 bioconda::bx-python=0.8.13" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : + 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0'}" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(resfrag) + + output: + tuple val(meta), path("*.validPairs"), emit:valid_pairs + tuple val(meta), path("*.DEPairs"), optional:true, emit:de_pairs + tuple val(meta), path("*.SCPairs"), optional: true, emit:sc_pairs + tuple val(meta), path("*.REPairs"), optional: true, emit:re_pairs + tuple val(meta), path("*.FiltPairs"), optional: true, emit:filt_pairs + tuple val(meta), path("*RSstat"), optional: true, emit:stats + path("versions.yml"), emit: versions + + script: + def args = task.ext.args ?: '' + """ + mapped_2hic_fragments.py \\ + -f ${resfrag} \\ + -r ${bam} \\ + --all \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(echo \$(python --version 2>&1) | sed 's/Python //') + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/get_valid_interaction_dnase.nf b/modules/local/hicpro/get_valid_interaction_dnase.nf new file mode 100644 index 0000000000000000000000000000000000000000..142d0cbd4f042fb0106c7e0380515f7b54c79953 --- /dev/null +++ b/modules/local/hicpro/get_valid_interaction_dnase.nf @@ -0,0 +1,30 @@ +process GET_VALID_INTERACTION_DNASE { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.9 bioconda::pysam=0.19.0 bioconda::bx-python=0.8.13" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : + 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.validPairs"), emit:valid_pairs + tuple val(meta), path("*RSstat"), optional: true, emit:stats + path("versions.yml"), emit: versions + + script: + def args = task.ext.args ?: '' + """ + mapped_2hic_dnase.py \\ + -r ${bam} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(echo \$(python --version 2>&1) | sed 's/Python //') + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/hicpro2pairs.nf b/modules/local/hicpro/hicpro2pairs.nf new file mode 100644 index 0000000000000000000000000000000000000000..2ba2a901b62791e733900b24da4f244cd2fbaba7 --- /dev/null +++ b/modules/local/hicpro/hicpro2pairs.nf @@ -0,0 +1,31 @@ +process HICPRO2PAIRS { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::pairix=0.3.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairix:0.3.7--py36h30a8e3e_3' : + 'quay.io/biocontainers/pairix:0.3.7--py36h30a8e3e_3' }" + + input: + tuple val(meta), path(vpairs) + tuple val(meta2), path(chrsize) + + output: + tuple val(meta), path("*.pairs.gz"), path("*.pairs.gz.px2"), emit: pairs + path("versions.yml"), emit: versions + + script: + prefix = "${meta.id}" + """ + ##columns: readID chr1 pos1 chr2 pos2 strand1 strand2 + awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs | bgzip -c > ${prefix}_contacts.pairs.gz + ##sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz + pairix -f ${prefix}_contacts.pairs.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //')) + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/merge_stats.nf b/modules/local/hicpro/merge_stats.nf new file mode 100644 index 0000000000000000000000000000000000000000..b25dc8a13d2f18bc5ae05742c5f15d77fd19627d --- /dev/null +++ b/modules/local/hicpro/merge_stats.nf @@ -0,0 +1,32 @@ +process MERGE_STATS { + tag "${meta.id}" + label 'process_low' + + conda "conda-forge::python=3.9" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : + 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0'}" + + input: + tuple val(meta), path(fstat) + + output: + path("${meta.id}/"), emit: mqc + path("*.{mmapstat,mpairstat,mRSstat}"), emit: stats + path("versions.yml"), emit:versions + + script: + if ( (fstat =~ /.mapstat/) ){ ext = "${meta.mates}.mmapstat" } + if ( (fstat =~ /.pairstat/) ){ ext = "mpairstat" } + if ( (fstat =~ /.RSstat/) ){ ext = "mRSstat" } + """ + mkdir -p ${meta.id} + merge_statfiles.py -f ${fstat} > ${meta.id}.${ext} + cp *${ext} ${meta.id}/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(echo \$(python --version 2>&1) | sed 's/Python //') + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/merge_valid_interaction.nf b/modules/local/hicpro/merge_valid_interaction.nf new file mode 100644 index 0000000000000000000000000000000000000000..29e568a20997c0a313a87cf964f1f8c10327228b --- /dev/null +++ b/modules/local/hicpro/merge_valid_interaction.nf @@ -0,0 +1,34 @@ +process MERGE_VALID_INTERACTION { + tag "$prefix" + label 'process_high_memory' + + conda "conda-forge::gawk=5.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(vpairs) + + output: + tuple val(meta), path("*.allValidPairs"), emit: valid_pairs + path("${meta.id}/"), emit:mqc + path("*mergestat"), emit:stats + path("versions.yml"), emit: versions + + script: + prefix = meta.id + def args = task.ext.args ?: '' + """ + hicpro_merge_validpairs.sh ${args} -p ${prefix} ${vpairs} + + ## For MultiQC + mkdir -p ${prefix} + cp ${prefix}_allValidPairs.mergestat ${prefix}/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sort: \$(echo \$(sort --version 2>&1 | head -1 | awk '{print \$NF}' 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/run_ice.nf b/modules/local/hicpro/run_ice.nf new file mode 100644 index 0000000000000000000000000000000000000000..0a435edcd0f2a82c93be2a1b9bd832a721c270cf --- /dev/null +++ b/modules/local/hicpro/run_ice.nf @@ -0,0 +1,32 @@ +process ICE_NORMALIZATION{ + tag "$meta.id" + label 'process_high_memory' + + conda "conda-forge::python=3.9 bioconda::iced=0.5.10 conda-forge::numpy=1.22.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : + 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0'}" + + input: + tuple val(meta), val(res), path(rmaps), path(bed) + + output: + tuple val(meta), val(res), path("*iced.matrix"), path(bed), emit:maps + path ("*.biases"), emit:bias + path("versions.yml"), emit: versions + + script: + prefix = rmaps.toString() - ~/(\.matrix)?$/ + """ + ice --filter_low_counts_perc ${params.ice_filter_low_count_perc} \ + --results_filename ${prefix}_iced.matrix \ + --filter_high_counts_perc ${params.ice_filter_high_count_perc} \ + --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(echo \$(python --version 2>&1) | sed 's/Python //') + iced: \$(python -c "import iced; print(iced.__version__)") + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/trim_reads.nf b/modules/local/hicpro/trim_reads.nf new file mode 100644 index 0000000000000000000000000000000000000000..5a96df412ea89845583fc363eccf2df49ba9458d --- /dev/null +++ b/modules/local/hicpro/trim_reads.nf @@ -0,0 +1,32 @@ +process TRIM_READS { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(reads) + val(motif) + + output: + tuple val(meta), path("*trimmed.fastq.gz"), emit: fastq + path("versions.yml") , emit: versions + + script: + """ + zcat ${reads} > tmp.fastq + cutsite_trimming --fastq tmp.fastq \\ + --cutsite ${motif[0]} \\ + --out ${reads.simpleName}_trimmed.fastq + gzip ${reads.simpleName}_trimmed.fastq + /bin/rm -f tmp.fastq + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gzip: \$(echo \$(gzip --version 2>&1) | head -1 | cut -d" " -f2) + END_VERSIONS + """ +} diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf new file mode 100644 index 0000000000000000000000000000000000000000..595dc94d0eebd849d217c25349e4bb253bee64b6 --- /dev/null +++ b/modules/local/multiqc.nf @@ -0,0 +1,35 @@ +process MULTIQC { + label 'process_medium' + + conda "bioconda::multiqc=1.13" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + + input: + path multiqc_config + path (mqc_custom_config) + path workflow_summary + path ('fastqc/*') + path ('input_*/*') + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + multiqc -f $args . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf new file mode 100644 index 0000000000000000000000000000000000000000..1624dfafe6425f07bbd15daf3b2735ea1187c0ec --- /dev/null +++ b/modules/local/samplesheet_check.nf @@ -0,0 +1,31 @@ +process SAMPLESHEET_CHECK { + tag "$samplesheet" + label 'process_single' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'quay.io/biocontainers/python:3.8.3' }" + + input: + path samplesheet + + output: + path '*.csv' , emit: csv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/hic/bin/ + """ + check_samplesheet.py \\ + $samplesheet \\ + samplesheet.valid.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/split_cooler_dump.nf b/modules/local/split_cooler_dump.nf new file mode 100644 index 0000000000000000000000000000000000000000..8b9e45b31d304545ea62946f4c4e0eacb7860002 --- /dev/null +++ b/modules/local/split_cooler_dump.nf @@ -0,0 +1,32 @@ +process SPLIT_COOLER_DUMP { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::gawk=5.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(bedpe) + + output: + tuple val(meta), path("*.txt"), emit: matrix + path ("versions.yml"), emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = bedpe.toString() - ~/(\_balanced)?.bedpe$/ + """ + cat ${bedpe} | awk '{OFS="\t"; print \$1,\$2,\$3}' > ${prefix}_raw.txt + cat ${bedpe} | awk '{OFS="\t"; print \$1,\$2,\$4}' > ${prefix}_balanced.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(awk --version | head -1 | cut -f1 -d, | sed -e 's/GNU Awk //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..3d851866a30b4f629881864516f0789f6cae9443 --- /dev/null +++ b/modules/nf-core/bowtie2/align/main.nf @@ -0,0 +1,71 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label "process_high" + + conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' : + 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8e9a001290d0feddaa121424a5a9c65ae568396 --- /dev/null +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -0,0 +1,67 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Aligment log + pattern: "*.log" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/build/main.nf b/modules/nf-core/bowtie2/build/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..551893af3b3bb1a3955f1cc7c0b50735c6fded8d --- /dev/null +++ b/modules/nf-core/bowtie2/build/main.nf @@ -0,0 +1,30 @@ +process BOWTIE2_BUILD { + tag "$fasta" + label 'process_high' + + conda "bioconda::bowtie2=2.4.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' : + 'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('bowtie2') , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bowtie2 + bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..0240224d532af842d762c5c947fff6e84e5be113 --- /dev/null +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -0,0 +1,43 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cooler/balance/main.nf b/modules/nf-core/cooler/balance/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..4173a3c1291df7e25e3529d9ab8f2b6f8d1240b7 --- /dev/null +++ b/modules/nf-core/cooler/balance/main.nf @@ -0,0 +1,39 @@ +process COOLER_BALANCE { + tag "$meta.id" + label 'process_high' + + conda "bioconda::cooler=0.8.11" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0': + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), path(cool), val(resolution) + + output: + tuple val(meta), path("${prefix}.${extension}"), emit: cool + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + suffix = resolution ? "::/resolutions/$resolution" : "" + extension = cool.getExtension() + if ("$cool" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + cp ${cool} ${prefix}.${extension} + + cooler balance \\ + $args \\ + -p ${task.cpus} \\ + ${prefix}.${extension}${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cooler/balance/meta.yml b/modules/nf-core/cooler/balance/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..af1a780002701ad6bbf7d64105e5e8153098f5ec --- /dev/null +++ b/modules/nf-core/cooler/balance/meta.yml @@ -0,0 +1,45 @@ +name: "cooler_balance" +description: Run matrix balancing on a cool file +keywords: + - balance +tools: + - "cooler": + description: Sparse binary format for genomic interaction matrices + homepage: https://open2c.github.io/cooler/ + documentation: https://cooler.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/open2c/cooler + doi: "10.1093/bioinformatics/btz540" + licence: ["BSD-3-Clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cool: + type: file + description: Path to COOL file + pattern: "*.{cool,mcool}" + - resolution: + type: value + description: Resolution + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - cool: + type: file + description: Output COOL file balancing weigths + pattern: "*.cool" + +authors: + - "@nservant" + - "@muffato" diff --git a/modules/nf-core/cooler/cload/main.nf b/modules/nf-core/cooler/cload/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..80d61f07336f81233a6bd8ffb116e5a25e57048d --- /dev/null +++ b/modules/nf-core/cooler/cload/main.nf @@ -0,0 +1,39 @@ +process COOLER_CLOAD { + tag "$meta.id" + label 'process_high' + + conda "bioconda::cooler=0.8.11" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), path(pairs), path(index), val(cool_bin) + path chromsizes + + output: + tuple val(meta), path("*.cool"), val(cool_bin), emit: cool + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def nproc = args.contains('pairix') || args.contains('tabix')? "--nproc $task.cpus" : '' + + """ + cooler cload \\ + $args \\ + $nproc \\ + ${chromsizes}:${cool_bin} \\ + $pairs \\ + ${prefix}.cool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cooler/cload/meta.yml b/modules/nf-core/cooler/cload/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..8513aaec1ed8558f850367f453ee55eb9d378b11 --- /dev/null +++ b/modules/nf-core/cooler/cload/meta.yml @@ -0,0 +1,53 @@ +name: cooler_cload +description: Create a cooler from genomic pairs and bins +keywords: + - cool +tools: + - cooler: + description: Sparse binary format for genomic interaction matrices + homepage: https://open2c.github.io/cooler/ + documentation: https://cooler.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/open2c/cooler + doi: "10.1093/bioinformatics/btz540" + licence: ["BSD-3-clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pairs: + type: file + description: Path to contacts (i.e. read pairs) file. + - index: + type: file + description: Path to index file of the contacts. + - cool_bin: + type: value + description: Bins size in bp + - chromsizes: + type: file + description: Path to a chromsizes file. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "versions.yml" + - cool: + type: file + description: Output COOL file path + pattern: "*.cool" + - cool_bin: + type: value + description: Bins size in bp + +authors: + - "@jianhong" + - "@muffato" diff --git a/modules/nf-core/cooler/dump/main.nf b/modules/nf-core/cooler/dump/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..b46c78cf35ec6b40276e2d792635f97b9909ab55 --- /dev/null +++ b/modules/nf-core/cooler/dump/main.nf @@ -0,0 +1,35 @@ +process COOLER_DUMP { + tag "$meta.id" + label 'process_high' + + conda "bioconda::cooler=0.8.11" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), path(cool), val(resolution) + + output: + tuple val(meta), path("*.bedpe"), emit: bedpe + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = resolution ? "::/resolutions/$resolution" : "" + """ + cooler dump \\ + $args \\ + -o ${prefix}.bedpe \\ + $cool$suffix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cooler/dump/meta.yml b/modules/nf-core/cooler/dump/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..fe60523eb3556a6bb2187be0563c3fd6f2cbf5cf --- /dev/null +++ b/modules/nf-core/cooler/dump/meta.yml @@ -0,0 +1,45 @@ +name: cooler_dump +description: Dump a cooler’s data to a text stream. +keywords: + - dump +tools: + - cooler: + description: Sparse binary format for genomic interaction matrices + homepage: https://open2c.github.io/cooler/ + documentation: https://cooler.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/open2c/cooler + doi: "10.1093/bioinformatics/btz540" + licence: ["BSD-3-Clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cool: + type: file + description: Path to COOL file + pattern: "*.{cool,mcool}" + - resolution: + type: value + description: Resolution + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bedpe: + type: file + description: Output text file + pattern: "*.bedpe" + +authors: + - "@jianhong" + - "@muffato" diff --git a/modules/nf-core/cooler/makebins/main.nf b/modules/nf-core/cooler/makebins/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..7f0826197e99e9e79107359a8c65d7d60346dc49 --- /dev/null +++ b/modules/nf-core/cooler/makebins/main.nf @@ -0,0 +1,34 @@ +process COOLER_MAKEBINS { + tag "${meta.id}}" + label 'process_low' + + conda "bioconda::cooler=0.8.11" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0': + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), path(chromsizes), val(cool_bin) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cooler makebins \\ + $args \\ + ${chromsizes} \\ + ${cool_bin} > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cooler/makebins/meta.yml b/modules/nf-core/cooler/makebins/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..33fd8eb63f4a3225d3d03eb028578b2c1eeeaa5d --- /dev/null +++ b/modules/nf-core/cooler/makebins/meta.yml @@ -0,0 +1,34 @@ +name: "cooler_makebins" +description: Generate fixed-width genomic bins +keywords: + - makebins +tools: + - "cooler": + description: Sparse binary format for genomic interaction matrices + homepage: https://open2c.github.io/cooler/ + documentation: https://cooler.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/open2c/cooler + doi: "10.1093/bioinformatics/btz540" + licence: ["BSD-3-Clause"] + +input: + - chromsize: + type: file + description: Path to chromosome size file + - cool_bin: + type: value + description: Resolution (bin size) in base pairs + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bed: + type: file + description: Genome segmentation at a fixed resolution as a BED file. + pattern: "*.bed" + +authors: + - "@nservant" + - "@muffato" diff --git a/modules/nf-core/cooler/zoomify/main.nf b/modules/nf-core/cooler/zoomify/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..f1cd8df79d02b8630c31de0620521cef0cdd6df0 --- /dev/null +++ b/modules/nf-core/cooler/zoomify/main.nf @@ -0,0 +1,35 @@ +process COOLER_ZOOMIFY { + tag "$meta.id" + label 'process_high' + + conda "bioconda::cooler=0.8.11" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), path(cool) + + output: + tuple val(meta), path("*.mcool"), emit: mcool + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cooler zoomify \\ + $args \\ + -n $task.cpus \\ + -o ${prefix}.mcool \\ + $cool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cooler/zoomify/meta.yml b/modules/nf-core/cooler/zoomify/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..57f554861b25d4ca9edac66c73ff306a4d9f9390 --- /dev/null +++ b/modules/nf-core/cooler/zoomify/meta.yml @@ -0,0 +1,41 @@ +name: cooler_zoomify +description: Generate a multi-resolution cooler file by coarsening +keywords: + - mcool +tools: + - cooler: + description: Sparse binary format for genomic interaction matrices + homepage: https://open2c.github.io/cooler/ + documentation: https://cooler.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/open2c/cooler + doi: "10.1093/bioinformatics/btz540" + licence: ["BSD-3-clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cool: + type: file + description: Path to COOL file + pattern: "*.{cool,mcool}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mcool: + type: file + description: Output mcool file + pattern: "*.mcool" + +authors: + - "@jianhong" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..3df21765b90921413962c3bb5ca44d117d829297 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -0,0 +1,24 @@ +process CUSTOM_DUMPSOFTWAREVERSIONS { + label 'process_single' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + conda "bioconda::multiqc=1.13" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..60b546a012c457b5459490e732e6ac9be6979db1 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,34 @@ +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline template +keywords: + - custom + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] +input: + - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" + +output: + - yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 0000000000000000000000000000000000000000..b83b32c4d4242764a439f4d51e0255711f9cd40f --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + +import platform +from textwrap import dedent + +import yaml + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + <style> + #nf-core-versions tbody:nth-child(even) { + background-color: #f2f2f2; + } + </style> + <table class="table" style="width:100%" id="nf-core-versions"> + <thead> + <tr> + <th> Process Name </th> + <th> Software </th> + <th> Version </th> + </tr> + </thead> + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("<tbody>") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + <tr> + <td><samp>{process if (i == 0) else ''}</samp></td> + <td><samp>{tool}</samp></td> + <td><samp>{version}</samp></td> + </tr> + """ + ) + ) + html.append("</tbody>") + html.append("</table>") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..580f87feade7280bb8d520551aa61f0021b3b88d --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -0,0 +1,44 @@ +process CUSTOM_GETCHROMSIZES { + tag "$fasta" + label 'process_single' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path ("*.sizes"), emit: sizes + tuple val(meta), path ("*.fai") , emit: fai + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools faidx $fasta + cut -f 1,2 ${fasta}.fai > ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + touch ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/custom/getchromsizes/meta.yml b/modules/nf-core/custom/getchromsizes/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..219ca1d8e07166c23e28089ae5067b5937cc6f8d --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/meta.yml @@ -0,0 +1,53 @@ +name: custom_getchromsizes +description: Generates a FASTA file of chromosome sizes and a fasta index file +keywords: + - fasta + - chromosome + - indexing +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + tool_dev_url: https://github.com/samtools/samtools + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta,fna,fas}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sizes: + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@tamara-hodgetts" + - "@chris-cheshire" + - "@muffato" diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..9ae5838158b28d2ae49270133fbbfe0ea673e991 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,51 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::fastqc=0.11.9" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : + 'quay.io/biocontainers/fastqc:0.11.9--0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + fastqc $args --threads $task.cpus $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..4da5bb5a06e32ec5ae0112c3970936ce0c464aa7 --- /dev/null +++ b/modules/nf-core/fastqc/meta.yml @@ -0,0 +1,52 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/nextflow.config b/nextflow.config index 7296cc2af450343641614486f3cc232b25c71243..4513d65e9e615b858a5bc683fa983fb4503e7be2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,200 +1,242 @@ /* - * ------------------------------------------------- - * nf-core/hic Nextflow config file - * ------------------------------------------------- - * Default config options for all environments. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/hic Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ // Global default params, used in configs params { - // Inputs / outputs - genome = false - input = null - input_paths = null - outdir = './results' - genome = false - input_paths = false - chromosome_size = false - restriction_fragments = false - save_reference = false - - // Mapping - split_fastq = false - fastq_chunks_size = 20000000 - save_interaction_bam = false - save_aligned_intermediates = false - bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder' - bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder' - keep_dups = false - keep_multi = false - min_mapq = 10 + // Input options + input = null - // Digestion Hi-C - digestion = false - digest { - 'hindiii'{ - restriction_site='A^AGCTT' - ligation_site='AAGCTAGCTT' - } - 'mboi' { - restriction_site='^GATC' - ligation_site='GATCGATC' - } - 'dpnii' { - restriction_site='^GATC' - ligation_site='GATCGATC' - } - 'arima' { - restriction_site='^GATC,G^ANT' - ligation_site='GATCGATC,GATCGANT,GANTGATC,GANTGANT' + + // References + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = false + chromosome_size = null + restriction_fragments = null + save_reference = false + + // Mapping + split_fastq = false + fastq_chunks_size = 20000000 + save_interaction_bam = false + save_aligned_intermediates = false + bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder' + bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder' + keep_dups = false + keep_multi = false + min_mapq = 10 + + // Digestion Hi-C + digestion = null + ligation_site = null + restriction_site = null + digest { + 'hindiii'{ + restriction_site='A^AGCTT' + ligation_site='AAGCTAGCTT' + } + 'mboi' { + restriction_site='^GATC' + ligation_site='GATCGATC' + } + 'dpnii' { + restriction_site='^GATC' + ligation_site='GATCGATC' + } + 'arima' { + restriction_site='^GATC,G^ANTC' + ligation_site='GATCGATC,GATCANTC,GANTGATC,GANTANTC' + } } - } - min_restriction_fragment_size = 0 - max_restriction_fragment_size = 0 - min_insert_size = 0 - max_insert_size = 0 - save_nonvalid_pairs = false + + min_restriction_fragment_size = 0 + max_restriction_fragment_size = 0 + min_insert_size = 0 + max_insert_size = 0 + save_pairs_intermediates = false + + // Dnase Hi-C + dnase = false + min_cis_dist = 0 - // Dnase Hi-C - dnase = false - min_cis_dist = 0 + // Contact maps + save_raw_maps = false + bin_size = '1000000' + res_zoomify = null + hicpro_maps = false + ice_max_iter = 100 + ice_filter_low_count_perc = 0.02 + ice_filter_high_count_perc = 0 + ice_eps = 0.1 - // Contact maps - bin_size = '1000000' - res_zoomify = '5000' - hicpro_maps = false - ice_max_iter = 100 - ice_filter_low_count_perc = 0.02 - ice_filter_high_count_perc = 0 - ice_eps = 0.1 + // Downstream Analysis + res_dist_decay = '250000' + tads_caller = 'insulation' + res_tads = '40000' + res_compartments = '250000' - // Downstream Analysis - res_dist_decay = '250000' - tads_caller = 'insulation' - res_tads = '40000' - res_compartments = '250000' + // Workflow + skip_maps = false + skip_balancing = false + skip_mcool = false + skip_dist_decay = false + skip_compartments = false + skip_tads = false + skip_multiqc = false - // Workflow - skip_maps = false - skip_balancing = false - skip_mcool = false - skip_dist_decay = false - skip_compartments = false - skip_tads = false - skip_multiqc = false - - // Boilerplate options - publish_dir_mode = 'copy' - multiqc_config = false - email = false - email_on_fail = false - max_multiqc_email_size = 25.MB - plaintext_email = false - monochrome_logs = false - help = false - igenomes_base = 's3://ngi-igenomes/igenomes' - tracedir = "${params.outdir}/pipeline_info" - igenomes_ignore = false + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null - //Config - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - hostnames = false - config_profile_name = null - config_profile_description = false - config_profile_contact = false - config_profile_url = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes,digest,input_paths' + // Boilerplate options + outdir = './results' + tracedir = "${params.outdir}/pipeline_info" + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + validate_params = true + show_hidden_params = false + schema_ignore_params = 'genomes,digest' + + // Config options + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_description = null + config_profile_contact = null + config_profile_url = null + config_profile_name = null - // Defaults only, expecting to be overwritten - max_memory = 24.GB - max_cpus = 8 - max_time = 240.h -} -// Container slug. Stable releases should specify release tag! -// Developmental code should specify :dev -process.container = 'nfcore/hic:1.3.0' + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + +} // Load base.config by default for all pipelines includeConfig 'conf/base.config' +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + // Load nf-core custom profiles from different Institutions try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" + includeConfig "${params.custom_config_base}/nfcore_custom.config" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } -// Create profiles +// Load nf-core/hic custom profiles from different institutions. +// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// try { +// includeConfig "${params.custom_config_base}/pipeline/hic.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/hic profiles: ${params.custom_config_base}/pipeline/hic.config") +// } + + profiles { - conda { - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - process.conda = "$projectDir/environment.yml" - } - debug { process.beforeScript = 'echo $HOSTNAME' } - docker { - docker.enabled = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - // Avoid this error: - // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. - // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 - // once this is established and works well, nextflow might implement this behavior as new default. - docker.runOptions = '-u \$(id -u):\$(id -g)' - } - singularity { - docker.enabled = false - singularity.enabled = true - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - singularity.autoMounts = true - } - podman { - singularity.enabled = false - docker.enabled = false - podman.enabled = true - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - singularity.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = true - charliecloud.enabled = false - } - charliecloud { - singularity.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = true - } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + debug { process.beforeScript = 'echo $HOSTNAME' } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + docker { + docker.enabled = true + docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } } + // Load igenomes.config if required if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' + includeConfig 'conf/igenomes.config' +} else { + params.genomes = [:] } + // Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } // Capture exit codes from upstream processes when piping @@ -202,61 +244,65 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { - enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + enabled = true + file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" } report { - enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + enabled = true + file = "${params.tracedir}/execution_report_${trace_timestamp}.html" } trace { - enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + enabled = true + file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" } dag { - enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" + enabled = true + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { - name = 'nf-core/hic' - author = 'Nicolas Servant' - homePage = 'https://github.com/nf-core/hic' - description = 'Analysis of Chromosome Conformation Capture data (Hi-C)' - mainScript = 'main.nf' - nextflowVersion = '>=20.04.0' - version = '1.3.0' + name = 'nf-core/hic' + author = """Nicolas Servant""" + homePage = 'https://github.com/nf-core/hic' + description = """Analysis of Chromosome Conformation Capture data (Hi-C)""" + mainScript = 'main.nf' + nextflowVersion = '!>=22.10.1' + version = '2.0.0' + doi = '' } +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} \ No newline at end of file +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 7fe34b7c68bcc906fbaa437aab40a53ae0d41bfc..a0c433072b777fefb3f981f266180cd3c5e8ac3e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,26 +10,22 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", - "fa_icon": "fas fa-dna", - "description": "Input FastQ files.", - "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`" - }, - "input_paths": { - "type": "string", - "hidden": true, - "description": "Input FastQ files for test only", - "default": "undefined" + "format": "file-path", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "schema": "assets/schema_input.json", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/hic/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" }, "outdir": { "type": "string", - "description": "The output directory where the results will be saved.", - "default": "./results", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, "email": { @@ -38,6 +34,11 @@ "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" } } }, @@ -45,22 +46,26 @@ "title": "Reference genome options", "type": "object", "fa_icon": "fas fa-dna", - "description": "Options for the reference genome indices used to align reads.", + "description": "Reference genome related files and options required for the workflow.", "properties": { "genome": { "type": "string", "description": "Name of iGenomes reference.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, "fasta": { "type": "string", - "fa_icon": "fas fa-font", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?s?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible." + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "fa_icon": "far fa-file-code" }, "igenomes_base": { "type": "string", + "format": "directory-path", "description": "Directory / URL base for iGenomes references.", "default": "s3://ngi-igenomes/igenomes", "fa_icon": "fas fa-cloud-download-alt", @@ -88,27 +93,29 @@ "properties": { "digestion": { "type": "string", - "default": "hindiii", - "description": "Name of restriction enzyme to automatically set the restriction_site and ligation_site options" + "description": "Name of restriction enzyme to automatically set the restriction_site and ligation_site options (hindiii, mboi, dpnii, arima)", + "enum": ["hindiii", "mboi", "dpnii", "arima"] }, "restriction_site": { "type": "string", - "default": "'A^AGCTT'", + "default": null, "description": "Restriction motifs used during digestion. Several motifs (comma separated) can be provided." }, "ligation_site": { "type": "string", - "default": "'AAGCTAGCTT", + "default": null, "description": "Expected motif after DNA ligation. Several motifs (comma separated) can be provided." }, "chromosome_size": { "type": "string", + "format": "file-path", "description": "Full path to file specifying chromosome sizes (tab separated with chromosome name and size)`.", "fa_icon": "far fa-file-alt", "help_text": "If not specified, the pipeline will build this file from the reference genome file" }, "restriction_fragments": { "type": "string", + "format": "file-path", "description": "Full path to restriction fragment (bed) file.", "fa_icon": "far fa-file-alt", "help_text": "This file depends on the Hi-C protocols and digestion strategy. If not provided, the pipeline will build it using the --restriction_site option" @@ -118,12 +125,6 @@ "description": "If generated by the pipeline save the annotation and indexes in the results directory.", "help_text": "Use this parameter to save all annotations to your results folder. These can then be used for future pipeline runs, reducing processing times.", "fa_icon": "fas fa-save" - }, - "save_nonvalid_pairs": { - "type": "boolean", - "description": "Save the non valid pairs detected by HiC-Pro.", - "help_text": "Use this parameter to save non valid pairs detected by HiC-Pro (dangling-end, self-circle, re-ligation, filtered).", - "fa_icon": "fas fa-save" } } }, @@ -215,6 +216,10 @@ "save_interaction_bam": { "type": "boolean", "description": "Save a BAM file where all reads are flagged by their interaction classes" + }, + "save_pairs_intermediates": { + "type": "boolean", + "description": "Save all types of non valid read pairs in distinct output files" } } }, @@ -258,6 +263,10 @@ "type": "string", "default": "5000", "description": "Maximum resolution to build mcool file" + }, + "save_raw_maps": { + "type": "boolean", + "description": "Save raw contact maps" } } }, @@ -312,7 +321,7 @@ "description": "Do not run TADs calling" }, "skip_compartments": { - "type": "string", + "type": "boolean", "description": "Do not run compartments calling" }, "skip_balancing": { @@ -329,91 +338,50 @@ } } }, - "generic_options": { - "title": "Generic options", + "institutional_config_options": { + "title": "Institutional config options", "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", "hidden": true, - "fa_icon": "fas fa-question-circle" + "fa_icon": "fas fa-users-cog" }, - "publish_dir_mode": { + "custom_config_base": { "type": "string", - "default": "copy", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", "hidden": true, - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ] + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "default": true, - "fa_icon": "fas fa-check-square", + "config_profile_name": { + "type": "string", + "description": "Institutional config name", "hidden": true }, - "email_on_fail": { + "config_profile_description": { "type": "string", - "description": "Email address for completion summary, only when pipeline fails.", - "fa_icon": "fas fa-exclamation-triangle", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "hidden": true, - "help_text": "This works exactly as with `--email`, except emails are only sent if the workflow is not successful." - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format", + "description": "Institutional config description.", "hidden": true, - "help_text": "Set to receive plain-text e-mails instead of HTML formatted." + "fa_icon": "fas fa-users-cog" }, - "max_multiqc_email_size": { + "config_profile_contact": { "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails.", - "default": "25.MB", - "fa_icon": "fas fa-file-upload", - "hidden": true, - "help_text": "If file generated by pipeline exceeds the threshold, it will not be attached." - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette", + "description": "Institutional config contact information.", "hidden": true, - "help_text": "Set to disable colourful command line output and live life in monochrome." - }, - "multiqc_config": { - "type": "string", - "description": "Custom config file to supply to MultiQC.", - "fa_icon": "fas fa-cog", - "hidden": true + "fa_icon": "fas fa-users-cog" }, - "tracedir": { + "config_profile_url": { "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, - "show_hidden_params": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", + "description": "Institutional config URL link.", "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + "fa_icon": "fas fa-users-cog" } } }, @@ -426,7 +394,7 @@ "properties": { "max_cpus": { "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", + "description": "Maximum number of CPUs that can be requested for any single job.", "default": 16, "fa_icon": "fas fa-microchip", "hidden": true, @@ -452,57 +420,106 @@ } } }, - "institutional_config_options": { - "title": "Institutional config options", + "generic_options": { + "title": "Generic options", "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { - "custom_config_version": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "hidden": true, - "fa_icon": "fas fa-users-cog", - "help_text": "Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default: `master`.\n\n```bash\n## Download and use config file with following git commit id\n--custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96\n```" + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true }, - "custom_config_base": { + "email_on_fail": { "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "hidden": true, - "help_text": "If you're running offline, nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell nextflow where to find them with the `custom_config_base` option. For example:\n\n```bash\n## Download and unzip the config files\ncd /path/to/my/configs\nwget https://github.com/nf-core/configs/archive/master.zip\nunzip master.zip\n\n## Run the pipeline\ncd /path/to/my/data\nnextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/\n```\n\n> Note that the nf-core/tools helper package has a `download` command to download all required pipeline files + singularity containers + institutional configs in one go for you, to make this process easier.", - "fa_icon": "fas fa-users-cog" + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true }, - "hostnames": { + "max_multiqc_email_size": { "type": "string", - "description": "Institutional configs hostname.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "description": "File size limit when attaching MultiQC reports to summary emails.", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true }, - "config_profile_name": { + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { "type": "string", - "description": "Institutional config name", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, - "config_profile_description": { + "multiqc_config": { "type": "string", - "description": "Institutional config description.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true }, - "config_profile_contact": { + "multiqc_logo": { "type": "string", - "description": "Institutional config contact information.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true }, - "config_profile_url": { + "multiqc_methods_description": { "type": "string", - "description": "Institutional config URL link.", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, + "tracedir": { + "type": "string", + "description": "Directory to keep pipeline Nextflow logs and reports.", + "default": "${params.outdir}/pipeline_info", + "fa_icon": "fas fa-cogs", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "show_hidden_params": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", "hidden": true, - "fa_icon": "fas fa-users-cog" + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." } } } @@ -538,11 +555,14 @@ { "$ref": "#/definitions/generic_options" }, + { + "$ref": "#/definitions/institutional_config_options" + }, { "$ref": "#/definitions/max_job_request_options" }, { - "$ref": "#/definitions/institutional_config_options" + "$ref": "#/definitions/generic_options" } ] } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..0d62beb6f970a40843767771cc66ee0df14b21ce --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 diff --git a/subworkflows/local/compartments.nf b/subworkflows/local/compartments.nf new file mode 100644 index 0000000000000000000000000000000000000000..fee68a5d5d8c3a061d02a6f698aff80186b8d7c3 --- /dev/null +++ b/subworkflows/local/compartments.nf @@ -0,0 +1,23 @@ +include { COOLTOOLS_EIGSCIS } from '../../modules/local/cooltools/eigscis' + +workflow COMPARTMENTS { + + take: + cool + fasta + chrsize + + main: + ch_versions = Channel.empty() + + COOLTOOLS_EIGSCIS( + cool, + fasta.map{it -> it[1]}.collect(), + chrsize.map{it -> it[1]}.collect() + ) + ch_versions = ch_versions.mix(COOLTOOLS_EIGSCIS.out.versions) + + emit: + versions = ch_versions + compartments = COOLTOOLS_EIGSCIS.out.results +} \ No newline at end of file diff --git a/subworkflows/local/cooler.nf b/subworkflows/local/cooler.nf new file mode 100644 index 0000000000000000000000000000000000000000..1299266d61c0e8ee5a7e7b429e8ef205df7ebd8a --- /dev/null +++ b/subworkflows/local/cooler.nf @@ -0,0 +1,97 @@ +/* + * COOLER MAIN WORKFLOW + * INPUT : .pair text file with the list of valid interaction + * OUTPUT : cooler files + */ + +include { COOLER_ZOOMIFY } from '../../modules/nf-core/cooler/zoomify/main' +include { COOLER_DUMP } from '../../modules/nf-core/cooler/dump/main' +include { COOLER_CLOAD } from '../../modules/nf-core/cooler/cload/main' +include { COOLER_BALANCE } from '../../modules/nf-core/cooler/balance/main' +include { COOLER_MAKEBINS } from '../../modules/nf-core/cooler/makebins/main' + +include { SPLIT_COOLER_DUMP } from '../../modules/local/split_cooler_dump' + +// add resolution in meta +def addResolution(row) { + def meta = [:] + meta.id = row[0].id + meta.resolution = row[2] + return [meta, row[1], row[2]] +} + +workflow COOLER { + + take: + pairs // [meta, pairs, index] + chromsize // [meta, chromsize] + cool_bins + + main: + ch_versions = Channel.empty() + + //***************************************** + // EXPORT BINS + + COOLER_MAKEBINS( + chromsize.combine(cool_bins) + ) + ch_versions = ch_versions.mix(COOLER_MAKEBINS.out.versions) + + //***************************************** + // BUILD COOL FILE PER RESOLUTION + // [meta, pairs, resolution] + + COOLER_CLOAD( + pairs.combine(cool_bins), + chromsize.map{it -> it[1]}.collect() + ) + ch_versions = ch_versions.mix(COOLER_CLOAD.out.versions) + + // Add resolution in meta + COOLER_CLOAD.out.cool + .map{ it -> addResolution(it) } + .set{ ch_cool } + + COOLER_BALANCE( + ch_cool.map{[it[0], it[1], ""]} + ) + ch_versions = ch_versions.mix(COOLER_BALANCE.out.versions) + + // Zoomify at minimum bin resolution + if (!params.res_zoomify){ + ch_res_zoomify = cool_bins.min() + }else{ + ch_res_zoomify = Channel.from(params.res_zoomify).splitCsv().flatten().unique().toInteger() + } + + ch_cool + .combine(ch_res_zoomify) + .filter{ it[2] == it[3] } + .map{ it->[it[0], it[1]] } + .set{ ch_cool_zoomify } + + COOLER_ZOOMIFY( + ch_cool_zoomify + ) + ch_versions = ch_versions.mix(COOLER_ZOOMIFY.out.versions) + + //***************************************** + // DUMP DATA + // [meta, cool] / resolution + + COOLER_DUMP( + COOLER_BALANCE.out.cool.map{[it[0], it[1], ""]} + ) + ch_versions = ch_versions.mix(COOLER_DUMP.out.versions) + + SPLIT_COOLER_DUMP( + COOLER_DUMP.out.bedpe + ) + ch_versions = ch_versions.mix(SPLIT_COOLER_DUMP.out.versions) + + emit: + versions = ch_versions + cool = COOLER_BALANCE.out.cool + mcool = COOLER_ZOOMIFY.out.mcool +} \ No newline at end of file diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf new file mode 100644 index 0000000000000000000000000000000000000000..8b106a0820cf808501add6e5ad886cc726626107 --- /dev/null +++ b/subworkflows/local/hicpro.nf @@ -0,0 +1,132 @@ +/* + * HICPRO + * MAIN WORKFLOW + * From the raw sequencing reads to the list of valid interactions + */ + +include { HICPRO_MAPPING } from './hicpro_mapping' +include { GET_VALID_INTERACTION } from '../../modules/local/hicpro/get_valid_interaction' +include { GET_VALID_INTERACTION_DNASE } from '../../modules/local/hicpro/get_valid_interaction_dnase' +include { MERGE_VALID_INTERACTION } from '../../modules/local/hicpro/merge_valid_interaction' +include { MERGE_STATS } from '../../modules/local/hicpro/merge_stats' +include { HICPRO2PAIRS } from '../../modules/local/hicpro/hicpro2pairs' +include { BUILD_CONTACT_MAPS } from '../../modules/local/hicpro/build_contact_maps' +include { ICE_NORMALIZATION } from '../../modules/local/hicpro/run_ice' + +// Remove meta.chunks +def removeChunks(row){ + meta = row[0].clone() + meta.remove('chunk') + return [meta, row[1]] +} + +workflow HICPRO { + + take: + reads // [meta, read1, read2] + index // path + fragments // path + chrsize // path + ligation_site // value + map_res // values + + main: + ch_versions = Channel.empty() + + // Fastq to paired-end bam + HICPRO_MAPPING( + reads, + index, + ligation_site + ) + ch_versions = ch_versions.mix(HICPRO_MAPPING.out.versions) + + //*************************************** + // DIGESTION PROTOCOLS + + if (!params.dnase){ + GET_VALID_INTERACTION ( + HICPRO_MAPPING.out.bam, + fragments.collect() + ) + ch_versions = ch_versions.mix(GET_VALID_INTERACTION.out.versions) + ch_valid_pairs = GET_VALID_INTERACTION.out.valid_pairs + ch_valid_stats = GET_VALID_INTERACTION.out.stats + + }else{ + + //**************************************** + // DNASE-LIKE PROTOCOLS + + GET_VALID_INTERACTION_DNASE ( + HICPRO_MAPPING.out.bam + ) + ch_versions = ch_versions.mix(GET_VALID_INTERACTION_DNASE.out.versions) + ch_valid_pairs = GET_VALID_INTERACTION_DNASE.out.valid_pairs + ch_valid_stats = GET_VALID_INTERACTION_DNASE.out.stats + } + + + //************************************** + // MERGE AND REMOVE DUPLICATES + + //if (params.split_fastq){ + ch_valid_pairs = ch_valid_pairs.map{ it -> removeChunks(it)}.groupTuple() + ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.map{it->removeChunks(it)}.groupTuple() + .concat(HICPRO_MAPPING.out.pairstats.map{it->removeChunks(it)}.groupTuple(), + ch_valid_stats.map{it->removeChunks(it)}.groupTuple()) + //}else{ + // ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.groupTuple() + // .concat(HICPRO_MAPPING.out.pairstats.groupTuple(), + // ch_valid_stats.groupTuple()) + //} + + MERGE_VALID_INTERACTION ( + ch_valid_pairs + ) + ch_versions = ch_versions.mix(MERGE_VALID_INTERACTION.out.versions) + + MERGE_STATS( + ch_hicpro_stats + ) + ch_versions = ch_versions.mix(MERGE_STATS.out.versions) + + //*************************************** + // CONVERTS TO PAIRS + HICPRO2PAIRS ( + MERGE_VALID_INTERACTION.out.valid_pairs, + chrsize.collect() + ) + ch_versions = ch_versions.mix(HICPRO2PAIRS.out.versions) + + //*************************************** + // CONTACT MAPS + + if (params.hicpro_maps){ + + //build_contact_maps + BUILD_CONTACT_MAPS( + MERGE_VALID_INTERACTION.out.valid_pairs.combine(map_res), + chrsize.collect() + ) + ch_hicpro_raw_maps = BUILD_CONTACT_MAPS.out.maps + + // run_ice + ICE_NORMALIZATION( + BUILD_CONTACT_MAPS.out.maps + ) + ch_hicpro_iced_maps = ICE_NORMALIZATION.out.maps + ch_versions = ch_versions.mix(ICE_NORMALIZATION.out.versions) + + }else{ + ch_hicpro_raw_maps = Channel.empty() + ch_hicpro_iced_maps = Channel.empty() + } + + emit: + versions = ch_versions + pairs = HICPRO2PAIRS.out.pairs + mqc = MERGE_VALID_INTERACTION.out.mqc.concat(MERGE_STATS.out.mqc) + raw_maps = ch_hicpro_raw_maps + iced_maps = ch_hicpro_iced_maps +} diff --git a/subworkflows/local/hicpro_mapping.nf b/subworkflows/local/hicpro_mapping.nf new file mode 100644 index 0000000000000000000000000000000000000000..0f889e95f425becd78f6ab097ea035b6bd6e7abf --- /dev/null +++ b/subworkflows/local/hicpro_mapping.nf @@ -0,0 +1,115 @@ +/* + * HiC-Pro mapping + * From the raw sequencing reads to a paired-end bam file + */ + +include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' +include { TRIM_READS } from '../../modules/local/hicpro/trim_reads' +include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_TRIMMED } from '../../modules/nf-core/bowtie2/align/main' +include { MERGE_BOWTIE2 } from '../../modules/local/hicpro/bowtie2_merge' +include { COMBINE_MATES} from '../../modules/local/hicpro/combine_mates' +include { MAPPING_STATS_DNASE } from '../../modules/local/hicpro/dnase_mapping_stats' + +// Paired-end to Single-end +def pairToSingle(row, mates) { + def meta = row[0].clone() + meta.single_end = true + meta.mates = mates + if (mates == "R1") { + return [meta, [ row[1][0]] ] + }else if (mates == "R2"){ + return [meta, [ row[1][1]] ] + } +} + +// Single-end to Paired-end +def singleToPair(row){ + def meta = row[0].clone() + meta.remove('mates') + meta.single_end = false + return [ meta, row[1] ] +} + + +workflow HICPRO_MAPPING { + + take: + reads // [meta, read1, read2] + index // [meta, path] + ligation_site // value + + main: + ch_versions = Channel.empty() + + // Align each mates separetly and add mates information in [meta] + ch_reads_r1 = reads.map{ it -> pairToSingle(it,"R1") } + ch_reads_r2 = reads.map{ it -> pairToSingle(it,"R2") } + ch_reads = ch_reads_r1.concat(ch_reads_r2) + + // bowtie2 - save_unaligned=true - sort_bam=false + BOWTIE2_ALIGN( + ch_reads, + index.collect(), + true, + false + ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) + + if (!params.dnase){ + // trim reads + TRIM_READS( + BOWTIE2_ALIGN.out.fastq, + ligation_site.collect() + ) + ch_versions = ch_versions.mix(TRIM_READS.out.versions) + + // bowtie2 on trimmed reads - save_unaligned=false - sort_bam=false + BOWTIE2_ALIGN_TRIMMED( + TRIM_READS.out.fastq, + index.collect(), + false, + false + ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN_TRIMMED.out.versions) + + // Merge the two mapping steps + BOWTIE2_ALIGN.out.bam + .combine(BOWTIE2_ALIGN_TRIMMED.out.bam, by:[0]) + .set { ch_bowtie2_align} + + MERGE_BOWTIE2( + ch_bowtie2_align + ) + ch_versions = ch_versions.mix(MERGE_BOWTIE2.out.versions) + ch_mapping_stats = MERGE_BOWTIE2.out.stats + + // Combine mates + MERGE_BOWTIE2.out.bam + .map { singleToPair(it) } + .groupTuple() + .set {ch_bams} + + }else{ + + MAPPING_STATS_DNASE( + BOWTIE2_ALIGN.out.bam + ) + ch_mapping_stats = MAPPING_STATS_DNASE.out.stats + + BOWTIE2_ALIGN.out.bam + .map { singleToPair(it) } + .groupTuple() + .set {ch_bams} + } + + COMBINE_MATES ( + ch_bams + ) + ch_versions = ch_versions.mix(COMBINE_MATES.out.versions) + + emit: + versions = ch_versions + bam = COMBINE_MATES.out.bam + mapstats = ch_mapping_stats + pairstats = COMBINE_MATES.out.stats +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf new file mode 100644 index 0000000000000000000000000000000000000000..3f21f1f2c5a6a82fc781647724c6db3b574813e0 --- /dev/null +++ b/subworkflows/local/input_check.nf @@ -0,0 +1,72 @@ +// +// Check input samplesheet and get read channels +// + +include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' + +workflow INPUT_CHECK { + take: + samplesheet // file: /path/to/samplesheet.csv + + main: + if (params.split_fastq){ + + SAMPLESHEET_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channels(it) } + .splitFastq( by: params.fastq_chunks_size, pe:true, file: true, compress:true) + .map { it -> [it[0], [it[1], it[2]]]} + .groupTuple(by: [0]) + .flatMap { it -> setMetaChunk(it) } + .collate(2) + //.map { it -> + // def meta = it[0].clone() + // meta.chunk = it[1].baseName - ~/.fastq(.gz)?/ + // return [meta, [it[1], it[2]]] + //} + .set { reads } + + }else{ + SAMPLESHEET_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channels(it) } + .map { it -> [it[0], [it[1], it[2]]]} + .groupTuple(by: [0]) + .flatMap { it -> setMetaChunk(it) } + .collate(2) + .set { reads } + } + + emit: + reads // channel: [ val(meta), [ reads ] ] +} + +// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] +def create_fastq_channels(LinkedHashMap row) { + def meta = [:] + meta.id = row.sample + meta.single_end = false + + def array = [] + if (!file(row.fastq_1).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" + } + if (!file(row.fastq_2).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" + } + array = [ meta, file(row.fastq_1), file(row.fastq_2) ] + return array +} + +// Set the meta.chunk value in case of technical replicates +def setMetaChunk(row){ + def map = [] + row[1].eachWithIndex() { file,i -> + meta = row[0].clone() + meta.chunk = i + map += [meta, file] + } + return map +} \ No newline at end of file diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf new file mode 100644 index 0000000000000000000000000000000000000000..a4a2399303570928d763612bd03d88bd4b485f3b --- /dev/null +++ b/subworkflows/local/prepare_genome.nf @@ -0,0 +1,69 @@ +/* + * Prepare Annotation Genome for Hi-C data analysis + */ + +include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' +include { GET_RESTRICTION_FRAGMENTS } from '../../modules/local/hicpro/get_restriction_fragments' + +workflow PREPARE_GENOME { + + take: + fasta + restriction_site + + main: + ch_versions = Channel.empty() + + //*************************************** + // Bowtie Index + if(!params.bwt2_index){ + BOWTIE2_BUILD ( + fasta + ) + ch_index = BOWTIE2_BUILD.out.index + ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) + }else{ + Channel.fromPath( params.bwt2_index , checkIfExists: true) + .map { it -> [[:], it]} + .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } + .set { ch_index } + } + + //*************************************** + // Chromosome size + if(!params.chromosome_size){ + CUSTOM_GETCHROMSIZES( + fasta + ) + ch_chromsize = CUSTOM_GETCHROMSIZES.out.sizes + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + }else{ + Channel.fromPath( params.chromosome_size , checkIfExists: true) + .map { it -> [[:], it]} + .set {ch_chromsize} + } + + //*************************************** + // Restriction fragments + if(!params.restriction_fragments && !params.dnase){ + GET_RESTRICTION_FRAGMENTS( + fasta, + restriction_site + ) + ch_resfrag = GET_RESTRICTION_FRAGMENTS.out.results + ch_versions = ch_versions.mix(GET_RESTRICTION_FRAGMENTS.out.versions) + }else if (!params.dnase){ + Channel.fromPath( params.restriction_fragments, checkIfExists: true ) + .map{ it -> [[:], it] } + .set {ch_resfrag} + }else{ + ch_resfrag = Channel.empty() + } + + emit: + index = ch_index + chromosome_size = ch_chromsize + res_frag = ch_resfrag + versions = ch_versions +} diff --git a/subworkflows/local/tads.nf b/subworkflows/local/tads.nf new file mode 100644 index 0000000000000000000000000000000000000000..31c1e38b03b8f360c25014e92767f3d9705c434f --- /dev/null +++ b/subworkflows/local/tads.nf @@ -0,0 +1,28 @@ +include { COOLTOOLS_INSULATION } from '../../modules/local/cooltools/insulation' +include { HIC_FIND_TADS } from '../../modules/local/hicexplorer/hicFindTADs' + +workflow TADS { + + take: + cool + + main: + ch_versions = Channel.empty() + ch_tads = Channel.empty() + + if (params.tads_caller =~ 'insulation'){ + COOLTOOLS_INSULATION(cool) + ch_versions = ch_versions.mix(COOLTOOLS_INSULATION.out.versions) + ch_tads = ch_tads.mix(COOLTOOLS_INSULATION.out.tsv) + } + + if (params.tads_caller =~ 'hicexplorer'){ + HIC_FIND_TADS(cool) + ch_versions = ch_versions.mix(HIC_FIND_TADS.out.versions) + ch_tads = ch_tads.mix(HIC_FIND_TADS.out.results) + } + + emit: + tads = ch_tads + versions = ch_versions +} \ No newline at end of file diff --git a/workflows/hic.nf b/workflows/hic.nf new file mode 100644 index 0000000000000000000000000000000000000000..fed1114320cbc615259ab66db847887f9eef100d --- /dev/null +++ b/workflows/hic.nf @@ -0,0 +1,296 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowHic.initialise(params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ params.input ] +checkPathParamList = [ + params.input, params.multiqc_config, + params.fasta, params.bwt2_index +] + +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } + +//***************************************** +// Digestion parameters +if (params.digestion){ + restriction_site = params.digestion ? params.digest[ params.digestion ].restriction_site ?: false : false + ch_restriction_site = Channel.value(restriction_site) + ligation_site = params.digestion ? params.digest[ params.digestion ].ligation_site ?: false : false + ch_ligation_site = Channel.value(ligation_site) +}else if (params.dnase){ + ch_restriction_site = Channel.empty() + ch_ligation_site = Channel.empty() +}else{ + exit 1, "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase Hi-C, please use '--dnase' option" +} + +//**************************************** +// Combine all maps resolution for downstream analysis + +ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten().toInteger() + +if (params.res_zoomify){ + ch_zoom_res = Channel.from( params.res_zoomify ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_zoom_res) +} + +if (params.res_tads && !params.skip_tads){ + ch_tads_res = Channel.from( "${params.res_tads}" ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_tads_res) +}else{ + ch_tads_res=Channel.empty() + if (!params.skip_tads){ + log.warn "[nf-core/hic] Hi-C resolution for TADs calling not specified. See --res_tads" + } +} + +if (params.res_dist_decay && !params.skip_dist_decay){ + ch_ddecay_res = Channel.from( "${params.res_dist_decay}" ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_ddecay_res) +}else{ + ch_ddecay_res = Channel.empty() + if (!params.skip_dist_decay){ + log.warn "[nf-core/hic] Hi-C resolution for distance decay not specified. See --res_dist_decay" + } +} + +if (params.res_compartments && !params.skip_compartments){ + ch_comp_res = Channel.from( "${params.res_compartments}" ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_comp_res) +}else{ + ch_comp_res = Channel.empty() + if (!params.skip_compartments){ + log.warn "[nf-core/hic] Hi-C resolution for compartment calling not specified. See --res_compartments" + } +} + +ch_map_res = ch_map_res.unique() +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Local to the pipeline +// +include { HIC_PLOT_DIST_VS_COUNTS } from '../modules/local/hicexplorer/hicPlotDistVsCounts' +include { MULTIQC } from '../modules/local/multiqc' + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { HICPRO } from '../subworkflows/local/hicpro' +include { COOLER } from '../subworkflows/local/cooler' +include { COMPARTMENTS } from '../subworkflows/local/compartments' +include { TADS } from '../subworkflows/local/tads' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CHANNELS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +Channel.fromPath( params.fasta ) + .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" } + .map{it->[[:],it]} + .set { ch_fasta } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +workflow HIC { + + ch_versions = Channel.empty() + + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // + INPUT_CHECK ( + ch_input + ) + + // + // SUBWORKFLOW: Prepare genome annotation + // + PREPARE_GENOME( + ch_fasta, + ch_restriction_site + ) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + + // + // MODULE: Run FastQC + // + FASTQC ( + INPUT_CHECK.out.reads + ) + ch_versions = ch_versions.mix(FASTQC.out.versions) + + // + // SUB-WORFLOW: HiC-Pro + // + INPUT_CHECK.out.reads.view() + HICPRO ( + INPUT_CHECK.out.reads, + PREPARE_GENOME.out.index, + PREPARE_GENOME.out.res_frag, + PREPARE_GENOME.out.chromosome_size, + ch_ligation_site, + ch_map_res + ) + ch_versions = ch_versions.mix(HICPRO.out.versions) + + // + // SUB-WORKFLOW: COOLER + // + COOLER ( + HICPRO.out.pairs, + PREPARE_GENOME.out.chromosome_size, + ch_map_res + ) + ch_versions = ch_versions.mix(COOLER.out.versions) + + // + // MODULE: HICEXPLORER/HIC_PLOT_DIST_VS_COUNTS + // + if (!params.skip_dist_decay){ + COOLER.out.cool + .combine(ch_ddecay_res) + .filter{ it[0].resolution == it[2] } + .map { it -> [it[0], it[1]]} + .set{ ch_distdecay } + + HIC_PLOT_DIST_VS_COUNTS( + ch_distdecay + ) + ch_versions = ch_versions.mix(HIC_PLOT_DIST_VS_COUNTS.out.versions) + } + + // + // SUB-WORKFLOW: COMPARTMENT CALLING + // + if (!params.skip_compartments){ + COOLER.out.cool + .combine(ch_comp_res) + .filter{ it[0].resolution == it[2] } + .map { it -> [it[0], it[1], it[2]]} + .set{ ch_cool_compartments } + + COMPARTMENTS ( + ch_cool_compartments, + ch_fasta, + PREPARE_GENOME.out.chromosome_size + ) + ch_versions = ch_versions.mix(COMPARTMENTS.out.versions) + } + + // + // SUB-WORKFLOW : TADS CALLING + // + if (!params.skip_tads){ + COOLER.out.cool + .combine(ch_tads_res) + .filter{ it[0].resolution == it[2] } + .map { it -> [it[0], it[1]]} + .set{ ch_cool_tads } + + TADS( + ch_cool_tads + ) + ch_versions = ch_versions.mix(TADS.out.versions) + } + + // + // SOFTWARE VERSION + // + CUSTOM_DUMPSOFTWAREVERSIONS( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + // + // MODULE: MultiQC + // + workflow_summary = WorkflowHic.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_config) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.map{it->it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(HICPRO.out.mqc) + + MULTIQC ( + ch_multiqc_config, + ch_multiqc_custom_config.collect().ifEmpty([]), + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), + FASTQC.out.zip.map{it->it[1]}, + HICPRO.out.mqc.collect() + ) + multiqc_report = MULTIQC.out.report.toList() +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/