Lint with new template

b17922a8 · nservant · eb65c8d2 · b17922a8 · b17922a8 · b17922a8
Commit b17922a8 authored Aug 7, 2020 by nservant
--- a/.github/workflows/awsfulltest.yml
+++ b/.github/workflows/awsfulltest.yml
@@ -20,7 +20,6 @@ jobs:
      - name: Install awscli
        run: conda install -c conda-forge awscli
      - name: Start AWS batch job
-        # TODO nf-core: You can customise AWS full pipeline tests as required
        # Add full size test data (but still relatively small datasets for few samples)
        # on the `test_full.config` test runs with only one set of parameters
        # Then specify `-profile test_full` instead of `-profile test` on the AWS batch command

--- a/.github/workflows/awstest.yml
+++ b/.github/workflows/awstest.yml
@@ -21,7 +21,6 @@ jobs:
      - name: Install awscli
        run: conda install -c conda-forge awscli
      - name: Start AWS batch job
-        # TODO nf-core: You can customise CI pipeline run tests as required
        # For example: adding multiple test runs with different parameters
        # Remember that you can parallelise this by using strategy.matrix
        env:

--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -34,13 +34,13 @@ jobs:

      - name: Build new docker image
        if: env.GIT_DIFF
-        run: docker build --no-cache . -t nfcore/hic:dev
+        run: docker build --no-cache . -t nfcore/hic:1.2.2

      - name: Pull docker image
        if: ${{ !env.GIT_DIFF }}
        run: |
          docker pull nfcore/hic:dev
-          docker tag nfcore/hic:dev nfcore/hic:dev
+          docker tag nfcore/hic:dev nfcore/hic:1.2.2

      - name: Install Nextflow
        run: |
@@ -48,8 +48,5 @@ jobs:
          sudo mv nextflow /usr/local/bin/

      - name: Run pipeline with test data
-        # TODO nf-core: You can customise CI pipeline run tests as required
-        # For example: adding multiple test runs with different parameters
-        # Remember that you can parallelise this by using strategy.matrix
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,10 +10,10 @@ COPY environment.yml /
 RUN conda env create --quiet -f /environment.yml && conda clean -a

 # Add conda installation dir to PATH (instead of doing 'conda activate')
-ENV PATH /opt/conda/envs/nf-core-hic-1.2.1/bin:$PATH
+ENV PATH /opt/conda/envs/nf-core-hic-1.2.2/bin:$PATH

 # Dump the details of the installed packages to a file for posterity
-RUN conda env export --name nf-core-hic-1.2.1 > nf-core-hic-1.2.1.yml
+RUN conda env export --name nf-core-hic-1.2.2 > nf-core-hic-1.2.2.yml

 # Instruct R processes to use these empty files instead of clashing with a local version
 RUN touch .Rprofile

--- a/README.md
+++ b/README.md
@@ -104,8 +104,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool

 4. Start running your own analysis!

-    <!-- TODO nf-core: Update the example "typical command" below used to run the pipeline -->
-
    ```bash
    nextflow run nf-core/hic -profile <docker/singularity/conda/institute> --input '*_R{1,2}.fastq.gz' --genome GRCh37
    ```

--- a/conf/hicpro.config
+++ b/conf/hicpro.config
@@ -10,7 +10,6 @@
 params {

       // Alignment options
-       splitFastq = false
       bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'
       bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'
       min_mapq = 10
@@ -35,9 +34,5 @@ params {
       ice_filer_low_count_perc = 0.02
       ice_filer_high_count_perc =  0
       ice_eps = 0.1
-
-       saveReference = false
-       saveAlignedIntermediates = false
-       saveInteractionBAM = false
 }

--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -12,11 +12,25 @@ params {
  config_profile_description = 'Full test dataset to check pipeline function'

  // Input data for full size test
-  // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
-  // TODO nf-core: Give any required params for the test so that command line flags are not needed
-  single_end = false
  input_paths = [
-    ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']],
-    ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']]
+    ['SRR4292758_00', ['https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz']]
   ]
+
+  // Annotations
+  fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa'
+  restriction_site = 'A^AGCTT'
+  ligation_site = 'AAGCTAGCTT'
+  
+  min_mapq = 2
+  rm_dup = true
+  rm_singleton = true
+  rm_multi = true
+
+  min_restriction_fragment_size = 100
+  max_restriction_fragment_size = 100000
+  min_insert_size = 100
+  max_insert_size = 600
+  
+  // Options
+  skip_cool = true
 }
--- a/docs/README.md
+++ b/docs/README.md
@@ -2,8 +2,6 @@

 The nf-core/hic documentation is split into the following pages:

-<!-- TODO nf-core: If you write more documentation pages, add them to the docs index page here -->
-
 * [Usage](usage.md)
  * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags.
 * [Output](output.md)

--- a/environment.yml
+++ b/environment.yml
 # You can use this file to create a conda environment for this pipeline:
 #   conda env create -f environment.yml
-name: nf-core-hic-1.2.1
+name: nf-core-hic-1.2.2
 channels:
  - conda-forge
  - bioconda

--- a/main.nf
+++ b/main.nf
@@ -32,10 +32,10 @@ def helpMessage() {
      --chromosome_size [file]                  Path to chromosome size file
      --restriction_fragments [file]            Path to restriction fragment file (bed)
      --save_reference [bool]                   Save reference genome to output folder. Default: False
-      --save_aligned_intermediates [bool]       Save intermediates alignment files. Default: False

    Alignments
      --split_fastq [bool]                      Size of read chuncks to use to speed up the workflow. Default: None
+      --save_aligned_intermediates [bool]       Save intermediates alignment files. Default: False 
      --bwt2_opts_end2end [str]                 Options for bowtie2 end-to-end mappinf (first mapping step). See hic.config for default.
      --bwt2_opts_trimmed [str]                 Options for bowtie2 mapping after ligation site trimming. See hic.config for default.
      --min_mapq [int]                          Minimum mapping quality values to consider. Default: 10

--- a/nextflow.config
+++ b/nextflow.config
@@ -15,7 +15,8 @@ params {

  outdir = './results'
  genome = false
-  readPaths = false
+  input_paths = false
+  split_fastq = false
  chromosome_size = false
  restriction_fragments = false
  skip_maps = false
@@ -26,15 +27,27 @@ params {
  save_interaction_bam = false
  save_aligned_intermediates = false

-  dnase = false
-  rm_dup = false
-  rm_singleton = false
-  rm_multi = false
+  bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'
+  bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'
+  min_mapq = 10
+
+  // Digestion Hi-C
+  restriction_site = 'A^AGCTT'
+  ligation_site = 'AAGCTAGCTT'
  min_restriction_fragment_size = false
  max_restriction_fragment_size = false
  min_insert_size = false
  max_insert_size = false
-  min_cis_dist = false
+  dnase = false
+  rm_dup = false
+  rm_singleton = false
+  rm_multi = false
+  bin_size = '1000000,500000'
+  ice_max_iter = 100
+  ice_filer_low_count_perc = 0.02
+  ice_filer_high_count_perc =  0
+  ice_eps = 0.1
+  
  publish_dir_mode = 'copy'

  // Boilerplate options
@@ -65,7 +78,7 @@ params {

 // Container slug. Stable releases should specify release tag!
 // Developmental code should specify :dev
-process.container = 'nfcore/hic:1.2.1'
+process.container = 'nfcore/hic:1.2.2'

 // Load base.config by default for all pipelines
 includeConfig 'conf/base.config'
@@ -77,9 +90,6 @@ try {
  System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
 }

-// Load hic config file
-includeConfig 'conf/hicpro.config'
-
 // Create profiles
 profiles {
  conda { process.conda = "$baseDir/environment.yml" }
@@ -138,7 +148,7 @@ manifest {
  description = 'Analysis of Chromosome Conformation Capture data (Hi-C)'
  mainScript = 'main.nf'
  nextflowVersion = '>=19.10.0'
-  version = '1.2.1'
+  version = '1.2.2'
 }

 // Function to ensure that resource requirements don't go beyond

--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -20,6 +20,17 @@
                    "description": "Input FastQ files.",
                    "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`"
                },
+                "input_paths": {
+                    "type": "string",
+                    "hidden": true,
+                    "description": "Input FastQ files for test only",
+                    "default": "undefined"
+                },
+                "split_fastq": {
+                    "type": "number",
+                    "description": "Split the reads into chunks before running. Specify the number of reads per chuncks as --split_fastq 20000000.",
+                    "fa_icon": "fas fa-dna"
+                },
                "single_end": {
                    "type": "boolean",
                    "description": "Specifies that the input is single-end reads.",
@@ -72,6 +83,180 @@
                    "fa_icon": "fas fa-ban",
                    "hidden": true,
                    "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
+                },
+                "bwt2_index": {
+                    "type": "string",
+                    "description": "Full path to directory containing Bowtie index including base name. i.e. `/path/to/index/base`.",
+                    "fa_icon": "far fa-file-alt"
+                },
+                "chromosome_size": {
+                    "type": "string",
+                    "description": "Full path to file specifying chromosome sizes (tab separated with chromosome name and size)`.",
+                    "fa_icon": "far fa-file-alt",
+                    "help_text": "If not specified, the pipeline will build this file from the reference genome file"
+                },
+                "restriction_fragments": {
+                    "type": "string",
+                    "description": "Full path to restriction fragment (bed) file.",
+                    "fa_icon": "far fa-file-alt",
+                    "help_text": "This file depends on the Hi-C protocols and digestion strategy. If not provided, the pipeline will build it using the --restriction_site option"
+                },
+                "save_reference": {
+                    "type": "boolean",
+                    "description": "If generated by the pipeline save the annotation and indexes in the results directory.",
+                    "help_text": "Use this parameter to save all annotations to your results folder. These can then be used for future pipeline runs, reducing processing times.",
+                    "fa_icon": "fas fa-save"
+                }
+            }
+        },
+        "data_processing_options": {
+            "title": "Data processing",
+            "type": "object",
+            "description": "Parameters for Hi-C data processing",
+            "default": "",
+            "fa_icon": "fas fa-bahai",
+            "properties": {
+                "dnase": {
+                    "type": "boolean",
+                    "description": "For Hi-C protocols which are not based on enzyme digestion such as DNase Hi-C"
+                },
+                "restriction_site": {
+                    "type": "string",
+                    "default": "'A^AGCTT'",
+                    "description": "Restriction motifs used during digestion. Several motifs (comma separated) can be provided."
+                },
+                "ligation_site": {
+                    "type": "string",
+                    "default": "'AAGCTAGCTT",
+                    "description": "Expected motif after DNA ligation.  Several motifs (comma separated) can be provided."
+                },
+                "rm_dup": {
+                    "type": "boolean",
+                    "description": "Remove duplicates"
+                },
+                "rm_multi": {
+                    "type": "boolean",
+                    "description": "Remove multi-mapped reads"
+                },
+                "rm_singleton": {
+                    "type": "boolean",
+                    "description": "Remove singleton"
+                },
+                "min_mapq": {
+                    "type": "integer",
+                    "default": "10",
+                    "description": "Keep aligned reads with a minimum quality value"
+                },
+                "bwt2_opts_end2end": {
+                    "type": "string",
+                    "default": "'--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'",
+                    "description": "Option for end-to-end bowtie mapping"
+                },
+                "bwt2_opts_trimmed": {
+                    "type": "string",
+                    "default": "'--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'",
+                    "description": "Option for trimmed reads mapping"
+                },
+                "save_interaction_bam": {
+                    "type": "boolean",
+                    "description": "Save a BAM file where all reads are flagged by their interaction classes"
+                },
+                "save_aligned_intermediates": {
+                    "type": "boolean",
+                    "description": "Save all BAM files during two-steps mapping"
+                }
+            }
+        },
+        "contacts_calling_options": {
+            "title": "Contacts calling",
+            "type": "object",
+            "description": "Options to call significant interactions",
+            "default": "",
+            "fa_icon": "fas fa-signature",
+            "properties": {
+                "min_cis_dist": {
+                    "type": "string",
+                    "default": "undefined",
+                    "description": "Minimum distance between loci to consider. Useful for --dnase mode to remove spurious ligation products"
+                },
+                "max_insert_size": {
+                    "type": "string",
+                    "default": "undefined",
+                    "description": "Maximum fragment size to consider"
+                },
+                "min_insert_size": {
+                    "type": "string",
+                    "default": "undefined",
+                    "description": "Minimum fragment size to consider"
+                },
+                "max_restriction_fragment_size": {
+                    "type": "string",
+                    "default": "undefined",
+                    "description": "Maximum restriction fragment size to consider"
+                },
+                "min_restriction_fragment_size": {
+                    "type": "string",
+                    "default": "undefined",
+                    "description": "Minimum restriction fragment size to consider"
+                }
+            }
+        },
+        "contact_maps_options": {
+            "title": "Contact maps",
+            "type": "object",
+            "description": "Options to build Hi-C contact maps",
+            "default": "",
+            "fa_icon": "fas fa-chess-board",
+            "properties": {
+                "bin_size": {
+                    "type": "string",
+                    "default": "'1000000,500000'",
+                    "description": "Resolution to build the maps (comma separated)"
+                },
+                "ice_filer_low_count_perc": {
+                    "type": "string",
+                    "default": 0.02,
+                    "description": "Filter low counts rows before normalization"
+                },
+                "ice_filer_high_count_perc": {
+                    "type": "integer",
+                    "default": "0",
+                    "description": "Filter high counts rows before normalization"
+                },
+                "ice_eps": {
+                    "type": "string",
+                    "default": "0.1",
+                    "description": "Threshold for ICE convergence"
+                },
+                "ice_max_iter": {
+                    "type": "integer",
+                    "default": "100",
+                    "description": "Maximum number of iteraction for ICE normalization"
+                }
+            }
+        },
+        "skip_options": {
+            "title": "Skip options",
+            "type": "object",
+            "description": "Skip some steps of the pipeline",
+            "default": "",
+            "fa_icon": "fas fa-random",
+            "properties": {
+                "skip_maps": {
+                    "type": "boolean",
+                    "description": "Do not build contact maps"
+                },
+                "skip_ice": {
+                    "type": "boolean",
+                    "description": "Do not normalize contact maps"
+                },
+                "skip_cool": {
+                    "type": "boolean",
+                    "description": "Do not generate cooler file"
+                },
+                "skip_multiqc": {
+                    "type": "boolean",
+                    "description": "Do not generate MultiQC report"
                }
            }
        },
@@ -246,6 +431,18 @@
        {
            "$ref": "#/definitions/reference_genome_options"
        },
+        {
+            "$ref": "#/definitions/data_processing_options"
+        },
+        {
+            "$ref": "#/definitions/contacts_calling_options"
+        },
+        {
+            "$ref": "#/definitions/contact_maps_options"
+        },
+        {
+            "$ref": "#/definitions/skip_options"
+        },
        {
            "$ref": "#/definitions/generic_options"
        },