From b17922a875e3e0b14d4f1c9e13f482175719a9eb Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Fri, 7 Aug 2020 18:27:18 +0200
Subject: [PATCH] Lint with new template

---
 .github/workflows/awsfulltest.yml |   1 -
 .github/workflows/awstest.yml     |   1 -
 .github/workflows/ci.yml          |   7 +-
 Dockerfile                        |   4 +-
 README.md                         |   2 -
 conf/hicpro.config                |   5 -
 conf/test_full.config             |  26 +++-
 docs/README.md                    |   2 -
 environment.yml                   |   2 +-
 main.nf                           |   2 +-
 nextflow.config                   |  34 ++++--
 nextflow_schema.json              | 197 ++++++++++++++++++++++++++++++
 12 files changed, 245 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
index 26d2e7e..a432531 100644
--- a/.github/workflows/awsfulltest.yml
+++ b/.github/workflows/awsfulltest.yml
@@ -20,7 +20,6 @@ jobs:
       - name: Install awscli
         run: conda install -c conda-forge awscli
       - name: Start AWS batch job
-        # TODO nf-core: You can customise AWS full pipeline tests as required
         # Add full size test data (but still relatively small datasets for few samples)
         # on the `test_full.config` test runs with only one set of parameters
         # Then specify `-profile test_full` instead of `-profile test` on the AWS batch command
diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml
index 78ee5bc..94bca24 100644
--- a/.github/workflows/awstest.yml
+++ b/.github/workflows/awstest.yml
@@ -21,7 +21,6 @@ jobs:
       - name: Install awscli
         run: conda install -c conda-forge awscli
       - name: Start AWS batch job
-        # TODO nf-core: You can customise CI pipeline run tests as required
         # For example: adding multiple test runs with different parameters
         # Remember that you can parallelise this by using strategy.matrix
         env:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fe636dd..a8a8ba5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -34,13 +34,13 @@ jobs:
 
       - name: Build new docker image
         if: env.GIT_DIFF
-        run: docker build --no-cache . -t nfcore/hic:dev
+        run: docker build --no-cache . -t nfcore/hic:1.2.2
 
       - name: Pull docker image
         if: ${{ !env.GIT_DIFF }}
         run: |
           docker pull nfcore/hic:dev
-          docker tag nfcore/hic:dev nfcore/hic:dev
+          docker tag nfcore/hic:dev nfcore/hic:1.2.2
 
       - name: Install Nextflow
         run: |
@@ -48,8 +48,5 @@ jobs:
           sudo mv nextflow /usr/local/bin/
 
       - name: Run pipeline with test data
-        # TODO nf-core: You can customise CI pipeline run tests as required
-        # For example: adding multiple test runs with different parameters
-        # Remember that you can parallelise this by using strategy.matrix
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test,docker
diff --git a/Dockerfile b/Dockerfile
index 1f8dacf..3c8d019 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,10 +10,10 @@ COPY environment.yml /
 RUN conda env create --quiet -f /environment.yml && conda clean -a
 
 # Add conda installation dir to PATH (instead of doing 'conda activate')
-ENV PATH /opt/conda/envs/nf-core-hic-1.2.1/bin:$PATH
+ENV PATH /opt/conda/envs/nf-core-hic-1.2.2/bin:$PATH
 
 # Dump the details of the installed packages to a file for posterity
-RUN conda env export --name nf-core-hic-1.2.1 > nf-core-hic-1.2.1.yml
+RUN conda env export --name nf-core-hic-1.2.2 > nf-core-hic-1.2.2.yml
 
 # Instruct R processes to use these empty files instead of clashing with a local version
 RUN touch .Rprofile
diff --git a/README.md b/README.md
index cde30f5..da58777 100644
--- a/README.md
+++ b/README.md
@@ -104,8 +104,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
 
 4. Start running your own analysis!
 
-    <!-- TODO nf-core: Update the example "typical command" below used to run the pipeline -->
-
     ```bash
     nextflow run nf-core/hic -profile <docker/singularity/conda/institute> --input '*_R{1,2}.fastq.gz' --genome GRCh37
     ```
diff --git a/conf/hicpro.config b/conf/hicpro.config
index 01b755a..cd0cf0b 100644
--- a/conf/hicpro.config
+++ b/conf/hicpro.config
@@ -10,7 +10,6 @@
 params {
 
        // Alignment options
-       splitFastq = false
        bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'
        bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'
        min_mapq = 10
@@ -35,9 +34,5 @@ params {
        ice_filer_low_count_perc = 0.02
        ice_filer_high_count_perc =  0
        ice_eps = 0.1
-
-       saveReference = false
-       saveAlignedIntermediates = false
-       saveInteractionBAM = false
 }
 
diff --git a/conf/test_full.config b/conf/test_full.config
index 921372e..47d3176 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -12,11 +12,25 @@ params {
   config_profile_description = 'Full test dataset to check pipeline function'
 
   // Input data for full size test
-  // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
-  // TODO nf-core: Give any required params for the test so that command line flags are not needed
-  single_end = false
   input_paths = [
-    ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']],
-    ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']]
-  ]
+    ['SRR4292758_00', ['https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz']]
+   ]
+
+  // Annotations
+  fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa'
+  restriction_site = 'A^AGCTT'
+  ligation_site = 'AAGCTAGCTT'
+  
+  min_mapq = 2
+  rm_dup = true
+  rm_singleton = true
+  rm_multi = true
+
+  min_restriction_fragment_size = 100
+  max_restriction_fragment_size = 100000
+  min_insert_size = 100
+  max_insert_size = 600
+  
+  // Options
+  skip_cool = true
 }
diff --git a/docs/README.md b/docs/README.md
index 67df846..a688954 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -2,8 +2,6 @@
 
 The nf-core/hic documentation is split into the following pages:
 
-<!-- TODO nf-core: If you write more documentation pages, add them to the docs index page here -->
-
 * [Usage](usage.md)
   * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags.
 * [Output](output.md)
diff --git a/environment.yml b/environment.yml
index e8944c6..ccca9c3 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,6 +1,6 @@
 # You can use this file to create a conda environment for this pipeline:
 #   conda env create -f environment.yml
-name: nf-core-hic-1.2.1
+name: nf-core-hic-1.2.2
 channels:
   - conda-forge
   - bioconda
diff --git a/main.nf b/main.nf
index f33f88c..0cfcbc6 100644
--- a/main.nf
+++ b/main.nf
@@ -32,10 +32,10 @@ def helpMessage() {
       --chromosome_size [file]                  Path to chromosome size file
       --restriction_fragments [file]            Path to restriction fragment file (bed)
       --save_reference [bool]                   Save reference genome to output folder. Default: False
-      --save_aligned_intermediates [bool]       Save intermediates alignment files. Default: False
 
     Alignments
       --split_fastq [bool]                      Size of read chuncks to use to speed up the workflow. Default: None
+      --save_aligned_intermediates [bool]       Save intermediates alignment files. Default: False 
       --bwt2_opts_end2end [str]                 Options for bowtie2 end-to-end mappinf (first mapping step). See hic.config for default.
       --bwt2_opts_trimmed [str]                 Options for bowtie2 mapping after ligation site trimming. See hic.config for default.
       --min_mapq [int]                          Minimum mapping quality values to consider. Default: 10
diff --git a/nextflow.config b/nextflow.config
index 24ff566..c598df9 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -15,7 +15,8 @@ params {
 
   outdir = './results'
   genome = false
-  readPaths = false
+  input_paths = false
+  split_fastq = false
   chromosome_size = false
   restriction_fragments = false
   skip_maps = false
@@ -25,16 +26,28 @@ params {
   save_reference = false
   save_interaction_bam = false
   save_aligned_intermediates = false
-  
-  dnase = false
-  rm_dup = false
-  rm_singleton = false
-  rm_multi = false
+
+  bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'
+  bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'
+  min_mapq = 10
+
+  // Digestion Hi-C
+  restriction_site = 'A^AGCTT'
+  ligation_site = 'AAGCTAGCTT'
   min_restriction_fragment_size = false
   max_restriction_fragment_size = false
   min_insert_size = false
   max_insert_size = false
-  min_cis_dist = false
+  dnase = false
+  rm_dup = false
+  rm_singleton = false
+  rm_multi = false
+  bin_size = '1000000,500000'
+  ice_max_iter = 100
+  ice_filer_low_count_perc = 0.02
+  ice_filer_high_count_perc =  0
+  ice_eps = 0.1
+  
   publish_dir_mode = 'copy'
 
   // Boilerplate options
@@ -65,7 +78,7 @@ params {
 
 // Container slug. Stable releases should specify release tag!
 // Developmental code should specify :dev
-process.container = 'nfcore/hic:1.2.1'
+process.container = 'nfcore/hic:1.2.2'
 
 // Load base.config by default for all pipelines
 includeConfig 'conf/base.config'
@@ -77,9 +90,6 @@ try {
   System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
 }
 
-// Load hic config file
-includeConfig 'conf/hicpro.config'
-
 // Create profiles
 profiles {
   conda { process.conda = "$baseDir/environment.yml" }
@@ -138,7 +148,7 @@ manifest {
   description = 'Analysis of Chromosome Conformation Capture data (Hi-C)'
   mainScript = 'main.nf'
   nextflowVersion = '>=19.10.0'
-  version = '1.2.1'
+  version = '1.2.2'
 }
 
 // Function to ensure that resource requirements don't go beyond
diff --git a/nextflow_schema.json b/nextflow_schema.json
index ccbfe4b..bb45295 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -20,6 +20,17 @@
                     "description": "Input FastQ files.",
                     "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`"
                 },
+                "input_paths": {
+                    "type": "string",
+                    "hidden": true,
+                    "description": "Input FastQ files for test only",
+                    "default": "undefined"
+                },
+                "split_fastq": {
+                    "type": "number",
+                    "description": "Split the reads into chunks before running. Specify the number of reads per chuncks as --split_fastq 20000000.",
+                    "fa_icon": "fas fa-dna"
+                },
                 "single_end": {
                     "type": "boolean",
                     "description": "Specifies that the input is single-end reads.",
@@ -72,6 +83,180 @@
                     "fa_icon": "fas fa-ban",
                     "hidden": true,
                     "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
+                },
+                "bwt2_index": {
+                    "type": "string",
+                    "description": "Full path to directory containing Bowtie index including base name. i.e. `/path/to/index/base`.",
+                    "fa_icon": "far fa-file-alt"
+                },
+                "chromosome_size": {
+                    "type": "string",
+                    "description": "Full path to file specifying chromosome sizes (tab separated with chromosome name and size)`.",
+                    "fa_icon": "far fa-file-alt",
+                    "help_text": "If not specified, the pipeline will build this file from the reference genome file"
+                },
+                "restriction_fragments": {
+                    "type": "string",
+                    "description": "Full path to restriction fragment (bed) file.",
+                    "fa_icon": "far fa-file-alt",
+                    "help_text": "This file depends on the Hi-C protocols and digestion strategy. If not provided, the pipeline will build it using the --restriction_site option"
+                },
+                "save_reference": {
+                    "type": "boolean",
+                    "description": "If generated by the pipeline save the annotation and indexes in the results directory.",
+                    "help_text": "Use this parameter to save all annotations to your results folder. These can then be used for future pipeline runs, reducing processing times.",
+                    "fa_icon": "fas fa-save"
+                }
+            }
+        },
+        "data_processing_options": {
+            "title": "Data processing",
+            "type": "object",
+            "description": "Parameters for Hi-C data processing",
+            "default": "",
+            "fa_icon": "fas fa-bahai",
+            "properties": {
+                "dnase": {
+                    "type": "boolean",
+                    "description": "For Hi-C protocols which are not based on enzyme digestion such as DNase Hi-C"
+                },
+                "restriction_site": {
+                    "type": "string",
+                    "default": "'A^AGCTT'",
+                    "description": "Restriction motifs used during digestion. Several motifs (comma separated) can be provided."
+                },
+                "ligation_site": {
+                    "type": "string",
+                    "default": "'AAGCTAGCTT",
+                    "description": "Expected motif after DNA ligation.  Several motifs (comma separated) can be provided."
+                },
+                "rm_dup": {
+                    "type": "boolean",
+                    "description": "Remove duplicates"
+                },
+                "rm_multi": {
+                    "type": "boolean",
+                    "description": "Remove multi-mapped reads"
+                },
+                "rm_singleton": {
+                    "type": "boolean",
+                    "description": "Remove singleton"
+                },
+                "min_mapq": {
+                    "type": "integer",
+                    "default": "10",
+                    "description": "Keep aligned reads with a minimum quality value"
+                },
+                "bwt2_opts_end2end": {
+                    "type": "string",
+                    "default": "'--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'",
+                    "description": "Option for end-to-end bowtie mapping"
+                },
+                "bwt2_opts_trimmed": {
+                    "type": "string",
+                    "default": "'--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'",
+                    "description": "Option for trimmed reads mapping"
+                },
+                "save_interaction_bam": {
+                    "type": "boolean",
+                    "description": "Save a BAM file where all reads are flagged by their interaction classes"
+                },
+                "save_aligned_intermediates": {
+                    "type": "boolean",
+                    "description": "Save all BAM files during two-steps mapping"
+                }
+            }
+        },
+        "contacts_calling_options": {
+            "title": "Contacts calling",
+            "type": "object",
+            "description": "Options to call significant interactions",
+            "default": "",
+            "fa_icon": "fas fa-signature",
+            "properties": {
+                "min_cis_dist": {
+                    "type": "string",
+                    "default": "undefined",
+                    "description": "Minimum distance between loci to consider. Useful for --dnase mode to remove spurious ligation products"
+                },
+                "max_insert_size": {
+                    "type": "string",
+                    "default": "undefined",
+                    "description": "Maximum fragment size to consider"
+                },
+                "min_insert_size": {
+                    "type": "string",
+                    "default": "undefined",
+                    "description": "Minimum fragment size to consider"
+                },
+                "max_restriction_fragment_size": {
+                    "type": "string",
+                    "default": "undefined",
+                    "description": "Maximum restriction fragment size to consider"
+                },
+                "min_restriction_fragment_size": {
+                    "type": "string",
+                    "default": "undefined",
+                    "description": "Minimum restriction fragment size to consider"
+                }
+            }
+        },
+        "contact_maps_options": {
+            "title": "Contact maps",
+            "type": "object",
+            "description": "Options to build Hi-C contact maps",
+            "default": "",
+            "fa_icon": "fas fa-chess-board",
+            "properties": {
+                "bin_size": {
+                    "type": "string",
+                    "default": "'1000000,500000'",
+                    "description": "Resolution to build the maps (comma separated)"
+                },
+                "ice_filer_low_count_perc": {
+                    "type": "string",
+                    "default": 0.02,
+                    "description": "Filter low counts rows before normalization"
+                },
+                "ice_filer_high_count_perc": {
+                    "type": "integer",
+                    "default": "0",
+                    "description": "Filter high counts rows before normalization"
+                },
+                "ice_eps": {
+                    "type": "string",
+                    "default": "0.1",
+                    "description": "Threshold for ICE convergence"
+                },
+                "ice_max_iter": {
+                    "type": "integer",
+                    "default": "100",
+                    "description": "Maximum number of iteraction for ICE normalization"
+                }
+            }
+        },
+        "skip_options": {
+            "title": "Skip options",
+            "type": "object",
+            "description": "Skip some steps of the pipeline",
+            "default": "",
+            "fa_icon": "fas fa-random",
+            "properties": {
+                "skip_maps": {
+                    "type": "boolean",
+                    "description": "Do not build contact maps"
+                },
+                "skip_ice": {
+                    "type": "boolean",
+                    "description": "Do not normalize contact maps"
+                },
+                "skip_cool": {
+                    "type": "boolean",
+                    "description": "Do not generate cooler file"
+                },
+                "skip_multiqc": {
+                    "type": "boolean",
+                    "description": "Do not generate MultiQC report"
                 }
             }
         },
@@ -246,6 +431,18 @@
         {
             "$ref": "#/definitions/reference_genome_options"
         },
+        {
+            "$ref": "#/definitions/data_processing_options"
+        },
+        {
+            "$ref": "#/definitions/contacts_calling_options"
+        },
+        {
+            "$ref": "#/definitions/contact_maps_options"
+        },
+        {
+            "$ref": "#/definitions/skip_options"
+        },
         {
             "$ref": "#/definitions/generic_options"
         },
-- 
GitLab