diff --git a/CITATIONS.md b/CITATIONS.md new file mode 100644 index 0000000000000000000000000000000000000000..da253d248988442de777df56482ff1333cd7dce5 --- /dev/null +++ b/CITATIONS.md @@ -0,0 +1,32 @@ +# nf-core/hic: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +* [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +## Software packaging/containerisation tools + +* [Anaconda](https://anaconda.com) + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +* [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +* [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + +* [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/conf/modules.config b/conf/modules.config new file mode 100644 index 0000000000000000000000000000000000000000..0b1bfdec2050c7ba17ab39188d89ced8973ea549 --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,32 @@ +/* +======================================================================================== + Config file for defining DSL2 per module options +======================================================================================== + Available keys to override module options: + args = Additional arguments appended to command in module. + args2 = Second set of arguments appended to command in module (multi-tool modules). + args3 = Third set of arguments appended to command in module (multi-tool modules). + publish_dir = Directory to publish results. + publish_by_meta = Groovy list of keys available in meta map to append as directories to "publish_dir" path + If publish_by_meta = true - Value of ${meta['id']} is appended as a directory to "publish_dir" path + If publish_by_meta = ['id', 'custompath'] - If "id" is in meta map and "custompath" isn't then "${meta['id']}/custompath/" + is appended as a directory to "publish_dir" path + If publish_by_meta = false / null - No directories are appended to "publish_dir" path + publish_files = Groovy map where key = "file_ext" and value = "directory" to publish results for that file extension + The value of "directory" is appended to the standard "publish_dir" path as defined above. + If publish_files = null (unspecified) - All files are published. + If publish_files = false - No files are published. + suffix = File name suffix for output files. +---------------------------------------------------------------------------------------- +*/ + +params { + modules { + 'fastqc' { + args = "--quiet" + } + 'multiqc' { + args = "" + } + } +} diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index 52ee730432905c5f6dc3e2c89352bbaee6ea145b..8d6920dd645644e70e8bce260022e7e70be97788 100644 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -1,6 +1,6 @@ -/* - * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. - */ +// +// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. +// import org.everit.json.schema.Schema import org.everit.json.schema.loader.SchemaLoader @@ -13,16 +13,23 @@ import groovy.json.JsonBuilder class NfcoreSchema { - /* - * Function to loop over all parameters defined in schema and check - * whether the given paremeters adhere to the specificiations - */ + // + // Resolve Schema path relative to main workflow directory + // + public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { + return "${workflow.projectDir}/${schema_filename}" + } + + // + // Function to loop over all parameters defined in schema and check + // whether the given parameters adhere to the specifications + // /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - private static void validateParameters(params, jsonSchema, log) { + public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { def has_error = false //=====================================================================// // Check for nextflow core params and unexpected params - def json = new File(jsonSchema).text + def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') def nf_params = [ // Options for base `nextflow` command @@ -114,7 +121,8 @@ class NfcoreSchema { def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !expectedParamsLowerCase.contains(specifiedParamLowerCase)) { + def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) + if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { // Temporarily remove camelCase/camel-case params #1035 def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ @@ -125,36 +133,36 @@ class NfcoreSchema { //=====================================================================// // Validate parameters against the schema - InputStream inputStream = new File(jsonSchema).newInputStream() - JSONObject rawSchema = new JSONObject(new JSONTokener(inputStream)) + InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() + JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) // Remove anything that's in params.schema_ignore_params - rawSchema = removeIgnoredParams(rawSchema, params) + raw_schema = removeIgnoredParams(raw_schema, params) - Schema schema = SchemaLoader.load(rawSchema) + Schema schema = SchemaLoader.load(raw_schema) // Clean the parameters def cleanedParams = cleanParameters(params) // Convert to JSONObject def jsonParams = new JsonBuilder(cleanedParams) - JSONObject paramsJSON = new JSONObject(jsonParams.toString()) + JSONObject params_json = new JSONObject(jsonParams.toString()) // Validate try { - schema.validate(paramsJSON) + schema.validate(params_json) } catch (ValidationException e) { println '' log.error 'ERROR: Validation of pipeline parameters failed!' JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, paramsJSON, log) + printExceptions(exceptionJSON, params_json, log) println '' has_error = true } // Check for unexpected parameters if (unexpectedParams.size() > 0) { - Map colors = log_colours(params.monochrome_logs) + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) println '' def warn_msg = 'Found unexpected parameters:' for (unexpectedParam in unexpectedParams) { @@ -170,266 +178,17 @@ class NfcoreSchema { } } - // Loop over nested exceptions and print the causingException - private static void printExceptions(exJSON, paramsJSON, log) { - def causingExceptions = exJSON['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = exJSON['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (exJSON['pointerToViolation'] == '#') { - log.error "* ${exJSON['message']}" - } - // Error with specific param - else { - def param = exJSON['pointerToViolation'] - ~/^#\// - def param_val = paramsJSON[param].toString() - log.error "* --${param}: ${exJSON['message']} (${param_val})" - } - } - for (ex in causingExceptions) { - printExceptions(ex, paramsJSON, log) - } - } - - // Remove an element from a JSONArray - private static JSONArray removeElement(jsonArray, element){ - def list = [] - int len = jsonArray.length() - for (int i=0;i<len;i++){ - list.add(jsonArray.get(i).toString()) - } - list.remove(element) - JSONArray jsArray = new JSONArray(list) - return jsArray - } - - private static JSONObject removeIgnoredParams(rawSchema, params){ - // Remove anything that's in params.schema_ignore_params - params.schema_ignore_params.split(',').each{ ignore_param -> - if(rawSchema.keySet().contains('definitions')){ - rawSchema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(rawSchema.keySet().contains('properties') && rawSchema.get('properties').keySet().contains(ignore_param)) { - rawSchema.get("properties").remove(ignore_param) - } - if(rawSchema.keySet().contains('required') && rawSchema.required.contains(ignore_param)) { - def cleaned_required = removeElement(rawSchema.required, ignore_param) - rawSchema.put("required", cleaned_required) - } - } - return rawSchema - } - - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - /* - * This method tries to read a JSON params file - */ - private static LinkedHashMap params_load(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = params_read(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - private static Map log_colours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - static String dashed_line(monochrome_logs) { - Map colors = log_colours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - /* - Method to actually read in JSON file using Groovy. - Group (as Key), values are all parameters - - Parameter1 as Key, Description as Value - - Parameter2 as Key, Description as Value - .... - Group - - - */ - private static LinkedHashMap params_read(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - /* - * Get maximum number of characters across all parameter names - */ - private static Integer params_max_chars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } - - /* - * Beautify parameters for --help - */ - private static String params_help(workflow, params, json_schema, command) { - Map colors = log_colours(params.monochrome_logs) + // + // Beautify parameters for --help + // + public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) Integer num_hidden = 0 String output = '' output += 'Typical pipeline command:\n\n' output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = params_load(json_schema) - Integer max_chars = params_max_chars(params_map) + 1 + Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + Integer max_chars = paramsMaxChars(params_map) + 1 Integer desc_indent = max_chars + 14 Integer dec_linewidth = 160 - desc_indent for (group in params_map.keySet()) { @@ -469,18 +228,17 @@ class NfcoreSchema { output += group_output } } - output += dashed_line(params.monochrome_logs) if (num_hidden > 0){ - output += colors.dim + "\n Hiding $num_hidden params, use --show_hidden_params to show.\n" + colors.reset - output += dashed_line(params.monochrome_logs) + output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset } + output += NfcoreTemplate.dashedLine(params.monochrome_logs) return output } - /* - * Groovy Map summarising parameters/workflow options used by the pipeline - */ - private static LinkedHashMap params_summary_map(workflow, params, json_schema) { + // + // Groovy Map summarising parameters/workflow options used by the pipeline + // + public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { // Get a selection of core Nextflow workflow options def Map workflow_summary = [:] if (workflow.revision) { @@ -503,7 +261,7 @@ class NfcoreSchema { // Get pipeline parameters defined in JSON Schema def Map params_summary = [:] def blacklist = ['hostnames'] - def params_map = params_load(json_schema) + def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) for (group in params_map.keySet()) { def sub_params = new LinkedHashMap() def group_params = params_map.get(group) // This gets the parameters of that particular group @@ -546,14 +304,14 @@ class NfcoreSchema { return [ 'Core Nextflow options' : workflow_summary ] << params_summary } - /* - * Beautify parameters for summary and return as string - */ - private static String params_summary_log(workflow, params, json_schema) { - Map colors = log_colours(params.monochrome_logs) + // + // Beautify parameters for summary and return as string + // + public static String paramsSummaryLog(workflow, params) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) String output = '' - def params_map = params_summary_map(workflow, params, json_schema) - def max_chars = params_max_chars(params_map) + def params_map = paramsSummaryMap(workflow, params) + def max_chars = paramsMaxChars(params_map) for (group in params_map.keySet()) { def group_params = params_map.get(group) // This gets the parameters of that particular group if (group_params) { @@ -564,10 +322,196 @@ class NfcoreSchema { output += '\n' } } - output += dashed_line(params.monochrome_logs) - output += colors.dim + "\n Only displaying parameters that differ from defaults.\n" + colors.reset - output += dashed_line(params.monochrome_logs) + output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" + output += NfcoreTemplate.dashedLine(params.monochrome_logs) return output } + // + // Loop over nested exceptions and print the causingException + // + private static void printExceptions(ex_json, params_json, log) { + def causingExceptions = ex_json['causingExceptions'] + if (causingExceptions.length() == 0) { + def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ + // Missing required param + if (m.matches()) { + log.error "* Missing required parameter: --${m[0][1]}" + } + // Other base-level error + else if (ex_json['pointerToViolation'] == '#') { + log.error "* ${ex_json['message']}" + } + // Error with specific param + else { + def param = ex_json['pointerToViolation'] - ~/^#\// + def param_val = params_json[param].toString() + log.error "* --${param}: ${ex_json['message']} (${param_val})" + } + } + for (ex in causingExceptions) { + printExceptions(ex, params_json, log) + } + } + + // + // Remove an element from a JSONArray + // + private static JSONArray removeElement(json_array, element) { + def list = [] + int len = json_array.length() + for (int i=0;i<len;i++){ + list.add(json_array.get(i).toString()) + } + list.remove(element) + JSONArray jsArray = new JSONArray(list) + return jsArray + } + + // + // Remove ignored parameters + // + private static JSONObject removeIgnoredParams(raw_schema, params) { + // Remove anything that's in params.schema_ignore_params + params.schema_ignore_params.split(',').each{ ignore_param -> + if(raw_schema.keySet().contains('definitions')){ + raw_schema.definitions.each { definition -> + for (key in definition.keySet()){ + if (definition[key].get("properties").keySet().contains(ignore_param)){ + // Remove the param to ignore + definition[key].get("properties").remove(ignore_param) + // If the param was required, change this + if (definition[key].has("required")) { + def cleaned_required = removeElement(definition[key].required, ignore_param) + definition[key].put("required", cleaned_required) + } + } + } + } + } + if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { + raw_schema.get("properties").remove(ignore_param) + } + if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { + def cleaned_required = removeElement(raw_schema.required, ignore_param) + raw_schema.put("required", cleaned_required) + } + } + return raw_schema + } + + // + // Clean and check parameters relative to Nextflow native classes + // + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + + // + // This function tries to read a JSON params file + // + private static LinkedHashMap paramsLoad(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = paramsRead(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + // + // Method to actually read in JSON file using Groovy. + // Group (as Key), values are all parameters + // - Parameter1 as Key, Description as Value + // - Parameter2 as Key, Description as Value + // .... + // Group + // - + private static LinkedHashMap paramsRead(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') + def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + * properties <- parameters can also be ungrouped, outside of definitions + parameter 1 + type + description + */ + + // Grouped params + def params_map = new LinkedHashMap() + schema_definitions.each { key, val -> + def Map group = schema_definitions."$key".properties // Gets the property object of the group + def title = schema_definitions."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + + // Ungrouped params + def ungrouped_params = new LinkedHashMap() + schema_properties.each { innerkey, value -> + ungrouped_params.put(innerkey, value) + } + params_map.put("Other parameters", ungrouped_params) + + return params_map + } + + // + // Get maximum number of characters across all parameter names + // + private static Integer paramsMaxChars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } } diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy new file mode 100755 index 0000000000000000000000000000000000000000..44551e0a3521cfd9f02501d0a4a1db460dd1ff07 --- /dev/null +++ b/lib/NfcoreTemplate.groovy @@ -0,0 +1,270 @@ +// +// This file holds several functions used within the nf-core pipeline template. +// + +import org.yaml.snakeyaml.Yaml + +class NfcoreTemplate { + + // + // Check AWS Batch related parameters have been specified correctly + // + public static void awsBatch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + // Check params.awsqueue and params.awsregion have been set if running on AWSBatch + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + } + } + + // + // Check params.hostnames + // + public static void hostName(workflow, params, log) { + Map colors = logColours(params.monochrome_logs) + if (params.hostnames) { + try { + def hostname = "hostname".execute().text.trim() + params.hostnames.each { prof, hnames -> + hnames.each { hname -> + if (hostname.contains(hname) && !workflow.profile.contains(prof)) { + log.info "=${colors.yellow}====================================================${colors.reset}=\n" + + "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" + + " but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" + + " ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" + + "=${colors.yellow}====================================================${colors.reset}=" + } + } + } + } catch (Exception e) { + log.warn "[$workflow.manifest.name] Could not determine 'hostname' - skipping check. Reason: ${e.message}." + } + } + } + + // + // Construct and send completion email + // + public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = workflow.manifest.version + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(params.monochrome_logs) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + } + + // + // Print pipeline summary on completion + // + public static void summary(workflow, params, log) { + Map colors = logColours(params.monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + hostName(workflow, params, log) + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } + + // + // ANSII Colours used for terminal logging + // + public static Map logColours(Boolean monochrome_logs) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes + } + + // + // Does what is says on the tin + // + public static String dashedLine(monochrome_logs) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + // + // nf-core logo + // + public static String logo(workflow, monochrome_logs) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/Utils.groovy b/lib/Utils.groovy new file mode 100755 index 0000000000000000000000000000000000000000..18173e98503206c71e7cfc1615bfbfb6202c1198 --- /dev/null +++ b/lib/Utils.groovy @@ -0,0 +1,47 @@ +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml + +class Utils { + + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + def required_channels = ['conda-forge', 'bioconda', 'defaults'] + def conda_check_failed = !required_channels.every { ch -> ch in channels } + + // Check that they are in the right order + conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) + conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + + if (conda_check_failed) { + log.warn "=============================================================================\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + + " NB: The order of the channels matters!\n" + + "===================================================================================" + } + } + + // + // Join module args with appropriate spacing + // + public static String joinModuleArgs(args_list) { + return ' ' + args_list.join(' ') + } +} diff --git a/lib/WorkflowHic.groovy b/lib/WorkflowHic.groovy new file mode 100755 index 0000000000000000000000000000000000000000..5381157ff81cd224ecdeebb857b5f68332145fb5 --- /dev/null +++ b/lib/WorkflowHic.groovy @@ -0,0 +1,59 @@ +// +// This file holds several functions specific to the workflow/hic.nf in the nf-core/hic pipeline +// + +class WorkflowHic { + + // + // Check and validate parameters + // + public static void initialise(params, log) { + genomeExistsError(params, log) + + if (!params.fasta) { + log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." + System.exit(1) + } + } + + // + // Get workflow summary for MultiQC + // + public static String paramsSummaryMultiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += " <p style=\"font-size:110%\"><b>$group</b></p>\n" + summary_section += " <dl class=\"dl-horizontal\">\n" + for (param in group_params.keySet()) { + summary_section += " <dt>$param</dt><dd><samp>${group_params.get(param) ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>\n" + } + summary_section += " </dl>\n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + return yaml_file_text + } + + // + // Exit pipeline if incorrect --genome key provided + // + private static void genomeExistsError(params, log) { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + log.error "=============================================================================\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "===================================================================================" + System.exit(1) + } + } +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy new file mode 100755 index 0000000000000000000000000000000000000000..92eb1766e1d7763ccc40bcefc92b56a28544a4b1 --- /dev/null +++ b/lib/WorkflowMain.groovy @@ -0,0 +1,94 @@ +// +// This file holds several functions specific to the main.nf workflow in the nf-core/hic pipeline +// + +class WorkflowMain { + + // + // Citation string for pipeline + // + public static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + // TODO nf-core: Add Zenodo DOI for pipeline after first release + //"* The pipeline\n" + + //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + + // + // Print help to screen if required + // + public static String help(workflow, params, log) { + def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + def help_string = '' + help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) + help_string += NfcoreSchema.paramsHelp(workflow, params, command) + help_string += '\n' + citation(workflow) + '\n' + help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) + return help_string + } + + // + // Print parameter summary log to screen + // + public static String paramsSummaryLog(workflow, params, log) { + def summary_log = '' + summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) + summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) + summary_log += '\n' + citation(workflow) + '\n' + summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) + return summary_log + } + + // + // Validate parameters and print summary to screen + // + public static void initialise(workflow, params, log) { + // Print help to screen if required + if (params.help) { + log.info help(workflow, params, log) + System.exit(0) + } + + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + + // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) + + // Check that conda channels are set-up correctly + if (params.enable_conda) { + Utils.checkCondaChannels(log) + } + + // Check AWS batch settings + NfcoreTemplate.awsBatch(workflow, params) + + // Check the hostnames against configured profiles + NfcoreTemplate.hostName(workflow, params, log) + + // Check input has been provided + if (!params.input) { + log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" + System.exit(1) + } + } + + // + // Get attribute from genome config file e.g. fasta + // + public static String getGenomeAttribute(params, attribute) { + def val = '' + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + val = params.genomes[ params.genome ][ attribute ] + } + } + return val + } +} diff --git a/main.nf b/main.nf index a8611d564b4a7d9d029e0549976ceaa69afb93f4..59b966325e9decf9ccb8b91afa014ac82cb8dd86 100644 --- a/main.nf +++ b/main.nf @@ -1,1236 +1,63 @@ #!/usr/bin/env nextflow /* ======================================================================================== - nf-core/hic + nf-core/hic ======================================================================================== - nf-core/hic Analysis Pipeline. - #### Homepage / Documentation - https://github.com/nf-core/hic + Github : https://github.com/nf-core/hic + Website: https://nf-co.re/hic + Slack : https://nfcore.slack.com/channels/hic ---------------------------------------------------------------------------------------- */ -log.info Headers.nf_core(workflow, params.monochrome_logs) - -//////////////////////////////////////////////////// -/* -- PRINT HELP -- */ -////////////////////////////////////////////////////+ -def json_schema = "$projectDir/nextflow_schema.json" -if (params.help) { - def command = "nextflow run nf-core/hic --input '*_R{1,2}.fastq.gz' -profile docker" - log.info NfcoreSchema.params_help(workflow, params, json_schema, command) - exit 0 -} - -//////////////////////////////////////////////////// -/* -- VALIDATE PARAMETERS -- */ -////////////////////////////////////////////////////+ -if (params.validate_params) { - NfcoreSchema.validateParameters(params, json_schema, log) -} - -// Check if genome exists in the config file -if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(', ')}" -} - -if (params.digest && params.digestion && !params.digest.containsKey(params.digestion)) { - exit 1, "Unknown digestion protocol. Currently, the available digestion options are ${params.digest.keySet().join(", ")}. Please set manually the '--restriction_site' and '--ligation_site' parameters." -} - -params.restriction_site = params.digestion ? params.digest[ params.digestion ].restriction_site ?: false : false -params.ligation_site = params.digestion ? params.digest[ params.digestion ].ligation_site ?: false : false - -// Check Digestion or DNase Hi-C mode -if (!params.dnase && !params.ligation_site) { - exit 1, "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase Hi-C, please use '--dnase' option" -} - -// Reference index path configuration -params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false -params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false - - -//////////////////////////////////////////////////// -/* -- Collect configuration parameters -- */ -//////////////////////////////////////////////////// - -// Check AWS batch settings -if (workflow.profile.contains('awsbatch')) { - // AWSBatch sanity checking - if (!params.awsqueue || !params.awsregion) exit 1, 'Specify correct --awsqueue and --awsregion parameters on AWSBatch!' - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, 'Outdir not on S3 - specify S3 Bucket to run on AWSBatch!' - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (params.tracedir.startsWith('s3:')) exit 1, 'Specify a local tracedir or run without trace! S3 cannot be used for tracefiles.' -} - -// Stage config files -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() -ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true) -ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true) - -/* - * input read files - */ - -if (params.input_paths){ - - raw_reads = Channel.create() - raw_reads_2 = Channel.create() - - Channel - .from( params.input_paths ) - .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] } - .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0] + "_R1", a[1][0]), tuple(a[0] + "_R2", a[1][1])] } - -}else{ - raw_reads = Channel.create() - raw_reads_2 = Channel.create() - - if ( params.split_fastq ){ - Channel - .fromFilePairs( params.input, flat:true ) - .splitFastq( by: params.fastq_chunks_size, pe:true, file: true, compress:true) - .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0] + "_R1", a[1]), tuple(a[0] + "_R2", a[2])] } - }else{ - Channel - .fromFilePairs( params.input ) - .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0] + "_R1", a[1][0]), tuple(a[0] + "_R2", a[1][1])] } - } -} - -// Update sample name if splitFastq is used -def updateSampleName(x) { - if ((matcher = x[1] =~ /\s*(\.[\d]+).fastq.gz/)) { - res = matcher[0][1] - } - return [x[0] + res, x[1]] -} - -if (params.split_fastq ){ - raw_reads = raw_reads.concat( raw_reads_2 ).map{it -> updateSampleName(it)}.dump(tag:'input') -}else{ - raw_reads = raw_reads.concat( raw_reads_2 ).dump(tag:'input') -} - -/* - * Other input channels - */ - -// Reference genome -if ( params.bwt2_index ){ - - Channel.fromPath( params.bwt2_index , checkIfExists: true) - .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } - .into { bwt2_index_end2end; bwt2_index_trim } - -} -else if ( params.fasta ) { - Channel.fromPath( params.fasta ) - .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" } - .into { fasta_for_index } -} -else { - exit 1, "No reference genome specified!" -} - -// Chromosome size -if ( params.chromosome_size ){ - Channel.fromPath( params.chromosome_size , checkIfExists: true) - .into {chrsize; chrsize_build; chrsize_raw; chrsize_balance; chrsize_zoom; chrsize_compartments} -} -else if ( params.fasta ){ - Channel.fromPath( params.fasta ) - .ifEmpty { exit 1, "Chromosome sizes: Fasta file not found: ${params.fasta}" } - .set { fasta_for_chromsize } -} -else { - exit 1, "No chromosome size specified!" -} - -// Restriction fragments -if ( params.restriction_fragments ){ - Channel.fromPath( params.restriction_fragments, checkIfExists: true ) - .set {res_frag_file} -} -else if ( params.fasta && params.restriction_site ){ - Channel.fromPath( params.fasta ) - .ifEmpty { exit 1, "Restriction fragments: Fasta file not found: ${params.fasta}" } - .set { fasta_for_resfrag } -} -else if (! params.dnase) { - exit 1, "No restriction fragments file specified!" -} - -// Resolutions for contact maps -map_res = Channel.from( params.bin_size ).splitCsv().flatten() -all_res = params.bin_size -if (params.res_tads && !params.skip_tads){ - Channel.from( "${params.res_tads}" ) - .splitCsv() - .flatten() - .into {tads_bin; tads_res_hicexplorer; tads_res_insulation} - map_res = map_res.concat(tads_bin) - all_res = all_res + ',' + params.res_tads -}else{ - tads_res_hicexplorer=Channel.empty() - tads_res_insulation=Channel.empty() - tads_bin=Channel.empty() - if (!params.skip_tads){ - log.warn "[nf-core/hic] Hi-C resolution for TADs calling not specified. See --res_tads" - } -} - -if (params.res_dist_decay && !params.skip_dist_decay){ - Channel.from( "${params.res_dist_decay}" ) - .splitCsv() - .flatten() - .into {ddecay_res; ddecay_bin } - map_res = map_res.concat(ddecay_bin) - all_res = all_res + ',' + params.res_dist_decay -}else{ - ddecay_res = Channel.create() - ddecay_bin = Channel.create() - if (!params.skip_dist_decay){ - log.warn "[nf-core/hic] Hi-C resolution for distance decay not specified. See --res_dist_decay" - } -} - -if (params.res_compartments && !params.skip_compartments){ - Channel.fromPath( params.fasta ) - .ifEmpty { exit 1, "Compartments calling: Fasta file not found: ${params.fasta}" } - .set { fasta_for_compartments } - Channel.from( "${params.res_compartments}" ) - .splitCsv() - .flatten() - .into {comp_bin; comp_res} - map_res = map_res.concat(comp_bin) - all_res = all_res + ',' + params.res_compartments -}else{ - fasta_for_compartments = Channel.empty() - comp_res = Channel.create() - if (!params.skip_compartments){ - log.warn "[nf-core/hic] Hi-C resolution for compartment calling not specified. See --res_compartments" - } -} - -map_res - .unique() - .into { map_res_summary; map_res; map_res_cool; map_comp } - - -//////////////////////////////////////////////////// -/* -- PRINT PARAMETER SUMMARY -- */ -//////////////////////////////////////////////////// -log.info NfcoreSchema.params_summary_log(workflow, params, json_schema) - -// Header log info -def summary = [:] -if (workflow.revision) summary['Pipeline Release'] = workflow.revision -summary['Run Name'] = workflow.runName -summary['Input'] = params.input -summary['splitFastq'] = params.split_fastq -if (params.split_fastq) - summary['Read chunks Size'] = params.fastq_chunks_size -summary['Fasta Ref'] = params.fasta -if (params.restriction_site){ - summary['Digestion'] = params.digestion - summary['Restriction Motif']= params.restriction_site - summary['Ligation Motif'] = params.ligation_site - summary['Min Fragment Size']= params.min_restriction_fragment_size - summary['Max Fragment Size']= params.max_restriction_fragment_size - summary['Min Insert Size'] = params.min_insert_size - summary['Max Insert Size'] = params.max_insert_size -}else{ - summary['DNase Mode'] = params.dnase - summary['Min CIS dist'] = params.min_cis_dist -} -summary['Min MAPQ'] = params.min_mapq -summary['Keep Duplicates'] = params.keep_dups ? 'Yes' : 'No' -summary['Keep Multihits'] = params.keep_multi ? 'Yes' : 'No' -summary['Maps resolution'] = all_res -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output dir'] = params.outdir -summary['Launch dir'] = workflow.launchDir -summary['Working dir'] = workflow.workDir -summary['Script dir'] = workflow.projectDir -summary['User'] = workflow.userName -if (workflow.profile.contains('awsbatch')) { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue - summary['AWS CLI'] = params.awscli -} -summary['Config Profile'] = workflow.profile -if (params.config_profile_description) summary['Config Profile Description'] = params.config_profile_description -if (params.config_profile_contact) summary['Config Profile Contact'] = params.config_profile_contact -if (params.config_profile_url) summary['Config Profile URL'] = params.config_profile_url -summary['Config Files'] = workflow.configFiles.join(', ') -if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC maxsize'] = params.max_multiqc_email_size -} - -// Check the hostnames against configured profiles -checkHostname() - -Channel.from(summary.collect{ [it.key, it.value] }) - .map { k,v -> "<dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" } - .reduce { a, b -> return [a, b].join("\n ") } - .map { x -> """ - id: 'nf-core-hic-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/hic Workflow Summary' - section_href: 'https://github.com/nf-core/hic' - plot_type: 'html' - data: | - <dl class=\"dl-horizontal\"> - $x - </dl> - """.stripIndent() } - .set { ch_workflow_summary } - -/* - * Parse software version numbers - */ - -process get_software_versions { - publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode, - saveAs: { filename -> if (filename.indexOf('.csv') > 0) filename else null } - - output: - file 'software_versions_mqc.yaml' into ch_software_versions_yaml - file 'software_versions.csv' - - script: - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - bowtie2 --version > v_bowtie2.txt - python --version > v_python.txt 2>&1 - samtools --version > v_samtools.txt - multiqc --version > v_multiqc.txt - scrape_software_versions.py &> software_versions_mqc.yaml - """ -} - -/**************************************************** - * PRE-PROCESSING - */ - -if(!params.bwt2_index && params.fasta){ - process makeBowtie2Index { - tag "$fasta_base" - label 'process_highmem' - publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode - - input: - file fasta from fasta_for_index - - output: - file "bowtie2_index" into bwt2_index_end2end - file "bowtie2_index" into bwt2_index_trim - - script: - fasta_base = fasta.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?(\.fsa)?$/ - """ - mkdir bowtie2_index - bowtie2-build ${fasta} bowtie2_index/${fasta_base} - """ - } - } - - -if(!params.chromosome_size && params.fasta){ - process makeChromSize { - tag "$fasta" - label 'process_low' - publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode - - input: - file fasta from fasta_for_chromsize - - output: - file "*.size" into chrsize, chrsize_build, chrsize_raw, chrsize_balance, chrsize_zoom, chrsize_compartments - - script: - """ - samtools faidx ${fasta} - cut -f1,2 ${fasta}.fai > chrom.size - """ - } - } - -if(!params.restriction_fragments && params.fasta && !params.dnase){ - process getRestrictionFragments { - tag "$fasta ${params.restriction_site}" - label 'process_low' - publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode - - input: - file fasta from fasta_for_resfrag - - output: - file "*.bed" into res_frag_file - - script: - """ - digest_genome.py -r ${params.restriction_site} -o restriction_fragments.bed ${fasta} - """ - } - } - -/**************************************************** - * MAIN WORKFLOW - */ - -/* - * HiC-pro - Two-steps Reads Mapping - */ - -process bowtie2_end_to_end { - tag "$sample" - label 'process_medium' - publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping/bwt2_end2end" : params.outdir }, - saveAs: { filename -> if (params.save_aligned_intermediates) filename }, mode: params.publish_dir_mode - - input: - set val(sample), file(reads) from raw_reads - file index from bwt2_index_end2end.collect() - - output: - set val(sample), file("${prefix}_unmap.fastq") into unmapped_end_to_end - set val(sample), file("${prefix}.bam") into end_to_end_bam - - script: - prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ - def bwt2_opts = params.bwt2_opts_end2end - if (!params.dnase){ - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 --rg-id BMG --rg SM:${prefix} \\ - ${bwt2_opts} \\ - -p ${task.cpus} \\ - -x \${INDEX} \\ - --un ${prefix}_unmap.fastq \\ - -U ${reads} | samtools view -F 4 -bS - > ${prefix}.bam - """ - }else{ - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 --rg-id BMG --rg SM:${prefix} \\ - ${bwt2_opts} \\ - -p ${task.cpus} \\ - -x \${INDEX} \\ - --un ${prefix}_unmap.fastq \\ - -U ${reads} > ${prefix}.bam - """ - } -} - -process trim_reads { - tag "$sample" - label 'process_low' - publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping/bwt2_trimmed" : params.outdir }, - saveAs: { filename -> if (params.save_aligned_intermediates) filename }, mode: params.publish_dir_mode - - when: - !params.dnase - - input: - set val(sample), file(reads) from unmapped_end_to_end - - output: - set val(sample), file("${prefix}_trimmed.fastq") into trimmed_reads - - script: - prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ - """ - cutsite_trimming --fastq $reads \\ - --cutsite ${params.ligation_site} \\ - --out ${prefix}_trimmed.fastq - """ -} - -process bowtie2_on_trimmed_reads { - tag "$sample" - label 'process_medium' - publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping/bwt2_trimmed" : params.outdir }, - saveAs: { filename -> if (params.save_aligned_intermediates) filename }, mode: params.publish_dir_mode - - when: - !params.dnase - - input: - set val(sample), file(reads) from trimmed_reads - file index from bwt2_index_trim.collect() - - output: - set val(sample), file("${prefix}_trimmed.bam") into trimmed_bam - - script: - prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/ - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 --rg-id BMG --rg SM:${prefix} \\ - ${params.bwt2_opts_trimmed} \\ - -p ${task.cpus} \\ - -x \${INDEX} \\ - -U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam - """ -} - -if (!params.dnase){ - process bowtie2_merge_mapping_steps{ - tag "$prefix = $bam1 + $bam2" - label 'process_medium' - publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, - saveAs: { filename -> if (params.save_aligned_intermediates && filename.endsWith("stat")) "stats/$filename" - else if (params.save_aligned_intermediates) filename} - - input: - set val(prefix), file(bam1), file(bam2) from end_to_end_bam.join( trimmed_bam ).dump(tag:'merge') - - output: - set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam - set val(oname), file("${prefix}.mapstat") into all_mapstat - - script: - sample = prefix.toString() - ~/(_R1|_R2)/ - tag = prefix.toString() =~/_R1/ ? "R1" : "R2" - oname = prefix.toString() - ~/(\.[0-9]+)$/ - """ - samtools merge -@ ${task.cpus} \\ - -f ${prefix}_bwt2merged.bam \\ - ${bam1} ${bam2} - - samtools sort -@ ${task.cpus} -m 800M \\ - -n \\ - -o ${prefix}_bwt2merged.sorted.bam \\ - ${prefix}_bwt2merged.bam - - mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam - - echo "## ${prefix}" > ${prefix}.mapstat - echo -n "total_${tag}\t" >> ${prefix}.mapstat - samtools view -c ${prefix}_bwt2merged.bam >> ${prefix}.mapstat - echo -n "mapped_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${prefix}_bwt2merged.bam >> ${prefix}.mapstat - echo -n "global_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat - echo -n "local_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam2} >> ${prefix}.mapstat - """ - } -}else{ - process dnase_mapping_stats{ - tag "$sample = $bam" - label 'process_medium' - publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, - saveAs: { filename -> if (params.save_aligned_intermediates && filename.endsWith("stat")) "stats/$filename" - else if (params.save_aligned_intermediates) filename} - - input: - set val(prefix), file(bam) from end_to_end_bam - - output: - set val(sample), file(bam) into bwt2_merged_bam - set val(oname), file("${prefix}.mapstat") into all_mapstat - - script: - sample = prefix.toString() - ~/(_R1|_R2)/ - tag = prefix.toString() =~/_R1/ ? "R1" : "R2" - oname = prefix.toString() - ~/(\.[0-9]+)$/ - """ - echo "## ${prefix}" > ${prefix}.mapstat - echo -n "total_${tag}\t" >> ${prefix}.mapstat - samtools view -c ${bam} >> ${prefix}.mapstat - echo -n "mapped_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam} >> ${prefix}.mapstat - echo -n "global_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam} >> ${prefix}.mapstat - echo -n "local_${tag}\t0" >> ${prefix}.mapstat - """ - } -} - -process combine_mates{ - tag "$sample = $r1_prefix + $r2_prefix" - label 'process_low' - publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, - saveAs: {filename -> filename.endsWith(".pairstat") ? "stats/$filename" : "$filename"} - - input: - set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple() - - output: - set val(oname), file("${sample}_bwt2pairs.bam") into paired_bam - set val(oname), file("*.pairstat") into all_pairstat - - script: - r1_bam = aligned_bam[0] - r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/ - r2_bam = aligned_bam[1] - r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ - oname = sample.toString() - ~/(\.[0-9]+)$/ - - def opts = "-t" - if (params.keep_multi) { - opts="${opts} --multi" - }else if (params.min_mapq){ - opts="${opts} -q ${params.min_mapq}" - } - """ - mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam ${opts} - """ -} - -/* - * HiC-Pro - detect valid interaction from aligned data - */ - -if (!params.dnase){ - process get_valid_interaction{ - tag "$sample" - label 'process_low' - publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, - saveAs: {filename -> if (filename.endsWith("RSstat")) "stats/$filename" - else if (filename.endsWith(".validPairs")) filename - else if (params.save_nonvalid_pairs) filename} - - input: - set val(sample), file(pe_bam) from paired_bam - file frag_file from res_frag_file.collect() - - output: - set val(sample), file("*.validPairs") into valid_pairs - set val(sample), file("*.validPairs") into valid_pairs_4cool - set val(sample), file("*.DEPairs") into de_pairs - set val(sample), file("*.SCPairs") into sc_pairs - set val(sample), file("*.REPairs") into re_pairs - set val(sample), file("*.FiltPairs") into filt_pairs - set val(sample), file("*RSstat") into all_rsstat - - script: - if (params.split_fastq){ - sample = sample.toString() - ~/(\.[0-9]+)$/ - } - - def opts = "" - opts += params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '' - opts += params.min_insert_size > 0 ? " -s ${params.min_insert_size}" : '' - opts += params.max_insert_size > 0 ? " -l ${params.max_insert_size}" : '' - opts += params.min_restriction_fragment_size > 0 ? " -t ${params.min_restriction_fragment_size}" : '' - opts += params.max_restriction_fragment_size > 0 ? " -m ${params.max_restriction_fragment_size}" : '' - opts += params.save_interaction_bam ? " --sam" : '' - prefix = pe_bam.toString() - ~/.bam/ - """ - mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} --all ${opts} - sort -k2,2V -k3,3n -k5,5V -k6,6n -o ${prefix}.validPairs ${prefix}.validPairs - """ - } -} -else{ - process get_valid_interaction_dnase{ - tag "$sample" - label 'process_low' - publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, - saveAs: {filename -> if (filename.endsWith("RSstat")) "stats/$filename" - else filename} - - input: - set val(sample), file(pe_bam) from paired_bam - - output: - set val(sample), file("*.validPairs") into valid_pairs - set val(sample), file("*.validPairs") into valid_pairs_4cool - set val(sample), file("*RSstat") into all_rsstat - - script: - if (params.split_fastq){ - sample = sample.toString() - ~/(\.[0-9]+)$/ - } - - opts = params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '' - prefix = pe_bam.toString() - ~/.bam/ - """ - mapped_2hic_dnase.py -r ${pe_bam} ${opts} - sort -k2,2V -k3,3n -k5,5V -k6,6n -o ${prefix}.validPairs ${prefix}.validPairs - """ - } -} - -/* - * Remove duplicates - */ - -process remove_duplicates { - tag "$sample" - label 'process_highmem' - publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, - saveAs: {filename -> if (filename.endsWith("mergestat")) "stats/$filename" - else if (filename.endsWith("allValidPairs")) "$filename"} - input: - set val(sample), file(vpairs) from valid_pairs.groupTuple() - - output: - set val(sample), file("*.allValidPairs") into ch_vpairs, ch_vpairs_cool - file("stats/") into mqc_mergestat - file("*mergestat") into all_mergestat - - script: - if ( ! params.keep_dups ){ - """ - mkdir -p stats/${sample} - - ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) - sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \\ - awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs - - echo -n "valid_interaction\t" > ${sample}_allValidPairs.mergestat - cat ${vpairs} | wc -l >> ${sample}_allValidPairs.mergestat - echo -n "valid_interaction_rmdup\t" >> ${sample}_allValidPairs.mergestat - cat ${sample}.allValidPairs | wc -l >> ${sample}_allValidPairs.mergestat - - ## Count short range (<20000) vs long range contacts - awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> ${sample}_allValidPairs.mergestat - - ## For MultiQC - mkdir -p stats/${sample} - cp ${sample}_allValidPairs.mergestat stats/${sample}/ - """ - }else{ - """ - cat ${vpairs} > ${sample}.allValidPairs - echo -n "valid_interaction\t" > ${sample}_allValidPairs.mergestat - cat ${vpairs} | wc -l >> ${sample}_allValidPairs.mergestat - echo -n "valid_interaction_rmdup\t" >> ${sample}_allValidPairs.mergestat - cat ${sample}.allValidPairs | wc -l >> ${sample}_allValidPairs.mergestat - - ## Count short range (<20000) vs long range contacts - awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> ${sample}_allValidPairs.mergestat - - ## For MultiQC - mkdir -p stats/${sample} - cp ${sample}_allValidPairs.mergestat stats/${sample}/ - """ - } -} - -process merge_stats { - tag "$ext" - label 'process_low' - publishDir "${params.outdir}/hicpro/", mode: params.publish_dir_mode, - saveAs: {filename -> if (filename.endsWith("stat")) "stats/$filename"} - - input: - set val(prefix), file(fstat) from all_mapstat.groupTuple().concat(all_pairstat.groupTuple(), all_rsstat.groupTuple()) - - output: - file("stats/") into mqc_mstats - file("*stat") into all_mstats - - script: - sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ - if ( (fstat =~ /.mapstat/) ){ ext = "mmapstat" } - if ( (fstat =~ /.pairstat/) ){ ext = "mpairstat" } - if ( (fstat =~ /.RSstat/) ){ ext = "mRSstat" } - """ - merge_statfiles.py -f ${fstat} > ${prefix}.${ext} - mkdir -p stats/${sample} - cp ${prefix}.${ext} stats/${sample}/ - """ -} - -/* - * HiC-Pro build matrix processes - * kept for backward compatibility - */ - - -process build_contact_maps{ - tag "$sample - $mres" - label 'process_highmem' - publishDir "${params.outdir}/hicpro/matrix/raw", mode: params.publish_dir_mode - - when: - !params.skip_maps && params.hicpro_maps - - input: - set val(sample), file(vpairs), val(mres) from ch_vpairs.combine(map_res) - file chrsize from chrsize.collect() - - output: - set val(sample), val(mres), file("*.matrix"), file("*.bed") into raw_maps, raw_maps_4cool - - script: - """ - build_matrix --matrix-format upper --binsize ${mres} --chrsizes ${chrsize} --ifile ${vpairs} --oprefix ${sample}_${mres} - """ -} - -process run_ice{ - tag "$rmaps" - label 'process_highmem' - publishDir "${params.outdir}/hicpro/matrix/iced", mode: params.publish_dir_mode - - when: - !params.skip_maps && !params.skip_balancing && params.hicpro_maps - - input: - set val(sample), val(res), file(rmaps), file(bed) from raw_maps - - output: - set val(sample), val(res), file("*iced.matrix"), file(bed) into hicpro_iced_maps - file ("*.biases") into hicpro_iced_bias - - script: - prefix = rmaps.toString() - ~/(\.matrix)?$/ - """ - ice --filter_low_counts_perc ${params.ice_filter_low_count_perc} \ - --results_filename ${prefix}_iced.matrix \ - --filter_high_counts_perc ${params.ice_filter_high_count_perc} \ - --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} - """ -} - - -/* - * Cooler - */ - -process convert_to_pairs { - tag "$sample" - label 'process_medium' - - when: - !params.skip_maps - - input: - set val(sample), file(vpairs) from ch_vpairs_cool - file chrsize from chrsize_build.collect() - - output: - set val(sample), file("*.txt.gz") into cool_build, cool_build_zoom - - script: - """ - ## chr/pos/strand/chr/pos/strand - awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > contacts.txt - gzip contacts.txt - """ -} - - -process cooler_raw { - tag "$sample - ${res}" - label 'process_medium' - - publishDir "${params.outdir}/contact_maps/", mode: 'copy', - saveAs: {filename -> filename.endsWith(".cool") ? "raw/cool/$filename" : "raw/txt/$filename"} - - input: - set val(sample), file(contacts), val(res) from cool_build.combine(map_res_cool) - file chrsize from chrsize_raw.collect() - - output: - set val(sample), val(res), file("*cool") into raw_cool_maps - set file("*.bed"), file("${sample}_${res}.txt") into raw_txt_maps - - script: - """ - cooler makebins ${chrsize} ${res} > ${sample}_${res}.bed - cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 ${sample}_${res}.bed ${contacts} ${sample}_${res}.cool - cooler dump ${sample}_${res}.cool | awk '{OFS="\t"; print \$1+1,\$2+1,\$3}' > ${sample}_${res}.txt - """ -} - -process cooler_balance { - tag "$sample - ${res}" - label 'process_medium' - - publishDir "${params.outdir}/contact_maps/", mode: 'copy', - saveAs: {filename -> filename.endsWith(".cool") ? "norm/cool/$filename" : "norm/txt/$filename"} - - when: - !params.skip_balancing - - input: - set val(sample), val(res), file(cool) from raw_cool_maps - file chrsize from chrsize_balance.collect() - - output: - set val(sample), val(res), file("${sample}_${res}_norm.cool") into balanced_cool_maps - file("${sample}_${res}_norm.txt") into norm_txt_maps - - script: - """ - cp ${cool} ${sample}_${res}_norm.cool - cooler balance ${sample}_${res}_norm.cool -p ${task.cpus} --force - cooler dump ${sample}_${res}_norm.cool --balanced --na-rep 0 | awk '{OFS="\t"; print \$1+1,\$2+1,\$4}' > ${sample}_${res}_norm.txt - """ -} - -process cooler_zoomify { - tag "$sample" - label 'process_medium' - publishDir "${params.outdir}/contact_maps/norm/mcool", mode: 'copy' - - when: - !params.skip_mcool - - input: - set val(sample), file(contacts) from cool_build_zoom - file chrsize from chrsize_zoom.collect() - - output: - file("*mcool") into mcool_maps - - script: - """ - cooler makebins ${chrsize} ${params.res_zoomify} > bins.bed - cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 bins.bed ${contacts} ${sample}.cool - cooler zoomify --nproc ${task.cpus} --balance ${sample}.cool - """ -} - - -/**************************************************** - * DOWNSTREAM ANALYSIS - */ - -(maps_cool_insulation, maps_cool_comp, maps_hicexplorer_ddecay, maps_hicexplorer_tads) = balanced_cool_maps.into(4) +nextflow.enable.dsl = 2 /* - * Counts vs distance QC - */ - -if (!params.skip_dist_decay){ - chddecay = maps_hicexplorer_ddecay.combine(ddecay_res).filter{ it[1] == it[3] }.dump(tag: "ddecay") -}else{ - chddecay = Channel.empty() -} - -process dist_decay { - tag "$sample" - label 'process_medium' - publishDir "${params.outdir}/dist_decay", mode: 'copy' - - when: - !params.skip_dist_decay - - input: - set val(sample), val(res), file(maps), val(r) from chddecay - - output: - file("*_distcount.txt") - file("*.png") - - - script: - """ - hicPlotDistVsCounts --matrices ${maps} \ - --plotFile ${maps.baseName}_distcount.png \ - --outFileData ${maps.baseName}_distcount.txt - """ -} - -/* - * Compartment calling - */ - -if(!params.skip_compartments){ - chcomp = maps_cool_comp.combine(comp_res).filter{ it[1] == it[3] }.dump(tag: "comp") -}else{ - chcomp = Channel.empty() -} - -process compartment_calling { - tag "$sample - $res" - label 'process_medium' - publishDir "${params.outdir}/compartments", mode: 'copy' - - when: - !params.skip_compartments - - input: - set val(sample), val(res), file(cool), val(r) from chcomp - file(fasta) from fasta_for_compartments.collect() - file(chrsize) from chrsize_compartments.collect() - - output: - file("*compartments*") optional true into out_compartments - - script: - """ - cooltools genome binnify --all-names ${chrsize} ${res} > genome_bins.txt - cooltools genome gc genome_bins.txt ${fasta} > genome_gc.txt - cooltools call-compartments --contact-type cis -o ${sample}_compartments ${cool} - awk -F"\t" 'NR>1{OFS="\t"; if(\$6==""){\$6=0}; print \$1,\$2,\$3,\$6}' ${sample}_compartments.cis.vecs.tsv | sort -k1,1 -k2,2n > ${sample}_compartments.cis.E1.bedgraph - """ -} +======================================================================================== + GENOME PARAMETER VALUES +======================================================================================== +*/ +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') /* - * TADs calling - */ - -if (!params.skip_tads){ - chtads = maps_hicexplorer_tads.combine(tads_res_hicexplorer).filter{ it[1] == it[3] }.dump(tag: "hicexp") -}else{ - chtads = Channel.empty() -} - -process tads_hicexplorer { - tag "$sample - $res" - label 'process_medium' - publishDir "${params.outdir}/tads/hicexplorer", mode: 'copy' - - when: - !params.skip_tads && params.tads_caller =~ 'hicexplorer' - - input: - set val(sample), val(res), file(cool), val(r) from chtads - - output: - file("*.{bed,bedgraph,gff}") into hicexplorer_tads - - script: - """ - hicFindTADs --matrix ${cool} \ - --outPrefix tad \ - --correctForMultipleTesting fdr \ - --numberOfProcessors ${task.cpus} - """ -} - -if (!params.skip_tads){ - chIS = maps_cool_insulation.combine(tads_res_insulation).filter{ it[1] == it[3] }.dump(tag : "ins") -}else{ - chIS = Channel.empty() -} - -process tads_insulation { - tag "$sample - $res" - label 'process_medium' - publishDir "${params.outdir}/tads/insulation", mode: 'copy' - - when: - !params.skip_tads && params.tads_caller =~ 'insulation' - - input: - set val(sample), val(res), file(cool), val(r) from chIS - - output: - file("*tsv") into insulation_tads - - script: - """ - cooltools diamond-insulation --window-pixels ${cool} 15 25 50 > ${sample}_insulation.tsv - """ -} +======================================================================================== + VALIDATE & PRINT PARAMETER SUMMARY +======================================================================================== +*/ +WorkflowMain.initialise(workflow, params, log) /* - * MultiQC - */ - -process multiqc { - label 'process_low' - publishDir "${params.outdir}/MultiQC", mode: params.publish_dir_mode - - when: - !params.skip_multiqc - - input: - file multiqc_config from ch_multiqc_config - file (mqc_custom_config) from ch_multiqc_custom_config.collect().ifEmpty([]) - file ('input_*/*') from mqc_mstats.concat(mqc_mergestat).collect() - file ('software_versions/*') from ch_software_versions_yaml - file workflow_summary from ch_workflow_summary.collect() +======================================================================================== + NAMED WORKFLOW FOR PIPELINE +======================================================================================== +*/ - output: - file "*multiqc_report.html" into multiqc_report - file "*_data" +include { HIC } from './workflows/hic' - script: - rtitle = '' - rfilename = '' - if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { - rtitle = "--title \"${workflow.runName}\"" - rfilename = "--filename " + workflow.runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" - } - custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : '' - """ - multiqc -f $rtitle $rfilename $custom_config_file . - """ +// +// WORKFLOW: Run main nf-core/hic analysis pipeline +// +workflow NFCORE_HIC { + HIC () } /* - * Output Description HTML - */ -process output_documentation { - publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode - - input: - file output_docs from ch_output_docs - file images from ch_output_docs_images - - output: - file 'results_description.html' +======================================================================================== + RUN ALL WORKFLOWS +======================================================================================== +*/ - script: - """ - markdown_to_html.py $output_docs -o results_description.html - """ +// +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 +// +workflow { + NFCORE_HIC () } /* - * Completion e-mail notification - */ - -workflow.onComplete { - - // Set up the e-mail variables - def subject = "[nf-core/hic] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[nf-core/hic] FAILED: $workflow.runName" - } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = ch_multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList) { - log.warn "[nf-core/hic] Found multiple reports from process 'multiqc', will use only one" - mqc_report = mqc_report[0] - } - } - } catch (all) { - log.warn "[nf-core/hic] Could not attach MultiQC report to summary email" - } - - // Check if we are only sending emails on failure - email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/hic] Sent summary e-mail to $email_address (sendmail)" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= params.max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "[nf-core/hic] Sent summary e-mail to $email_address (mail)" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; - - if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-" - log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-" - log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}-" - } - - if (workflow.success) { - log.info "-${c_purple}[nf-core/hic]${c_green} Pipeline completed successfully${c_reset}-" - } else { - checkHostname() - log.info "-${c_purple}[nf-core/hic]${c_red} Pipeline completed with errors${c_reset}-" - } -} - -workflow.onError { - // Print unexpected parameters - easiest is to just rerun validation - NfcoreSchema.validateParameters(params, json_schema, log) -} - -def checkHostname() { - def c_reset = params.monochrome_logs ? '' : "\033[0m" - def c_white = params.monochrome_logs ? '' : "\033[0;37m" - def c_red = params.monochrome_logs ? '' : "\033[1;91m" - def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if (params.hostnames) { - def hostname = 'hostname'.execute().text.trim() - params.hostnames.each { prof, hnames -> - hnames.each { hname -> - if (hostname.contains(hname) && !workflow.profile.contains(prof)) { - log.error "${c_red}====================================================${c_reset}\n" + - " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + - " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + - " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + - "${c_red}====================================================${c_reset}\n" - } - } - } - } -} +======================================================================================== + THE END +======================================================================================== +*/ diff --git a/modules.json b/modules.json new file mode 100644 index 0000000000000000000000000000000000000000..a68b1c1a6a7a5f41f235094a0055e1f1751c98de --- /dev/null +++ b/modules.json @@ -0,0 +1,14 @@ +{ + "name": "nf-core/hic", + "homePage": "https://github.com/nf-core/hic", + "repos": { + "nf-core/modules": { + "fastqc": { + "git_sha": "e937c7950af70930d1f34bb961403d9d2aa81c7d" + }, + "multiqc": { + "git_sha": "e937c7950af70930d1f34bb961403d9d2aa81c7d" + } + } + } +} diff --git a/modules/local/bowtie2_end_to_end.nf b/modules/local/bowtie2_end_to_end.nf new file mode 100644 index 0000000000000000000000000000000000000000..8391ac7452a2e152f503aa3f4b18a9c8dd79f92f --- /dev/null +++ b/modules/local/bowtie2_end_to_end.nf @@ -0,0 +1,45 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process bowtie2_end_to_end { + tag "$sample" + label 'process_medium' + publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping/bwt2_end2end" : params.outdir }, + saveAs: { filename -> if (params.save_aligned_intermediates) filename }, mode: params.publish_dir_mode + + input: + tuple val(sample), path(reads) + path index + + output: + tuple val(sample), path("${prefix}_unmap.fastq"), emit: unmapped_end_to_end + tuple val(sample), path("${prefix}.bam"), emit: end_to_end_bam + + script: + prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ + def bwt2_opts = params.bwt2_opts_end2end + if (!params.dnase){ + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` + bowtie2 --rg-id BMG --rg SM:${prefix} \\ + ${bwt2_opts} \\ + -p ${task.cpus} \\ + -x \${INDEX} \\ + --un ${prefix}_unmap.fastq \\ + -U ${reads} | samtools view -F 4 -bS - > ${prefix}.bam + """ + }else{ + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` + bowtie2 --rg-id BMG --rg SM:${prefix} \\ + ${bwt2_opts} \\ + -p ${task.cpus} \\ + -x \${INDEX} \\ + --un ${prefix}_unmap.fastq \\ + -U ${reads} > ${prefix}.bam + """ + } +} diff --git a/modules/local/bowtie2_merge_mapping_steps.nf b/modules/local/bowtie2_merge_mapping_steps.nf new file mode 100644 index 0000000000000000000000000000000000000000..148acd01695e30fa9e07365d0a52334ef3c6f63f --- /dev/null +++ b/modules/local/bowtie2_merge_mapping_steps.nf @@ -0,0 +1,47 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process bowtie2_merge_mapping_steps{ + tag "$prefix = $bam1 + $bam2" + label 'process_medium' + publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, + saveAs: { filename -> if (params.save_aligned_intermediates && filename.endsWith("stat")) "stats/$filename" + else if (params.save_aligned_intermediates) filename} + + input: + tuple val(prefix), path(bam1), path(bam2) + + output: + tuple val(sample), path("${prefix}_bwt2merged.bam"), emit:bwt2_merged_bam + tuple val(oname), path("${prefix}.mapstat"), emit:all_mapstat + + script: + sample = prefix.toString() - ~/(_R1|_R2)/ + tag = prefix.toString() =~/_R1/ ? "R1" : "R2" + oname = prefix.toString() - ~/(\.[0-9]+)$/ + """ + samtools merge -@ ${task.cpus} \\ + -f ${prefix}_bwt2merged.bam \\ + ${bam1} ${bam2} + + samtools sort -@ ${task.cpus} -m 800M \\ + -n \\ + -o ${prefix}_bwt2merged.sorted.bam \\ + ${prefix}_bwt2merged.bam + + mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam + + echo "## ${prefix}" > ${prefix}.mapstat + echo -n "total_${tag}\t" >> ${prefix}.mapstat + samtools view -c ${prefix}_bwt2merged.bam >> ${prefix}.mapstat + echo -n "mapped_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${prefix}_bwt2merged.bam >> ${prefix}.mapstat + echo -n "global_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat + echo -n "local_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam2} >> ${prefix}.mapstat + """ +} diff --git a/modules/local/bowtie2_on_trimmed_reads.nf b/modules/local/bowtie2_on_trimmed_reads.nf new file mode 100644 index 0000000000000000000000000000000000000000..a7eb45895bb3ff30d181fadce055be39de4aefbd --- /dev/null +++ b/modules/local/bowtie2_on_trimmed_reads.nf @@ -0,0 +1,33 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process bowtie2_on_trimmed_reads { + tag "$sample" + label 'process_medium' + publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping/bwt2_trimmed" : params.outdir }, + saveAs: { filename -> if (params.save_aligned_intermediates) filename }, mode: params.publish_dir_mode + + when: + !params.dnase + + input: + tuple val(sample), path(reads) + path index + + output: + tuple val(sample), path("${prefix}_trimmed.bam"), emit:trimmed_bam + + script: + prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/ + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` + bowtie2 --rg-id BMG --rg SM:${prefix} \\ + ${params.bwt2_opts_trimmed} \\ + -p ${task.cpus} \\ + -x \${INDEX} \\ + -U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam + """ +} diff --git a/modules/local/build_contact_maps.nf b/modules/local/build_contact_maps.nf new file mode 100644 index 0000000000000000000000000000000000000000..764f48287f36b89def3911bbdab68b556a88bf9e --- /dev/null +++ b/modules/local/build_contact_maps.nf @@ -0,0 +1,26 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process build_contact_maps{ + tag "$sample - $mres" + label 'process_highmem' + publishDir "${params.outdir}/hicpro/matrix/raw", mode: params.publish_dir_mode + + when: + !params.skip_maps && params.hicpro_maps + + input: + tuple val(sample), path(vpairs), val(mres) + path chrsize + + output: + tuple val(sample), val(mres), path("*.matrix"), path("*.bed"), emit: raw_maps_4cool + + script: + """ + build_matrix --matrix-format upper --binsize ${mres} --chrsizes ${chrsize} --ipath ${vpairs} --oprefix ${sample}_${mres} + """ +} diff --git a/modules/local/combine_mates.nf b/modules/local/combine_mates.nf new file mode 100644 index 0000000000000000000000000000000000000000..503911ef9296157d0502e84ea87b1a77a7c7844e --- /dev/null +++ b/modules/local/combine_mates.nf @@ -0,0 +1,36 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process combine_mates{ + tag "$sample = $r1_prefix + $r2_prefix" + label 'process_low' + publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, + saveAs: {filename -> filename.endsWith(".pairstat") ? "stats/$filename" : "$filename"} + + input: + tuple val(sample), path(aligned_bam) + + output: + tuple val(oname), path("${sample}_bwt2pairs.bam"), emit:paired_bam + tuple val(oname), path("*.pairstat"), emit:all_pairstat + + script: + r1_bam = aligned_bam[0] + r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/ + r2_bam = aligned_bam[1] + r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ + oname = sample.toString() - ~/(\.[0-9]+)$/ + + def opts = "-t" + if (params.keep_multi) { + opts="${opts} --multi" + }else if (params.min_mapq){ + opts="${opts} -q ${params.min_mapq}" + } + """ + mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam ${opts} + """ +} diff --git a/modules/local/compartment_calling.nf b/modules/local/compartment_calling.nf new file mode 100644 index 0000000000000000000000000000000000000000..51f9788e55ba0dcc068cbcb35c0711d57274ce7f --- /dev/null +++ b/modules/local/compartment_calling.nf @@ -0,0 +1,30 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process compartment_calling { + tag "$sample - $res" + label 'process_medium' + publishDir "${params.outdir}/compartments", mode: 'copy' + + when: + !params.skip_compartments + + input: + tuple val(sample), val(res), path(cool), val(r) + path(fasta) + path(chrsize) + + output: + path("*compartments*") optional true, emit:out_compartments + + script: + """ + cooltools genome binnify --all-names ${chrsize} ${res} > genome_bins.txt + cooltools genome gc genome_bins.txt ${fasta} > genome_gc.txt + cooltools call-compartments --contact-type cis -o ${sample}_compartments ${cool} + awk -F"\t" 'NR>1{OFS="\t"; if(\$6==""){\$6=0}; print \$1,\$2,\$3,\$6}' ${sample}_compartments.cis.vecs.tsv | sort -k1,1 -k2,2n > ${sample}_compartments.cis.E1.bedgraph + """ +} diff --git a/modules/local/converts_to_pairs.nf b/modules/local/converts_to_pairs.nf new file mode 100644 index 0000000000000000000000000000000000000000..60554d4cbe2b3f487f2d8606253595ad95abdfc8 --- /dev/null +++ b/modules/local/converts_to_pairs.nf @@ -0,0 +1,27 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process convert_to_pairs { + tag "$sample" + label 'process_medium' + + when: + !params.skip_maps + + input: + tuple val(sample), path(vpairs) + path chrsize + + output: + tuple val(sample), path("*.txt.gz"), emit: cool_build_zoom + + script: + """ + ## chr/pos/strand/chr/pos/strand + awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > contacts.txt + gzip contacts.txt + """ +} diff --git a/modules/local/cooler_balance.nf b/modules/local/cooler_balance.nf new file mode 100644 index 0000000000000000000000000000000000000000..201f06424e74c7ce136dac8cc1a6bae9211d9f80 --- /dev/null +++ b/modules/local/cooler_balance.nf @@ -0,0 +1,31 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process cooler_balance { + tag "$sample - ${res}" + label 'process_medium' + + publishDir "${params.outdir}/contact_maps/", mode: 'copy', + saveAs: {filename -> filename.endsWith(".cool") ? "norm/cool/$filename" : "norm/txt/$filename"} + + when: + !params.skip_balancing + + input: + tuple val(sample), val(res), path(cool) + path chrsize + + output: + tuple val(sample), val(res), path("${sample}_${res}_norm.cool"), emit:balanced_cool_maps + path("${sample}_${res}_norm.txt"), emit:norm_txt_maps + + script: + """ + cp ${cool} ${sample}_${res}_norm.cool + cooler balance ${sample}_${res}_norm.cool -p ${task.cpus} --force + cooler dump ${sample}_${res}_norm.cool --balanced --na-rep 0 | awk '{OFS="\t"; print \$1+1,\$2+1,\$4}' > ${sample}_${res}_norm.txt + """ +} diff --git a/modules/local/cooler_raw.nf b/modules/local/cooler_raw.nf new file mode 100644 index 0000000000000000000000000000000000000000..9bc45c5dd6aac47ed2ef92a63f42bd092f000d13 --- /dev/null +++ b/modules/local/cooler_raw.nf @@ -0,0 +1,28 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process cooler_raw { + tag "$sample - ${res}" + label 'process_medium' + + publishDir "${params.outdir}/contact_maps/", mode: 'copy', + saveAs: {filename -> filename.endsWith(".cool") ? "raw/cool/$filename" : "raw/txt/$filename"} + + input: + tuple val(sample), path(contacts), val(res) + path chrsize + + output: + tuple val(sample), val(res), path("*cool"), emit:raw_cool_maps + tuple path("*.bed"), path("${sample}_${res}.txt"), emit:raw_txt_maps + + script: + """ + cooler makebins ${chrsize} ${res} > ${sample}_${res}.bed + cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 ${sample}_${res}.bed ${contacts} ${sample}_${res}.cool + cooler dump ${sample}_${res}.cool | awk '{OFS="\t"; print \$1+1,\$2+1,\$3}' > ${sample}_${res}.txt + """ +} diff --git a/modules/local/cooler_zoomify.nf b/modules/local/cooler_zoomify.nf new file mode 100644 index 0000000000000000000000000000000000000000..1ce68d0288c64359c2a67f7ec85c55cf0a749931 --- /dev/null +++ b/modules/local/cooler_zoomify.nf @@ -0,0 +1,28 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process cooler_zoomify { + tag "$sample" + label 'process_medium' + publishDir "${params.outdir}/contact_maps/norm/mcool", mode: 'copy' + + when: + !params.skip_mcool + + input: + tuple val(sample), path(contacts) + path chrsize + + output: + path("*mcool"), emit:mcool_maps + + script: + """ + cooler makebins ${chrsize} ${params.res_zoomify} > bins.bed + cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 bins.bed ${contacts} ${sample}.cool + cooler zoomify --nproc ${task.cpus} --balance ${sample}.cool + """ +} diff --git a/modules/local/dist_decay.nf b/modules/local/dist_decay.nf new file mode 100644 index 0000000000000000000000000000000000000000..e5bc660e9f8257b4152ee3782ef23b58a6665c06 --- /dev/null +++ b/modules/local/dist_decay.nf @@ -0,0 +1,29 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process dist_decay { + tag "$sample" + label 'process_medium' + publishDir "${params.outdir}/dist_decay", mode: 'copy' + + when: + !params.skip_dist_decay + + input: + tuple val(sample), val(res), path(maps), val(r) + + output: + path("*_distcount.txt") + path("*.png") + + + script: + """ + hicPlotDistVsCounts --matrices ${maps} \ + --plotFile ${maps.baseName}_distcount.png \ + --outFileData ${maps.baseName}_distcount.txt + """ +} diff --git a/modules/local/dnase_mapping_stats.nf b/modules/local/dnase_mapping_stats.nf new file mode 100644 index 0000000000000000000000000000000000000000..c11f3434e7de826ae318626441f1d095bd770150 --- /dev/null +++ b/modules/local/dnase_mapping_stats.nf @@ -0,0 +1,35 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process dnase_mapping_stats{ + tag "$sample = $bam" + label 'process_medium' + publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, + saveAs: { filename -> if (params.save_aligned_intermediates && filename.endsWith("stat")) "stats/$filename" + else if (params.save_aligned_intermediates) filename} + + input: + tuple val(prefix), path(bam) + + output: + tuple val(sample), path(bam), emit:bwt2_merged_bam + tuple val(oname), path("${prefix}.mapstat"), emit:all_mapstat + + script: + sample = prefix.toString() - ~/(_R1|_R2)/ + tag = prefix.toString() =~/_R1/ ? "R1" : "R2" + oname = prefix.toString() - ~/(\.[0-9]+)$/ + """ + echo "## ${prefix}" > ${prefix}.mapstat + echo -n "total_${tag}\t" >> ${prefix}.mapstat + samtools view -c ${bam} >> ${prefix}.mapstat + echo -n "mapped_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam} >> ${prefix}.mapstat + echo -n "global_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam} >> ${prefix}.mapstat + echo -n "local_${tag}\t0" >> ${prefix}.mapstat + """ +} diff --git a/modules/local/functions.nf b/modules/local/functions.nf new file mode 100644 index 0000000000000000000000000000000000000000..da9da093d3f6025e328759a12adc2c1c9ede0d03 --- /dev/null +++ b/modules/local/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/getRestictionFragments.nf b/modules/local/getRestictionFragments.nf new file mode 100644 index 0000000000000000000000000000000000000000..00c0dda5bda3d068bebece33d3f6f4be3f22c524 --- /dev/null +++ b/modules/local/getRestictionFragments.nf @@ -0,0 +1,23 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process getRestrictionFragments { + tag "$fasta ${params.restriction_site}" + label 'process_low' + publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, + saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode + + input: + path fasta + + output: + path "*.bed", emit:res_frag_file + + script: + """ + digest_genome.py -r ${params.restriction_site} -o restriction_fragments.bed ${fasta} + """ +} diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf new file mode 100644 index 0000000000000000000000000000000000000000..be0d2b514ba664db2a19122f6ac60bb439b9705e --- /dev/null +++ b/modules/local/get_software_versions.nf @@ -0,0 +1,35 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process GET_SOFTWARE_VERSIONS { + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/python:3.8.3" + } else { + container "quay.io/biocontainers/python:3.8.3" + } + + cache false + + input: + path versions + + output: + path "software_versions.tsv" , emit: tsv + path 'software_versions_mqc.yaml', emit: yaml + + script: // This script is bundled with the pipeline, in nf-core/hic/bin/ + """ + echo $workflow.manifest.version > pipeline.version.txt + echo $workflow.nextflow.version > nextflow.version.txt + multiqc --version > v_multiqc.txt + scrape_software_versions.py &> software_versions_mqc.yaml + """ +} diff --git a/modules/local/get_valid_interaction.nf b/modules/local/get_valid_interaction.nf new file mode 100644 index 0000000000000000000000000000000000000000..622c08ee92579a97662764dc9e82a827768dd476 --- /dev/null +++ b/modules/local/get_valid_interaction.nf @@ -0,0 +1,45 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process get_valid_interaction{ + tag "$sample" + label 'process_low' + publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, + saveAs: {filename -> if (filename.endsWith("RSstat")) "stats/$filename" + else if (filename.endsWith(".validPairs")) filename + else if (params.save_nonvalid_pairs) filename} + + input: + tuple val(sample), path(pe_bam) + path frag_path + + output: + tuple val(sample), path("*.validPairs"), emit:valid_pairs + tuple val(sample), path("*.validPairs"), emit:valid_pairs_4cool + tuple val(sample), path("*.DEPairs"), emit:de_pairs + tuple val(sample), path("*.SCPairs"), emit:sc_pairs + tuple val(sample), path("*.REPairs"), emit:re_pairs + tuple val(sample), path("*.FiltPairs"), emit:filt_pairs + tuple val(sample), path("*RSstat"), emit:all_rsstat + + script: + if (params.split_fastq){ + sample = sample.toString() - ~/(\.[0-9]+)$/ + } + + def opts = "" + opts += params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '' + opts += params.min_insert_size > 0 ? " -s ${params.min_insert_size}" : '' + opts += params.max_insert_size > 0 ? " -l ${params.max_insert_size}" : '' + opts += params.min_restriction_fragment_size > 0 ? " -t ${params.min_restriction_fragment_size}" : '' + opts += params.max_restriction_fragment_size > 0 ? " -m ${params.max_restriction_fragment_size}" : '' + opts += params.save_interaction_bam ? " --sam" : '' + prefix = pe_bam.toString() - ~/.bam/ + """ + mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} --all ${opts} + sort -k2,2V -k3,3n -k5,5V -k6,6n -o ${prefix}.validPairs ${prefix}.validPairs + """ +} diff --git a/modules/local/get_valid_interaction_dnase.nf b/modules/local/get_valid_interaction_dnase.nf new file mode 100644 index 0000000000000000000000000000000000000000..62c3ea4dfd8a1d86881a49d9078e0997151fb9a8 --- /dev/null +++ b/modules/local/get_valid_interaction_dnase.nf @@ -0,0 +1,33 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process get_valid_interaction_dnase{ + tag "$sample" + label 'process_low' + publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, + saveAs: {filename -> if (filename.endsWith("RSstat")) "stats/$filename" + else filename} + + input: + tuple val(sample), path(pe_bam) + + output: + tuple val(sample), path("*.validPairs"), emit:valid_pairs + tuple val(sample), path("*.validPairs"), emit:valid_pairs_4cool + tuple val(sample), path("*RSstat"), emit:all_rsstat + + script: + if (params.split_fastq){ + sample = sample.toString() - ~/(\.[0-9]+)$/ + } + + opts = params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '' + prefix = pe_bam.toString() - ~/.bam/ + """ + mapped_2hic_dnase.py -r ${pe_bam} ${opts} + sort -k2,2V -k3,3n -k5,5V -k6,6n -o ${prefix}.validPairs ${prefix}.validPairs + """ +} diff --git a/modules/local/makeBowtie2Index.nf b/modules/local/makeBowtie2Index.nf new file mode 100644 index 0000000000000000000000000000000000000000..f38d951949173052c967917e11eeeba6e3f2c9f8 --- /dev/null +++ b/modules/local/makeBowtie2Index.nf @@ -0,0 +1,41 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process MAKE_BOWTIE2_INDEX { + tag "$fasta_base" + label 'process_highmem' + + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "bioconda::bowtie2=2.3.5" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/bowtie2:2.3.5--py37he860b03_0" + } else { + container "quay.io/biocontainers/bowtie2:2.3.5--py37he860b03_0" + } + + input: + path fasta + + output: + path "bowtie2_index", emit: bwt2_index_end2end + path "bowtie2_index", emit: bwt2_index_trim + + script: + fasta_base = fasta.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?(\.fsa)?$/ + """ + mkdir bowtie2_index + bowtie2-build ${fasta} bowtie2_index/${fasta_base} + """ +} diff --git a/modules/local/makeChromSize.nf b/modules/local/makeChromSize.nf new file mode 100644 index 0000000000000000000000000000000000000000..ffe198cf48ed51ff4463f850627b10faeb742721 --- /dev/null +++ b/modules/local/makeChromSize.nf @@ -0,0 +1,24 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process makeChromSize { + tag "$fasta" + label 'process_low' + publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, + saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode + + input: + path fasta + + output: + path "*.size", emit: chrsize_compartments + + script: + """ + samtools faidx ${fasta} + cut -f1,2 ${fasta}.fai > chrom.size + """ +} diff --git a/modules/local/merge_stats.nf b/modules/local/merge_stats.nf new file mode 100644 index 0000000000000000000000000000000000000000..336baa3df0ad3226113682a61fb5f9a73517b067 --- /dev/null +++ b/modules/local/merge_stats.nf @@ -0,0 +1,30 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process merge_stats { + tag "$ext" + label 'process_low' + publishDir "${params.outdir}/hicpro/", mode: params.publish_dir_mode, + saveAs: {filename -> if (filename.endsWith("stat")) "stats/$filename"} + + input: + tuple val(prefix), path(fstat) + + output: + path("stats/"), emit:mqc_mstats + path("*stat"), emit:all_mstats + + script: + sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ + if ( (fstat =~ /.mapstat/) ){ ext = "mmapstat" } + if ( (fstat =~ /.pairstat/) ){ ext = "mpairstat" } + if ( (fstat =~ /.RSstat/) ){ ext = "mRSstat" } + """ + merge_statfiles.py -f ${fstat} > ${prefix}.${ext} + mkdir -p stats/${sample} + cp ${prefix}.${ext} stats/${sample}/ + """ +} diff --git a/modules/local/output_documentation.nf b/modules/local/output_documentation.nf new file mode 100644 index 0000000000000000000000000000000000000000..7e49c6a79cbfec9b348734c0c05d14794f5003f1 --- /dev/null +++ b/modules/local/output_documentation.nf @@ -0,0 +1,21 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process output_documentation { + publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode + + input: + path output_docs + path images + + output: + path 'results_description.html' + + script: + """ + markdown_to_html.py $output_docs -o results_description.html + """ +} diff --git a/modules/local/remove_duplicates.nf b/modules/local/remove_duplicates.nf new file mode 100644 index 0000000000000000000000000000000000000000..a8b10936b9d7c9f8f3465a4c0232ac3afdd2cc81 --- /dev/null +++ b/modules/local/remove_duplicates.nf @@ -0,0 +1,58 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process remove_duplicates { + tag "$sample" + label 'process_highmem' + publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, + saveAs: {filename -> if (filename.endsWith("mergestat")) "stats/$filename" + else if (filename.endsWith("allValidPairs")) "$filename"} + input: + tuple val(sample), path(vpairs) + + output: + tuple val(sample), path("*.allValidPairs"), emit: ch_vpairs_cool + path("stats/"), emit:mqc_mergestat + path("*mergestat"), emit:all_mergestat + + script: + if ( ! params.keep_dups ){ + """ + mkdir -p stats/${sample} + + ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) + sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \\ + awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs + + echo -n "valid_interaction\t" > ${sample}_allValidPairs.mergestat + cat ${vpairs} | wc -l >> ${sample}_allValidPairs.mergestat + echo -n "valid_interaction_rmdup\t" >> ${sample}_allValidPairs.mergestat + cat ${sample}.allValidPairs | wc -l >> ${sample}_allValidPairs.mergestat + + ## Count short range (<20000) vs long range contacts + awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> ${sample}_allValidPairs.mergestat + + ## For MultiQC + mkdir -p stats/${sample} + cp ${sample}_allValidPairs.mergestat stats/${sample}/ + """ + }else{ + """ + cat ${vpairs} > ${sample}.allValidPairs + echo -n "valid_interaction\t" > ${sample}_allValidPairs.mergestat + cat ${vpairs} | wc -l >> ${sample}_allValidPairs.mergestat + echo -n "valid_interaction_rmdup\t" >> ${sample}_allValidPairs.mergestat + cat ${sample}.allValidPairs | wc -l >> ${sample}_allValidPairs.mergestat + + ## Count short range (<20000) vs long range contacts + awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> ${sample}_allValidPairs.mergestat + + ## For MultiQC + mkdir -p stats/${sample} + cp ${sample}_allValidPairs.mergestat stats/${sample}/ + """ + } +} diff --git a/modules/local/run_ice.nf b/modules/local/run_ice.nf new file mode 100644 index 0000000000000000000000000000000000000000..ffdafb5db6f655bddc7ad42a20251fb8e03b0630 --- /dev/null +++ b/modules/local/run_ice.nf @@ -0,0 +1,30 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process run_ice{ + tag "$rmaps" + label 'process_highmem' + publishDir "${params.outdir}/hicpro/matrix/iced", mode: params.publish_dir_mode + + when: + !params.skip_maps && !params.skip_balancing && params.hicpro_maps + + input: + tuple val(sample), val(res), path(rmaps), path(bed) + + output: + tuple val(sample), val(res), path("*iced.matrix"), path(bed), emit:hicpro_iced_maps + path ("*.biases"), emit:hicpro_iced_bias + + script: + prefix = rmaps.toString() - ~/(\.matrix)?$/ + """ + ice --filter_low_counts_perc ${params.ice_filter_low_count_perc} \ + --results_filename ${prefix}_iced.matrix \ + --filter_high_counts_perc ${params.ice_filter_high_count_perc} \ + --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf new file mode 100644 index 0000000000000000000000000000000000000000..96ff67f747ec8144d48fee8bcafe412c5bf5ee95 --- /dev/null +++ b/modules/local/samplesheet_check.nf @@ -0,0 +1,32 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SAMPLESHEET_CHECK { + tag "$samplesheet" + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/python:3.8.3" + } else { + container "quay.io/biocontainers/python:3.8.3" + } + + input: + path samplesheet + + output: + path '*.csv' + + script: // This script is bundled with the pipeline, in nf-core/hic/bin/ + """ + check_samplesheet.py \\ + $samplesheet \\ + samplesheet.valid.csv + """ +} diff --git a/modules/local/tads_hicexplorer.nf b/modules/local/tads_hicexplorer.nf new file mode 100644 index 0000000000000000000000000000000000000000..0bb4088798064ef3937fb8b06473fd74e7b6225b --- /dev/null +++ b/modules/local/tads_hicexplorer.nf @@ -0,0 +1,28 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process tads_hicexplorer { + tag "$sample - $res" + label 'process_medium' + publishDir "${params.outdir}/tads/hicexplorer", mode: 'copy' + + when: + !params.skip_tads && params.tads_caller =~ 'hicexplorer' + + input: + tuple val(sample), val(res), path(cool), val(r) + + output: + path("*.{bed,bedgraph,gff}"), emit:hicexplorer_tads + + script: + """ + hicFindTADs --matrix ${cool} \ + --outPrefix tad \ + --correctForMultipleTesting fdr \ + --numberOfProcessors ${task.cpus} + """ +} diff --git a/modules/local/tads_insulation.nf b/modules/local/tads_insulation.nf new file mode 100644 index 0000000000000000000000000000000000000000..7338cd39ed982e1552ae59049fad244d3c15883b --- /dev/null +++ b/modules/local/tads_insulation.nf @@ -0,0 +1,25 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process tads_insulation { + tag "$sample - $res" + label 'process_medium' + publishDir "${params.outdir}/tads/insulation", mode: 'copy' + + when: + !params.skip_tads && params.tads_caller =~ 'insulation' + + input: + tuple val(sample), val(res), path(cool), val(r) + + output: + path("*tsv"), emit:insulation_tads + + script: + """ + cooltools diamond-insulation --window-pixels ${cool} 15 25 50 > ${sample}_insulation.tsv + """ +} diff --git a/modules/local/trim_reads.nf b/modules/local/trim_reads.nf new file mode 100644 index 0000000000000000000000000000000000000000..3b893cc3d0489c266a5d1b7299e57dee7c16c6fd --- /dev/null +++ b/modules/local/trim_reads.nf @@ -0,0 +1,29 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process trim_reads { + tag "$sample" + label 'process_low' + publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping/bwt2_trimmed" : params.outdir }, + saveAs: { filename -> if (params.save_aligned_intermediates) filename }, mode: params.publish_dir_mode + + when: + !params.dnase + + input: + tuple val(sample), path(reads) + + output: + tuple val(sample), path("${prefix}_trimmed.fastq"), emit:trimmed_reads + + script: + prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ + """ + cutsite_trimming --fastq $reads \\ + --cutsite ${params.ligation_site} \\ + --out ${prefix}_trimmed.fastq + """ +} diff --git a/modules/nf-core/modules/fastqc/functions.nf b/modules/nf-core/modules/fastqc/functions.nf new file mode 100644 index 0000000000000000000000000000000000000000..da9da093d3f6025e328759a12adc2c1c9ede0d03 --- /dev/null +++ b/modules/nf-core/modules/fastqc/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..39c327b261f7a590f48f64b6dd0eaf24f44ef926 --- /dev/null +++ b/modules/nf-core/modules/fastqc/main.nf @@ -0,0 +1,47 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process FASTQC { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0" + } else { + container "quay.io/biocontainers/fastqc:0.11.9--0" + } + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "*.version.txt" , emit: version + + script: + // Add soft-links to original FastQs for consistent naming in pipeline + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz + fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz + fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + """ + } +} diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/modules/fastqc/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..8eb9953dce50e37c6ead461861bc39a1436308f2 --- /dev/null +++ b/modules/nf-core/modules/fastqc/meta.yml @@ -0,0 +1,51 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/modules/multiqc/functions.nf b/modules/nf-core/modules/multiqc/functions.nf new file mode 100644 index 0000000000000000000000000000000000000000..da9da093d3f6025e328759a12adc2c1c9ede0d03 --- /dev/null +++ b/modules/nf-core/modules/multiqc/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..da78080024721aee1d944f954150fa9352352aca --- /dev/null +++ b/modules/nf-core/modules/multiqc/main.nf @@ -0,0 +1,35 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process MULTIQC { + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "bioconda::multiqc=1.10.1" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/multiqc:1.10.1--py_0" + } else { + container "quay.io/biocontainers/multiqc:1.10.1--py_0" + } + + input: + path multiqc_files + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + """ + multiqc -f $options.args . + multiqc --version | sed -e "s/multiqc, version //g" > ${software}.version.txt + """ +} diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/modules/multiqc/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..532a8bb1eff7f38e4f38dc1e36f2ce1f6c6657d1 --- /dev/null +++ b/modules/nf-core/modules/multiqc/meta.yml @@ -0,0 +1,39 @@ +name: MultiQC +description: Aggregate results from bioinformatics analyses across many samples into a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ +input: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC +output: + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: dir + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" diff --git a/subworkflows/local/compartments.nf b/subworkflows/local/compartments.nf new file mode 100644 index 0000000000000000000000000000000000000000..e3089f3eb3cd3359eec2ede37e27aae8273d0527 --- /dev/null +++ b/subworkflows/local/compartments.nf @@ -0,0 +1,15 @@ +params.options = [:] + +include { COMPARTMENT_CALLING } from '../../modules/local/compartment_calling' addParams( options: params.options ) + +workflow COMPARTMENTS { + + take: + + + main: + + + emit: + +} \ No newline at end of file diff --git a/subworkflows/local/cooler.nf b/subworkflows/local/cooler.nf new file mode 100644 index 0000000000000000000000000000000000000000..e4a908d190e50b2f7afcb3271351e5820c47a56d --- /dev/null +++ b/subworkflows/local/cooler.nf @@ -0,0 +1,17 @@ +params.options = [:] + +include { COOLER_RAW } from '../../modules/local/cooler_raw' addParams( options: params.options ) +include { COOLER_BALANCE } from '../../modules/local/cooler_balance' addParams( options: params.options ) +include { COOLER_ZOOMIFY } from '../../modules/local/cooler_zoomify' addParams( options: params.options ) + +workflow COOLER { + + take: + + + main: + + + emit: + +} \ No newline at end of file diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf new file mode 100644 index 0000000000000000000000000000000000000000..46a418abdee7a1b5c0c568e05715a71781a1b70c --- /dev/null +++ b/subworkflows/local/hicpro.nf @@ -0,0 +1,26 @@ +params.options = [:] + +include { BOWTIE2_END_TO_END } from '../../modules/local/bowtie2_end_to_end' addParams( options: params.options ) +include { BOWTIE2_ON_TRIMED_READS } from '../../modules/local/bowtie2_on_trimmed_reads' addParams( options: params.options ) +include { BOWTIE2_MERGE_MAPPING_STEPS } from '../../modules/local/bowtie2_merge_mapping_steps' addParams( options: params.options ) +include { DNASE_MAPPING_STATS } from '../../modules/local/dnase_mapping_stats' addParams( options: params.options ) +include { COMBINE_MATES } from '../../modules/local/combine_mates' addParams( options: params.options ) +include { GET_VALID_INTERACTION } from '../../modules/local/get_valid_interaction' addParams( options: params.options ) +include { GET_VALID_INTERACTION_DNASE } from '../../modules/local/get_valid_interaction_dnase' addParams( options: params.options ) +include { REMOVE_DUPLICATES } from '../../modules/local/remove_duplicates' addParams( options: params.options ) +include { MERGE_STATS } from '../../modules/local/merge_stats' addParams( options: params.options ) +include { BUILD_CONTACT_MAPS } from '../../modules/local/build_contact_maps' addParams( options: params.options ) +include { RUN_ICE } from '../../modules/local/run_ice' addParams( options: params.options ) +include { CONVERTS_TO_PAIRS } from '../../modules/local/convert_to_pairs' addParams( options: params.options ) + +workflow HIC_PRO { + + take: + + + main: + + + emit: + +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf new file mode 100644 index 0000000000000000000000000000000000000000..b664bc8caf10c1ee8b810f3108e0eedefc6b398b --- /dev/null +++ b/subworkflows/local/input_check.nf @@ -0,0 +1,42 @@ +// +// Check input samplesheet and get read channels +// + +params.options = [:] + +include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' addParams( options: params.options ) + +workflow INPUT_CHECK { + take: + samplesheet // file: /path/to/samplesheet.csv + + main: + SAMPLESHEET_CHECK ( samplesheet ) + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channels(it) } + .set { reads } + + emit: + reads // channel: [ val(meta), [ reads ] ] +} + +// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] +def create_fastq_channels(LinkedHashMap row) { + def meta = [:] + meta.id = row.sample + meta.single_end = row.single_end.toBoolean() + + def array = [] + if (!file(row.fastq_1).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" + } + if (meta.single_end) { + array = [ meta, [ file(row.fastq_1) ] ] + } else { + if (!file(row.fastq_2).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" + } + array = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] + } + return array +} diff --git a/subworkflows/local/tads.nf b/subworkflows/local/tads.nf new file mode 100644 index 0000000000000000000000000000000000000000..9f0836af8542b012253e82afa63b8ebd45bd5bf4 --- /dev/null +++ b/subworkflows/local/tads.nf @@ -0,0 +1,16 @@ +params.options = [:] + +include { TADS_HICEXPLORER } from '../../modules/local/tads_hicexplorer' addParams( options: params.options ) +include { TADS_INSULATION } from '../../modules/local/tads_insulation' addParams( options: params.options ) + +workflow TADS { + + take: + + + main: + + + emit: + +} \ No newline at end of file diff --git a/workflows/hic.nf b/workflows/hic.nf new file mode 100644 index 0000000000000000000000000000000000000000..991eb0a052d039cdeb198206cd1ab37090857112 --- /dev/null +++ b/workflows/hic.nf @@ -0,0 +1,147 @@ +/* +======================================================================================== + VALIDATE INPUTS +======================================================================================== +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowHic.initialise(params, log) + +// TODO nf-core: Add all file path parameters for the pipeline to the list below +// Check input path parameters to see if they exist +def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } + +/* +======================================================================================== + CONFIG FILES +======================================================================================== +*/ + +ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() + +/* +======================================================================================== + IMPORT LOCAL MODULES/SUBWORKFLOWS +======================================================================================== +*/ + +// Don't overwrite global params.modules, create a copy instead and use that within the main script. +def modules = params.modules.clone() + +// +// MODULE: Local to the pipeline +// +include { GET_SOFTWARE_VERSIONS } from '../modules/local/get_software_versions' addParams( options: [publish_files : ['tsv':'']] ) +include { OUTPUT_DOCUMENTATION } from '../modules/local/output_documentation' addParams( options: [publish_files : ['tsv':'']] ) + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { INPUT_CHECK } from '../subworkflows/local/input_check' addParams( options: [:] ) +include { HIC_PRO } from '../subworkflows/local/hicpro' addParams( options: [:] ) +include { COOLER } from '../subworkflows/local/cooler' addParams( options: [:] ) +include { COMPARTMENTS } from '../subworkflows/local/compartments' addParams( options: [:] ) +include { TADS } from '../subworkflows/local/tads' addParams( options: [:] ) +/* +======================================================================================== + IMPORT NF-CORE MODULES/SUBWORKFLOWS +======================================================================================== +*/ + +def multiqc_options = modules['multiqc'] +multiqc_options.args += params.multiqc_title ? Utils.joinModuleArgs(["--title \"$params.multiqc_title\""]) : '' + +// +// MODULE: Installed directly from nf-core/modules +// +include { FASTQC } from '../modules/nf-core/modules/fastqc/main' addParams( options: modules['fastqc'] ) +include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' addParams( options: multiqc_options ) + +/* +======================================================================================== + RUN MAIN WORKFLOW +======================================================================================== +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +workflow HIC { + + ch_software_versions = Channel.empty() + + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // + INPUT_CHECK ( + ch_input + ) + + // + // MODULE: Run FastQC + // + FASTQC ( + INPUT_CHECK.out.reads + ) + ch_software_versions = ch_software_versions.mix(FASTQC.out.version.first().ifEmpty(null)) + + // + // MODULE: Pipeline reporting + // + ch_software_versions + .map { it -> if (it) [ it.baseName, it ] } + .groupTuple() + .map { it[1][0] } + .flatten() + .collect() + .set { ch_software_versions } + + GET_SOFTWARE_VERSIONS ( + ch_software_versions.map { it }.collect() + ) + + // + // MODULE: MultiQC + // + workflow_summary = WorkflowHic.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(GET_SOFTWARE_VERSIONS.out.yaml.collect()) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + + MULTIQC ( + ch_multiqc_files.collect() + ) + multiqc_report = MULTIQC.out.report.toList() + ch_software_versions = ch_software_versions.mix(MULTIQC.out.version.ifEmpty(null)) +} + +/* +======================================================================================== + COMPLETION EMAIL AND SUMMARY +======================================================================================== +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) +} + +/* +======================================================================================== + THE END +======================================================================================== +*/