From e92a40852c88caaff1e9d6411464bd0e831282d3 Mon Sep 17 00:00:00 2001 From: Mia Croiset <mia.croiset@ens-lyon.fr> Date: Mon, 12 Jun 2023 16:51:35 +0200 Subject: [PATCH] refactorisation remove gatk module --- .../custom/gatk4/markduplicates/main.nf | 54 -------------- .../custom/gatk4/markduplicates/meta.yml | 72 ------------------- subworkflows/local/hicstuff_sub.nf | 4 -- 3 files changed, 130 deletions(-) delete mode 100644 modules/nf-core/custom/gatk4/markduplicates/main.nf delete mode 100644 modules/nf-core/custom/gatk4/markduplicates/meta.yml diff --git a/modules/nf-core/custom/gatk4/markduplicates/main.nf b/modules/nf-core/custom/gatk4/markduplicates/main.nf deleted file mode 100644 index 223fa7c..0000000 --- a/modules/nf-core/custom/gatk4/markduplicates/main.nf +++ /dev/null @@ -1,54 +0,0 @@ -process GATK4_MARKDUPLICATES { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::gatk4=4.4.0.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" - - input: - tuple val(meta), path(bam) - path fasta - path fasta_fai - - output: - tuple val(meta), path("*cram"), emit: cram, optional: true - tuple val(meta), path("*bam"), emit: bam, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - tuple val(meta), path("*.bai"), emit: bai, optional: true - tuple val(meta), path("*.metrics"), emit: metrics - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def input_list = bam.collect{"--INPUT $it"}.join(' ') - def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" - - def avail_mem = 3072 - if (!task.memory) { - log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - """ - gatk --java-options "-Xmx${avail_mem}M" MarkDuplicates \\ - $input_list \\ - --OUTPUT ${prefix} \\ - --METRICS_FILE ${prefix}.metrics \\ - --TMP_DIR . \\ - ${reference} \\ - $args - if [[ ${prefix} == *.cram ]]&&[[ -f ${prefix}.bai ]]; then - mv ${prefix}.bai ${prefix}.crai - fi - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/nf-core/custom/gatk4/markduplicates/meta.yml b/modules/nf-core/custom/gatk4/markduplicates/meta.yml deleted file mode 100644 index ae7443d..0000000 --- a/modules/nf-core/custom/gatk4/markduplicates/meta.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - markduplicates - - bam - - sort -tools: - - gatk4: - description: - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Sorted BAM file - pattern: "*.{bam}" - - fasta: - type: file - description: Fasta file - pattern: "*.{fasta}" - - fasta_fai: - type: file - description: Fasta index file - pattern: "*.{fai}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: Marked duplicates BAM file - pattern: "*.{bam}" - - cram: - type: file - description: Marked duplicates CRAM file - pattern: "*.{cram}" - - bai: - type: file - description: BAM index file - pattern: "*.{bam.bai}" - - crai: - type: file - description: CRAM index file - pattern: "*.{cram.crai}" - - metrics: - type: file - description: Duplicate metrics file generated by GATK - pattern: "*.{metrics.txt}" - -authors: - - "@ajodeh-juma" - - "@FriederikeHanssen" - - "@maxulysse" \ No newline at end of file diff --git a/subworkflows/local/hicstuff_sub.nf b/subworkflows/local/hicstuff_sub.nf index 9bb3406..221d646 100644 --- a/subworkflows/local/hicstuff_sub.nf +++ b/subworkflows/local/hicstuff_sub.nf @@ -8,7 +8,6 @@ include { BUILD_MATRIX_COOL_ALT } from '../../modules/local/hicstuff/build_matri include { FILTER_EVENT } from '../../modules/local/hicstuff/filter_event' include { DISTANCE_LAW } from '../../modules/local/hicstuff/distance_law' include { FILTER_PCR } from '../../modules/local/hicstuff/filter_pcr' -include { GATK4_MARKDUPLICATES } from '../../modules/nf-core/custom/gatk4/markduplicates/main' include { SAMTOOLS_SORT } from '../../modules/nf-core/custom/samtools/sort/main' include { SAMTOOLS_SORT_N } from '../../modules/nf-core/custom/samtools_n/sort/main' include { FILTER_PAIR } from '../../modules/local/filterbam/main' @@ -137,9 +136,6 @@ workflow HICSTUFF_SUB { FILTER_PCR.out.idx_pairs_pcrfree.set{ ch_idx_pairs } } - //TODO rajouter filtres + distance law + filtres PCR en options - // pour les PCR filter, soit le hicstuff soit PICARD - BUILD_MATRIX( ch_idx_pairs, FRAGMENT_ENZYME.out.fragments_list.collect() -- GitLab