Skip to content
Snippets Groups Projects
Verified Commit b607dc0e authored by Mia Croiset's avatar Mia Croiset
Browse files

TO FIX markduplicates picard

parent 59bd5bf1
No related branches found
No related tags found
No related merge requests found
......@@ -245,6 +245,17 @@ process {
]
}
withName: 'PICARD_MARKDUPLICATES' {
ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
ext.args = { [
"--REMOVE_DUPLICATES true"
].join('').trim() }
publishDir = [
path: { "${params.outdir}/picard/bam" },
mode: 'copy'
]
}
withName: 'SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_sorted" }
ext.args = { [
......@@ -252,6 +263,17 @@ process {
].join('').trim() }
}
withName: 'SAMTOOLS_SORT_N' {
ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_nsorted" }
ext.args = { [
"-n"
].join('').trim() }
publishDir = [
path: { "${params.outdir}/gatk4/bam" },
mode: 'copy'
]
}
withName: 'SAMTOOLS_INDEX' {
ext.args = { [
""
......
process PICARD_MARKDUPLICATES {
tag "$meta.id"
label 'process_medium'
conda "bioconda::picard=3.0.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' :
'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }"
input:
tuple val(meta), path(bam)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
output:
tuple val(meta), path("*.bam") , emit: bam
tuple val(meta), path("*.bai") , optional:true, emit: bai
tuple val(meta), path("*.metrics.txt"), emit: metrics
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3072
if (!task.memory) {
log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = (task.memory.mega*0.8).intValue()
}
"""
picard \\
-Xmx${avail_mem}M \\
MarkDuplicates \\
$args \\
--INPUT $bam \\
--OUTPUT ${prefix}.bam \\
--REFERENCE_SEQUENCE $fasta \\
--METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.bam
touch ${prefix}.bam.bai
touch ${prefix}.MarkDuplicates.metrics.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
END_VERSIONS
"""
}
\ No newline at end of file
name: picard_markduplicates
description: Locate and tag duplicate reads in a BAM file
keywords:
- markduplicates
- pcr
- duplicates
- bam
- sam
- cram
tools:
- picard:
description: |
A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
data and formats such as SAM/BAM/CRAM and VCF.
homepage: https://broadinstitute.github.io/picard/
documentation: https://broadinstitute.github.io/picard/
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file
pattern: "*.{bam,cram,sam}"
- meta2:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fasta:
type: file
description: Reference genome fasta file
pattern: "*.{fasta,fa}"
- meta3:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fai:
type: file
description: Reference genome fasta index
pattern: "*.{fai}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file with duplicate reads marked/removed
pattern: "*.{bam}"
- bai:
type: file
description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag
pattern: "*.{bai}"
- metrics:
type: file
description: Duplicate metrics file generated by picard
pattern: "*.{metrics.txt}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@projectoriented"
- "@ramprasadn"
\ No newline at end of file
process SAMTOOLS_SORT_N {
tag "$meta.id"
label 'process_high' //medium
conda "bioconda::samtools=1.17"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }"
input:
tuple val(meta), path(bam)
output:
tuple val(meta), path("*.bam"), emit: bam
tuple val(meta), path("*.csi"), emit: csi, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def sort_memory = (task.memory.mega/task.cpus).intValue()
if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
samtools sort \\
$args \\
-@ $task.cpus \\
-m ${sort_memory}M \\
-o ${prefix}.bam \\
-T $prefix \\
$bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
\ No newline at end of file
name: samtools_sort
description: Sort SAM/BAM/CRAM file
keywords:
- sort
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: http://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: Sorted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- csi:
type: file
description: BAM index file (optional)
pattern: "*.csi"
authors:
- "@drpatelh"
- "@ewels"
\ No newline at end of file
......@@ -8,9 +8,11 @@ include { BUILD_MATRIX_COOL_ALT } from '../../modules/local/hicstuff/build_matri
include { FILTER_EVENT } from '../../modules/local/hicstuff/filter_event'
include { DISTANCE_LAW } from '../../modules/local/hicstuff/distance_law'
include { FILTER_PCR } from '../../modules/local/hicstuff/filter_pcr'
include { GATK4_MARKDUPLICATES } from '../../modules/nf-core/custom/markduplicates/main'
include { GATK4_MARKDUPLICATES } from '../../modules/nf-core/custom/gatk4/markduplicates/main'
include { SAMTOOLS_SORT } from '../../modules/nf-core/custom/samtools/sort/main'
include { SAMTOOLS_SORT_N } from '../../modules/nf-core/custom/samtools_n/sort/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/custom/samtools/index/main'
include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/custom/picard/markduplicates/main'
// Paired-end to Single-end
def pairToSingle(row, mates) {
......@@ -67,16 +69,36 @@ workflow HICSTUFF_SUB {
BOWTIE2_ALIGNMENT.out.bam
)
/* GATK4_MARKDUPLICATES(
SAMTOOLS_SORT.out.bam,
fasta.map{ it[1] }.collect(),
index.map{ it[1] }.collect()
)
SAMTOOLS_SORT_N(
GATK4_MARKDUPLICATES.out.bam
)
SAMTOOLS_INDEX(
SAMTOOLS_SORT.out.bam
SAMTOOLS_SORT_N.out.bam
)
*/
GATK4_MARKDUPLICATES(
PICARD_MARKDUPLICATES(
SAMTOOLS_SORT.out.bam,
fasta.map{ it[1] }.collect(),
index.map{ it[1] }.collect()
fasta.collect(),
index.collect()
)
SAMTOOLS_SORT_N(
PICARD_MARKDUPLICATES.out.bam
)
GATK4_MARKDUPLICATES.out.bam.set{ ch_bam }
SAMTOOLS_INDEX(
SAMTOOLS_SORT_N.out.bam
)
SAMTOOLS_SORT_N.out.bam.set{ ch_bam }
}
else {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment