Skip to content
Snippets Groups Projects
Unverified Commit 01aba97f authored by Laurent Modolo's avatar Laurent Modolo
Browse files

SNP_calling.nf: split bam convertion and sort to avoid filling /tmp

parent 2d8f8287
No related branches found
No related tags found
No related merge requests found
...@@ -18,9 +18,15 @@ profiles { ...@@ -18,9 +18,15 @@ profiles {
withName: dedup_sam { withName: dedup_sam {
container = "samblaster:0.1.24" container = "samblaster:0.1.24"
} }
withName: sam_to_bam {
container = "sambamba:0.6.7"
}
withName: sort_bam { withName: sort_bam {
container = "sambamba:0.6.7" container = "sambamba:0.6.7"
} }
withName: name_fasta {
container = "samtools:1.7"
}
withName: index_bam { withName: index_bam {
container = "sambamba:0.6.7" container = "sambamba:0.6.7"
} }
......
...@@ -120,36 +120,71 @@ samblaster --addMateTags -i ${sam} -o ${file_id}_dedup.sam ...@@ -120,36 +120,71 @@ samblaster --addMateTags -i ${sam} -o ${file_id}_dedup.sam
""" """
} }
process sort_bam { process sam_to_bam {
tag "$file_id" tag "$file_id"
cpus 4 cpus 4
publishDir "results/mapping/bam/1_dedup/", mode: 'copy'
input: input:
set file_id, file(sam) from dedup_sam_files set file_id, file(sam) from dedup_sam_files
output:
set file_id, "*.bam" into dedup_bam_files
script:
"""
sambamba view -t ${task.cpus} -S -f bam -l 0 ${sam} -o ${file_id}.bam
"""
}
process sort_bam {
tag "$file_id"
cpus 4
input:
set file_id, file(bam) from dedup_bam_files
output: output:
set file_id, "*_sorted.bam" into sorted_bam_files set file_id, "*_sorted.bam" into sorted_bam_files
script: script:
""" """
sambamba view -t ${task.cpus} -S -f bam -l 0 ${sam} | \ sambamba sort -t ${task.cpus} -o ${file_id}_sorted.bam ${bam}
sambamba sort -t ${task.cpus} -o ${file_id}_sorted.bam /dev/stdin
""" """
} }
sorted_bam_files.into{ process name_bam {
index_sorted_bam_files; tag "$file_id"
haplotypecaller_sorted_bam_files cpus 4
publishDir "results/mapping/bam/", mode: 'copy'
input:
set file_id, file(bam) from sorted_bam_files
output:
set file_id, "*_named.bam" into named_bam_files
script:
"""
samtools view -H ${bam} > header.sam
echo "@RG\tID:${file_id}\tLB:library1\tPL:illumina\tPU:${file_id}\tSM:${file_id}" \
>> header.sam
cp ${bam} ${file_id}_named.bam
samtools reheader header.sam ${file_id}_named.bam
"""
}
named_bam_files.into{
index_named_bam_files;
haplotypecaller_named_bam_files
} }
process index_bam { process index_bam {
tag "$file_id" tag "$file_id"
cpus 4 cpus 4
publishDir "results/mapping/bam/2_realigned/", mode: 'copy' publishDir "results/mapping/bam/", mode: 'copy'
input: input:
set file_id, file(bam) from index_sorted_bam_files set file_id, file(bam) from index_named_bam_files
output: output:
set file_id, "*.bam*" into indexed_bam_files set file_id, "*.bam*" into indexed_bam_files
...@@ -167,8 +202,8 @@ haplotypecaller_fasta_file.into{ ...@@ -167,8 +202,8 @@ haplotypecaller_fasta_file.into{
} }
process index2_fasta { process index2_fasta {
tag "$file_id" tag "$genome_id"
publishDir "results/mapping/bam/2_realigned/", mode: 'copy' publishDir "results/fasta/", mode: 'copy'
input: input:
set genome_id, file(fasta) from index2_fasta_file set genome_id, file(fasta) from index2_fasta_file
...@@ -183,8 +218,8 @@ gatk CreateSequenceDictionary -R ${fasta} &> gatk_output.txt ...@@ -183,8 +218,8 @@ gatk CreateSequenceDictionary -R ${fasta} &> gatk_output.txt
} }
process index3_fasta { process index3_fasta {
tag "$file_id" tag "$genome_id"
publishDir "results/mapping/bam/2_realigned/", mode: 'copy' publishDir "results/fasta/", mode: 'copy'
input: input:
set genome_id, file(fasta) from index3_fasta_file set genome_id, file(fasta) from index3_fasta_file
...@@ -204,7 +239,7 @@ process HaplotypeCaller { ...@@ -204,7 +239,7 @@ process HaplotypeCaller {
publishDir "results/SNP/vcf/", mode: 'copy' publishDir "results/SNP/vcf/", mode: 'copy'
input: input:
set file_id, file(bam) from haplotypecaller_sorted_bam_files.collect() set file_id, file(bam) from haplotypecaller_named_bam_files.collect()
set file_ididx, file(bamidx) from indexed_bam_files.collect() set file_ididx, file(bamidx) from indexed_bam_files.collect()
set genome_id, file(fasta) from haplo_fasta_file.collect() set genome_id, file(fasta) from haplo_fasta_file.collect()
set genome2_idx, file(fasta2idx) from indexed2_fasta_file.collect() set genome2_idx, file(fasta2idx) from indexed2_fasta_file.collect()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment