Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
nextflow
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
LBMC
nextflow
Commits
36fafd89
Unverified
Commit
36fafd89
authored
6 years ago
by
Laurent Modolo
Browse files
Options
Downloads
Patches
Plain Diff
SNP_calling.nf: add SNP_filter step
parent
58e36e9e
No related branches found
No related tags found
1 merge request
!10
Module for BWA GATK sambamba samblaster and bcftools
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/SNP_calling.config
+15
-3
15 additions, 3 deletions
src/SNP_calling.config
src/SNP_calling.nf
+158
-73
158 additions, 73 deletions
src/SNP_calling.nf
with
173 additions
and
76 deletions
src/SNP_calling.config
+
15
−
3
View file @
36fafd89
...
...
@@ -9,6 +9,9 @@ profiles {
withName
:
trimming
{
container
=
"urqt:d62c1f8"
}
withName
:
filter_fasta
{
container
=
"bioawk:1.0"
}
withName
:
index_fasta
{
container
=
"bwa:0.7.17"
}
...
...
@@ -21,9 +24,6 @@ profiles {
withName
:
sort_bam
{
container
=
"sambamba:0.6.7"
}
withName
:
name_fasta
{
container
=
"samtools:1.7"
}
withName
:
index_bam
{
container
=
"sambamba:0.6.7"
}
...
...
@@ -36,6 +36,18 @@ profiles {
withName
:
HaplotypeCaller
{
container
=
"gatk:4.0.8.1"
}
withName
:
GetPileupSummaries
{
container
=
"gatk:4.0.8.1"
}
withName
:
CalculateContamination
{
container
=
"gatk:4.0.8.1"
}
withName
:
CollectSequencingArtifactMetrics
{
container
=
"gatk:4.0.8.1"
}
withName
:
filter_SNP
{
container
=
"gatk:4.0.8.1"
}
}
}
sge
{
...
...
This diff is collapsed.
Click to expand it.
src/SNP_calling.nf
+
158
−
73
View file @
36fafd89
params
.
fastq
=
"$baseDir/data/*.fastq"
params
.
fasta
=
"$baseDir/data/*.fasta"
params
.
seq_length
=
800000
log
.
info
"fastq files : ${params.fastq}"
log
.
info
"fasta files : ${params.fasta}"
log
.
info
"fasta length to retain : ${params.seq_length}"
def
normal_sample
=
Eval
.
me
(
params
.
normal
)
def
tumor_sample
=
Eval
.
me
(
params
.
tumor
)
log
.
info
"normal : ${normal_sample}"
...
...
@@ -11,11 +13,7 @@ Channel
.
fromPath
(
params
.
fasta
)
.
ifEmpty
{
error
"Cannot find any fasta files matching: ${params.fasta}"
}
.
map
{
it
->
[(
it
.
baseName
=~
/([^\.]*)/
)[
0
][
1
],
it
]}
.
into
{
fasta_file
;
indel_fasta_file
;
recalibration_fasta_file
;
haplotypecaller_fasta_file
}
.
set
{
fasta_file
}
Channel
.
fromFilePairs
(
params
.
fastq
)
.
ifEmpty
{
error
"Cannot find any fastq files matching: ${params.fastq}"
}
...
...
@@ -61,13 +59,39 @@ UrQt --t 20 --m ${task.cpus} --gz \
"""
}
process
filter_fasta
{
tag
"$fasta_id"
cpus
4
publishDir
"results/fasta/"
,
mode:
'copy'
input:
set
fasta_id
,
file
(
fasta
)
from
fasta_file
output:
set
fasta_idf
,
"*_filtered.fasta"
into
filter_fasta_files
script:
fasta_idf
=
"${fasta_id}_filtered"
"""
bioawk -c fastx '{ if(length(\$seq) > $params.seq_length) { print ">"\$name; print \$seq }}' ${fasta} > \
${fasta_id}_filtered.fasta
"""
}
filter_fasta_files
.
into
{
filtered_fasta_files
;
indel_fasta_file
;
recalibration_fasta_file
;
haplotypecaller_fasta_file
}
process
index_fasta
{
tag
"$fasta_id"
cpus
4
publishDir
"results/mapping/index/"
,
mode:
'copy'
input:
set
fasta_id
,
file
(
fasta
)
from
fasta_file
set
fasta_id
,
file
(
fasta
)
from
filtered_
fasta_file
s
output:
set
fasta_id
,
"${fasta.baseName}.*"
into
index_files
...
...
@@ -205,7 +229,7 @@ index_bam_files.into{
}
haplotypecaller_fasta_file
.
into
{
haplo
_fasta_file
;
final
_fasta_file
;
index2_fasta_file
index3_fasta_file
}
...
...
@@ -242,20 +266,49 @@ samtools faidx ${fasta}
"""
}
haplotypecaller
_bam_files_norm
=
haplo_bam_files_norm
final
_bam_files_norm
=
haplo_bam_files_norm
.
filter
{
"normal_sample"
==
it
[
0
]
}
haplotypecaller
_bam_files_tumor
=
haplo_bam_files_tumor
final
_bam_files_tumor
=
haplo_bam_files_tumor
.
filter
{
"tumor_sample"
==
it
[
0
]
}
indexed_bam_files
.
into
{
index_bam_files_norm
;
index_bam_files_tumor
}
indexed_bam_files_norm
=
index_bam_files_norm
final_
indexed_bam_files_norm
=
index_bam_files_norm
.
filter
{
"normal_sample"
==
it
[
0
]
}
indexed_bam_files_tumor
=
index_bam_files_tumor
final_
indexed_bam_files_tumor
=
index_bam_files_tumor
.
filter
{
"tumor_sample"
==
it
[
0
]
}
final_bam_files_norm
.
set
{
haplotypecaller_bam_files_norm
}
final_bam_files_tumor
.
into
{
haplotypecaller_bam_files_tumor
;
artifact_bam_files_tumor
;
pileup_bam_files_tumor
}
final_indexed_bam_files_norm
.
set
{
haplo_index_bam_files_norm
}
final_indexed_bam_files_tumor
.
into
{
haplo_index_bam_files_tumor
;
artifact_index_bam_files_tumor
;
pileup_index_bam_files_tumor
}
final_fasta_file
.
into
{
haplo_fasta_file
artifact_fasta_file
}
indexed2_fasta_file
.
into
{
haplo_indexed2_fasta_file
artifact_indexed2_fasta_file
}
indexed3_fasta_file
.
into
{
haplo_indexed3_fasta_file
;
artifact_indexed3_fasta_file
}
process
HaplotypeCaller
{
tag
"$file_id_norm"
cpus
10
...
...
@@ -263,21 +316,22 @@ process HaplotypeCaller {
input:
set
file_id_norm
,
file
(
bam_norm
)
from
haplotypecaller_bam_files_norm
set
file_ididx_norm
,
file
(
bamidx_norm
)
from
index
ed
_bam_files_norm
set
file_ididx_norm
,
file
(
bamidx_norm
)
from
haplo_
index_bam_files_norm
set
file_id_tumor
,
file
(
bam_tumor
)
from
haplotypecaller_bam_files_tumor
set
file_ididx_tumor
,
file
(
bamidx_tumor
)
from
index
ed
_bam_files_tumor
set
file_ididx_tumor
,
file
(
bamidx_tumor
)
from
haplo_
index_bam_files_tumor
set
genome_id
,
file
(
fasta
)
from
haplo_fasta_file
set
genome2_idx
,
file
(
fasta2idx
)
from
indexed2_fasta_file
set
genome3_idx
,
file
(
fasta3idx
)
from
indexed3_fasta_file
set
genome2_idx
,
file
(
fasta2idx
)
from
haplo_
indexed2_fasta_file
set
genome3_idx
,
file
(
fasta3idx
)
from
haplo_
indexed3_fasta_file
output:
set
file_id_norm
,
"*.vcf"
into
vcf_files
set
file_id_norm
,
"*.vcf.idx"
into
index_vcf_files
set
file_id_norm
,
"*.bam"
into
realigned_bams_files
set
"*_mutect2_report.txt"
into
mutect2_report
file
"*_mutect2_report.txt"
into
mutect2_report
script:
"""
gatk Mutect2 --native-pair-hmm-threads ${task.cpus} -R ${fasta} \
gatk
--java-options "-Xmx32G"
Mutect2 --native-pair-hmm-threads ${task.cpus} -R ${fasta} \
-I ${bam_tumor} -tumor ${file_id_tumor} \
-I ${bam_norm} -normal ${file_id_norm} \
-O ${file_id_norm}_raw_calls.g.vcf \
...
...
@@ -285,75 +339,106 @@ gatk Mutect2 --native-pair-hmm-threads ${task.cpus} -R ${fasta} \
"""
}
vcf_files
.
into
{
pileup_vcf_files
;
filter_vcf_files
}
index_vcf_files
.
into
{
pileup_index_vcf_files
;
filter_index_vcf_files
}
/*
process
filter_SNP
{
tag "$file_id"
cpus
4
process
GetPileupSummaries
{
tag "$file_id
_norm
"
cpus
1
publishDir "results/SNP/vcf/", mode: 'copy'
input:
set file_id_norm, file(vcf) from pileup_vcf_files
set fileidx_id_norm, file(vcfidx) from pileup_index_vcf_files
set file_id_tumor, file(bam_tumor) from pileup_bam_files_tumor
output:
set file_id, "*.vcf" into vcf_files_filtered
set file_id_tumor, "*.table" into pileup_files
file "*_pileup_report.txt" into pileup_report
script:
"""
gatk --java-options "-Xmx2g" Mutect2 \
-R hg38/Homo_sapiens_assembly38.fasta \
-I tumor.bam \
-I normal.bam \
-tumor HCC1143_tumor \
-normal HCC1143_normal \
-pon resources/chr17_pon.vcf.gz \
--germline-resource resources/chr17_af-only-gnomad_grch38.vcf.gz \
--af-of-alleles-not-in-resource 0.0000025 \
--disable-read-filter MateOnSameContigOrNoMappedMateReadFilter \
-L chr17plus.interval_list \
-O 1_somatic_m2.vcf.gz \
-bamout 2_tumor_normal_m2.bam
gatk Mutect2 \
-R ~/Documents/ref/hg38/Homo_sapiens_assembly38.fasta \
-I HG00190.bam \
-tumor HG00190 \
--disable-read-filter MateOnSameContigOrNoMappedMateReadFilter \
-L chr17plus.interval_list \
-O 3_HG00190.vcf.gz
gatk CreateSomaticPanelOfNormals \
-vcfs 3_HG00190.vcf.gz \
-vcfs 4_NA19771.vcf.gz \
-vcfs 5_HG02759.vcf.gz \
-O 6_threesamplepon.vcf.gz
gatk GetPileupSummaries \
-I tumor.bam \
-V resources/chr17_small_exac_common_3_grch38.vcf.gz \
-O 7_tumor_getpileupsummaries.table
gatk CalculateContamination \
-I 7_tumor_getpileupsummaries.table \
-O 8_tumor_calculatecontamination.table
gatk --java-options "-Xmx32G" GetPileupSummaries \
-I ${bam_tumor} \
-V ${vcf} \
-O ${file_id_tumor}_pileup.table \
2> ${file_id_tumor}_pileup_report.txt
"""
}
gatk FilterMutectCalls \
-V somatic_m2.vcf.gz \
--contamination-table tumor_calculatecontamination.table \
-O 9_somatic_oncefiltered.vcf.gz
process CalculateContamination {
tag "$file_id_tumor"
cpus 1
publishDir "results/SNP/vcf/", mode: 'copy'
input:
set file_id_tumor, file(pileup_table) from pileup_files
output:
set file_id_tumor, "*.table" into contamination_files
file "*_contamination_report.txt" into contamination_report
script:
"""
gatk --java-options "-Xmx32G" CalculateContamination \
-I ${pileup_table} \
-O $file_id_tumor}_contamination.table \
2> ${file_id_tumor}_contamination_report.txt
"""
}
*/
process
CollectSequencingArtifactMetrics
{
tag
"$file_id_tumor"
cpus
1
publishDir
"results/SNP/vcf/"
,
mode:
'copy'
input:
set
file_id_tumor
,
file
(
bam_tumor
)
from
artifact_bam_files_tumor
set
genome_id
,
file
(
fasta
)
from
artifact_fasta_file
set
genome2_idx
,
file
(
fasta2idx
)
from
artifact_indexed2_fasta_file
set
genome3_idx
,
file
(
fasta3idx
)
from
artifact_indexed3_fasta_file
output:
set
file_id_tumor
,
"*_artifact.*"
into
artifact_files
file
"*_artifact_report.txt"
into
artifact_report
script:
"""
gatk CollectSequencingArtifactMetrics \
-I tumor
.bam
\
-O
10
_tumor_artifact \
–-FILE_EXTENSION ".txt"
\
-R ~/Documents/ref/hg38/Homo_sapiens_assembly38.fasta
gatk FilterByOrientationBias \
-A G/T \
-A C/T \
-V 9_somatic_oncefiltered.vcf.gz \
-P tumor_artifact.pre_adapter_detail_metrics.txt \
-O 11_somatic_twicefiltered.vcf.gz
-I
${bam_
tumor
}
\
-O
${file_id
_tumor
}
_artifact \
-R ${fasta}
\
2> ${file_id_tumor}_artifact_report.txt
"""
}
process
filter_SNP
{
tag
"$file_id_norm"
cpus
1
publishDir
"results/SNP/vcf/"
,
mode:
'copy'
input:
set
file_id_norm
,
file
(
vcf
)
from
filter_vcf_files
set
fileidx_id_norm
,
file
(
vcfid
)
from
filter_index_vcf_files
output:
set
file_id_norm
,
"*.vcf"
into
vcf_files_filtered
file
"*_filter_report.txt"
into
filter_report
script:
"""
gatk FilterMutectCalls \
-V ${vcf} \
-O ${file_id_norm}_filtered.vcf \
2> ${file_id_norm}_filter_report.txt
"""
}
*/
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment