Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
JU28_59vs17_SNP
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
LBMC
Delattre
JU28_59vs17_SNP
Commits
36fafd89
Unverified
Commit
36fafd89
authored
6 years ago
by
Laurent Modolo
Browse files
Options
Downloads
Patches
Plain Diff
SNP_calling.nf: add SNP_filter step
parent
58e36e9e
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/SNP_calling.config
+15
-3
15 additions, 3 deletions
src/SNP_calling.config
src/SNP_calling.nf
+158
-73
158 additions, 73 deletions
src/SNP_calling.nf
with
173 additions
and
76 deletions
src/SNP_calling.config
+
15
−
3
View file @
36fafd89
...
...
@@ -9,6 +9,9 @@ profiles {
withName
:
trimming
{
container
=
"urqt:d62c1f8"
}
withName
:
filter_fasta
{
container
=
"bioawk:1.0"
}
withName
:
index_fasta
{
container
=
"bwa:0.7.17"
}
...
...
@@ -21,9 +24,6 @@ profiles {
withName
:
sort_bam
{
container
=
"sambamba:0.6.7"
}
withName
:
name_fasta
{
container
=
"samtools:1.7"
}
withName
:
index_bam
{
container
=
"sambamba:0.6.7"
}
...
...
@@ -36,6 +36,18 @@ profiles {
withName
:
HaplotypeCaller
{
container
=
"gatk:4.0.8.1"
}
withName
:
GetPileupSummaries
{
container
=
"gatk:4.0.8.1"
}
withName
:
CalculateContamination
{
container
=
"gatk:4.0.8.1"
}
withName
:
CollectSequencingArtifactMetrics
{
container
=
"gatk:4.0.8.1"
}
withName
:
filter_SNP
{
container
=
"gatk:4.0.8.1"
}
}
}
sge
{
...
...
This diff is collapsed.
Click to expand it.
src/SNP_calling.nf
+
158
−
73
View file @
36fafd89
params.fastq = "$baseDir/data/*.fastq"
params.fasta = "$baseDir/data/*.fasta"
params.seq_length = 800000
log.info "fastq files : ${params.fastq}"
log.info "fasta files : ${params.fasta}"
log.info "fasta length to retain : ${params.seq_length}"
def normal_sample = Eval.me(params.normal)
def tumor_sample = Eval.me(params.tumor)
log.info "normal : ${normal_sample}"
...
...
@@ -11,11 +13,7 @@ Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
.into { fasta_file;
indel_fasta_file;
recalibration_fasta_file;
haplotypecaller_fasta_file
}
.set { fasta_file }
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
...
...
@@ -61,13 +59,39 @@ UrQt --t 20 --m ${task.cpus} --gz \
"""
}
process filter_fasta {
tag "$fasta_id"
cpus 4
publishDir "results/fasta/", mode: 'copy'
input:
set fasta_id, file(fasta) from fasta_file
output:
set fasta_idf, "*_filtered.fasta" into filter_fasta_files
script:
fasta_idf = "${fasta_id}_filtered"
"""
bioawk -c fastx '{ if(length(\$seq) > $params.seq_length) { print ">"\$name; print \$seq }}' ${fasta} > \
${fasta_id}_filtered.fasta
"""
}
filter_fasta_files.into{
filtered_fasta_files;
indel_fasta_file;
recalibration_fasta_file;
haplotypecaller_fasta_file
}
process index_fasta {
tag "$fasta_id"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
set fasta_id, file(fasta) from fasta_file
set fasta_id, file(fasta) from
filtered_
fasta_file
s
output:
set fasta_id, "${fasta.baseName}.*" into index_files
...
...
@@ -205,7 +229,7 @@ index_bam_files.into{
}
haplotypecaller_fasta_file.into{
haplo
_fasta_file;
final
_fasta_file;
index2_fasta_file
index3_fasta_file
}
...
...
@@ -242,20 +266,49 @@ samtools faidx ${fasta}
"""
}
haplotypecaller
_bam_files_norm = haplo_bam_files_norm
final
_bam_files_norm = haplo_bam_files_norm
.filter{ "normal_sample" == it[0] }
haplotypecaller
_bam_files_tumor = haplo_bam_files_tumor
final
_bam_files_tumor = haplo_bam_files_tumor
.filter{ "tumor_sample" == it[0] }
indexed_bam_files.into {
index_bam_files_norm;
index_bam_files_tumor
}
indexed_bam_files_norm = index_bam_files_norm
final_
indexed_bam_files_norm = index_bam_files_norm
.filter{ "normal_sample" == it[0] }
indexed_bam_files_tumor = index_bam_files_tumor
final_
indexed_bam_files_tumor = index_bam_files_tumor
.filter{ "tumor_sample" == it[0] }
final_bam_files_norm.set{
haplotypecaller_bam_files_norm
}
final_bam_files_tumor.into{
haplotypecaller_bam_files_tumor;
artifact_bam_files_tumor;
pileup_bam_files_tumor
}
final_indexed_bam_files_norm.set{
haplo_index_bam_files_norm
}
final_indexed_bam_files_tumor.into{
haplo_index_bam_files_tumor;
artifact_index_bam_files_tumor;
pileup_index_bam_files_tumor
}
final_fasta_file.into{
haplo_fasta_file
artifact_fasta_file
}
indexed2_fasta_file.into{
haplo_indexed2_fasta_file
artifact_indexed2_fasta_file
}
indexed3_fasta_file.into{
haplo_indexed3_fasta_file;
artifact_indexed3_fasta_file
}
process HaplotypeCaller {
tag "$file_id_norm"
cpus 10
...
...
@@ -263,21 +316,22 @@ process HaplotypeCaller {
input:
set file_id_norm, file(bam_norm) from haplotypecaller_bam_files_norm
set file_ididx_norm, file(bamidx_norm) from index
ed
_bam_files_norm
set file_ididx_norm, file(bamidx_norm) from
haplo_
index_bam_files_norm
set file_id_tumor, file(bam_tumor) from haplotypecaller_bam_files_tumor
set file_ididx_tumor, file(bamidx_tumor) from index
ed
_bam_files_tumor
set file_ididx_tumor, file(bamidx_tumor) from
haplo_
index_bam_files_tumor
set genome_id, file(fasta) from haplo_fasta_file
set genome2_idx, file(fasta2idx) from indexed2_fasta_file
set genome3_idx, file(fasta3idx) from indexed3_fasta_file
set genome2_idx, file(fasta2idx) from
haplo_
indexed2_fasta_file
set genome3_idx, file(fasta3idx) from
haplo_
indexed3_fasta_file
output:
set file_id_norm, "*.vcf" into vcf_files
set file_id_norm, "*.vcf.idx" into index_vcf_files
set file_id_norm, "*.bam" into realigned_bams_files
set
"*_mutect2_report.txt" into mutect2_report
file
"*_mutect2_report.txt" into mutect2_report
script:
"""
gatk Mutect2 --native-pair-hmm-threads ${task.cpus} -R ${fasta} \
gatk
--java-options "-Xmx32G"
Mutect2 --native-pair-hmm-threads ${task.cpus} -R ${fasta} \
-I ${bam_tumor} -tumor ${file_id_tumor} \
-I ${bam_norm} -normal ${file_id_norm} \
-O ${file_id_norm}_raw_calls.g.vcf \
...
...
@@ -285,75 +339,106 @@ gatk Mutect2 --native-pair-hmm-threads ${task.cpus} -R ${fasta} \
"""
}
vcf_files.into{
pileup_vcf_files;
filter_vcf_files
}
index_vcf_files.into{
pileup_index_vcf_files;
filter_index_vcf_files
}
/*
process
filter_SNP
{
tag "$file_id"
cpus
4
process
GetPileupSummaries
{
tag "$file_id
_norm
"
cpus
1
publishDir "results/SNP/vcf/", mode: 'copy'
input:
set file_id_norm, file(vcf) from pileup_vcf_files
set fileidx_id_norm, file(vcfidx) from pileup_index_vcf_files
set file_id_tumor, file(bam_tumor) from pileup_bam_files_tumor
output:
set file_id, "*.vcf" into vcf_files_filtered
set file_id_tumor, "*.table" into pileup_files
file "*_pileup_report.txt" into pileup_report
script:
"""
gatk --java-options "-Xmx2g" Mutect2 \
-R hg38/Homo_sapiens_assembly38.fasta \
-I tumor.bam \
-I normal.bam \
-tumor HCC1143_tumor \
-normal HCC1143_normal \
-pon resources/chr17_pon.vcf.gz \
--germline-resource resources/chr17_af-only-gnomad_grch38.vcf.gz \
--af-of-alleles-not-in-resource 0.0000025 \
--disable-read-filter MateOnSameContigOrNoMappedMateReadFilter \
-L chr17plus.interval_list \
-O 1_somatic_m2.vcf.gz \
-bamout 2_tumor_normal_m2.bam
gatk Mutect2 \
-R ~/Documents/ref/hg38/Homo_sapiens_assembly38.fasta \
-I HG00190.bam \
-tumor HG00190 \
--disable-read-filter MateOnSameContigOrNoMappedMateReadFilter \
-L chr17plus.interval_list \
-O 3_HG00190.vcf.gz
gatk CreateSomaticPanelOfNormals \
-vcfs 3_HG00190.vcf.gz \
-vcfs 4_NA19771.vcf.gz \
-vcfs 5_HG02759.vcf.gz \
-O 6_threesamplepon.vcf.gz
gatk GetPileupSummaries \
-I tumor.bam \
-V resources/chr17_small_exac_common_3_grch38.vcf.gz \
-O 7_tumor_getpileupsummaries.table
gatk CalculateContamination \
-I 7_tumor_getpileupsummaries.table \
-O 8_tumor_calculatecontamination.table
gatk --java-options "-Xmx32G" GetPileupSummaries \
-I ${bam_tumor} \
-V ${vcf} \
-O ${file_id_tumor}_pileup.table \
2> ${file_id_tumor}_pileup_report.txt
"""
}
gatk FilterMutectCalls \
-V somatic_m2.vcf.gz \
--contamination-table tumor_calculatecontamination.table \
-O 9_somatic_oncefiltered.vcf.gz
process CalculateContamination {
tag "$file_id_tumor"
cpus 1
publishDir "results/SNP/vcf/", mode: 'copy'
input:
set file_id_tumor, file(pileup_table) from pileup_files
output:
set file_id_tumor, "*.table" into contamination_files
file "*_contamination_report.txt" into contamination_report
script:
"""
gatk --java-options "-Xmx32G" CalculateContamination \
-I ${pileup_table} \
-O $file_id_tumor}_contamination.table \
2> ${file_id_tumor}_contamination_report.txt
"""
}
*/
process CollectSequencingArtifactMetrics {
tag "$file_id_tumor"
cpus 1
publishDir "results/SNP/vcf/", mode: 'copy'
input:
set file_id_tumor, file(bam_tumor) from artifact_bam_files_tumor
set genome_id, file(fasta) from artifact_fasta_file
set genome2_idx, file(fasta2idx) from artifact_indexed2_fasta_file
set genome3_idx, file(fasta3idx) from artifact_indexed3_fasta_file
output:
set file_id_tumor, "*_artifact.*" into artifact_files
file "*_artifact_report.txt" into artifact_report
script:
"""
gatk CollectSequencingArtifactMetrics \
-I tumor
.bam
\
-O
10
_tumor_artifact \
–-FILE_EXTENSION ".txt"
\
-R ~/Documents/ref/hg38/Homo_sapiens_assembly38.fasta
gatk FilterByOrientationBias \
-A G/T \
-A C/T \
-V 9_somatic_oncefiltered.vcf.gz \
-P tumor_artifact.pre_adapter_detail_metrics.txt \
-O 11_somatic_twicefiltered.vcf.gz
-I
${bam_
tumor
}
\
-O
${file_id
_tumor
}
_artifact \
-R ${fasta}
\
2> ${file_id_tumor}_artifact_report.txt
"""
}
process filter_SNP {
tag "$file_id_norm"
cpus 1
publishDir "results/SNP/vcf/", mode: 'copy'
input:
set file_id_norm, file(vcf) from filter_vcf_files
set fileidx_id_norm, file(vcfid) from filter_index_vcf_files
output:
set file_id_norm, "*.vcf" into vcf_files_filtered
file "*_filter_report.txt" into filter_report
script:
"""
gatk FilterMutectCalls \
-V ${vcf} \
-O ${file_id_norm}_filtered.vcf \
2> ${file_id_norm}_filter_report.txt
"""
}
*/
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment