Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
nextflow
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
nfontrod
nextflow
Commits
224bbd69
Unverified
Commit
224bbd69
authored
6 years ago
by
Laurent Modolo
Browse files
Options
Downloads
Patches
Plain Diff
src/SNP_calling.nf: add Mutect2 to call SNP
parent
a2f71be3
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/SNP_calling.nf
+170
-38
170 additions, 38 deletions
src/SNP_calling.nf
with
170 additions
and
38 deletions
src/SNP_calling.nf
+
170
−
38
View file @
224bbd69
params.fastq = "$baseDir/data/*.fastq"
params.fasta = "$baseDir/data/*.fasta"
params.sam = ""
log.info "fastq files : ${params.fastq}"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any
bam
files matching: ${params.fasta}" }
.ifEmpty { error "Cannot find any
fasta
files matching: ${params.fasta}" }
.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
.set { fasta_file }
.into { fasta_file;
indel_fasta_file;
recalibration_fasta_file;
haplotypecaller_fasta_file
}
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
process adaptor_removal {
tag "$pair_id"
publishDir "results/fastq/adaptor_removal/", mode: 'copy'
if (params.sam == "") {
process adaptor_removal {
tag "$pair_id"
publishDir "results/fastq/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files
output:
set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \
-o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${pair_id}_report.txt
"""
}
process trimming {
tag "${reads}"
cpus 4
publishDir "results/fastq/trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut
output:
set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_trim
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads[0]} --inpair ${reads[1]} \
--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \
> ${pair_id}_trimming_report.txt
"""
}
process index_fasta {
tag "$fasta_id"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
set fasta_id, file(fasta) from fasta_file
output:
set fasta_id, "${fasta.baseName}.*" into index_files
file "*_bwa_report.txt" into index_files_report
script:
"""
bwa index -p ${fasta.baseName} ${fasta} \
&> ${fasta.baseName}_bwa_report.txt
"""
}
process mapping_fastq {
tag "$reads"
cpus 4
publishDir "results/mapping/sam/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_trim
set index_id, file(index) from index_files.collect()
output:
file "${pair_id}.sam" into sam_files
file "${pair_id}_bwa_report.txt" into mapping_repport_files
script:
"""
bwa mem -t ${task.cpus} \
${index_id} ${reads[0]} ${reads[1]} \
-o ${pair_id}.sam &> ${pair_id}_bwa_report.txt
"""
}
} else {
Channel
.fromPath( params.sam )
.ifEmpty { error "Cannot find any sam files matching: ${params.sam}" }
.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
.set { sam_files }
}
process dedup_sam {
tag "$file_id"
cpus 4
input:
set
pair
_id, file(
reads
) from
fastq
_files
set
file
_id, file(
sam
) from
sam
_files
output:
set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut
set file_id, "*_dedup.sam*" into dedup_sam_files
script:
"""
cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \
-o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${pair_id}_report.txt
samblaster --addMateTags -i ${sam} -o ${file_id}_dedup.sam
"""
}
process
trimming
{
tag "$
{reads}
"
process
sort_bam
{
tag "$
file_id
"
cpus 4
publishDir "results/
fastq/trimming
/", mode: 'copy'
publishDir "results/
mapping/bam/1_dedup
/", mode: 'copy'
input:
set
pair
_id, file(
reads
) from
fastq
_files
_cut
set
file
_id, file(
sam
) from
dedup_sam
_files
output:
set
pair
_id, "*_
trim_R{1,2}.fastq.gz" into fastq
_files
_trim
set
file
_id, "*_
sorted.bam" into sorted_bam
_files
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads[0]} --inpair ${reads[1]} \
--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \
> ${pair_id}_trimming_report.txt
sambamba view -t ${task.cpus} -S -f bam -l 0 ${sam} | \
sambamba sort -t ${task.cpus} -o ${file_id}_sorted.bam /dev/stdin
"""
}
process index_fasta {
tag "$fasta_id"
sorted_bam_files.into{
index_sorted_bam_files;
haplotypecaller_sorted_bam_files
}
process index_bam {
tag "$file_id"
cpus 4
publishDir "results/mapping/
index
/", mode: 'copy'
publishDir "results/mapping/
bam/2_realigned
/", mode: 'copy'
input:
set f
asta
_id, file(
fasta
) from
fasta
_file
set f
ile
_id, file(
bam
) from
index_sorted_bam
_file
s
output:
set fasta_id, "${fasta.baseName}.*" into index_files
file "*_bwa_report.txt" into index_files_report
set file_id, "*.bam*" into indexed_bam_files
script:
"""
bwa index -p ${fasta.baseName} ${fasta} \
&> ${fasta.baseName}_bwa_report.txt
sambamba index -t ${task.cpus} ${bam}
"""
}
haplotypecaller_fasta_file.into{
haplo_fasta_file;
index2_fasta_file
index3_fasta_file
}
process index2_fasta {
tag "$file_id"
publishDir "results/mapping/bam/2_realigned/", mode: 'copy'
process mapping_fastq {
tag "$reads"
input:
set genome_id, file(fasta) from index2_fasta_file
output:
set genome_id, "*.dict" into indexed2_fasta_file
script:
"""
gatk CreateSequenceDictionary -R ${fasta} &> gatk_output.txt
"""
}
process index3_fasta {
tag "$file_id"
publishDir "results/mapping/bam/2_realigned/", mode: 'copy'
input:
set genome_id, file(fasta) from index3_fasta_file
output:
set genome_id, "*.fai" into indexed3_fasta_file
script:
"""
samtools faidx ${fasta}
"""
}
process HaplotypeCaller {
tag "$file_id"
cpus 4
publishDir "results/
mapping/sam
/", mode: 'copy'
publishDir "results/
SNP/vcf
/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_trim
set index_id, file(index) from index_files.collect()
set file_id, file(bam) from haplotypecaller_sorted_bam_files.collect()
set file_ididx, file(bamidx) from indexed_bam_files.collect()
set genome_id, file(fasta) from haplo_fasta_file.collect()
set genome2_idx, file(fasta2idx) from indexed2_fasta_file.collect()
set genome3_idx, file(fasta3idx) from indexed3_fasta_file.collect()
output:
file "${pair_id}.sam
" into
sam
_files
file "${pair_id}_bwa_report.txt" into mapping_repport
_files
set file_id, "*.vcf
" into
vcf
_files
set file_id, "*.bam" into realigned_bams
_files
script:
"""
bwa mem -t ${task.cpus} \
${index_id} ${reads[0]} ${reads[1]} \
-o ${pair_id}.sam &> ${pair_id}_bwa_report.txt
gatk Mutect2 --native-pair-hmm-threads ${task.cpus} -R ${fasta} \
-I ${bam} -tumor ${params.tumor} -normal ${params.normal} \
-O ${file_id}_raw_calls.g.vcf \
-bamout ${file_id}_realigned.bam
"""
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment