Skip to content
Snippets Groups Projects
Commit 3505e2de authored by elabaron's avatar elabaron
Browse files

fusion changes with nextflow git

parents d93c0b55 a83195d8
No related branches found
No related tags found
No related merge requests found
Showing
with 1499 additions and 13 deletions
version = "0.20.1"
container_url = "lbmc/fastp:${version}"
process fastp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$pair_id"
publishDir "results/QC/fastp/", mode: 'copy', pattern: "*.html"
input:
tuple val(pair_id), path(reads)
output:
tuple val(pair_id), path("*.fastq.gz"), emit: fastq
tuple val(pair_id), path("*.html"), emit: html
tuple val(pair_id), path("*.json"), emit: report
script:
if (reads instanceof List)
"""
fastp --thread ${task.cpus} \
--qualified_quality_phred 20 \
--disable_length_filtering \
--detect_adapter_for_pe \
--in1 ${reads[0]} \
--in2 ${reads[1]} \
--out1 ${pair_id}_R1_trim.fastq.gz \
--out2 ${pair_id}_R2_trim.fastq.gz \
--html ${pair_id}.html \
--json ${pair_id}_fastp.json \
--report_title ${pair_id}
"""
else
"""
fastp --thread ${task.cpus} \
--qualified_quality_phred 20 \
--disable_length_filtering \
--detect_adapter_for_pe \
--in1 ${reads} \
--out1 ${pair_id}_trim.fastq.gz \
--html ${pair_id}.html \
--json ${pair_id}_fastp.json \
--report_title ${pair_id}
"""
}
process fastp_pairedend {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$pair_id"
publishDir "results/QC/fastp/", mode: 'copy', pattern: "*.html"
input:
tuple val(pair_id), path(reads)
output:
tuple val(pair_id), path("*.fastq.gz"), emit: fastq
tuple val(pair_id), path("*.html"), emit: html
tuple val(pair_id), path("*.json"), emit: report
script:
"""
fastp --thread ${task.cpus} \
--qualified_quality_phred 20 \
--disable_length_filtering \
--detect_adapter_for_pe \
--in1 ${reads[0]} \
--in2 ${reads[1]} \
--out1 ${pair_id}_R1_trim.fastq.gz \
--out2 ${pair_id}_R2_trim.fastq.gz \
--html ${pair_id}.html \
--json ${pair_id}_fastp.json \
--report_title ${pair_id}
"""
}
process fastp_singleend {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$pair_id"
publishDir "results/QC/fastp/", mode: 'copy', pattern: "*.html"
input:
tuple val(pair_id), path(reads)
output:
tuple val(pair_id), path("*.fastq.gz"), emit: fastq
tuple val(pair_id), path("*.html"), emit: html
tuple val(pair_id), path("*.json"), emit: report
script:
"""
fastp --thread ${task.cpus} \
--qualified_quality_phred 20 \
--disable_length_filtering \
--detect_adapter_for_pe \
--in1 ${reads} \
--out1 ${pair_id}_trim.fastq.gz \
--html ${pair_id}.html \
--json ${pair_id}_fastp.json \
--report_title ${pair_id}
"""
}
version = "0.11.5"
container_url = "lbmc/fastqc:${version}"
process fastqc_fastq {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$pair_id"
input:
tuple val(pair_id), path(reads)
output:
path "*.{zip,html}", emit: report
script:
if (reads instanceof List)
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
else
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process fastqc_fastq_pairedend {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$pair_id"
input:
tuple val(pair_id), path(reads)
output:
path "*.{zip,html}", emit: report
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process fastqc_fastq_singleend {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(reads)
output:
path "*.{zip,html}", emit: report
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
version = "0.2.8"
container_url = "lbmc/g2gtools:${version}"
process vci_build {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta)
output:
tuple val(file_id), path("*.vci.gz"), path("*.vci.gz.tbi"), emit: vci
tuple val(file_id), path("*_report.txt"), emit: report
script:
input_vcf = ""
for (vcf_file in vcf) {
input_vcf += " -i ${vcf_file}"
}
"""
g2gtools vcf2vci \
-p ${task.cpus} \
-f ${fasta} \
${input_vcf} \
-s ${file_id} \
-o ${file_id}.vci 2> ${file_id}_g2gtools_vcf2vci_report.txt
"""
}
process incorporate_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(ref_id), path(fasta)
output:
tuple val(file_id), path("${file_id}_snp.fa"), path("${vci}"), path("${tbi}"), emit: fasta
tuple val(file_id), path("*_report.txt"), emit: report
script:
"""
g2gtools patch \
-p ${task.cpus} \
-i ${fasta} \
-c ${vci} \
-o ${file_id}_snp.fa 2> ${file_id}_g2gtools_path_report.txt
"""
}
process incorporate_indel {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(fasta), path(vci), path(tbi)
output:
tuple val(file_id), path("${file_id}_snp_indel.fa"), path("${vci}"), path("${tbi}"), emit: fasta
tuple val(file_id), path("*_report.txt"), emit: report
script:
"""
g2gtools transform \
-p ${task.cpus} \
-i ${fasta} \
-c ${vci} \
-o ${file_id}_snp_indel.fa 2> ${file_id}_g2gtools_transform_report.txt
"""
}
process convert_gtf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(annot_id), path(gtf)
output:
tuple val(file_id), path("${file_id}.gtf"), emit: gtf
tuple val(file_id), path("*_report.txt"), emit: report
script:
"""
g2gtools convert \
-i ${gtf} \
-c ${vci} \
-o ${file_id}.gtf 2> ${file_id}_g2gtools_convert_report.txt
"""
}
process convert_bed {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(annot_id), path(bed)
output:
tuple val(file_id), path("${file_id}.bed"), emit: bed
tuple val(file_id), path("*_report.txt"), emit: report
script:
"""
g2gtools convert \
-i ${bed} \
-c ${vci} \
-o ${file_id}.bed 2> ${file_id}_g2gtools_convert_report.txt
"""
}
process convert_bam {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${bam_id} ${file_id}"
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(bam_id), path(bam)
output:
tuple val(file_id), path("${file_id}_${bam_id.baseName}.bam"), emit: bam
tuple val(file_id), path("*_report.txt"), emit: report
script:
"""
g2gtools convert \
-i ${bam} \
-c ${vci} \
-o ${file_id}_${bam.baseName}.bam 2> ${file_id}_g2gtools_convert_report.txt
"""
}
\ No newline at end of file
version = "3.8.0"
container_url = "lbmc/gatk:${version}"
process variant_calling {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam), path(bai)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf"), emit: vcf
script:
"""
gatk3 -T HaplotypeCaller \
-nct ${task.cpus} \
-R ${fasta} \
-I ${bam} \
-o ${file_id}.vcf
"""
}
process filter_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
-R ${fasta} \
-V ${vcf} \
-selectType SNP \
-o ${file_id}_snp.vcf
"""
}
process filter_indels {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
-R ${fasta} \
-V ${vcf} \
-selectType INDEL \
-o ${file_id}_indel.vcf
"""
}
high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)"
process high_confidence_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
"""
gatk3 -T VariantFiltration \
-nct ${task.cpus} \
-R ${fasta} \
-V ${vcf} \
--filterExpression "${high_confidence_snp_filter}" \
--filterName "basic_snp_filter" \
-o ${file_id}_filtered_snp.vcf
"""
}
high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0"
process high_confidence_indels {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
"""
gatk3 -T VariantFiltration \
-nct ${task.cpus} \
-R ${fasta} \
-V ${vcf} \
--filterExpression "${high_confidence_indel_filter}" \
--filterName "basic_indel_filter" \
-o ${file_id}_filtered_indel.vcf
"""
}
process recalibrate_snp_table {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("recal_data_table"), emit: recal_table
script:
"""
gatk3 -T BaseRecalibrator \
-nct ${task.cpus} \
-R ${fasta} \
-I ${bam} \
-knownSites ${snp_file} \
-knownSites ${indel_file} \
-o recal_data_table
"""
}
process recalibrate_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
tuple val(table_id), path(recal_data_table)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.bam"), emit: bam
script:
"""
gatk3 -T PrintReads \
--use_jdk_deflater \
--use_jdk_inflater \
-nct ${task.cpus} \
-R ${fasta} \
-I ${bam} \
-BQSR recal_data_table \
-o ${file_id}_recal.bam
"""
}
process haplotype_caller {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.gvcf"), emit: gvcf
script:
"""
gatk3 -T HaplotypeCaller \
-nct ${task.cpus} \
-R ${fasta} \
-I ${bam} \
-ERC GVCF \
-variant_index_type LINEAR -variant_index_parameter 128000 \
-o ${file_id}.gvcf
"""
}
process gvcf_genotyping {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(gvcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf"), emit: vcf
script:
"""
gatk3 -T GenotypeGVCFs \
-nct ${task.cpus} \
-R ${fasta} \
-V ${gvcf} \
-o ${file_id}_joint.vcf
"""
}
process select_variants_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf
script:
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
-R ${fasta} \
-V ${vcf} \
-selectType SNP \
-o ${file_id}_joint_snp.vcf
"""
}
process select_variants_indels {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf
script:
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
-R ${fasta} \
-V ${vcf} \
-selectType INDEL \
-o ${file_id}_joint_indel.vcf
"""
}
process personalized_genome {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_genome.fasta"), emit: fasta
script:
library = pick_library(file_id, library_list)
"""
gatk3 -T FastaAlternateReferenceMaker\
-R ${reference} \
-V ${vcf} \
-o ${library}_genome.fasta
"""
}
version = "4.2.0.0"
container_url = "broadinstitute/gatk:${version}"
process variant_calling {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam), path(bai)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
-R ${fasta} \
-I ${bam} \
-O ${bam.simpleName}.vcf
"""
}
process filter_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
-R ${fasta} \
-V ${vcf} \
-select-type SNP \
-O ${vcf.simpleName}_snp.vcf
"""
}
process filter_indels {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
-R ${fasta} \
-V ${vcf} \
-select-type INDEL \
-O ${vcf.simpleName}_indel.vcf
"""
}
high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)"
process high_confidence_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \
-R ${fasta} \
-V ${vcf} \
--filter-expression "${high_confidence_snp_filter}" \
--filter-name "basic_snp_filter" \
-O ${vcf.simpleName}_filtered_snp.vcf
"""
}
high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0"
process high_confidence_indels {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \
-R ${fasta} \
-V ${vcf} \
--filter-expression "${high_confidence_indel_filter}" \
--filter-name "basic_indel_filter" \
-O ${vcf.simpleName}_filtered_indel.vcf
"""
}
process recalibrate_snp_table {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("recal_data_table"), emit: recal_table
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${snp_file}
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${indel_file}
gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \
-R ${fasta} \
-I ${bam} \
-known-sites ${snp_file} \
-known-sites ${indel_file} \
-O recal_data_table
"""
}
process recalibrate_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(recal_table)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.bam"), emit: bam
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \
-R ${fasta} \
-I ${bam} \
--bqsr-recal-file recal_data_table \
-O ${bam.simpleName}_recal.bam
"""
}
process haplotype_caller {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.gvcf"), emit: gvcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
-R ${fasta} \
-I ${bam} \
-ERC GVCF \
-O ${bam.simpleName}.gvcf
"""
}
process gvcf_genotyping {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(gvcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf.gz"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \
-R ${fasta} \
-V ${gvcf} \
-O ${gvcf.simpleName}_joint.vcf.gz
"""
}
process select_variants_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \
-R ${fasta} \
-V ${vcf} \
-select-type SNP \
-O ${vcf.simpleName}_joint_snp.vcf
"""
}
process select_variants_indels {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
-R ${fasta} \
-V ${vcf} \
-select-type INDEL \
-O ${file_id}_joint_indel.vcf
"""
}
process personalized_genome {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_genome.fasta"), emit: fasta
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
"""
gatk --java-options "-Xmx${xmx_memory}G" FastaAlternateReferenceMaker\
-R ${reference} \
-V ${vcf} \
-O ${vcf.simpleName}_genome.fasta
"""
}
......@@ -3,7 +3,7 @@ profiles {
docker.temp = "auto"
docker.enabled = true
process {
withName: index_fasta {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
cpus = 4
}
......@@ -13,7 +13,7 @@ profiles {
singularity.enabled = true
singularity.cacheDir = "./bin/"
process {
withName: index_fasta {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
cpus = 4
}
......@@ -24,7 +24,7 @@ profiles {
singularity.cacheDir = "/.singularity_psmn/"
singularity.runOptions = "--bind /Xnfs,/scratch"
process{
withName: index_fasta {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
executor = "sge"
clusterOptions = "-cwd -V"
......@@ -41,7 +41,7 @@ profiles {
singularity.cacheDir = "$baseDir/.singularity_in2p3/"
singularity.runOptions = "--bind /pbs,/sps,/scratch"
process{
withName: index_fasta {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
scratch = true
stageInMode = "copy"
......
......@@ -9,7 +9,8 @@ Channel
process index_fasta {
tag "$fasta.baseName"
publishDir "results/index/", mode: 'copy'
publishDir "results/mapping/index/", mode: 'copy'
label "kallisto"
input:
file fasta from fasta_file
......
version = "0.44.0"
container_url = "lbmc/kallisto:${version}"
process index_fasta {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$fasta.baseName"
input:
path fasta
output:
path "*.index*", emit: index
path "*_report.txt", emit: report
script:
"""
kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \
2> ${fasta.baseName}_kallisto_index_report.txt
"""
}
process mapping_fastq_pairedend {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$pair_id"
input:
path index
tuple val(pair_id), path(reads)
output:
path "${pair_id}", emit: counts
path "*_report.txt", emit: report
script:
"""
mkdir ${pair_id}
kallisto quant -i ${index} -t ${task.cpus} \
--bias --bootstrap-samples 100 -o ${pair_id} \
${reads[0]} ${reads[1]} &> ${pair_id}_kallisto_mapping_report.txt
"""
}
process mapping_fastq_singleend {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
path index
tuple val(file_id), path(reads)
output:
tuple val(file_id), path("${pair_id}"), emit: counts
path "*_report.txt", emit: report
script:
"""
mkdir ${file_id}
kallisto quant -i ${index} -t ${task.cpus} --single \
--bias --bootstrap-samples 100 -o ${file_id} \
-l ${params.mean} -s ${params.sd} \
${reads} &> ${reads.simpleName}_kallisto_mapping_report.txt
"""
}
......@@ -3,7 +3,7 @@ profiles {
docker.temp = "auto"
docker.enabled = true
process {
withName: mapping_fastq {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
cpus = 4
}
......@@ -13,7 +13,7 @@ profiles {
singularity.enabled = true
singularity.cacheDir = "./bin/"
process {
withName: mapping_fastq {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
cpus = 4
}
......@@ -24,7 +24,7 @@ profiles {
singularity.cacheDir = "/.singularity_psmn/"
singularity.runOptions = "--bind /Xnfs,/scratch"
process{
withName: mapping_fastq {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
executor = "sge"
clusterOptions = "-cwd -V"
......@@ -41,7 +41,7 @@ profiles {
singularity.cacheDir = "$baseDir/.singularity_in2p3/"
singularity.runOptions = "--bind /pbs,/sps,/scratch"
process{
withName: mapping_fastq {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
scratch = true
stageInMode = "copy"
......
......@@ -18,6 +18,7 @@ Channel
process mapping_fastq {
tag "$reads"
publishDir "${params.output}", mode: 'copy'
label "kallisto"
input:
set pair_id, file(reads) from fastq_files
......
......@@ -3,7 +3,7 @@ profiles {
docker.temp = "auto"
docker.enabled = true
process {
withName: mapping_fastq {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
cpus = 4
}
......@@ -13,7 +13,7 @@ profiles {
singularity.enabled = true
singularity.cacheDir = "./bin/"
process {
withName: mapping_fastq {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
cpus = 4
}
......@@ -24,7 +24,7 @@ profiles {
singularity.cacheDir = "/.singularity_psmn/"
singularity.runOptions = "--bind /Xnfs,/scratch"
process{
withName: mapping_fastq {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
executor = "sge"
clusterOptions = "-cwd -V"
......@@ -41,7 +41,7 @@ profiles {
singularity.cacheDir = "$baseDir/.singularity_in2p3/"
singularity.runOptions = "--bind /pbs,/sps,/scratch"
process{
withName: mapping_fastq {
withLabel: kallisto {
container = "lbmc/kallisto:0.44.0"
scratch = true
stageInMode = "copy"
......
......@@ -21,6 +21,7 @@ Channel
process mapping_fastq {
tag "$file_id"
publishDir "results/mapping/quantification/", mode: 'copy'
label "kallisto"
input:
set file_id, file(reads) from fastq_files
......
version = "2.1.2"
container_url = "lbmc/macs2:${version}"
params.macs_gsize=3e9
params.macs_mfold="5 50"
process peak_calling {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
input:
tuple val(file_id), path(bam_ip), path(bam_control)
output:
tuple val(file_id), path("*.narrowPeak"), emit: peak
tuple val(file_id), path("*.bed"), emit: summits
tuple val(file_id), path("*_peaks.xls"), path("*_report.txt"), emit: report
script:
/* remove --nomodel option for real dataset */
"""
macs2 callpeak \
--treatment ${bam_ip} \
--call-summits \
--control ${bam_control} \
--keep-dup all \
--name ${bam_ip.simpleName} \
--mfold ${params.macs_mfold} \
--gsize ${params.macs_gsize} 2> \
${bam_ip.simpleName}_macs2_report.txt
if grep -q "ERROR" ${bam_ip.simpleName}_macs2_report.txt; then
echo "MACS3 error"
exit 1
fi
"""
}
process peak_calling_bg {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
input:
tuple val(file_id), path(bg_ip), path(bg_control)
output:
tuple val(file_id), path("*.narrowPeak"), emit: peak
tuple val(file_id), path("*.bed"), emit: summits
tuple val(file_id), path("*_report.txt"), emit: report
script:
/* remove --nomodel option for real dataset */
"""
awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \
${bg_ip.simpleName}.bed
awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
${bg_control.simpleName}.bed
macs2 callpeak \
--treatment ${bg_ip.simpleName}.bed \
--call-summits \
--control ${bg_control.simpleName}.bed \
--keep-dup all \
--name ${bg_ip.simpleName} \
--mfold ${params.macs_mfold} \
--gsize ${params.macs_gsize} 2> \
${bg_ip.simpleName}_macs2_report.txt
if grep -q "ERROR" ${bg_ip.simpleName}_macs2_report.txt; then
echo "MACS3 error"
exit 1
fi
"""
}
version = "3.0.0a6"
container_url = "lbmc/macs3:${version}"
params.macs_gsize=3e9
params.macs_mfold=[5, 50]
process peak_calling {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
input:
tuple val(file_id), path(bam_ip), path(bam_control)
output:
path "*", emit: peak
path "*_report.txt", emit: report
script:
/* remove --nomodel option for real dataset */
"""
macs3 callpeak \
--treatment ${bam_ip} \
--call-summits \
--control ${bam_control} \
--keep-dup all \
--mfold params.macs_mfold[0] params.macs_mfold[1]
--name ${bam_ip.simpleName} \
--gsize ${params.macs_gsize} 2> \
${bam_ip.simpleName}_macs3_report.txt
if grep -q "ERROR" ${bam_ip.simpleName}_macs3_report.txt; then
echo "MACS3 error"
exit 1
fi
"""
}
process peak_calling_bg {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
input:
tuple val(file_id), path(bg_ip), path(bg_control)
output:
path "*", emit: peak
path "*_report.txt", emit: report
script:
/* remove --nomodel option for real dataset */
"""
awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \
${bg_ip.simpleName}.bed
awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
${bg_control.simpleName}.bed
macs3 callpeak \
--treatment ${bg_ip.simpleName}.bed \
--call-summits \
--control ${bg_control.simpleName}.bed \
--keep-dup all \
--mfold params.macs_mfold[0] params.macs_mfold[1]
--name ${bg_ip.simpleName} \
--gsize ${params.macs_gsize} 2> \
${bg_ip.simpleName}_macs3_report.txt
if grep -q "ERROR" ${bg_ip.simpleName}_macs3_report.txt; then
echo "MACS3 error"
exit 1
fi
"""
}
version = "2.17"
container_url = "lbmc/minimap2:${version}"
process index_fasta {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$fasta.baseName"
input:
path fasta
output:
tuple path("${fasta}"), path("*.mmi*"), emit: index
path "*_report.txt", emit: report
script:
memory = "${task.memory}" - ~/\s*GB/
"""
minimap2 -t ${task.cpus} -I ${memory}G -d ${fasta.baseName}.mmi ${fasta}
"""
}
process mapping_fastq {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$pair_id"
input:
tuple path(fasta), path(index)
tuple val(pair_id), path(reads)
output:
tuple val(pair_id), path("*.bam"), emit: bam
path "*_report.txt", emit: report
script:
memory = "${task.memory}" - ~/\s*GB/
memory = memory / (task.cpus + 1.0)
if (reads instanceof List)
"""
minimap2 -ax sr -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} |
samtools view -Sb - > ${pair_id}.bam
"""
else
"""
minimap2 -ax sr -t ${task.cpus} -K ${memory} ${fasta} ${reads} |
samtools view -Sb - > ${reads.baseName}.bam
"""
}
\ No newline at end of file
version = "1.9"
container_url = "lbmc/multiqc:${version}"
process multiqc {
container = "${container_url}"
label "big_mem_mono_cpus"
publishDir "results/QC/", mode: 'copy'
input:
path report
output:
path "*multiqc_*", emit: report
script:
"""
multiqc -f .
"""
}
version = "2.18.11"
container_url = "lbmc/picard:${version}"
process mark_duplicate {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id) , path("*.bam"), emit: bam
path "*_report.txt", emit: report
script:
"""
PicardCommandLine MarkDuplicates \
VALIDATION_STRINGENCY=LENIENT \
REMOVE_DUPLICATES=true \
INPUT=${bam} \
OUTPUT=${bam.baseName}_dedup.bam \
METRICS_FILE=${bam.baseName}_picard_dedup_report.txt &> \
picard_${bam.baseName}.log
"""
}
process index_fasta {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(fasta)
output:
tuple val(file_id), path("*.dict"), emit: index
script:
"""
PicardCommandLine CreateSequenceDictionary \
REFERENCE=${fasta} \
OUTPUT=${fasta.baseName}.dict
"""
}
process index_bam {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*"), emit: index
script:
"""
PicardCommandLine BuildBamIndex \
INPUT=${bam}
"""
}
version = "0.6.7"
container_url = "lbmc/sambamba:${version}"
process index_bam {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*.bam*"), emit: bam
script:
"""
sambamba index -t ${task.cpus} ${bam}
"""
}
process sort_bam {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*.bam*"), emit: bam
script:
"""
sambamba sort -t ${task.cpus} -o ${bam.baseName}_sorted.bam ${bam}
"""
}
process split_bam {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*_forward.bam*"), emit: bam_forward
tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse
script:
"""
sambamba view -t ${task.cpus} -h -F "strand == '+'" ${bam} > \
${bam.baseName}_forward.bam
sambamba view -t ${task.cpus} -h -F "strand == '-'" ${bam} > \
${bam.baseName}_reverse.bam
"""
}
version = "1.11"
container_url = "lbmc/samtools:${version}"
process index_fasta {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(fasta)
output:
tuple val(file_id), path("*.fai"), emit: index
script:
"""
samtools faidx ${fasta}
"""
}
filter_bam_quality_threshold = 30
process filter_bam_quality {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*_filtered.bam"), emit: bam
script:
"""
samtools view -@ ${task.cpus} -hb ${bam} -q ${filter_bam_quality_threshold} > \
${bam.simpleName}_filtered.bam
"""
}
process filter_bam {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
path bed
output:
tuple val(file_id), path("*_filtered.bam"), emit: bam
script:
"""
samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} > \
${bam.simpleName}_filtered.bam
"""
}
process filter_bam_mapped {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*_mapped.bam"), emit: bam
script:
"""
samtools view -@ ${task.cpus} -F 4 -hb ${bam} > \
${bam.simpleName}_mapped.bam
"""
}
process filter_bam_unmapped {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*_unmapped.bam"), emit: bam
script:
"""
samtools view -@ ${task.cpus} -f 4 -hb ${bam} > ${bam.simpleName}_unmapped.bam
"""
}
process index_bam {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path(bam), emit: bam
tuple val(file_id), path("*.bam.bai"), emit: bam_idx
script:
"""
samtools index ${bam}
"""
}
process sort_bam {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*.bam*"), emit: bam
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${bam.simpleName}_sorted.bam ${bam}
"""
}
process split_bam {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*_forward.bam*"), emit: bam_forward
tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse
script:
"""
samtools view --@ ${Math.round(task.cpus/2)} \
-hb -F 0x10 ${bam} > ${bam.simpleName}_forward.bam &
samtools view --@ ${Math.round(task.cpus/2)} \
-hb -f 0x10 ${bam} > ${bam.simpleName}_reverse.bam
"""
}
process merge_bam {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
cpus = 2
input:
tuple val(first_file_id), path(first_bam)
tuple val(second_file_id), path(second_bam)
output:
tuple val(file_id), path("*.bam*"), emit: bam
script:
"""
samtools merge ${first_bam} ${second_bam} \
${first_bam.simpleName}_${second_file.simpleName}.bam
"""
}
process merge_multi_bam {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
cpus = 2
input:
tuple val(file_id), path(bams)
output:
tuple val(file_id), path("*_merged.bam*"), emit: bam
script:
"""
samtools merge -@ ${task.cpus} \
${bams[0].simpleName}_merged.bam \
${bams}
"""
}
process stats_bam {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
cpus = 2
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*.tsv"), emit: tsv
script:
"""
samtools flagstat -@ ${task.cpus} -O tsv ${bam} > ${bam.simpleName}_stats.tsv
"""
}
process flagstat_2_multiqc {
tag "$file_id"
input:
tuple val(file_id), path(tsv)
output:
path "*.txt" , emit: report
"""
mv ${tsv} ${tsv.simpleName}.flagstat.txt
"""
}
process idxstat_2_multiqc {
tag "$file_id"
input:
tuple val(file_id), path(tsv)
output:
path "*.txt", emit: report
"""
mv ${tsv} ${tsv.simpleName}.idxstats.txt
"""
}
\ No newline at end of file
version = "2.8.2"
container_url = "lbmc/sratoolkit:${version}"
process fastq_dump {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$sra"
input:
val sra
output:
tuple val(sra), path("*.fastq"), emit: fastq
script:
"""
fastq-dump --split-files --gzip ${sra}
if [ -f ${sra}_1.fastq ]
then
mv ${sra}_1.fastq ${sra}_R1.fastq
fi
if [ -f ${sra}_2.fastq ]
then
mv ${sra}_2.fastq ${sra}_R2.fastq
fi
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment