Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • LBMC/RMI2/rmi2_pipelines
  • LBMC/Palladino/RNAseq_nextflow
  • rseraphi/nextflow
  • elabaron/nextflow
  • pberna01/nextflow
  • jblin/nextflow
  • cginevra/nextflow
  • carpin/nextflow
  • cfournea/nextflow
  • dtorresc/nextflow
  • LBMC/nextflow
  • nlecouvr/nextflow-nathan
  • lpicard/nextflow
  • vvanoost/nextflow
  • fmortreu/nextflow
  • hpolvech/nextflow
  • lanani/nextflow
  • mcariou/nextflow
  • fduveau/nextflow
  • jshapiro/nextflow
  • hregue/nextflow
  • yjia01/nextflow
  • acorbin/nextflow
  • ggirau03/nextflow
  • letien02/nextflow
  • ogandril/nextflow
  • jclaud01/nextflow
  • mshamjal/nextflow
  • mprieux/nextflow
  • z483801/nextflow
  • mparis/nextflow
  • alapendr/nextflow
  • cbourgeo/nextflow
  • jvalat/nextflow
  • z483800/nextflow
  • ecombe01/nextflow
  • dchalopi/nextflow
  • mherbett/nextflow
  • jprobin/nextflow
  • lestrada/nextflow
  • gyvert/nextflow
  • nfontrod/nextflow
  • gbenoit/nextflow
  • aguill09/nextflow
  • LBMC/regards/nextflow
  • mvilcot/nextflow
  • jkleine/nextflow
  • jseimand/nextflow
  • LBMC/Delattre/JU28_59vs17_SNP
  • mdjaffar/nextflow
  • pmarie01/nextflow
  • rhoury/nextflow
  • mlepetit/nextflow
  • lgely/nextflow
54 results
Show changes
Showing
with 2707 additions and 0 deletions
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.2.8"
container_url = "lbmc/g2gtools:${version}"
params.vci_build = ""
params.vci_build_out = ""
process vci_build {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.vci_build_out != "") {
publishDir "results/${params.vci_build_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta)
output:
tuple val(file_id), path("*.vci.gz"), path("*.vci.gz.tbi"), emit: vci
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
input_vcf = ""
for (vcf_file in vcf) {
input_vcf += " -i ${vcf_file}"
}
"""
g2gtools vcf2vci \
${params.vci_build} \
-p ${task.cpus} \
-f ${fasta} \
${input_vcf} \
-s ${file_prefix} \
-o ${file_prefix}.vci 2> ${file_prefix}_g2gtools_vcf2vci_report.txt
"""
}
params.incorporate_snp = ""
params.incorporate_snp_out = ""
process incorporate_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.incorporate_snp_out != "") {
publishDir "results/${params.incorporate_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(ref_id), path(fasta)
output:
tuple val(file_id), path("${file_prefix}_snp.fa"), path("${vci}"), path("${tbi}"), emit: fasta
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
g2gtools patch \
${params.incorporate_snp} \
-p ${task.cpus} \
-i ${fasta} \
-c ${vci} \
-o ${file_prefix}_snp.fa 2> ${file_prefix}_g2gtools_path_report.txt
"""
}
params.incorporate_indel = ""
params.incorporate_indel_out = ""
process incorporate_indel {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.incorporate_indel_out != "") {
publishDir "results/${params.incorporate_indel_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta), path(vci), path(tbi)
output:
tuple val(file_id), path("${file_prefix}_snp_indel.fa"), path("${vci}"), path("${tbi}"), emit: fasta
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
g2gtools transform \
${params.incorporate_indel} \
-p ${task.cpus} \
-i ${fasta} \
-c ${vci} \
-o ${file_prefix}_snp_indel.fa 2> ${file_prefix}_g2gtools_transform_report.txt
"""
}
params.convert_gtf = ""
params.convert_gtf_out = ""
process convert_gtf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.convert_gtf_out != "") {
publishDir "results/${params.convert_gtf_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(annot_id), path(gtf)
output:
tuple val(file_id), path("${file_prefix}.gtf"), emit: gtf
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
g2gtools convert \
${params.convert_gtf} \
-i ${gtf} \
-c ${vci} \
-o ${file_prefix}.gtf 2> ${file_prefix}_g2gtools_convert_report.txt
"""
}
params.convert_bed = ""
params.convert_bed_out = ""
process convert_bed {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.convert_bed_out != "") {
publishDir "results/${params.convert_bed_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(annot_id), path(bed)
output:
tuple val(file_id), path("${file_id}.bed"), emit: bed
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
g2gtools convert \
${params.convert_bed} \
-i ${bed} \
-c ${vci} \
-o ${file_id}.bed 2> ${file_id}_g2gtools_convert_report.txt
"""
}
params.convert_bam = ""
params.convert_bam_out = ""
process convert_bam {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${bam_id} ${file_id}"
if (params.convert_bam_out != "") {
publishDir "results/${params.convert_bam_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(bam_id), path(bam)
output:
tuple val(file_id), path("${file_id}_${bam_id.baseName}.bam"), emit: bam
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
g2gtools convert \
${params.convert_bam} \
-i ${bam} \
-c ${vci} \
-o ${file_id}_${bam.baseName}.bam 2> ${file_id}_g2gtools_convert_report.txt
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "3.8.0"
container_url = "lbmc/gatk:${version}"
params.variant_calling = ""
params.variant_calling_out = ""
process variant_calling {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.variant_calling_out != "") {
publishDir "results/${params.variant_calling_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), path(bai)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T HaplotypeCaller \
-nct ${task.cpus} \
${params.variant_calling} \
-R ${fasta} \
-I ${bam} \
-o ${file_prefix}.vcf
"""
}
params.filter_snp = ""
params.filter_snp_out = ""
process filter_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.filter_snp_out != "") {
publishDir "results/${params.filter_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
${params.filter_snp} \
-R ${fasta} \
-V ${vcf} \
-selectType SNP \
-o ${file_prefix}_snp.vcf
"""
}
params.filter_indels = ""
params.filter_indels_out = ""
process filter_indels {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.filter_indels_out != "") {
publishDir "results/${params.filter_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
${params.filter_indels} \
-R ${fasta} \
-V ${vcf} \
-selectType INDEL \
-o ${file_prefix}_indel.vcf
"""
}
params.high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)"
params.high_confidence_snp = "--filterExpression \"${params.high_confidence_snp_filter}\" --filterName \"basic_snp_filter\""
params.high_confidence_snp_out = ""
process high_confidence_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.high_confidence_snp_out != "") {
publishDir "results/${params.high_confidence_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T VariantFiltration \
-nct ${task.cpus} \
-R ${fasta} \
-V ${vcf} \
${params.high_confidence_snp} \
-o ${file_prefix}_filtered_snp.vcf
"""
}
params.high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0"
params.high_confidence_indels = "--filterExpression \"${params.high_confidence_indel_filter}\" --filterName \"basic_indel_filter\""
params.high_confidence_indels_out = ""
process high_confidence_indels {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.high_confidence_indels_out != "") {
publishDir "results/${params.high_confidence_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T VariantFiltration \
-nct ${task.cpus} \
-R ${fasta} \
-V ${vcf} \
${params.high_confidence_indels} \
-o ${file_prefix}_filtered_indel.vcf
"""
}
params.recalibrate_snp_table = ""
params.recalibrate_snp_table_out = ""
process recalibrate_snp_table {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.recalibrate_snp_table_out != "") {
publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy'
}
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("recal_data_table"), emit: recal_table
script:
"""
gatk3 -T BaseRecalibrator \
-nct ${task.cpus} \
${recalibrate_snp_table} \
-R ${fasta} \
-I ${bam} \
-knownSites ${snp_file} \
-knownSites ${indel_file} \
-o recal_data_table
"""
}
params.recalibrate_snp = ""
params.recalibrate_snp_out = ""
process recalibrate_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.recalibrate_snp_out != "") {
publishDir "results/${params.recalibrate_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
tuple val(table_id), path(recal_data_table)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.bam"), emit: bam
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T PrintReads \
--use_jdk_deflater \
--use_jdk_inflater \
${recalibrate_snp} \
-nct ${task.cpus} \
-R ${fasta} \
-I ${bam} \
-BQSR recal_data_table \
-o ${file_prefix}_recal.bam
"""
}
params.haplotype_caller = ""
params.haplotype_caller_out = ""
process haplotype_caller {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.haplotype_caller_out != "") {
publishDir "results/${params.haplotype_caller_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.gvcf"), emit: gvcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T HaplotypeCaller \
-nct ${task.cpus} \
${params.haplotype_caller} \
-R ${fasta} \
-I ${bam} \
-ERC GVCF \
-variant_index_type LINEAR -variant_index_parameter 128000 \
-o ${file_prefix}.gvcf
"""
}
params.gvcf_genotyping = ""
params.gvcf_genotyping_out = ""
process gvcf_genotyping {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.gvcf_genotyping_out != "") {
publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gvcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T GenotypeGVCFs \
-nct ${task.cpus} \
${params.gvcf_genotyping} \
-R ${fasta} \
-V ${gvcf} \
-o ${file_prefix}_joint.vcf
"""
}
params.select_variants_snp = ""
params.select_variants_snp_out = ""
process select_variants_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.select_variants_snp_out != "") {
publishDir "results/${params.select_variants_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
${params.select_variants_snp} \
-R ${fasta} \
-V ${vcf} \
-selectType SNP \
-o ${file_prefix}_joint_snp.vcf
"""
}
params.select_variants_indels = ""
params.select_variants_indels_out = ""
process select_variants_indels {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.select_variants_indels_out != "") {
publishDir "results/${params.select_variants_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
${params.select_variants_indels} \
-R ${fasta} \
-V ${vcf} \
-selectType INDEL \
-o ${file_prefix}_joint_indel.vcf
"""
}
params.personalized_genome = ""
params.personalized_genome_out = ""
process personalized_genome {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.personalized_genome_out != "") {
publishDir "results/${params.personalized_genome_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_genome.fasta"), emit: fasta
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T FastaAlternateReferenceMaker\
${params.personalized_genome} \
-R ${reference} \
-V ${vcf} \
-o ${file_prefix}_genome.fasta
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "4.2.0.0"
container_url = "broadinstitute/gatk:${version}"
def get_file_prefix(file_id) {
if (file_id instanceof List){
file_prefix = file_id[0]
} else if (file_id instanceof Map) {
library = file_id[0]
file_prefix = file_id[0]
if (file_id.containsKey('library')) {
library = file_id.library
file_prefix = file_id.id
}
} else {
file_prefix = file_id
}
return file_prefix
}
include {
index_fasta as samtools_index_fasta;
index_bam;
} from './../samtools/main.nf'
include {
index_fasta as picard_index_fasta;
index_bam as picard_index_bam;
mark_duplicate;
} from './../picard/main.nf'
params.variant_calling_out = ""
workflow germline_cohort_data_variant_calling {
take:
bam
fasta
main:
// data preparation
mark_duplicate(bam)
index_bam(mark_duplicate.out.bam)
picard_index_bam(mark_duplicate.out.bam)
index_bam.out.bam_idx
.join(picard_index_bam.out.index)
.set{ bam_idx }
picard_index_fasta(fasta)
samtools_index_fasta(fasta)
fasta
.join(picard_index_fasta.out.index)
.join(samtools_index_fasta.out.index)
.set{ fasta_idx }
// variant calling
call_variants_per_sample(
bam_idx,
fasta_idx.collect()
)
call_variants_all_sample(
call_variants_per_sample.out.gvcf,
fasta_idx
)
emit:
vcf = call_variants_all_sample.out.vcf
}
/*******************************************************************/
workflow base_quality_recalibrator{
take:
bam_idx
fasta_idx
vcf
main:
index_vcf(vcf)
compute_base_recalibration(
bam_idx,
fasta_idx,
index_vcf.out.vcf_idx
)
apply_base_recalibration(
bam_idx,
fasta_idx,
compute_base_recalibration.out.table
)
emit:
bam = apply_base_recalibration.out.bam
}
process index_vcf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
output:
tuple val(file_id), path("${vcf}"), path("*"), emit: vcf_idx
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${vcf}
"""
}
process compute_base_recalibration {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
tuple val(ref_id), path(fasta), path(fai), path(dict)
tuple val(vcf_id), path(vcf), path(vcf_idx)
output:
tuple val(file_id), path("${bam.simpleName}.table"), emit: table
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
def vcf_cmd = ""
if (vcf instanceof List){
for (vcf_file in vcf){
vcf_cmd += "--known-sites ${vcf_file} "
}
} else {
vcf_cmd = "--known-sites ${vcf} "
}
"""
gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \
-I ${bam} \
-R ${fasta} \
${vcf_cmd} \
-O ${bam.simpleName}.table
"""
}
process apply_base_recalibration {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
tuple val(ref_id), path(fasta), path(fai), path(dict)
tuple val(table_id), path(table)
output:
tuple val(file_id), path("${bam.simpleName}_recalibrate.bam"), emit: bam
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \
-R ${fasta} \
-I ${bam} \
--bqsr-recal-file ${table} \
-O ${bam.simpleName}_recalibrate.bam
"""
}
/*******************************************************************/
params.variant_calling_gvcf_out = ""
process call_variants_per_sample {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.variant_calling_gvcf_out != "") {
publishDir "results/${params.variant_calling_gvcf_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("${bam.simpleName}.gvcf.gz"), emit: gvcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
-R ${fasta} \
-I ${bam} \
-O ${bam.simpleName}.gvcf.gz \
-ERC GVCF
"""
}
/*******************************************************************/
workflow call_variants_all_sample {
take:
gvcf
fasta_idx
main:
index_gvcf(gvcf)
validate_gvcf(
index_gvcf.out.gvcf_idx,
fasta_idx.collect()
)
consolidate_gvcf(
validate_gvcf.out.gvcf
.groupTuple(),
fasta_idx.collect()
)
genomic_db_call(
consolidate_gvcf.out.gvcf_idx,
fasta_idx.collect()
)
emit:
vcf = genomic_db_call.out.vcf
}
process index_gvcf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(gvcf)
output:
tuple val(file_id), path("${gvcf}"), path("${gvcf}.tbi"), emit: gvcf_idx
tuple val(file_id), path("${gvcf.simpleName}_IndexFeatureFile_report.txt"), emit: report
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${gvcf} 2> ${gvcf.simpleName}_IndexFeatureFile_report.txt
"""
}
process validate_gvcf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(gvcf), path(gvcf_idx)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("${gvcf}"), path("${gvcf_idx}"), emit: gvcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" ValidateVariants \
-V ${gvcf} \
-R ${fasta} -gvcf
"""
}
process consolidate_gvcf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(gvcf), path(gvcf_idx)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("${file_prefix}.gvcf"), path("${file_prefix}.gvcf.idx"), emit: gvcf_idx
tuple val(file_id), path("${file_prefix}_CombineGVCFs_report.txt"), emit: report
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
def gvcf_cmd = ""
if (gvcf instanceof List){
for (gvcf_file in gvcf){
gvcf_cmd += "-V ${gvcf_file} "
}
} else {
gvcf_cmd = "-V ${gvcf} "
}
"""
mkdir tmp
gatk --java-options "-Xmx${xmx_memory}G" CombineGVCFs \
${gvcf_cmd} \
-R ${fasta} \
-O ${file_prefix}.gvcf 2> ${file_prefix}_CombineGVCFs_report.txt
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${file_prefix}.gvcf 2> ${file_prefix}_IndexFeatureFile_report.txt
"""
}
process genomic_db_call {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.variant_calling_out != "") {
publishDir "results/${params.variant_calling_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gvcf), path(gvcf_idx)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("${gvcf.simpleName}.vcf.gz"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
def gvcf_cmd = ""
if (gvcf instanceof List){
for (gvcf_file in gvcf){
gvcf_cmd += "--V ${gvcf_file} "
}
} else {
gvcf_cmd = "--V ${gvcf} "
}
"""
mkdir tmp
gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \
-R ${fasta} \
-V ${gvcf} \
-O ${gvcf.simpleName}.vcf.gz \
--tmp-dir ./tmp
"""
}
/*******************************************************************/
params.variant_calling = ""
process variant_calling {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.variant_calling_out != "") {
publishDir "results/${params.variant_calling_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), path(bai)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
${params.variant_calling} \
-R ${fasta} \
-I ${bam} \
-O ${bam.simpleName}.vcf
"""
}
params.filter_snp = ""
params.filter_snp_out = ""
process filter_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.filter_snp_out != "") {
publishDir "results/${params.filter_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
${params.filter_snp} \
-R ${fasta} \
-V ${vcf} \
-select-type SNP \
-O ${vcf.simpleName}_snp.vcf
"""
}
params.filter_indels = ""
params.filter_indels_out = ""
process filter_indels {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.filter_indels_out != "") {
publishDir "results/${params.filter_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
${params.filter_indels} \
-R ${fasta} \
-V ${vcf} \
-select-type INDEL \
-O ${vcf.simpleName}_indel.vcf
"""
}
params.high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)"
params.high_confidence_snp = "--filter-expression \"${params.high_confidence_snp_filter}\" --filter-name \"basic_snp_filter\""
params.high_confidence_snp_out = ""
process high_confidence_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.high_confidence_snp_out != "") {
publishDir "results/${params.high_confidence_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \
-R ${fasta} \
-V ${vcf} \
${params.high_confidence_snp} \
-O ${vcf.simpleName}_filtered_snp.vcf
"""
}
params.high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0"
params.high_confidence_indels = "--filter-expression \"${params.high_confidence_indel_filter}\" --filter-name \"basic_indel_filter\""
params.high_confidence_indels_out = ""
process high_confidence_indels {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.high_confidence_indels_out != "") {
publishDir "results/${params.high_confidence_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \
-R ${fasta} \
-V ${vcf} \
${params.high_confidence_indels} \
-O ${vcf.simpleName}_filtered_indel.vcf
"""
}
params.recalibrate_snp_table = ""
params.recalibrate_snp_table_out = ""
process recalibrate_snp_table {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.recalibrate_snp_table_out != "") {
publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy'
}
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(bam_idx_bis)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("recal_data_table"), emit: recal_table
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${snp_file}
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${indel_file}
gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \
${params.recalibrate_snp_table} \
-R ${fasta} \
-I ${bam} \
-known-sites ${snp_file} \
-known-sites ${indel_file} \
-O recal_data_table
"""
}
params.recalibrate_snp = ""
params.recalibrate_snp_out = ""
process recalibrate_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.recalibrate_snp_out != "") {
publishDir "results/${params.recalibrate_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(recal_table)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.bam"), emit: bam
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \
${params.recalibrate_snp} \
-R ${fasta} \
-I ${bam} \
--bqsr-recal-file recal_data_table \
-O ${bam.simpleName}_recal.bam
"""
}
params.haplotype_caller = ""
params.haplotype_caller_out = ""
process haplotype_caller {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.haplotype_caller_out != "") {
publishDir "results/${params.haplotype_caller_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.gvcf"), emit: gvcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
${params.haplotype_caller} \
-R ${fasta} \
-I ${bam} \
-ERC GVCF \
-O ${bam.simpleName}.gvcf
"""
}
params.gvcf_genotyping = ""
params.gvcf_genotyping_out = ""
process gvcf_genotyping {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.gvcf_genotyping_out != "") {
publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gvcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf.gz"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \
${params.gvcf_genotyping} \
-R ${fasta} \
-V ${gvcf} \
-O ${gvcf.simpleName}_joint.vcf.gz
"""
}
params.select_variants_snp = ""
params.select_variants_snp_out = ""
process select_variants_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.select_variants_snp_out != "") {
publishDir "results/${params.select_variants_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \
${params.select_variants_snp} \
-R ${fasta} \
-V ${vcf} \
-select-type SNP \
-O ${vcf.simpleName}_joint_snp.vcf
"""
}
params.select_variants_indels = ""
params.select_variants_indels_out = ""
process select_variants_indels {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.select_variants_indels_out != "") {
publishDir "results/${params.select_variants_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
${params.select_variants_indels} \
-R ${fasta} \
-V ${vcf} \
-select-type INDEL \
-O ${file_prefix}_joint_indel.vcf
"""
}
params.personalized_genome = ""
params.personalized_genome_out = ""
process personalized_genome {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.personalized_genome_out != "") {
publishDir "results/${params.personalized_genome_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_genome.fasta"), emit: fasta
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" FastaAlternateReferenceMaker\
${params.personalized_genome} \
-R ${reference} \
-V ${vcf} \
-O ${vcf.simpleName}_genome.fasta
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.12.2"
container_url = "lbmc/gffread:${version}"
params.gffread = ""
params.gffread_out = ""
process gffread {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_prefix"
if (params.gffread_out != "") {
publishDir "results/${params.gffread_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gtf)
tuple val(fasta_id), path(fasta)
output:
tuple val(fasta_id), path("${file_prefix}.fasta"), emit: fasta
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gffread ${gtf} -g ${fasta} -M -x dup_${file_prefix}.fasta
awk 'BEGIN {i = 1;} { if (\$1 ~ /^>/) { tmp = h[i]; h[i] = \$1; } else if (!a[\$1]) { s[i] = \$1; a[\$1] = "1"; i++; } else { h[i] = tmp; } } END { for (j = 1; j < i; j++) { print h[j]; print s[j]; } }' < dup_${file_prefix}.fasta | grep -v -e "^\$" > ${file_prefix}.fasta
"""
}
params.spliced_cds = ""
params.spliced_cds_out = ""
process spliced_cds {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_prefix"
if (params.spliced_cds_out != "") {
publishDir "results/${params.spliced_cds_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gtf)
tuple val(fasta_id), path(fasta)
output:
tuple val(fasta_id), path("${file_prefix}.fasta"), emit: fasta
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gzip -dck ${fasta} > ${fasta.simpleName}_un.fasta
gzip -dck ${gtf} > ${gtf.simpleName}_un.gtf
gffread ${gtf.simpleName}_un.gtf -g ${fasta.simpleName}_un.fasta -M \
-x ${file_prefix}.fasta
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "5.0.11"
container_url = "lbmc/guppy-cpu:${version}"
params.basecalling_out = ""
params.flowcell = "FLO-MIN106"
params.kit = "SQK-PCS109"
params.cpu_threads_per_caller = 4
params.num_callers = 1
process basecall_fast5 {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.basecalling_out != "") {
publishDir "results/${params.basecalling_out}", mode: 'copy'
}
if (params.flowcell == "") {
errorFlowcell << "WARNING ! No Flowcell type given..."
errorFlowcell.view()
}
if (params.kit == "") {
errorKit "WARNING ! No kit type given..."
errorKit.view()
}
input:
tuple val(file_id), path(fast5)
output:
tuple val(file_id), path("*.fastq*"), emit: fastq
script:
"""
guppy_basecaller --compress_fastq \
-i ${path(fast5)} \
-s ${params.basecalling_out} \
--cpu_threads_per_caller ${params.cpu_threads_per_caller} \
--num_callers ${params.num_callers} \
--flowcell ${params.flowcell} \
--kit ${params.kit}
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "5.0.11"
container_url = "lbmc/guppy-gpu:${version}"
params.basecalling_out = ""
params.flowcell = ""
params.kit = ""
params.gpu_runners_per_device = 16
process basecall_fast5 {
container = "${container_url}"
// Need to create a profile using GPUs
label ""
tag "$file_id"
if (params.basecalling_out != "") {
publishDir "results/${params.basecalling_out}", mode: 'copy'
}
if (params.flowcell == "") {
errorFlowcell << "WARNING ! No Flowcell type given..."
errorFlowcell.view()
}
if (params.kit == "") {
errorKit "WARNING ! No kit type given..."
errorKit.view()
}
input:
tuple val(file_id), path(fast5)
output:
tuple val(file_id), path("*.fastq*"), emit: fastq
script:
"""
guppy_basecaller --compress_fastq -x "cuda:all" --min_qscore 7.0 \
-i ${path(fast5)} \
-s ${params.basecalling_out} \
--gpu_runners_per_device ${params.gpu_runners_per_device} \
--flowcell ${params.flowcell} \
--kit ${params.kit}
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "2.2.1"
container_url = "lbmc/hisat2:${version}"
params.index_fasta = ""
params.index_fasta_out = ""
process index_fasta {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.index_fasta_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta)
output:
tuple val(file_id), path("*.ht2*"), emit: index
tuple val(file_id), path("*_report.txt"), emit: report
script:
"""
gunzip ${fasta}
hisat2-build -p ${task.cpus} \
${fasta.baseName} \
${fasta.simpleName} &> \
${fasta.simpleName}_hisat2_index_report.txt
if grep -q "Error" ${fasta.simpleName}_hisat2_index_report.txt; then
exit 1
fi
"""
}
params.mapping_fastq = ""
params.mapping_fastq_out = ""
process mapping_fastq {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.mapping_fastq_out != "") {
publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
}
input:
tuple val(index_id), path(index)
tuple val(file_id), path(reads)
output:
tuple val(file_id), path("*.bam"), emit: bam
path "*_report.txt", emit: report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.ht2.*/) {
index_id = ( index_file =~ /(.*)\.1\.ht2.*/)[0][1]
}
}
switch(file_id) {
case {it instanceof List}:
file_prefix = file_id[0]
break
case {it instanceof Map}:
file_prefix = file_id.values()[0]
break
default:
file_prefix = file_id
break
}
if (reads.size() == 2)
"""
hisat2 ${params.mapping_fastq} \
-p ${task.cpus} \
-x ${index_id} \
-1 ${reads[0]} \
-2 ${reads[1]} 2> \
${file_prefix}_ht2_mapping_report.txt \
| samtools view -@ ${task.cpus} -bS - \
| samtools sort -@ ${task.cpus} -o ${file_prefix}.bam
if grep -q "Error" ${file_prefix}_ht2_mapping_report.txt; then
exit 1
fi
"""
else
"""
hisat2 ${params.mapping_fastq} \
-p ${task.cpus} \
-x ${index_id} \
-U ${reads} 2> \
${file_prefix}_ht2_mapping_report.txt \
| samtools view -@ ${task.cpus} -bS - \
| samtools sort -@ ${task.cpus} -o ${file_prefix}.bam
if grep -q "Error" ${file_prefix}_ht2_mapping_report.txt; then
exit 1
fi
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "1.99.2"
container_url = "lbmc/htseq:${version}"
params.htseq_out = ""
process gff3_2_gtf {
container = "dceoy/cufflinks"
label "small_mem_mono_cpus"
input:
tuple val(genome_id), path(gff3_file)
output:
path "${genome_id}.gtf", emit: gtf
script:
"""
gffread ${gff3_file} -T -o ${genome_id}.gtf
"""
}
process htseq_count {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "file_id: $file_id"
if (params.htseq_out != "") {
publishDir "results/${params.htseq_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), path(bai)
path (gtf)
output:
path "${file_id}.tsv", emit: counts
script:
"""
htseq-count -n ${task.cpus} -r pos -a 10 -s yes -t exon -i gene_id $bam $gtf > ${file_id}.tsv
"""
}
workflow htseq_count_with_gff {
take:
bam_tuple
gff_file
main:
gff3_2_gtf(gff_file)
htseq_count(bam_tuple,gff3_2_gtf.out.gtf)
emit:
counts = htseq_count.out.counts
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.44.0"
container_url = "lbmc/kallisto:${version}"
params.index_fasta = "-k 31 --make-unique"
params.index_fasta_out = ""
process index_fasta {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.index_fasta_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta)
output:
tuple val(file_id), path("*.index*"), emit: index
tuple val(file_id), path("*_report.txt"), emit: report
script:
"""
kallisto index ${params.index_fasta} -i ${fasta.baseName}.index ${fasta} \
2> ${fasta.baseName}_kallisto_index_report.txt
"""
}
params.mapping_fastq = "--bias --bootstrap-samples 100"
params.mapping_fastq_out = ""
process mapping_fastq {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$pair_id"
if (params.mapping_fastq_out != "") {
publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
}
input:
tuple val(index_id), path(index)
tuple val(file_id), path(reads)
output:
tuple val(file_id), path("${file_prefix}"), emit: counts
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
if (reads.size() == 2)
"""
mkdir ${file_prefix}
kallisto quant -i ${index} -t ${task.cpus} \
${params.mapping_fastq} -o ${file_prefix} \
${reads[0]} ${reads[1]} &> ${file_prefix}_kallisto_mapping_report.txt
"""
else
"""
mkdir ${file_prefix}
kallisto quant -i ${index} -t ${task.cpus} --single \
${params.mapping_fastq} -o ${file_prefix} \
${reads[0]} &> ${file_prefix}_kallisto_mapping_report.txt
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.26.0"
container_url = "lbmc/kb:${version}"
params.index_fasta = ""
params.index_fasta_out = ""
workflow index_fasta {
take:
fasta
gtf
main:
tr2g(gtf)
index_default(fasta, gtf, tr2g.out.t2g)
emit:
index = index_default.out.index
t2g = index_default.out.t2g
report = index_default.out.report
}
process tr2g {
// create transcript to gene table from gtf if no transcript to gene file is provided
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.index_fasta_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gtf)
output:
tuple val(file_id), path("t2g.txt"), emit: t2g
script:
"""
t2g.py --gtf ${gtf}
sort -k1 -u t2g_dup.txt > t2g.txt
"""
}
process g2tr {
// create gene to transcript table from gtf if no transcript to gene file is provided
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.index_fasta_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gtf)
output:
tuple val(file_id), path("g2t.txt"), emit: g2t
script:
"""
t2g.py --gtf ${gtf}
sort -k1 -u t2g_dup.txt > t2g.txt
awk 'BEGIN{OFS="\\t"}{print \$2, \$1}' t2g.txt > g2t.txt
"""
}
process index_default {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.index_fasta_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta)
tuple val(gtf_id), path(gtf)
tuple val(t2g_id), path(transcript_to_gene)
output:
tuple val(file_id), path("*.idx"), emit: index
tuple val(t2g_id), path("${transcript_to_gene}"), emit: t2g
tuple val(file_id), path("*_report.txt"), emit: report
script:
"""
kb ref \
-i ${fasta.simpleName}.idx \
-g ${transcript_to_gene} \
${params.index_fasta} \
-f1 cdna.fa ${fasta} ${gtf} > ${fasta.simpleName}_kb_index_report.txt
"""
}
include { split } from "./../flexi_splitter/main.nf"
params.kb_protocol = "10x_v3"
params.count = ""
params.count_out = ""
workflow count {
take:
index
fastq
transcript_to_gene
whitelist
config
main:
whitelist
.ifEmpty(["NO WHITELIST", 0])
.set{ whitelist_optional }
switch(params.kb_protocol) {
case "marsseq":
split(fastq, config.collect())
kb_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect())
kb_marseq.out.counts.set{res_counts}
kb_marseq.out.report.set{res_report}
break;
default:
kb_default(index.collect(), fastq, transcript_to_gene.collect(), whitelist_optional.collect())
kb_default.out.counts.set{res_counts}
kb_default.out.report.set{res_report}
break;
}
emit:
counts = res_counts
report = res_report
}
process kb_default {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_prefix"
if (params.count_out != "") {
publishDir "results/${params.count_out}", mode: 'copy'
}
input:
tuple val(index_id), path(index)
tuple val(file_id), path(reads)
tuple val(t2g_id), path(transcript_to_gene)
tuple val(whitelist_id), path(whitelist)
output:
tuple val(file_id), path("${file_prefix}"), emit: counts
tuple val(file_id), path("*_report.txt"), emit: report
script:
def kb_memory = "${task.memory}" - ~/GB/
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
def whitelist_param = ""
if (whitelist_id != "NO WHITELIST"){
whitelist_param = "-w ${whitelist}"
}
if (reads.size() == 2)
"""
mkdir ${file_prefix}
kb count -t ${task.cpus} \
-m ${kb_memory} \
-i ${index} \
-g ${transcript_to_gene} \
-o ${file_prefix} \
${whitelist_param} \
-x 10XV3 \
--h5ad \
${params.count} \
${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
fix_t2g.py --t2g ${transcript_to_gene}
cp fix_t2g.txt ${file_prefix}/
cp ${transcript_to_gene} ${file_prefix}/
"""
}
process kb_marseq {
// With the MARS-Seq protocol, we have:
// on the read 1: 4 nt of bc plate
// on the read 2: 6 nt of bc cell, and 8 nt of UMI
// this process expect that the bc plate is removed from the read 1
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_prefix"
if (params.count_out != "") {
publishDir "results/${params.count_out}", mode: 'copy'
}
input:
tuple val(index_id), path(index)
tuple val(file_id), path(reads)
tuple val(t2g_id), path(transcript_to_gene)
tuple val(whitelist_id), path(whitelist)
output:
tuple val(file_id), path("${file_prefix}"), emit: counts
tuple val(file_id), path("*_report.txt"), emit: report
script:
def kb_memory = "${task.memory}" - ~/GB/
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
def whitelist_param = ""
if (whitelist_id != "NO WHITELIST"){
whitelist_param = "-w ${whitelist}"
}
if (reads.size() == 2)
"""
mkdir ${file_prefix}
kb count -t ${task.cpus} \
-m ${kb_memory} \
-i ${index} \
-g ${transcript_to_gene} \
-o ${file_prefix} \
${whitelist_param} \
${params.count} \
--h5ad \
-x 1,0,6:1,6,14:0,0,0 \
${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
fix_t2g.py --t2g ${transcript_to_gene}
cp fix_t2g.txt ${file_prefix}/
cp ${transcript_to_gene} ${file_prefix}/
"""
else
"""
mkdir ${file_prefix}
kb count -t ${task.cpus} \
-m ${kb_memory} \
-i ${index} \
-g ${transcript_to_gene} \
-o ${file_prefix} \
${whitelist_param} \
${params.count} \
-x 1,0,6:1,6,14:0,0,0 \
--h5ad \
${reads} > ${file_prefix}_kb_mapping_report.txt
fix_t2g.py --t2g ${transcript_to_gene}
cp fix_t2g.txt ${file_prefix}/
cp ${transcript_to_gene} ${file_prefix}/
"""
}
// ************************** velocity workflow **************************
workflow index_fasta_velocity {
take:
fasta
gtf
main:
tr2g(gtf)
index_fasta_velocity_default(fasta, gtf, tr2g.out.t2g)
emit:
index = index_fasta_velocity_default.out.index
t2g = index_fasta_velocity_default.out.t2g
report = index_fasta_velocity_default.out.report
}
process index_fasta_velocity_default {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.index_fasta_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta)
tuple val(gtf_id), path(gtf)
tuple val(t2g_id), path(transcript_to_gene)
output:
tuple val(file_id), path("*.idx"), emit: index
tuple val(t2g_id), path("${transcript_to_gene}"), path("cdna_t2c.txt"), path("intron_t2c.txt"), emit: t2g
tuple val(file_id), path("*_report.txt"), emit: report
script:
"""
kb ref \
-i ${fasta.simpleName}.idx \
-g ${transcript_to_gene} \
${params.index_fasta} \
-f1 cdna.fa -f2 intron.fa -c1 cdna_t2c.txt -c2 intron_t2c.txt --workflow lamanno \
${fasta} ${gtf} > ${fasta.simpleName}_kb_index_report.txt
"""
}
params.count_velocity = ""
params.count_velocity_out = ""
workflow count_velocity {
take:
index
fastq
transcript_to_gene
whitelist
config
main:
whitelist
.ifEmpty(["NO WHITELIST", 0])
.set{ whitelist_optional }
switch(params.kb_protocol) {
case "marsseq":
split(fastq, config.collect())
velocity_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect())
velocity_marseq.out.counts.set{res_counts}
velocity_marseq.out.report.set{res_report}
break;
default:
velocity_default(index.collect(), fastq, transcript_to_gene.collect(), whitelist_optional.collect())
velocity_default.out.counts.set{res_counts}
velocity_default.out.report.set{res_report}
break;
}
emit:
counts = res_counts
report = res_report
}
process velocity_default {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_prefix"
if (params.count_velocity_out != "") {
publishDir "results/${params.count_velocity_out}", mode: 'copy'
}
input:
tuple val(index_id), path(index)
tuple val(file_id), path(reads)
tuple val(t2g_id), path(transcript_to_gene), path(cdna_t2g), path(intron_t2g)
tuple val(whitelist_id), path(whitelist)
output:
tuple val(file_id), path("${file_prefix}"), emit: counts
tuple val(file_id), path("*_report.txt"), emit: report
script:
def kb_memory = "${task.memory}" - ~/GB/
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
def whitelist_param = ""
if (whitelist_id != "NO WHITELIST"){
whitelist_param = "-w ${whitelist}"
}
if (reads.size() == 2)
"""
mkdir ${file_prefix}
kb count -t ${task.cpus} \
-m ${kb_memory} \
-i ${index} \
-g ${transcript_to_gene} \
-o ${file_prefix} \
-c1 ${cdna_t2g} \
-c2 ${intron_t2g} \
--workflow lamanno \
${whitelist_param} \
-x 10XV3 \
--h5ad \
${params.count} \
${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
fix_t2g.py --t2g ${transcript_to_gene}
cp fix_t2g.txt ${file_prefix}/
cp ${transcript_to_gene} ${file_prefix}/
cp ${cdna_t2g} ${file_prefix}/
cp ${intron_t2g} ${file_prefix}/
"""
}
process velocity_marseq {
// With the MARS-Seq protocol, we have:
// on the read 1: 4 nt of bc plate
// on the read 2: 6 nt of bc cell, and 8 nt of UMI
// this process expect that the bc plate is removed from the read 1
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_prefix"
if (params.count_velocity_out != "") {
publishDir "results/${params.count_velocity_out}", mode: 'copy'
}
input:
tuple val(index_id), path(index)
tuple val(file_id), path(reads)
tuple val(t2g_id), path(transcript_to_gene), path(cdna_t2g), path(intron_t2g)
tuple val(whitelist_id), path(whitelist)
output:
tuple val(file_id), path("${file_prefix}"), emit: counts
tuple val(file_id), path("*_report.txt"), emit: report
script:
def kb_memory = "${task.memory}" - ~/GB/
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
def whitelist_param = ""
if (whitelist_id != "NO WHITELIST"){
whitelist_param = "-w ${whitelist}"
}
if (reads.size() == 2)
"""
mkdir ${file_prefix}
kb count -t ${task.cpus} \
-m ${kb_memory} \
-i ${index} \
-g ${transcript_to_gene} \
-o ${file_prefix} \
-c1 ${cdna_t2g} \
-c2 ${intron_t2g} \
--workflow lamanno \
--h5ad \
${whitelist_param} \
${params.count} \
-x 1,0,6:1,6,14:0,0,0 \
${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
fix_t2g.py --t2g ${transcript_to_gene}
cp fix_t2g.txt ${file_prefix}/
cp ${transcript_to_gene} ${file_prefix}/
cp ${cdna_t2g} ${file_prefix}/
cp ${intron_t2g} ${file_prefix}/
"""
else
"""
mkdir ${file_prefix}
kb count -t ${task.cpus} \
-m ${kb_memory} \
-i ${index} \
-g ${transcript_to_gene} \
-o ${file_prefix} \
-c1 ${cdna_t2g} \
-c2 ${intron_t2g} \
--workflow lamanno \
${whitelist_param} \
${params.count} \
-x 1,0,6:1,6,14:0,0,0 \
${reads} > ${file_prefix}_kb_mapping_report.txt
fix_t2g.py --t2g ${transcript_to_gene}
cp fix_t2g.txt ${file_prefix}/
cp ${transcript_to_gene} ${file_prefix}/
cp ${cdna_t2g} ${file_prefix}/
cp ${intron_t2g} ${file_prefix}/
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "2.1.2"
container_url = "lbmc/macs2:${version}"
params.macs_gsize=3e9
params.macs_mfold="5 50"
params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
params.peak_calling_out = ""
process peak_calling {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.peak_calling_out != "") {
publishDir "results/${params.peak_calling_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam_ip), path(bam_control)
output:
tuple val(file_id), path("*.narrowPeak"), emit: peak
tuple val(file_id), path("*.bed"), emit: summits
tuple val(file_id), path("*_peaks.xls"), path("*_report.txt"), emit: report
script:
/* remove --nomodel option for real dataset */
"""
macs2 callpeak \
${params.peak_calling} \
--treatment ${bam_ip} \
--call-summits \
--control ${bam_control} \
--keep-dup all \
--qvalue 0.99 \
--name ${bam_ip.simpleName} 2> \
${bam_ip.simpleName}_macs2_report.txt
if grep -q "ERROR" ${bam_ip.simpleName}_macs2_report.txt; then
echo "MACS3 error"
exit 1
fi
"""
}
params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
params.peak_calling_bg_out = ""
process peak_calling_bg {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.peak_calling_bg_out != "") {
publishDir "results/${params.peak_calling_bg_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bg_ip), path(bg_control)
output:
tuple val(file_id), path("*.narrowPeak"), emit: peak
tuple val(file_id), path("*.bed"), emit: summits
tuple val(file_id), path("*_report.txt"), emit: report
script:
/* remove --nomodel option for real dataset */
"""
awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \
${bg_ip.simpleName}.bed
awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
${bg_control.simpleName}.bed
macs2 callpeak \
${params.peak_calling_bg} \
--treatment ${bg_ip.simpleName}.bed \
--qvalue 0.99 \
--call-summits \
--control ${bg_control.simpleName}.bed \
--keep-dup all \
--name ${bg_ip.simpleName} 2> \
${bg_ip.simpleName}_macs2_report.txt
if grep -q "ERROR" ${bg_ip.simpleName}_macs2_report.txt; then
echo "MACS3 error"
exit 1
fi
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "3.0.0a6"
container_url = "lbmc/macs3:${version}"
params.macs_gsize=3e9
params.macs_mfold="5 50"
params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
params.peak_calling_out = ""
process peak_calling {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.peak_calling_out != "") {
publishDir "results/${params.peak_calling_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam_ip), path(bam_control)
output:
path "*", emit: peak
path "*_report.txt", emit: report
script:
/* remove --nomodel option for real dataset */
"""
macs3 callpeak \
--treatment ${bam_ip} \
--call-summits \
--control ${bam_control} \
--keep-dup all \
${params.peak_calling} \
--name ${bam_ip.simpleName} \
--gsize ${params.macs_gsize} 2> \
${bam_ip.simpleName}_macs3_report.txt
if grep -q "ERROR" ${bam_ip.simpleName}_macs3_report.txt; then
echo "MACS3 error"
exit 1
fi
"""
}
params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
params.peak_calling_bg_out = ""
process peak_calling_bg {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.peak_calling_bg_out != "") {
publishDir "results/${params.peak_calling_bg_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bg_ip), path(bg_control)
output:
path "*", emit: peak
path "*_report.txt", emit: report
script:
/* remove --nomodel option for real dataset */
"""
awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \
${bg_ip.simpleName}.bed
awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
${bg_control.simpleName}.bed
macs3 callpeak \
${params.peak_calling_bg} \
--treatment ${bg_ip.simpleName}.bed \
--call-summits \
--control ${bg_control.simpleName}.bed \
--keep-dup all \
--mfold params.macs_mfold[0] params.macs_mfold[1]
--name ${bg_ip.simpleName} \
--gsize ${params.macs_gsize} 2> \
${bg_ip.simpleName}_macs3_report.txt
if grep -q "ERROR" ${bg_ip.simpleName}_macs3_report.txt; then
echo "MACS3 error"
exit 1
fi
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "2.17"
container_url = "lbmc/minimap2:${version}"
params.index_fasta = ""
params.index_fasta_out = ""
process index_fasta {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.index_fasta_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta)
output:
tuple val(file_id), path("${fasta}"), path("*.mmi*"), emit: index
script:
memory = "${task.memory}" - ~/\s*GB/
"""
minimap2 ${params.index_fasta} -t ${task.cpus} -I ${memory}G -d ${fasta.baseName}.mmi ${fasta}
"""
}
params.mapping_fastq = "-ax sr"
params.mapping_fastq_out = ""
process mapping_fastq {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.mapping_fastq_out != "") {
publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
}
input:
tuple val(fasta_id), path(fasta), path(index)
tuple val(file_id), path(reads)
output:
tuple val(file_id), path("*.bam"), emit: bam
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
memory = "${task.memory}" - ~/\s*GB/
memory = memory.toInteger() / (task.cpus + 1.0)
if (reads.size() == 2)
"""
minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} |
samtools view -Sb - > ${pair_id}.bam
"""
else
"""
minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads} |
samtools view -Sb - > ${pair_id}.bam
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
// multiqc generate nice html report combining lots of differents bioinformatics
// tools report.
//
// EXAMPLE:
/*
include { multiqc }
from './nf_modules/multiqc/main'
addParams(
multiqc_out: "QC/"
)
multiqc(
report_a
.mix(
report_b,
report_c,
report_d
)
)
*/
version = "1.11"
container_url = "lbmc/multiqc:${version}"
params.multiqc = ""
params.multiqc_out = "QC/"
workflow multiqc {
take:
report
main:
report
.map{it ->
if (it instanceof List){
if(it.size() > 1) {
it[1]
} else {
it[0]
}
} else {
it
}
}
.unique()
.flatten()
.set { report_cleaned }
multiqc_default(report_cleaned.collect())
emit:
report = multiqc_default.out.report
}
process multiqc_default {
container = "${container_url}"
label "big_mem_mono_cpus"
if (params.multiqc_out != "") {
publishDir "results/${params.multiqc_out}", mode: 'copy'
}
input:
path report
output:
path "*multiqc_*", emit: report
script:
"""
multiqc ${params.multiqc} -f .
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "2.18.11"
container_url = "lbmc/picard:${version}"
params.mark_duplicate = "VALIDATION_STRINGENCY=LENIENT REMOVE_DUPLICATES=true"
params.mark_duplicate_out = ""
process mark_duplicate {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.mark_duplicate_out != "") {
publishDir "results/${params.mark_duplicate_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id) , path("*.bam"), emit: bam
path "*_report.dupinfo.txt", emit: report
script:
"""
PicardCommandLine MarkDuplicates \
${params.mark_duplicate} \
INPUT=${bam} \
OUTPUT=${bam.baseName}_dedup.bam \
METRICS_FILE=${bam.baseName}_picard_dedup_report.dupinfo.txt &> \
picard_${bam.baseName}.log
"""
}
params.normalize_fasta = ""
params.normalize_fasta_out = ""
process normalize_fasta {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.normalize_fasta_out != "") {
publishDir "results/${params.normalize_fasta_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta)
output:
tuple val(file_id), path("results/*.fasta.gz"), emit: fasta
script:
"""
mkdir -p results
PicardCommandLine NormalizeFasta \
I=${fasta} \
O=results/${fasta.simpleName}.fasta
gzip results/${fasta.simpleName}.fasta
"""
}
params.index_fasta = ""
params.index_fasta_out = ""
process index_fasta {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.index_fasta_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta)
output:
tuple val(file_id), path("*.dict"), emit: index
script:
"""
PicardCommandLine CreateSequenceDictionary \
${params.index_fasta} \
REFERENCE=${fasta} \
OUTPUT=${fasta.baseName}.dict
"""
}
params.index_bam = ""
params.index_bam_out = ""
process index_bam {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.index_bam_out != "") {
publishDir "results/${params.index_bam_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*"), emit: index
script:
"""
PicardCommandLine BuildBamIndex \
${params.index_bam} \
INPUT=${bam}
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.2.4"
container_url = "lbmc/porechop:${version}"
process porechop {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.porechop_out != "") {
publishDir "results/${params.porechop_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fatsq)
output:
tuple val(file_id), path("*_porechoped.fastq"), emit: porechoped_fastq
script:
"""
porechop -i ${fastq} -o ${file_id}_porechoped.fastq --threads 4
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.6.0"
container_url = "lbmc/rasusa:${version}"
include { index_fasta } from "./../samtools/main.nf"
params.sample_fastq = ""
params.sample_fastq_coverage = ""
params.sample_fastq_size = ""
params.sample_fastq_out = ""
workflow sample_fastq {
take:
fastq
fasta
main:
if (params.sample_fastq_coverage == "" && params.sample_fastq_size == ""){
fastq
.set{ final_fastq }
} else {
index_fasta(fasta)
sub_sample_fastq(fastq, index_fasta.out.index)
sub_sample_fastq.out.fastq
.set{ final_fastq }
}
emit:
fastq = final_fastq
}
process sub_sample_fastq {
container = "${container_url}"
label "small_mem_mono_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.sample_fastq_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fastq)
tuple val(index_id), path(idx)
output:
tuple val(file_id), path("sub_*.fastq.gz"), emit: fastq
script:
switch(file_id) {
case {it instanceof List}:
file_prefix = file_id[0]
break
case {it instanceof Map}:
file_prefix = file_id.values()[0]
break
default:
file_prefix = file_id
break
}
sample_option = "-c " + params.sample_fastq_coverage
if (params.sample_fastq_size != ""){
sample_option = "-b " + params.sample_fastq_size
}
if (fastq.size() == 2)
"""
rasusa \
-i ${fastq[0]} ${fastq[1]} \
-g ${idx} \
${sample_option} \
-o sub_${fastq[0].simpleName}.fastq.gz sub_${fastq[1].simpleName}.fastq.gz
"""
else
"""
rasusa \
-i ${fastq} \
-g ${idx} \
${sample_option} \
-o sub_${fastq.simpleName}.fastq.gz
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
nextflow.enable.dsl=2
/*
./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq"
./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0
./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb"
*/
params.fastq = "data/fastq/*R{1,2}*"
params.fasta = "data/fasta/*.fasta"
params.coverage = ""
params.size = ""
include { sample_fastq } from "./main.nf" addParams(sample_fastq_coverage: params.coverage, sample_fastq_size: params.size, sample_fastq_out: "sample/")
channel
.fromFilePairs( params.fastq, size: -1)
.set { fastq_files }
channel
.fromPath( params.fasta )
.map { it -> [it.simpleName, it]}
.set { fasta_files }
workflow {
sample_fastq(fastq_files, fasta_files.collect())
}
\ No newline at end of file
#! /bin/sh
# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
#
# SPDX-License-Identifier: AGPL-3.0-or-later
./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq"
./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0
./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb"
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "1.8.0"
container_url = "lbmc/salmon:${version}"
process quantify {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.salmon_out != "") {
publishDir "results/${params.salmon_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("*.sf"), emit: quant
script:
"""
salmon quant -l A --noErrorModel -t XXXXXXXXXX -a ${bam} -p 4 -o ${params.salmon_out}
"""
}
\ No newline at end of file