Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • LBMC/RMI2/rmi2_pipelines
  • LBMC/Palladino/RNAseq_nextflow
  • rseraphi/nextflow
  • elabaron/nextflow
  • pberna01/nextflow
  • jblin/nextflow
  • cginevra/nextflow
  • carpin/nextflow
  • cfournea/nextflow
  • dtorresc/nextflow
  • LBMC/nextflow
  • nlecouvr/nextflow-nathan
  • lpicard/nextflow
  • vvanoost/nextflow
  • fmortreu/nextflow
  • hpolvech/nextflow
  • lanani/nextflow
  • mcariou/nextflow
  • fduveau/nextflow
  • jshapiro/nextflow
  • hregue/nextflow
  • yjia01/nextflow
  • acorbin/nextflow
  • ggirau03/nextflow
  • letien02/nextflow
  • ogandril/nextflow
  • jclaud01/nextflow
  • mshamjal/nextflow
  • mprieux/nextflow
  • z483801/nextflow
  • mparis/nextflow
  • alapendr/nextflow
  • cbourgeo/nextflow
  • jvalat/nextflow
  • z483800/nextflow
  • ecombe01/nextflow
  • dchalopi/nextflow
  • mherbett/nextflow
  • jprobin/nextflow
  • lestrada/nextflow
  • gyvert/nextflow
  • nfontrod/nextflow
  • gbenoit/nextflow
  • aguill09/nextflow
  • LBMC/regards/nextflow
  • mvilcot/nextflow
  • jkleine/nextflow
  • jseimand/nextflow
  • LBMC/Delattre/JU28_59vs17_SNP
  • mdjaffar/nextflow
  • pmarie01/nextflow
  • rhoury/nextflow
  • mlepetit/nextflow
  • lgely/nextflow
54 results
Show changes
Showing
with 2852 additions and 0 deletions
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "3.0.0"
container_url = "mlepetit/cellphonedb:latest"
params.cellphonedb = ""
params.cellphonedb_out = ""
params.pval=""
params.thres=""
params.iterations=""
params.gene_id=""
workflow cellphone_statistical_analysis {
take:
meta
counts
main:
cpdb_methods_stats(meta,counts)
cpdb_plot_dot_plot(cpdb_methods_stats.out.means,cpdb_methods_stats.out.pvalues)
cpdb_plot_heatmap(cpdb_methods_stats.out.pvalues)
emit:
means = cpdb_methods_stats.out.means
pvalues = cpdb_methods_stats.out.pvalues
deconvoluted = cpdb_methods_stats.out.deconvoluted
significant_means = cpdb_methods_stats.out.significant_means
dot_plot = cpdb_plot_dot_plot.out.dot_plot
heatmap = cpdb_plot_heatmap.out.heatmap
heatmap_log = cpdb_plot_heatmap.out.heatmap_log
count_network = cpdb_plot_heatmap.out.count_network
interactions_count = cpdb_plot_heatmap.out.interactions_count
}
process cpdb_methods_stats {
container = "${container_url}"
label "big_mem_multi_cpus"
if (params.cellphonedb_out != "") {
publishDir "results/${params.cellphonedb_out}", mode: 'copy'
}
input:
tuple val(id_mtx), path(meta)
tuple val(id_mtx), path(counts)
output:
tuple val(id_mtx), path("out/means.txt"), emit: means
tuple val(id_mtx), path("out/pvalues.txt"), emit: pvalues
tuple val(id_mtx), path("out/deconvoluted.txt"), emit: deconvoluted
tuple val(id_mtx), path("out/significant_means.txt"), emit: significant_means
script:
"""
cellphonedb method statistical_analysis ${params.meta} ${params.counts} --counts-data ${params.gene_id} --threads ${task.cpus} --iterations ${params.iterations} --pvalue ${params.pval} --threshold ${params.thres}
"""
}
process cpdb_plot_dot_plot {
container = "${container_url}"
label "big_mem_mono_cpus"
if (params.cellphonedb_out != "") {
publishDir "results/${params.cellphonedb_out}", mode: 'copy'
}
input:
tuple val(id_mtx), path(means)
tuple val(id_mtx), path(pvalues)
output:
tuple val(id_mtx), path("out/plot.pdf"), emit: dot_plot
script:
"""
mkdir ./out
cellphonedb plot dot_plot --means-path ${means} --pvalues-path ${pvalues}
"""
}
process cpdb_plot_heatmap {
container = "${container_url}"
label "big_mem_multi_cpus"
if (params.cellphonedb_out != "") {
publishDir "results/${params.cellphonedb_out}", mode: 'copy'
}
input:
tuple val(id_mtx), path(pvalues)
output:
tuple val(id_mtx), path("out/heatmap_count.pdf"), emit: heatmap
tuple val(id_mtx), path("out/heatmap_log_count.pdf"), emit: heatmap_log
tuple val(id_mtx), path("out/count_network.txt"), emit: count_network
tuple val(id_mtx), path("out/interaction_count.txt"), emit: interactions_count
script:
"""
mkdir ./out
cellphonedb plot heatmap_plot --pvalues-path ${pvalues} --pvalue ${params.pval} ${params.meta}
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "2.1"
container_url = "lbmc/cutadapt:${version}"
params.adapter_3_prim = "AGATCGGAAGAG"
params.adapter_5_prim = "CTCTTCCGATCT"
params.adaptor_removal = "-a ${params.adapter_3_prim} -g ${params.adapter_5_prim} -A ${params.adapter_3_prim} -G ${params.adapter_5_prim}"
params.adaptor_removal_out = ""
process adaptor_removal {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.adaptor_removal_out != "") {
publishDir "results/${params.adaptor_removal_out}", mode: 'copy'
}
input:
tuple val(file_id), path(reads)
output:
tuple val(file_id), path("*_cut_*"), emit: fastq
path "*_report.txt", emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
if (reads.size() == 2)
"""
cutadapt ${params.adaptor_removal} \
-o ${file_prefix}_cut_R1.fastq.gz -p ${file_prefix}_cut_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${file_prefix}_report.txt
"""
else
"""
cutadapt ${params.adaptor_removal} \
-o ${file_prefix}_cut.fastq.gz \
${reads} > ${file_prefix}_report.txt
"""
}
params.trim_quality = "20"
params.trimming = "-q ${params.trim_quality},${params.trim_quality}"
params.trimming_out = ""
process trimming {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.trimming_out != "") {
publishDir "results/${params.trimming_out}", mode: 'copy'
}
input:
tuple val(file_id), path(reads)
output:
tuple val(file_id), path("*_trim_*"), emit:fastq
path "*_report.txt", emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
if (reads.size() == 2)
"""
cutadapt ${params.trimming} \
-o ${file_prefix}_trim_R1.fastq.gz -p ${file_prefix}_trim_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${file_prefix}_report.txt
"""
else
"""
cutadapt ${params.trimming} \
-o ${file_prefix}_trim.fastq.gz \
${reads} > ${file_prefix}_report.txt
"""
}
process 5pRACE {
container = "${container_url}"
label "big_mem_mono_cpus"tag "$file_id"
if (params.cutadapt_out != "") {
publishDir "results/${params.cutadapt_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fastq)
output:
tuple val(file_id), path("*_cut_*"), emit: fastq
"""
cutadapt -e 0.2 -g CGACTGGAGCACGAGGACACTGACATGGACTGAAGGAGTAGAAA -g TTAGGCAGAGGTGAAAAAGTTG
-a TTTCTACTCCTTCAGTCCATGTCAGTGTCCTCGTGCTCCAGTCG -a CAACTTTTTCACCTCTGCCTAA
-o ${}
${fastq}
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "v2.2.2_cv3"
container_url = "biocontainers/danpos:${version}"
include {
bigwig2_to_wig2;
bigwig_to_wig;
wig_to_bedgraph;
wig2_to_bedgraph2
} from "./../ucsc/main.nf"
params.dpos = "--smooth_width 0 -n N "
params.dpos_out = ""
workflow dpos_bam_bg {
take:
fasta
fastq
bam
main:
dpos_bam(fastq, bam)
wig2_to_bedgraph2(fasta, dpos_bam.out.wig)
emit:
bg = wig2_to_bedgraph2.out.bg
wig = dpos_bam.out.wig
bed = dpos_bam.out.bed
}
process dpos_bam {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.dpos_out != "") {
publishDir "results/${params.dpos_out}", mode: 'copy', overwrite: true
}
input:
val fastq
tuple val(file_id), path(bam_ip), path(bam_wce)
output:
tuple val(file_id), path("${file_prefix}/${bam_ip.simpleName}*.wig"), path("${file_prefix}/${bam_wce.simpleName}*.wig"), emit: wig
tuple val(file_id), path("${file_prefix}/*.positions.bed"), emit: bed
script:
switch(file_id) {
case {it instanceof List}:
file_prefix = file_id[0]
break
case {it instanceof Map}:
file_prefix = file_id.values()[0]
break
default:
file_prefix = file_id
break
}
m = 0
if (fastq[1].size() == 2){
m = 1
}
"""
danpos.py dpos -m ${m} \
${params.dpos} \
-b ${bam_wce} \
-o ${file_prefix} \
${bam_ip}
mv ${file_prefix}/pooled/* ${file_prefix}/
rm -R ${file_prefix}/pooled
awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.xls > ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.bed
"""
}
workflow dpos_bw {
take:
fasta
fastq
bw
main:
bigwig2_to_wig2(bw)
dpos_wig(fastq, bigwig2_to_wig2.out.wig)
wig_to_bedgraph(fasta, bigwig2_to_wig2.out.wig)
emit:
bg = wig_to_bedgraph.out.bg
wig = bigwig2_to_wig2.out.wig
bed = dpos_wig.out.bed
}
process dpos_wig {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.dpos_out != "") {
publishDir "results/${params.dpos_out}", mode: 'copy', overwrite: true
}
input:
val fastq
tuple val(file_id), path(wig_ip), path(wig_wce)
output:
tuple val(file_id), path("${file_prefix}/*.positions.bed"), emit: bed
tuple val(file_id), path("${file_prefix}/${bam_ip.simpleName}*.wig"), path("${file_prefix}/${bam_wce.simpleName}*.wig"), emit: wig
script:
switch(file_id) {
case {it instanceof List}:
file_prefix = file_id[0]
break
case {it instanceof Map}:
file_prefix = file_id.values()[0]
break
default:
file_prefix = file_id
break
}
m = 0
if (fastq[1].size() == 2){
m = 1
}
"""
danpos.py dpos -m ${m} \
${params.dpos} \
-b ${wig_wce} \
-o ${file_prefix} \
${wig_ip}
mv ${file_prefix}/pooled/* ${file_prefix}/
rm -R ${file_prefix}/pooled
awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${wig_ip.simpleName}.positions.xls > ${file_prefix}/${wig_ip.simpleName}.positions.bed
"""
}
workflow dpos_bw_no_b {
take:
fasta
fastq
bw
main:
bigwig_to_wig(bw)
dpos_wig_no_b(fastq, bigwig_to_wig.out.wig)
wig_to_bedgraph(fasta, bigwig_to_wig.out.wig)
emit:
bg = wig_to_bedgraph.out.bg
wig = bigwig_to_wig.out.wig
bed = dpos_wig_no_b.out.bed
}
process dpos_wig_no_b {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.dpos_out != "") {
publishDir "results/${params.dpos_out}", mode: 'copy', overwrite: true
}
input:
val fastq
tuple val(file_id), path(wig_ip)
output:
tuple val(file_id), path("${file_prefix}/*.positions.bed"), emit: bed
script:
switch(file_id) {
case {it instanceof List}:
file_prefix = file_id[0]
break
case {it instanceof Map}:
file_prefix = file_id.values()[0]
break
default:
file_prefix = file_id
break
}
m = 0
if (fastq[1].size() == 2){
m = 1
}
"""
danpos.py dpos -m ${m} \
${params.dpos} \
-o ${file_prefix} \
${wig_ip}
mv ${file_prefix}/pooled/* ${file_prefix}/
rm -R ${file_prefix}/pooled
awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${wig_ip.simpleName}.positions.xls > ${file_prefix}/${wig_ip.simpleName}.positions.bed
"""
}
workflow dwig_bwvsbw {
take:
fasta
fastq
bw_a
bw_b
main:
dpos_wigvswig(
fastq,
bigwig2_to_wig2(bw_a),
bigwig2_to_wig2(bw_b),
)
wig_to_bedgraph(fasta, dpos_wigvswig.out.wig)
emit:
bg = wig_to_bedgraph.out.bg
wig = dpeak_wig.out.wig
bed = dpeak_wig.out.bed
}
process dpos_wigvswig {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.dpos_out != "") {
publishDir "results/${params.dpos_out}", mode: 'copy', overwrite: true
}
input:
val fastq
tuple val(file_id_a), path(wig_ip_a)
tuple val(file_id_b), path(wig_ip_b)
output:
tuple val(file_id), path("${file_prefix}/${wig_ip_a.simpleName}*.wig"), emit: wig
tuple val(file_id), path("${file_prefix}/*.positions.bed"), emit: bed
script:
switch(file_id_a) {
case {it instanceof List}:
file_prefix = file_id_a[0]
break
case {it instanceof Map}:
file_prefix = file_id_a.values()[0]
break
default:
file_prefix = file_id_a
break
}
m = 0
if (fastq[1].size() == 2){
m = 1
}
"""
danpos.py dpos -m ${m} \
${params.dpos} \
-b ${wig_ip_a},${wig_ip_b} \
-o ${file_prefix} \
${wig_ip_a}:${wig_ip_b}
mv ${file_prefix}/pooled/* ${file_prefix}/
rm -R ${file_prefix}/pooled
awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.positions.xls > ${file_prefix}/${bam_ip.simpleName}.positions.bed
"""
}
params.dpeak = "--smooth_width 0 -n N "
params.dpeak_out = ""
process dpeak_bam {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.dpeak_out != "") {
publishDir "results/${params.dpeak_out}", mode: 'copy', overwrite: true
}
input:
val fastq
tuple val(file_id), path(bam_ip), path(bam_wce)
output:
tuple val(file_id), path("${file_prefix}/${bam_ip.simpleName}*.wig"), path("${file_prefix}/${bam_wce.simpleName}*.wig"), emit: wig
tuple val(file_id), path("${file_prefix}/*.positions.bed"), path("${file_prefix}/*.summit.bed"), emit: bed
tuple val(file_id), path("${file_prefix}/*.bed"), emit: bed
script:
switch(file_id) {
case {it instanceof List}:
file_prefix = file_id[0]
break
case {it instanceof Map}:
file_prefix = file_id.values()[0]
break
default:
file_prefix = file_id
break
}
m = 0
if (fastq[1].size() == 2){
m = 1
}
"""
danpos.py dpeak -m ${m} \
${params.dpeak} \
-b ${bam_wce} \
-o ${file_prefix} \
${bam_ip}
mv ${file_prefix}/pooled/* ${file_prefix}/
rm -R ${file_prefix}/pooled
awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.bed
awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$4-1, \$4, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.summit.bed
"""
}
workflow dpeak_bw {
take:
fasta
fastq
bw
main:
dpeak_wig(fastq, bigwig2_to_wig2(bw))
wig2_to_bedgraph2(fasta, dpeak_wig.out.wig)
emit:
bg = wig2_to_bedgraph2.out.bg
wig = dpeak_wig.out.wig
bed = dpeak_wig.out.bed
}
process dpeak_wig {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.dpeak_out != "") {
publishDir "results/${params.dpeak_out}", mode: 'copy', overwrite: true
}
input:
val fastq
tuple val(file_id), path(wig_ip), path(wig_wce)
output:
tuple val(file_id), path("${file_prefix}/${wig_ip.simpleName}.bgsub.wig"), path("${file_prefix}/${wig_wce.simpleName}.wig"), emit: wig
tuple val(file_id), path("${file_prefix}/*.positions.bed"), path("${file_prefix}/*.summit.bed"), emit: bed
script:
switch(file_id) {
case {it instanceof List}:
file_prefix = file_id[0]
break
case {it instanceof Map}:
file_prefix = file_id.values()[0]
break
default:
file_prefix = file_id
break
}
m = 0
if (fastq[1].size() == 2){
m = 1
}
"""
danpos.py dpeak -m ${m} \
${params.dpeak} \
-b ${wig_wce} \
-o ${file_prefix} \
${wig_ip}
mv ${file_prefix}/pooled/* ${file_prefix}/
rm -R ${file_prefix}/pooled
awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${wig_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${wig_ip.simpleName}.bgsub.positions.bed
awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$4-1, \$4, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${wig_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${wig_ip.simpleName}.bgsub.positions.summit.bed
"""
}
workflow dpeak_bwvsbw {
take:
fasta
fastq
bw_a
bw_b
main:
dpeak_wigvswig(
fastq,
bigwig2_to_wig2(bw_a),
bigwig2_to_wig2(bw_b),
)
wig2_to_bedgraph2(fasta, dpeak_wigvswig.out.wig)
emit:
bg = wig2_to_bedgraph2.out.bg
wig = dpeak_wig.out.wig
bed = dpeak_wig.out.bed
}
process dpeak_wigvswig {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.dpeak_out != "") {
publishDir "results/${params.dpeak_out}", mode: 'copy', overwrite: true
}
input:
val fastq
tuple val(file_id_a), path(wig_ip_a), path(wig_wce_a)
tuple val(file_id_b), path(wig_ip_b), path(wig_wce_b)
output:
tuple val(file_id), path("${file_prefix}/${wig_ip_a.simpleName}.bgsub.wig"), path("${file_prefix}/${wig_wce_a.simpleName}.wig"), emit: wig
tuple val(file_id), path("${file_prefix}/*.positions.bed"), path("${file_prefix}/*.summit.bed"), emit: bed
script:
switch(file_id_a) {
case {it instanceof List}:
file_prefix = file_id_a[0]
break
case {it instanceof Map}:
file_prefix = file_id_a.values()[0]
break
default:
file_prefix = file_id_a
break
}
m = 0
if (fastq[1].size() == 2){
m = 1
}
"""
danpos.py dpeak -m ${m} \
${params.dpeak} \
-b ${wig_ip_a}:${wig_wce_a},${wig_ip_b}:${wig_wce_b} \
-o ${file_prefix} \
${wig_ip_a}:${wig_ip_b}
mv ${file_prefix}/pooled/* ${file_prefix}/
rm -R ${file_prefix}/pooled
awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.bed
awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$4-1, \$4, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.summit.bed
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "3.5.1"
container_url = "lbmc/deeptools:${version}"
params.index_bam = ""
params.index_bam_out = ""
process index_bam {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.index_bam_out != "") {
publishDir "results/${params.index_bam_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam)
output:
tuple val(file_id), path("${bam}"), path("*.bam*"), emit: bam_idx
script:
"""
sambamba index -t ${task.cpus} ${bam}
"""
}
params.bam_to_bigwig = ""
params.bam_to_bigwig_out = ""
process bam_to_bigwig {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.bam_to_bigwig_out != "") {
publishDir "results/${params.bam_to_bigwig_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), path(idx)
output:
tuple val(file_id), path("*.bw"), emit: bw
script:
"""
bamCoverage -p ${task.cpus} --ignoreDuplicates -b ${bam} \
-o ${bam.simpleName}.bw
"""
}
params.compute_matrix = ""
params.compute_matrix_out = ""
process compute_matrix {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "${bed_file_id}"
if (params.compute_matrix_out != "") {
publishDir "results/${params.compute_matrix_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bw)
tuple val(bed_file_id), path(bed)
output:
tuple val(bed_file_id), path("*.mat.gz"), emit: matrix
script:
"""
computeMatrix scale-regions -S ${bw} \
-p ${task.cpus} \
-R ${bed} \
--beforeRegionStartLength 100 \
--afterRegionStartLength 100 \
-o ${bed.simpleName}.mat.gz
"""
}
params.plot_profile = ""
params.plot_profile_out = ""
process plot_profile {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.compute_matrix_out != "") {
publishDir "results/${params.compute_matrix_out}", mode: 'copy'
}
input:
tuple val(file_id), path(matrix)
output:
tuple val(file_id), path("*.pdf"), emit: pdf
script:
/*
see more option at
https://deeptools.readthedocs.io/en/develop/content/tools/plotProfile.html
*/
"""
plotProfile -m ${matrix} \
--plotFileFormat=pdf \
-out ${matrix.simpleName}.pdf \
--plotType=fill \
--perGroup \
--plotTitle "${params.title}"
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.3.1"
container_url = "lbmc/emase-zero:${version}"
include { g2tr } from "./../kb/main.nf"
include { bam2ec } from "./../alntools/main.nf"
include { fasta_to_transcripts_lengths } from "./../bioawk/main.nf"
params.count = "-m 2"
params.count_out = ""
workflow count {
take:
bam_idx
fasta
gtf
main:
g2tr(gtf)
fasta_to_transcripts_lengths(fasta)
bam2ec(bam_idx, fasta_to_transcripts_lengths.out.tsv.collect())
emase(bam2ec.out.bin, fasta.collect(), bam2ec.out.tsv, g2tr.out.g2t.collect())
emit:
count = emase.out.count
}
process emase {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.count_out != "") {
publishDir "results/${params.count_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bin)
tuple val(fasta_id), path(fasta)
tuple val(transcript_length_id), path(transcript_length)
tuple val(gene_to_transcript_id), path(gene_to_transcript)
output:
tuple val(file_id), path("${bin.simpleName}.quantified*"), emit: count
path "*_report.txt", emit: report
script:
"""
grep ">" ${fasta} | sed 's/>//' > tr_list.txt
emase-zero ${params.count} \
-o ${bin.simpleName}.quantified \
-l ${transcript_length} \
-g ${gene_to_transcript} \
${bin} &> ${file_id}_emase-zero_report.txt
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.10.16"
container_url = "lbmc/emase:${version}"
params.diploid_genome = "-x"
params.diploid_genome_out = "-x"
process diploid_genome {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${genome_a}-${genome_b}"
if (params.diploid_genome_out != "") {
publishDir "results/${params.diploid_genome_out}", mode: 'copy'
}
input:
tuple val(genome_a), path(fasta_a), val(genome_b), path(fasta_b)
output:
tuple val("${genome_a}_${genome_b}"), path(".fa"), emit: fasta
script:
"""
prepare-emase -G ${fasta_a},${fasta_b} -s ${genome_a},${genome_b} ${params.diploid_genome}
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.23.2"
container_url = "lbmc/fastp:${version}"
params.fastp_protocol = ""
params.fastp = ""
params.fastp_out = ""
workflow fastp {
take:
fastq
main:
switch(params.fastp_protocol) {
case "accel_1splus":
fastp_accel_1splus(fastq)
fastp_accel_1splus.out.fastq.set{res_fastq}
fastp_accel_1splus.out.report.set{res_report}
break;
default:
fastp_default(fastq)
fastp_default.out.fastq.set{res_fastq}
fastp_default.out.report.set{res_report}
break;
}
emit:
fastq = res_fastq
report = res_report
}
process fastp_default {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_prefix"
if (params.fastp_out != "") {
publishDir "results/${params.fastp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(reads)
output:
tuple val(file_id), path("*_trim.fastq.gz"), emit: fastq
tuple val(file_id), path("${file_prefix}.html"), emit: html
tuple val(file_id), path("${file_prefix}_fastp.json"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
if (reads.size() == 2)
"""
fastp --thread ${task.cpus} \
--qualified_quality_phred 20 \
--disable_length_filtering \
--detect_adapter_for_pe \
${params.fastp} \
--in1 ${reads[0]} \
--in2 ${reads[1]} \
--out1 ${file_prefix}_R1_trim.fastq.gz \
--out2 ${file_prefix}_R2_trim.fastq.gz \
--html ${file_prefix}.html \
--json ${file_prefix}_fastp.json \
--report_title ${file_prefix}
"""
else
"""
fastp --thread ${task.cpus} \
--qualified_quality_phred 20 \
--disable_length_filtering \
--detect_adapter_for_pe \
${params.fastp} \
--in1 ${reads[0]} \
--out1 ${file_prefix}_trim.fastq.gz \
--html ${file_prefix}.html \
--json ${file_prefix}_fastp.json \
--report_title ${file_prefix}
"""
}
process fastp_accel_1splus {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_prefix"
if (params.fastp_out != "") {
publishDir "results/${params.fastp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(reads)
output:
tuple val(file_id), path("*_trim.fastq.gz"), emit: fastq
tuple val(file_id), path("${file_prefix}.html"), emit: html
tuple val(file_id), path("${file_prefix}_fastp.json"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
if (reads.size() == 2)
"""
fastp --thread ${task.cpus} \
--disable_quality_filtering \
--disable_length_filtering \
--disable_trim_poly_g \
--detect_adapter_for_pe \
--stdout \
--in1 ${reads[0]} \
--in2 ${reads[1]} 2> /dev/null | \
fastp --thread ${task.cpus} \
--stdin \
--interleaved_in \
--trim_front1=10 \
--trim_front2=10 \
--disable_adapter_trimming \
--qualified_quality_phred 20 \
--disable_length_filtering \
--detect_adapter_for_pe \
${params.fastp} \
--out1 ${file_prefix}_R1_trim.fastq.gz \
--out2 ${file_prefix}_R2_trim.fastq.gz \
--html ${file_prefix}.html \
--json ${file_prefix}_fastp.json \
--report_title ${file_prefix}
"""
else
"""
fastp --thread ${task.cpus} \
--disable_quality_filtering \
--disable_length_filtering \
--disable_trim_poly_g \
--detect_adapter_for_pe \
--stdout \
--in1 ${reads[0]} 2> /dev/null | \
fastp --thread ${task.cpus} \
--disable_adapter_trimming \
--stdin \
--trim_front1=10 \
--qualified_quality_phred 20 \
--disable_length_filtering \
--detect_adapter_for_pe \
${params.fastp} \
--out1 ${file_prefix}_trim.fastq.gz \
--html ${file_prefix}.html \
--json ${file_prefix}_fastp.json \
--report_title ${file_prefix}
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.11.3--pl5.22.0_0"
container_url = "quay.io/biocontainers/fastq-screen:${version}"
params.fastq_screen = ""
params.fastq_screen_out = ""
process fastq_screen {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.fastq_screen_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fastq)
output:
tuple val(file_id), path("*"), emit: output
script:
"""
fastq_screen --get_genomes
fastq_screen --threads ${task.cpus} sample1.fastq sample2.fastq
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.11.5"
container_url = "lbmc/fastqc:${version}"
params.fastqc_fastq = ""
params.fastqc_fastq_out = ""
process fastqc_fastq {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.fastqc_fastq_out != "") {
publishDir "results/${params.fastqc_fastq_out}", mode: 'copy'
}
input:
tuple val(file_id), path(reads)
output:
tuple val(file_id), path("*.{zip,html}"), emit: report
script:
if (reads.size() == 2)
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${params.fastqc_fastq} \
${reads[0]} ${reads[1]}
"""
else
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${params.fastqc_fastq} ${reads[0]}
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "1.0.2"
container_url = "lbmc/flexi_splitter:${version}"
params.split = ""
params.split_out = ""
workflow split {
take:
reads
config
main:
split_fastq(reads, config)
group_fastq(split_fastq.out.fastq_folder)
group_fastq.out.fastq
.map{ it -> it[1] }
.flatten()
.collate(2)
.map{ it -> [it[0].simpleName - ~/_{0,1}R[12]/, it]}
.set{ splited_fastq }
emit:
fastq = splited_fastq
}
process split_fastq {
// You can get an example of config file here:
// src/nf_modules/flexi_splitter/marseq_flexi_splitter.yaml
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.split_out != "") {
publishDir "results/${params.split_out}", mode: 'copy'
}
input:
tuple val(file_id), path(reads)
tuple val(config_id), path(config)
output:
tuple val(file_id), path("split"), emit: fastq_folder
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
if (reads.size() == 2)
"""
flexi_splitter ${params.split} -n 2 -f ${reads[0]},${reads[1]} -o split -c ${config}
"""
else
"""
flexi_splitter ${params.split} -n 1 -f ${reads[0]} -o split -c ${config}
"""
}
process group_fastq {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.split_out != "") {
publishDir "results/${params.split_out}", mode: 'copy'
}
input:
tuple val(file_id), path(reads_folder)
output:
tuple val(file_id), path("results/*"), emit: fastq
script:
"""
mkdir -p results/
find split/ -type "f" | \
grep -v "unassigned" | \
sed -E "s|(split/(.*)/(.*))|\\1 \\2_\\3|g" |
awk '{system("mv "\$1" results/"\$2)}'
"""
}
\ No newline at end of file
# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
#
# SPDX-License-Identifier: AGPL-3.0-or-later
PLATE:
coords:
reads: 0
start: 1
stop: 4
header: False
samples:
- name : Plate1
seq: GACT
- name : Plate2
seq: CATG
- name : Plate3
seq: CCAA
- name : Plate4
seq: CTGT
- name : Plate5
seq: GTAG
- name : Plate6
seq: TGAT
- name : Plate7
seq: ATCA
- name : Plate8
seq: TAGA
conditions:
- Plate1 :
Plate1
- Plate2 :
Plate2
- Plate3 :
Plate3
- Plate4 :
Plate4
- Plate5 :
Plate5
- Plate6 :
Plate6
- Plate7 :
Plate7
- Plate8 :
Plate8
# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
#
# SPDX-License-Identifier: AGPL-3.0-or-later
PCR:
coords:
reads: 3
start: 1
stop: 6
header: False
samples:
- name : PCR1
seq: NCAGTG
- name : PCR2
seq : CGATGT
- name : PCR3
seq: TTAGGC
- name : PCR4
seq : TGACCA
- name: PCR5
seq: NGAACG
- name: PCR6
seq: NCAACA
RT:
coords:
reads: 1
start: 6
stop: 13
header: False
samples:
- name : RT1
seq: TAGTGCC
- name : RT2
seq: GCTACCC
- name: RT3
seq: ATCGACC
- name: RT4
seq: CGACTCC
UMI:
coords:
reads: 1
start: 1
stop: 6
header: False
conditions:
wt:
- RT1
- PCR1
ko:
- RT2
- PCR2
sample_paired:
- RT2
- PCR6
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.2.8"
container_url = "lbmc/g2gtools:${version}"
params.vci_build = ""
params.vci_build_out = ""
process vci_build {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.vci_build_out != "") {
publishDir "results/${params.vci_build_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta)
output:
tuple val(file_id), path("*.vci.gz"), path("*.vci.gz.tbi"), emit: vci
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
input_vcf = ""
for (vcf_file in vcf) {
input_vcf += " -i ${vcf_file}"
}
"""
g2gtools vcf2vci \
${params.vci_build} \
-p ${task.cpus} \
-f ${fasta} \
${input_vcf} \
-s ${file_prefix} \
-o ${file_prefix}.vci 2> ${file_prefix}_g2gtools_vcf2vci_report.txt
"""
}
params.incorporate_snp = ""
params.incorporate_snp_out = ""
process incorporate_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.incorporate_snp_out != "") {
publishDir "results/${params.incorporate_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(ref_id), path(fasta)
output:
tuple val(file_id), path("${file_prefix}_snp.fa"), path("${vci}"), path("${tbi}"), emit: fasta
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
g2gtools patch \
${params.incorporate_snp} \
-p ${task.cpus} \
-i ${fasta} \
-c ${vci} \
-o ${file_prefix}_snp.fa 2> ${file_prefix}_g2gtools_path_report.txt
"""
}
params.incorporate_indel = ""
params.incorporate_indel_out = ""
process incorporate_indel {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.incorporate_indel_out != "") {
publishDir "results/${params.incorporate_indel_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta), path(vci), path(tbi)
output:
tuple val(file_id), path("${file_prefix}_snp_indel.fa"), path("${vci}"), path("${tbi}"), emit: fasta
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
g2gtools transform \
${params.incorporate_indel} \
-p ${task.cpus} \
-i ${fasta} \
-c ${vci} \
-o ${file_prefix}_snp_indel.fa 2> ${file_prefix}_g2gtools_transform_report.txt
"""
}
params.convert_gtf = ""
params.convert_gtf_out = ""
process convert_gtf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.convert_gtf_out != "") {
publishDir "results/${params.convert_gtf_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(annot_id), path(gtf)
output:
tuple val(file_id), path("${file_prefix}.gtf"), emit: gtf
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
g2gtools convert \
${params.convert_gtf} \
-i ${gtf} \
-c ${vci} \
-o ${file_prefix}.gtf 2> ${file_prefix}_g2gtools_convert_report.txt
"""
}
params.convert_bed = ""
params.convert_bed_out = ""
process convert_bed {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.convert_bed_out != "") {
publishDir "results/${params.convert_bed_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(annot_id), path(bed)
output:
tuple val(file_id), path("${file_id}.bed"), emit: bed
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
g2gtools convert \
${params.convert_bed} \
-i ${bed} \
-c ${vci} \
-o ${file_id}.bed 2> ${file_id}_g2gtools_convert_report.txt
"""
}
params.convert_bam = ""
params.convert_bam_out = ""
process convert_bam {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${bam_id} ${file_id}"
if (params.convert_bam_out != "") {
publishDir "results/${params.convert_bam_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vci), path(tbi)
tuple val(bam_id), path(bam)
output:
tuple val(file_id), path("${file_id}_${bam_id.baseName}.bam"), emit: bam
tuple val(file_id), path("*_report.txt"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
g2gtools convert \
${params.convert_bam} \
-i ${bam} \
-c ${vci} \
-o ${file_id}_${bam.baseName}.bam 2> ${file_id}_g2gtools_convert_report.txt
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "3.8.0"
container_url = "lbmc/gatk:${version}"
params.variant_calling = ""
params.variant_calling_out = ""
process variant_calling {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.variant_calling_out != "") {
publishDir "results/${params.variant_calling_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), path(bai)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T HaplotypeCaller \
-nct ${task.cpus} \
${params.variant_calling} \
-R ${fasta} \
-I ${bam} \
-o ${file_prefix}.vcf
"""
}
params.filter_snp = ""
params.filter_snp_out = ""
process filter_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.filter_snp_out != "") {
publishDir "results/${params.filter_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
${params.filter_snp} \
-R ${fasta} \
-V ${vcf} \
-selectType SNP \
-o ${file_prefix}_snp.vcf
"""
}
params.filter_indels = ""
params.filter_indels_out = ""
process filter_indels {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.filter_indels_out != "") {
publishDir "results/${params.filter_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
${params.filter_indels} \
-R ${fasta} \
-V ${vcf} \
-selectType INDEL \
-o ${file_prefix}_indel.vcf
"""
}
params.high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)"
params.high_confidence_snp = "--filterExpression \"${params.high_confidence_snp_filter}\" --filterName \"basic_snp_filter\""
params.high_confidence_snp_out = ""
process high_confidence_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.high_confidence_snp_out != "") {
publishDir "results/${params.high_confidence_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T VariantFiltration \
-nct ${task.cpus} \
-R ${fasta} \
-V ${vcf} \
${params.high_confidence_snp} \
-o ${file_prefix}_filtered_snp.vcf
"""
}
params.high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0"
params.high_confidence_indels = "--filterExpression \"${params.high_confidence_indel_filter}\" --filterName \"basic_indel_filter\""
params.high_confidence_indels_out = ""
process high_confidence_indels {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.high_confidence_indels_out != "") {
publishDir "results/${params.high_confidence_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T VariantFiltration \
-nct ${task.cpus} \
-R ${fasta} \
-V ${vcf} \
${params.high_confidence_indels} \
-o ${file_prefix}_filtered_indel.vcf
"""
}
params.recalibrate_snp_table = ""
params.recalibrate_snp_table_out = ""
process recalibrate_snp_table {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.recalibrate_snp_table_out != "") {
publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy'
}
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("recal_data_table"), emit: recal_table
script:
"""
gatk3 -T BaseRecalibrator \
-nct ${task.cpus} \
${recalibrate_snp_table} \
-R ${fasta} \
-I ${bam} \
-knownSites ${snp_file} \
-knownSites ${indel_file} \
-o recal_data_table
"""
}
params.recalibrate_snp = ""
params.recalibrate_snp_out = ""
process recalibrate_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.recalibrate_snp_out != "") {
publishDir "results/${params.recalibrate_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
tuple val(table_id), path(recal_data_table)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.bam"), emit: bam
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T PrintReads \
--use_jdk_deflater \
--use_jdk_inflater \
${recalibrate_snp} \
-nct ${task.cpus} \
-R ${fasta} \
-I ${bam} \
-BQSR recal_data_table \
-o ${file_prefix}_recal.bam
"""
}
params.haplotype_caller = ""
params.haplotype_caller_out = ""
process haplotype_caller {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.haplotype_caller_out != "") {
publishDir "results/${params.haplotype_caller_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.gvcf"), emit: gvcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T HaplotypeCaller \
-nct ${task.cpus} \
${params.haplotype_caller} \
-R ${fasta} \
-I ${bam} \
-ERC GVCF \
-variant_index_type LINEAR -variant_index_parameter 128000 \
-o ${file_prefix}.gvcf
"""
}
params.gvcf_genotyping = ""
params.gvcf_genotyping_out = ""
process gvcf_genotyping {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.gvcf_genotyping_out != "") {
publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gvcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T GenotypeGVCFs \
-nct ${task.cpus} \
${params.gvcf_genotyping} \
-R ${fasta} \
-V ${gvcf} \
-o ${file_prefix}_joint.vcf
"""
}
params.select_variants_snp = ""
params.select_variants_snp_out = ""
process select_variants_snp {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.select_variants_snp_out != "") {
publishDir "results/${params.select_variants_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
${params.select_variants_snp} \
-R ${fasta} \
-V ${vcf} \
-selectType SNP \
-o ${file_prefix}_joint_snp.vcf
"""
}
params.select_variants_indels = ""
params.select_variants_indels_out = ""
process select_variants_indels {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.select_variants_indels_out != "") {
publishDir "results/${params.select_variants_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T SelectVariants \
-nct ${task.cpus} \
${params.select_variants_indels} \
-R ${fasta} \
-V ${vcf} \
-selectType INDEL \
-o ${file_prefix}_joint_indel.vcf
"""
}
params.personalized_genome = ""
params.personalized_genome_out = ""
process personalized_genome {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.personalized_genome_out != "") {
publishDir "results/${params.personalized_genome_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_genome.fasta"), emit: fasta
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gatk3 -T FastaAlternateReferenceMaker\
${params.personalized_genome} \
-R ${reference} \
-V ${vcf} \
-o ${file_prefix}_genome.fasta
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "4.2.0.0"
container_url = "broadinstitute/gatk:${version}"
def get_file_prefix(file_id) {
if (file_id instanceof List){
file_prefix = file_id[0]
} else if (file_id instanceof Map) {
library = file_id[0]
file_prefix = file_id[0]
if (file_id.containsKey('library')) {
library = file_id.library
file_prefix = file_id.id
}
} else {
file_prefix = file_id
}
return file_prefix
}
include {
index_fasta as samtools_index_fasta;
index_bam;
} from './../samtools/main.nf'
include {
index_fasta as picard_index_fasta;
index_bam as picard_index_bam;
mark_duplicate;
} from './../picard/main.nf'
params.variant_calling_out = ""
workflow germline_cohort_data_variant_calling {
take:
bam
fasta
main:
// data preparation
mark_duplicate(bam)
index_bam(mark_duplicate.out.bam)
picard_index_bam(mark_duplicate.out.bam)
index_bam.out.bam_idx
.join(picard_index_bam.out.index)
.set{ bam_idx }
picard_index_fasta(fasta)
samtools_index_fasta(fasta)
fasta
.join(picard_index_fasta.out.index)
.join(samtools_index_fasta.out.index)
.set{ fasta_idx }
// variant calling
call_variants_per_sample(
bam_idx,
fasta_idx.collect()
)
call_variants_all_sample(
call_variants_per_sample.out.gvcf,
fasta_idx
)
emit:
vcf = call_variants_all_sample.out.vcf
}
/*******************************************************************/
workflow base_quality_recalibrator{
take:
bam_idx
fasta_idx
vcf
main:
index_vcf(vcf)
compute_base_recalibration(
bam_idx,
fasta_idx,
index_vcf.out.vcf_idx
)
apply_base_recalibration(
bam_idx,
fasta_idx,
compute_base_recalibration.out.table
)
emit:
bam = apply_base_recalibration.out.bam
}
process index_vcf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(vcf)
output:
tuple val(file_id), path("${vcf}"), path("*"), emit: vcf_idx
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${vcf}
"""
}
process compute_base_recalibration {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
tuple val(ref_id), path(fasta), path(fai), path(dict)
tuple val(vcf_id), path(vcf), path(vcf_idx)
output:
tuple val(file_id), path("${bam.simpleName}.table"), emit: table
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
def vcf_cmd = ""
if (vcf instanceof List){
for (vcf_file in vcf){
vcf_cmd += "--known-sites ${vcf_file} "
}
} else {
vcf_cmd = "--known-sites ${vcf} "
}
"""
gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \
-I ${bam} \
-R ${fasta} \
${vcf_cmd} \
-O ${bam.simpleName}.table
"""
}
process apply_base_recalibration {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
tuple val(ref_id), path(fasta), path(fai), path(dict)
tuple val(table_id), path(table)
output:
tuple val(file_id), path("${bam.simpleName}_recalibrate.bam"), emit: bam
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \
-R ${fasta} \
-I ${bam} \
--bqsr-recal-file ${table} \
-O ${bam.simpleName}_recalibrate.bam
"""
}
/*******************************************************************/
params.variant_calling_gvcf_out = ""
process call_variants_per_sample {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.variant_calling_gvcf_out != "") {
publishDir "results/${params.variant_calling_gvcf_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("${bam.simpleName}.gvcf.gz"), emit: gvcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
-R ${fasta} \
-I ${bam} \
-O ${bam.simpleName}.gvcf.gz \
-ERC GVCF
"""
}
/*******************************************************************/
workflow call_variants_all_sample {
take:
gvcf
fasta_idx
main:
index_gvcf(gvcf)
validate_gvcf(
index_gvcf.out.gvcf_idx,
fasta_idx.collect()
)
consolidate_gvcf(
validate_gvcf.out.gvcf
.groupTuple(),
fasta_idx.collect()
)
genomic_db_call(
consolidate_gvcf.out.gvcf_idx,
fasta_idx.collect()
)
emit:
vcf = genomic_db_call.out.vcf
}
process index_gvcf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(gvcf)
output:
tuple val(file_id), path("${gvcf}"), path("${gvcf}.tbi"), emit: gvcf_idx
tuple val(file_id), path("${gvcf.simpleName}_IndexFeatureFile_report.txt"), emit: report
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${gvcf} 2> ${gvcf.simpleName}_IndexFeatureFile_report.txt
"""
}
process validate_gvcf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(gvcf), path(gvcf_idx)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("${gvcf}"), path("${gvcf_idx}"), emit: gvcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" ValidateVariants \
-V ${gvcf} \
-R ${fasta} -gvcf
"""
}
process consolidate_gvcf {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
input:
tuple val(file_id), path(gvcf), path(gvcf_idx)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("${file_prefix}.gvcf"), path("${file_prefix}.gvcf.idx"), emit: gvcf_idx
tuple val(file_id), path("${file_prefix}_CombineGVCFs_report.txt"), emit: report
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
def gvcf_cmd = ""
if (gvcf instanceof List){
for (gvcf_file in gvcf){
gvcf_cmd += "-V ${gvcf_file} "
}
} else {
gvcf_cmd = "-V ${gvcf} "
}
"""
mkdir tmp
gatk --java-options "-Xmx${xmx_memory}G" CombineGVCFs \
${gvcf_cmd} \
-R ${fasta} \
-O ${file_prefix}.gvcf 2> ${file_prefix}_CombineGVCFs_report.txt
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${file_prefix}.gvcf 2> ${file_prefix}_IndexFeatureFile_report.txt
"""
}
process genomic_db_call {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.variant_calling_out != "") {
publishDir "results/${params.variant_calling_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gvcf), path(gvcf_idx)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("${gvcf.simpleName}.vcf.gz"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
def gvcf_cmd = ""
if (gvcf instanceof List){
for (gvcf_file in gvcf){
gvcf_cmd += "--V ${gvcf_file} "
}
} else {
gvcf_cmd = "--V ${gvcf} "
}
"""
mkdir tmp
gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \
-R ${fasta} \
-V ${gvcf} \
-O ${gvcf.simpleName}.vcf.gz \
--tmp-dir ./tmp
"""
}
/*******************************************************************/
params.variant_calling = ""
process variant_calling {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.variant_calling_out != "") {
publishDir "results/${params.variant_calling_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), path(bai)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
${params.variant_calling} \
-R ${fasta} \
-I ${bam} \
-O ${bam.simpleName}.vcf
"""
}
params.filter_snp = ""
params.filter_snp_out = ""
process filter_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.filter_snp_out != "") {
publishDir "results/${params.filter_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
${params.filter_snp} \
-R ${fasta} \
-V ${vcf} \
-select-type SNP \
-O ${vcf.simpleName}_snp.vcf
"""
}
params.filter_indels = ""
params.filter_indels_out = ""
process filter_indels {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.filter_indels_out != "") {
publishDir "results/${params.filter_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
${params.filter_indels} \
-R ${fasta} \
-V ${vcf} \
-select-type INDEL \
-O ${vcf.simpleName}_indel.vcf
"""
}
params.high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)"
params.high_confidence_snp = "--filter-expression \"${params.high_confidence_snp_filter}\" --filter-name \"basic_snp_filter\""
params.high_confidence_snp_out = ""
process high_confidence_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.high_confidence_snp_out != "") {
publishDir "results/${params.high_confidence_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_snp.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \
-R ${fasta} \
-V ${vcf} \
${params.high_confidence_snp} \
-O ${vcf.simpleName}_filtered_snp.vcf
"""
}
params.high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0"
params.high_confidence_indels = "--filter-expression \"${params.high_confidence_indel_filter}\" --filter-name \"basic_indel_filter\""
params.high_confidence_indels_out = ""
process high_confidence_indels {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.high_confidence_indels_out != "") {
publishDir "results/${params.high_confidence_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_indel.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \
-R ${fasta} \
-V ${vcf} \
${params.high_confidence_indels} \
-O ${vcf.simpleName}_filtered_indel.vcf
"""
}
params.recalibrate_snp_table = ""
params.recalibrate_snp_table_out = ""
process recalibrate_snp_table {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.recalibrate_snp_table_out != "") {
publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy'
}
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(bam_idx_bis)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("recal_data_table"), emit: recal_table
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${snp_file}
gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
-I ${indel_file}
gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \
${params.recalibrate_snp_table} \
-R ${fasta} \
-I ${bam} \
-known-sites ${snp_file} \
-known-sites ${indel_file} \
-O recal_data_table
"""
}
params.recalibrate_snp = ""
params.recalibrate_snp_out = ""
process recalibrate_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.recalibrate_snp_out != "") {
publishDir "results/${params.recalibrate_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(recal_table)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.bam"), emit: bam
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \
${params.recalibrate_snp} \
-R ${fasta} \
-I ${bam} \
--bqsr-recal-file recal_data_table \
-O ${bam.simpleName}_recal.bam
"""
}
params.haplotype_caller = ""
params.haplotype_caller_out = ""
process haplotype_caller {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.haplotype_caller_out != "") {
publishDir "results/${params.haplotype_caller_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.gvcf"), emit: gvcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
${params.haplotype_caller} \
-R ${fasta} \
-I ${bam} \
-ERC GVCF \
-O ${bam.simpleName}.gvcf
"""
}
params.gvcf_genotyping = ""
params.gvcf_genotyping_out = ""
process gvcf_genotyping {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.gvcf_genotyping_out != "") {
publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gvcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*.vcf.gz"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \
${params.gvcf_genotyping} \
-R ${fasta} \
-V ${gvcf} \
-O ${gvcf.simpleName}_joint.vcf.gz
"""
}
params.select_variants_snp = ""
params.select_variants_snp_out = ""
process select_variants_snp {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.select_variants_snp_out != "") {
publishDir "results/${params.select_variants_snp_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \
${params.select_variants_snp} \
-R ${fasta} \
-V ${vcf} \
-select-type SNP \
-O ${vcf.simpleName}_joint_snp.vcf
"""
}
params.select_variants_indels = ""
params.select_variants_indels_out = ""
process select_variants_indels {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.select_variants_indels_out != "") {
publishDir "results/${params.select_variants_indels_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
${params.select_variants_indels} \
-R ${fasta} \
-V ${vcf} \
-select-type INDEL \
-O ${file_prefix}_joint_indel.vcf
"""
}
params.personalized_genome = ""
params.personalized_genome_out = ""
process personalized_genome {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.personalized_genome_out != "") {
publishDir "results/${params.personalized_genome_out}", mode: 'copy'
}
input:
tuple val(file_id), path(vcf)
tuple val(ref_id), path(fasta), path(fai), path(dict)
output:
tuple val(file_id), path("*_genome.fasta"), emit: fasta
script:
xmx_memory = "${task.memory}" - ~/\s*GB/
file_prefix = get_file_prefix(file_id)
"""
gatk --java-options "-Xmx${xmx_memory}G" FastaAlternateReferenceMaker\
${params.personalized_genome} \
-R ${reference} \
-V ${vcf} \
-O ${vcf.simpleName}_genome.fasta
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "0.12.2"
container_url = "lbmc/gffread:${version}"
params.gffread = ""
params.gffread_out = ""
process gffread {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_prefix"
if (params.gffread_out != "") {
publishDir "results/${params.gffread_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gtf)
tuple val(fasta_id), path(fasta)
output:
tuple val(fasta_id), path("${file_prefix}.fasta"), emit: fasta
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gffread ${gtf} -g ${fasta} -M -x dup_${file_prefix}.fasta
awk 'BEGIN {i = 1;} { if (\$1 ~ /^>/) { tmp = h[i]; h[i] = \$1; } else if (!a[\$1]) { s[i] = \$1; a[\$1] = "1"; i++; } else { h[i] = tmp; } } END { for (j = 1; j < i; j++) { print h[j]; print s[j]; } }' < dup_${file_prefix}.fasta | grep -v -e "^\$" > ${file_prefix}.fasta
"""
}
params.spliced_cds = ""
params.spliced_cds_out = ""
process spliced_cds {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_prefix"
if (params.spliced_cds_out != "") {
publishDir "results/${params.spliced_cds_out}", mode: 'copy'
}
input:
tuple val(file_id), path(gtf)
tuple val(fasta_id), path(fasta)
output:
tuple val(fasta_id), path("${file_prefix}.fasta"), emit: fasta
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
"""
gzip -dck ${fasta} > ${fasta.simpleName}_un.fasta
gzip -dck ${gtf} > ${gtf.simpleName}_un.gtf
gffread ${gtf.simpleName}_un.gtf -g ${fasta.simpleName}_un.fasta -M \
-x ${file_prefix}.fasta
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "5.0.11"
container_url = "lbmc/guppy-cpu:${version}"
params.basecalling_out = ""
params.flowcell = "FLO-MIN106"
params.kit = "SQK-PCS109"
params.cpu_threads_per_caller = 4
params.num_callers = 1
process basecall_fast5 {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.basecalling_out != "") {
publishDir "results/${params.basecalling_out}", mode: 'copy'
}
if (params.flowcell == "") {
errorFlowcell << "WARNING ! No Flowcell type given..."
errorFlowcell.view()
}
if (params.kit == "") {
errorKit "WARNING ! No kit type given..."
errorKit.view()
}
input:
tuple val(file_id), path(fast5)
output:
tuple val(file_id), path("*.fastq*"), emit: fastq
script:
"""
guppy_basecaller --compress_fastq \
-i ${path(fast5)} \
-s ${params.basecalling_out} \
--cpu_threads_per_caller ${params.cpu_threads_per_caller} \
--num_callers ${params.num_callers} \
--flowcell ${params.flowcell} \
--kit ${params.kit}
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "5.0.11"
container_url = "lbmc/guppy-gpu:${version}"
params.basecalling_out = ""
params.flowcell = ""
params.kit = ""
params.gpu_runners_per_device = 16
process basecall_fast5 {
container = "${container_url}"
// Need to create a profile using GPUs
label ""
tag "$file_id"
if (params.basecalling_out != "") {
publishDir "results/${params.basecalling_out}", mode: 'copy'
}
if (params.flowcell == "") {
errorFlowcell << "WARNING ! No Flowcell type given..."
errorFlowcell.view()
}
if (params.kit == "") {
errorKit "WARNING ! No kit type given..."
errorKit.view()
}
input:
tuple val(file_id), path(fast5)
output:
tuple val(file_id), path("*.fastq*"), emit: fastq
script:
"""
guppy_basecaller --compress_fastq -x "cuda:all" --min_qscore 7.0 \
-i ${path(fast5)} \
-s ${params.basecalling_out} \
--gpu_runners_per_device ${params.gpu_runners_per_device} \
--flowcell ${params.flowcell} \
--kit ${params.kit}
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "2.2.1"
container_url = "lbmc/hisat2:${version}"
params.index_fasta = ""
params.index_fasta_out = ""
process index_fasta {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.index_fasta_out != "") {
publishDir "results/${params.index_fasta_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta)
output:
tuple val(file_id), path("*.ht2*"), emit: index
tuple val(file_id), path("*_report.txt"), emit: report
script:
"""
gunzip ${fasta}
hisat2-build -p ${task.cpus} \
${fasta.baseName} \
${fasta.simpleName} &> \
${fasta.simpleName}_hisat2_index_report.txt
if grep -q "Error" ${fasta.simpleName}_hisat2_index_report.txt; then
exit 1
fi
"""
}
params.mapping_fastq = ""
params.mapping_fastq_out = ""
process mapping_fastq {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.mapping_fastq_out != "") {
publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
}
input:
tuple val(index_id), path(index)
tuple val(file_id), path(reads)
output:
tuple val(file_id), path("*.bam"), emit: bam
path "*_report.txt", emit: report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.ht2.*/) {
index_id = ( index_file =~ /(.*)\.1\.ht2.*/)[0][1]
}
}
switch(file_id) {
case {it instanceof List}:
file_prefix = file_id[0]
break
case {it instanceof Map}:
file_prefix = file_id.values()[0]
break
default:
file_prefix = file_id
break
}
if (reads.size() == 2)
"""
hisat2 ${params.mapping_fastq} \
-p ${task.cpus} \
-x ${index_id} \
-1 ${reads[0]} \
-2 ${reads[1]} 2> \
${file_prefix}_ht2_mapping_report.txt \
| samtools view -@ ${task.cpus} -bS - \
| samtools sort -@ ${task.cpus} -o ${file_prefix}.bam
if grep -q "Error" ${file_prefix}_ht2_mapping_report.txt; then
exit 1
fi
"""
else
"""
hisat2 ${params.mapping_fastq} \
-p ${task.cpus} \
-x ${index_id} \
-U ${reads} 2> \
${file_prefix}_ht2_mapping_report.txt \
| samtools view -@ ${task.cpus} -bS - \
| samtools sort -@ ${task.cpus} -o ${file_prefix}.bam
if grep -q "Error" ${file_prefix}_ht2_mapping_report.txt; then
exit 1
fi
"""
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "1.99.2"
container_url = "lbmc/htseq:${version}"
params.htseq_out = ""
process gff3_2_gtf {
container = "dceoy/cufflinks"
label "small_mem_mono_cpus"
input:
tuple val(genome_id), path(gff3_file)
output:
path "${genome_id}.gtf", emit: gtf
script:
"""
gffread ${gff3_file} -T -o ${genome_id}.gtf
"""
}
process htseq_count {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "file_id: $file_id"
if (params.htseq_out != "") {
publishDir "results/${params.htseq_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), path(bai)
path (gtf)
output:
path "${file_id}.tsv", emit: counts
script:
"""
htseq-count -n ${task.cpus} -r pos -a 10 -s yes -t exon -i gene_id $bam $gtf > ${file_id}.tsv
"""
}
workflow htseq_count_with_gff {
take:
bam_tuple
gff_file
main:
gff3_2_gtf(gff_file)
htseq_count(bam_tuple,gff3_2_gtf.out.gtf)
emit:
counts = htseq_count.out.counts
}