Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • LBMC/RMI2/rmi2_pipelines
  • LBMC/Palladino/RNAseq_nextflow
  • rseraphi/nextflow
  • elabaron/nextflow
  • pberna01/nextflow
  • jblin/nextflow
  • cginevra/nextflow
  • carpin/nextflow
  • cfournea/nextflow
  • dtorresc/nextflow
  • LBMC/nextflow
  • nlecouvr/nextflow-nathan
  • lpicard/nextflow
  • vvanoost/nextflow
  • fmortreu/nextflow
  • hpolvech/nextflow
  • lanani/nextflow
  • mcariou/nextflow
  • fduveau/nextflow
  • jshapiro/nextflow
  • hregue/nextflow
  • yjia01/nextflow
  • acorbin/nextflow
  • ggirau03/nextflow
  • letien02/nextflow
  • ogandril/nextflow
  • jclaud01/nextflow
  • mshamjal/nextflow
  • mprieux/nextflow
  • z483801/nextflow
  • mparis/nextflow
  • alapendr/nextflow
  • cbourgeo/nextflow
  • jvalat/nextflow
  • z483800/nextflow
  • ecombe01/nextflow
  • dchalopi/nextflow
  • mherbett/nextflow
  • jprobin/nextflow
  • lestrada/nextflow
  • gyvert/nextflow
  • nfontrod/nextflow
  • gbenoit/nextflow
  • aguill09/nextflow
  • LBMC/regards/nextflow
  • mvilcot/nextflow
  • jkleine/nextflow
  • jseimand/nextflow
  • LBMC/Delattre/JU28_59vs17_SNP
  • mdjaffar/nextflow
  • pmarie01/nextflow
  • rhoury/nextflow
  • mlepetit/nextflow
  • lgely/nextflow
54 results
Show changes
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "407"
container_url = "lbmc/ucsc:${version}"
include {
index_fasta
} from './../samtools/main'
params.bedgraph_to_bigwig = ""
params.bedgraph_to_bigwig_out = ""
process bedgraph_to_bigwig {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.bedgraph_to_bigwig_out != "") {
publishDir "results/${params.bedgraph_to_bigwig_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bg)
tuple val(file_id), path(bed)
output:
tuple val(file_id), path("*.bw"), emit: bw
script:
"""
LC_COLLATE=C
# transform bed file of start-stop chromosome size to stop chromosome size
awk -v OFS="\\t" '{print \$1, \$3}' ${bed} > chromsize.txt
sort -T ./ -k1,1 -k2,2n ${bg} > \
bedGraphToBigWig ${params.bedgraph_to_bigwig} - \
chromsize.txt \
${bg.simpleName}_norm.bw
"""
}
params.wig_to_bedgraph = ""
params.wig_to_bedgraph_out = ""
workflow wig_to_bedgraph {
take:
fasta
wig
main:
wig_to_bigwig(
fasta,
wig
)
bigwig_to_bedgraph(
wig_to_bigwig.out.bw
)
emit:
bg = bigwig_to_bedgraph.out.bg
}
workflow wig2_to_bedgraph2 {
take:
fasta
wig
main:
wig2_to_bigwig2(
fasta,
wig
)
bigwig2_to_bedgraph2(
wig2_to_bigwig2.out.bw
)
emit:
bg = bigwig2_to_bedgraph2.out.bg
}
params.bigwig_to_bedgraph = ""
params.bigwig_to_bedgraph_out = ""
process bigwig_to_bedgraph {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.bigwig_to_bedgraph_out != "") {
publishDir "results/${params.bigwig_to_bedgraph_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bw)
output:
tuple val(file_id), path("*.bg"), emit: bg
script:
"""
bigWigToBedGraph ${bw} ${bw.simpleName}.bg
"""
}
params.bigwig2_to_bedgraph2 = ""
params.bigwig2_to_bedgraph2_out = ""
process bigwig2_to_bedgraph2 {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.bigwig_to_bedgraph_out != "") {
publishDir "results/${params.bigwig_to_bedgraph_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bw_a), path(bw_b)
output:
tuple val(file_id), path("${bw_a.simpleName}.bg"), path("${bw_b.simpleName}.bg"), emit: bg
script:
"""
bigWigToBedGraph ${bw_a} ${bw_a.simpleName}.bg
bigWigToBedGraph ${bw_b} ${bw_b.simpleName}.bg
"""
}
params.bigwig_to_wig = ""
params.bigwig_to_wig_out = ""
process bigwig_to_wig {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.bigwig_to_wig_out != "") {
publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bw)
output:
tuple val(file_id), path("*.wig"), emit: wig
script:
"""
bigWigToBedGraph ${bw} ${bw.simpleName}.bg
bedgraph_to_wig.pl --bedgraph ${bw.simpleName}.bg --wig ${bw.simpleName}.wig --step 10
"""
}
params.bigwig2_to_wig2 = ""
params.bigwig2_to_wig2_out = ""
process bigwig2_to_wig2 {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.bigwig_to_wig_out != "") {
publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bw_a), path(bw_b)
output:
tuple val(file_id), path("${bw_a.simpleName}.wig"), path("${bw_b.simpleName}.wig"), emit: wig
script:
"""
bigWigToBedGraph ${bw_a} ${bw_a.simpleName}.bg
bedgraph_to_wig.pl --bedgraph ${bw_a.simpleName}.bg --wig ${bw_a.simpleName}.wig --step 10
bigWigToBedGraph ${bw_b} ${bw_b.simpleName}.bg
bedgraph_to_wig.pl --bedgraph ${bw_b.simpleName}.bg --wig ${bw_b.simpleName}.wig --step 10
"""
}
params.wig_to_bigwig = ""
params.wig_to_bigwig_out = ""
workflow wig_to_bigwig {
take:
fasta
wig
main:
index_fasta(fasta)
wig_to_bigwig_sub(
wig,
index_fasta.out.index
)
emit:
bw = wig_to_bigwig_sub.out.bw
}
process wig_to_bigwig_sub {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.bigwig_to_wig_out != "") {
publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy'
}
input:
tuple val(file_id), path(w)
tuple val(idx_id), path(fasta_idx)
output:
tuple val(file_id), path("${w.simpleName}.bw"), emit: bw
script:
"""
cut -f 1,2 ${fasta_idx} > ${fasta_idx.simpleName}.sizes
wigToBigWig -clip ${w} ${fasta_idx.simpleName}.sizes ${w.simpleName}.bw
"""
}
params.wig2_to_bigwig2 = ""
params.wig2_to_bigwig2_out = ""
workflow wig2_to_bigwig2 {
take:
fasta
wigs
main:
index_fasta(fasta)
wig2_to_bigwig2_sub(
wigs,
index_fasta.out.index
)
emit:
bw = wig2_to_bigwig2_sub.out.bw
}
process wig2_to_bigwig2_sub {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "${file_id}"
if (params.bigwig_to_wig_out != "") {
publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy'
}
input:
tuple val(file_id), path(w_a), path(w_b)
tuple val(idx_id), path(fasta_idx)
output:
tuple val(file_id), path("${w_a.simpleName}.bw"), path("${w_b.simpleName}.bw"), emit: bw
script:
"""
cut -f 1,2 ${fasta_idx} > ${fasta_idx.simpleName}.sizes
wigToBigWig -clip ${w_a} ${fasta_idx.simpleName}.sizes ${w_a.simpleName}.bw
wigToBigWig -clip ${w_b} ${fasta_idx.simpleName}.sizes ${w_b.simpleName}.bw
"""
}
\ No newline at end of file
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
version = "d62c1f8"
container_url = "lbmc/urqt:${version}"
trim_quality = "20"
params.trimming = "--t 20"
process trimming {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "${file_id}"
input:
tuple val(file_id), path(reads)
output:
tuple val(pair_id), path("*_trim_R{1,2}.fastq.gz"), emit: fastq
path "*_report.txt", emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
if (reads.size() == 2)
"""
UrQt ${params.trimming} --m ${task.cpus} --gz \
--in ${reads[0]} --inpair ${reads[1]} \
--out ${file_prefix}_trim_R1.fastq.gz --outpair ${file_prefix}_trim_R2.fastq.gz \
> ${pair_id}_trimming_report.txt
"""
else
"""
UrQt ${params.trimming} --m ${task.cpus} --gz \
--in ${reads[0]} \
--out ${file_prefix}_trim.fastq.gz \
> ${file_prefix}_trimming_report.txt
"""
}
\ No newline at end of file
sge_modules @ 94be868e
Subproject commit 94be868ea503b4810b110b35520d61f129035967
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
nextflow.enable.dsl=2
include { fastp } from "./nf_modules/fastp/main.nf"
include { fasta_from_bed } from "./nf_modules/bedtools/main.nf"
include { index_fasta; mapping_fastq } from './nf_modules/kallisto/main.nf' addParams(mapping_fastq_out: "quantification/")
params.fastq = "data/fastq/*_{1,2}.fastq"
log.info "fastq files: ${params.fastq}"
log.info "fasta file : ${params.fasta}"
log.info "bed file : ${params.bed}"
channel
.fromFilePairs( params.fastq, size: -1)
.set { fastq_files }
channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
.map { it -> [it.simpleName, it]}
.set { fasta_files }
channel
.fromPath( params.bed )
.ifEmpty { error "Cannot find any bed files matching: ${params.bed}" }
.map { it -> [it.simpleName, it]}
.set { bed_files }
workflow {
fastp(fastq_files)
fasta_from_bed(fasta_files, bed_files)
index_fasta(fasta_from_bed.out.fasta)
mapping_fastq(index_fasta.out.index.collect(), fastp.out.fastq)
}
# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
#
# SPDX-License-Identifier: AGPL-3.0-or-later
profiles {
docker {
docker.temp = "auto"
docker.enabled = true
process {
withName: build_synthetic_bed {
container = "lbmc/bedtools:2.25.0"
cpus = 1
}
withName: fasta_from_bed {
container = "lbmc/bedtools:2.25.0"
cpus = 1
}
withName: index_fasta {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 4
}
withName: mapping_fastq_paired {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 4
}
withName: bam_2_fastq_paired {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: filter_bam_paired {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: sort_bam_paired {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: index_bam_paired {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: mapping_fastq_single {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 4
}
withName: bam_2_fastq_single {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: filter_bam_single {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: sort_bam_single {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: index_bam_single {
container = "lbmc/samtools:1.7"
cpus = 4
}
}
}
singularity {
singularity.enabled = true
singularity.cacheDir = "./bin/"
process {
withName: build_synthetic_bed {
container = "lbmc/bedtools:2.25.0"
cpus = 1
}
withName: fasta_from_bed {
container = "lbmc/bedtools:2.25.0"
cpus = 1
}
withName: index_fasta {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 4
}
withName: mapping_fastq_single {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 4
}
withName: mapping_fastq_paired {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 4
}
withName: bam_2_fastq_paired {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: filter_bam_paired {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: sort_bam_paired {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: index_bam_paired {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: bam_2_fastq_single {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: filter_bam_single {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: sort_bam_single {
container = "lbmc/samtools:1.7"
cpus = 4
}
withName: index_bam_single {
container = "lbmc/samtools:1.7"
cpus = 4
}
}
}
psmn {
process{
withName: build_synthetic_bed {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/bedtools_2.25.0"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = "monointeldeb128"
}
withName: fasta_from_bed {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/bedtools_2.25.0"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = "monointeldeb128"
}
withName: index_fasta {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 32
memory = "20GB"
time = "12h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withName: mapping_fastq_paired {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withName: bam_2_fastq_paired {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withName: sort_bam_paired {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withName: index_bam_paired {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withName: mapping_fastq_single {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withName: bam_2_fastq_single {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withName: sort_bam_single {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withName: index_bam_single {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-m e -cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
}
}
ccin2p3 {
singularity.enabled = true
singularity.cacheDir = "$baseDir/.singularity_in2p3/"
singularity.runOptions = "--bind /pbs,/sps,/scratch"
process{
withName: fasta_from_bed {
container = "lbmc/bedtools:2.25.0"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
}
process{
withName: build_synthetic_bed {
container = "lbmc/bedtools:2.25.0"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withName: fasta_from_bed {
container = "lbmc/bedtools:2.25.0"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withName: index_fasta {
container = "lbmc/bowtie2:2.3.4.1"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withName: mapping_fastq_paired {
container = "lbmc/bowtie2:2.3.4.1"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withName: bam_2_fastq_paired {
container = "lbmc/samtools:1.7"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withName: sort_bam_paired {
container = "lbmc/samtools:1.7"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withName: index_bam_paired {
container = "lbmc/samtools:1.7"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withName: mapping_fastq_single {
container = "lbmc/bowtie2:2.3.4.1"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withName: bam_2_fastq_single {
container = "lbmc/samtools:1.7"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withName: sort_bam_single {
container = "lbmc/samtools:1.7"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withName: index_bam_single {
container = "lbmc/samtools:1.7"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
}
}
}
// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
/*
small pipeline to build a training dataset from whole genome data
input:
- fasta
- fastq
- chromosome
- start position
- stop position
output:
- sort fasta
- sort fastq
example for paired-end data:
./nextflow src/training_dataset.nf -c src/training_dataset.config --fasta "data/genome.fa" --fastq_paired "data/*_R{1,2}.fastq.gz" --chromosome "X" --start 5305683 --stop 5333928 -resume
example for single-end data:
./nextflow src/training_dataset.nf -c src/training_dataset.config --fasta "data/genome.fa" --fastq_single "data/*_R1.fastq.gz" --chromosome "X" --start 5305683 --stop 5333928 -resume
*/
params.fastq_paired = ""
params.fastq_single = ""
log.info "fasta files : ${params.fasta}"
log.info "fastq paired files : ${params.fastq_paired}"
log.info "fastq single files : ${params.fastq_single}"
log.info "chromosome : ${params.chromosome}"
log.info "start position : ${params.start}"
log.info "stop position : ${params.stop}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any index files matching: ${params.fasta}" }
.set { fasta_file }
process build_synthetic_bed {
tag "${chromosome}:${start}-${stop}"
cpus 4
input:
val chromosome from params.chromosome
val start from params.start
val stop from params.stop
output:
file "*.bed" into bed_files
script:
"""
echo "${chromosome}\t${start}\t${stop}" > synthetic.bed
"""
}
process fasta_from_bed {
tag "${fasta.baseName}"
cpus 4
publishDir "results/training/fasta/", mode: 'copy'
input:
file fasta from fasta_file
file bed from bed_files
val chromosome from params.chromosome
output:
file "*.fasta" into fasta_files_extracted
script:
"""
bedtools getfasta \
-fi ${fasta} -bed ${bed} -fo s${fasta.baseName}.fasta
"""
}
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/training/mapping/index/", mode: 'copy'
input:
file fasta from fasta_files_extracted
output:
file "*.index*" into index_files
file "*_report.txt" into indexing_report
script:
"""
bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
if ( params.fastq_paired != "" ) {
Channel
.fromFilePairs( params.fastq_paired )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq_paired}" }
.set { fastq_files_paired }
process mapping_fastq_paired {
tag "$pair_id"
cpus 4
input:
set pair_id, file(reads) from fastq_files_paired
file index from index_files.collect()
output:
set pair_id, "*.bam" into bam_files_paired
file "*_report.txt" into mapping_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \
-1 ${reads[0]} -2 ${reads[1]} 2> \
${pair_id}_bowtie2_report.txt | \
samtools view -Sb - > ${pair_id}.bam
if grep -q "Error" ${pair_id}_bowtie2_report.txt; then
exit 1
fi
"""
}
bam_files_paired.into{ bam_files_paired_fa; bam_files_paired_ba}
process bam_2_fastq_paired {
tag "$file_id"
publishDir "results/training/fastq/", mode: 'copy'
input:
set file_id, file(bam) from bam_files_paired_fa
output:
set file_id, "*.fastq" into fastq_files_extracted
script:
"""
samtools fastq -1 s${file_id}_R1.fastq -2 s${file_id}_R2.fastq -F 0x4 ${bam}
"""
}
process filter_bam_paired {
tag "$file_id"
cpus 4
input:
set file_id, file(bam) from bam_files_paired_ba
file bed from bed_files
output:
set file_id, "*.bam" into filtered_bam_files_paired
script:
"""
samtools view -@ ${task.cpus} -hb ${bam} -F 0x4 > f${file_id}.bam
"""
}
process sort_bam_paired {
tag "$file_id"
publishDir "results/training/bams/", mode: 'copy'
cpus 4
input:
set file_id, file(bam) from filtered_bam_files_paired
output:
set file_id, "*.bam" into sorted_bam_files_paired
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o s${file_id}.bam ${bam}
"""
}
process index_bam_paired {
tag "$file_id"
publishDir "results/training/bams/", mode: 'copy'
input:
set file_id, file(bam) from sorted_bam_files_paired
output:
set file_id, "*.bam*" into indexed_bam_file_paired
script:
"""
samtools index ${bam}
"""
}
}
if ( params.fastq_single != "" ) {
Channel
.fromPath( params.fastq_single )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq_single}" }
.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
.set { fastq_files_single }
process mapping_fastq_single {
tag "$file_id"
cpus 4
input:
set file_id, file(reads) from fastq_files_single
file index from index_files.collect()
output:
set file_id, "*.bam" into bam_files_single
file "*_report.txt" into mapping_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \
-U ${reads} 2> \
${file_id}_bowtie2_report.txt | \
samtools view -Sb - > ${file_id}.bam
if grep -q "Error" ${file_id}_bowtie2_report.txt; then
exit 1
fi
"""
}
bam_files_single.into{ bam_files_single_fa; bam_files_single_ba}
process bam_2_fastq_single {
tag "$file_id"
input:
set file_id, file(bam) from bam_files_single_fa
output:
set file_id, "*.fastq" into fastq_files_extracted
script:
"""
samtools fastq -0 s${file_id}.fastq -F 0x4 ${bam}
"""
}
process filter_bam_single {
tag "$file_id"
cpus 4
input:
set file_id, file(bam) from bam_files_single_ba
file bed from bed_files
output:
set file_id, "*.bam" into filtered_bam_files_single
script:
"""
samtools view -@ ${task.cpus} -hb ${bam} -F 0x4 > f${file_id}.bam
"""
}
process sort_bam_single {
tag "$file_id"
publishDir "results/training/bams/", mode: 'copy'
cpus 4
input:
set file_id, file(bam) from filtered_bam_files_single
output:
set file_id, "*.bam" into sorted_bam_files_single
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o s${file_id}.bam ${bam}
"""
}
process index_bam_single {
tag "$file_id"
publishDir "results/training/bams/", mode: 'copy'
input:
set file_id, file(bam) from sorted_bam_files_single
output:
set file_id, "*.bam*" into indexed_bam_file_single
script:
"""
samtools index ${bam}
"""
}
}