From 5f215dd440f8fff1ad083d4cc9ca36fb910b9853 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Wed, 29 Mar 2023 13:55:56 +0200 Subject: [PATCH] add nextflow pipeline for fastkmers --- data/sample.csv | 14 ++++++++++++++ src/main.nf | 22 ++++++++++++++++++++++ src/modules/fastkmers.nf | 28 ++++++++++++++++++++++++++++ src/nextflow.config | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+) create mode 100644 data/sample.csv create mode 100644 src/main.nf create mode 100644 src/modules/fastkmers.nf create mode 100644 src/nextflow.config diff --git a/data/sample.csv b/data/sample.csv new file mode 100644 index 0000000..091534e --- /dev/null +++ b/data/sample.csv @@ -0,0 +1,14 @@ +specie; sex; r1; r2 +mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R2_001.fasta.gz +mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R2_001.fasta.gz +mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R2_001.fasta.gz +mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R2_001.fasta.gz +mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R2_001.fasta.gz +mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R2_001.fasta.gz +mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R2.fasta.gz +mbelari; male; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R2_001.fasta.gz +mbelari; male; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R2.fasta.gz +mlongespiculosa; female; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R2.fasta.gz +mlongespiculosa; male; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fasta.gz +mspiculigera; female; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R2_001.fasta.gz +mspiculigera; male; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R2_001.fasta.gz diff --git a/src/main.nf b/src/main.nf new file mode 100644 index 0000000..a0194eb --- /dev/null +++ b/src/main.nf @@ -0,0 +1,22 @@ +nextflow.enable.dsl=2 + +/* +Testing pipeline for marseq scRNASeq analysis +*/ + +include { FASTKMERS } from "./modules/fastkmers.nf" + + +Channel.fromPath( file(params.csv) ) + .splitCsv(header: true) + .map{row -> + fastq_r1 = file("${r1}") + fastq_r2 = file("${r2}") + meta = ['id': ${fastq_r1.simpleName},, 'specie': ${specie}, 'sex': ${sex}] + [meta, [fastq_r1, fastq_r2]] + } + .set{ fastq } + +workflow { + FASTLMERS(fastq) +} diff --git a/src/modules/fastkmers.nf b/src/modules/fastkmers.nf new file mode 100644 index 0000000..5a77b0a --- /dev/null +++ b/src/modules/fastkmers.nf @@ -0,0 +1,28 @@ +/* + * normalize bam file + */ +process FASTKMERS { + tag "$meta.id" + label 'process_single' + + container "lbmc/fastkmers:025efdf" + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.csv"), emit: csv + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + fastkmers -k 31 ${fastq} > ${fastq.simpleName}.csv + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastkmers: \$(echo \$(fastkmers --version 2>&1)) + END_VERSIONS + """ +} diff --git a/src/nextflow.config b/src/nextflow.config new file mode 100644 index 0000000..6b90004 --- /dev/null +++ b/src/nextflow.config @@ -0,0 +1,39 @@ +psmn { + charliecloud.enabled = true + charliecloud.cacheDir = "/Xnfs/abc/charliecloud" + charliecloud.readOnlyInputs = true + charliecloud.runOptions = "--bind /scratch:/scratch --bind /Xnfs:/Xnfs --bind /home:/home" + + process{ + errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + maxRetries = 3 + executor = "slurm" + queue = "Lake" + withLabel: big_mem_mono_cpus { + cpus = 1 + memory = "128GB" + time = "24h" + } + withLabel: big_mem_multi_cpus { + cpus = 32 + memory = "192GB" + time = "24h" + } + withLabel: small_mem_mono_cpus { + cpus = 1 + memory = "16GB" + time = "24h" + } + withLabel: small_mem_multi_cpus { + cpus = 32 + memory = "16GB" + time = "24h" + } + } + + params { + max_memory = 512.GB + max_cpus = 32 + max_time = 24.h + } +} -- GitLab