From 5f215dd440f8fff1ad083d4cc9ca36fb910b9853 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Wed, 29 Mar 2023 13:55:56 +0200
Subject: [PATCH] add nextflow pipeline for fastkmers

---
 data/sample.csv          | 14 ++++++++++++++
 src/main.nf              | 22 ++++++++++++++++++++++
 src/modules/fastkmers.nf | 28 ++++++++++++++++++++++++++++
 src/nextflow.config      | 39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 103 insertions(+)
 create mode 100644 data/sample.csv
 create mode 100644 src/main.nf
 create mode 100644 src/modules/fastkmers.nf
 create mode 100644 src/nextflow.config

diff --git a/data/sample.csv b/data/sample.csv
new file mode 100644
index 0000000..091534e
--- /dev/null
+++ b/data/sample.csv
@@ -0,0 +1,14 @@
+specie; sex; r1; r2
+mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R2_001.fasta.gz
+mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R2_001.fasta.gz
+mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R2_001.fasta.gz
+mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R2_001.fasta.gz
+mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R2_001.fasta.gz
+mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R2_001.fasta.gz
+mbelari; female; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R2.fasta.gz
+mbelari; male; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R2_001.fasta.gz
+mbelari; male; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R2.fasta.gz
+mlongespiculosa; female; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R2.fasta.gz
+mlongespiculosa; male; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fasta.gz
+mspiculigera; female; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R2_001.fasta.gz
+mspiculigera; male; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R2_001.fasta.gz
diff --git a/src/main.nf b/src/main.nf
new file mode 100644
index 0000000..a0194eb
--- /dev/null
+++ b/src/main.nf
@@ -0,0 +1,22 @@
+nextflow.enable.dsl=2
+
+/*
+Testing pipeline for marseq scRNASeq analysis
+*/
+
+include { FASTKMERS } from "./modules/fastkmers.nf"
+
+
+Channel.fromPath( file(params.csv) )
+                    .splitCsv(header: true)
+                    .map{row ->
+                        fastq_r1 = file("${r1}")
+                        fastq_r2 = file("${r2}")
+                        meta = ['id': ${fastq_r1.simpleName},, 'specie': ${specie}, 'sex': ${sex}]
+                        [meta, [fastq_r1, fastq_r2]]
+                    }
+                    .set{ fastq }
+
+workflow {
+  FASTLMERS(fastq)
+}
diff --git a/src/modules/fastkmers.nf b/src/modules/fastkmers.nf
new file mode 100644
index 0000000..5a77b0a
--- /dev/null
+++ b/src/modules/fastkmers.nf
@@ -0,0 +1,28 @@
+/*
+ * normalize bam file
+ */
+process FASTKMERS {
+    tag "$meta.id"
+    label 'process_single'
+
+    container "lbmc/fastkmers:025efdf"
+
+    input:
+    tuple val(meta), path(fastq)
+
+    output:
+    tuple val(meta), path("*.csv"), emit: csv
+    path "versions.yml"           , emit: versions
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    fastkmers -k 31 ${fastq} > ${fastq.simpleName}.csv
+
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fastkmers: \$(echo \$(fastkmers --version 2>&1))
+    END_VERSIONS
+    """
+}
diff --git a/src/nextflow.config b/src/nextflow.config
new file mode 100644
index 0000000..6b90004
--- /dev/null
+++ b/src/nextflow.config
@@ -0,0 +1,39 @@
+psmn {
+  charliecloud.enabled = true
+  charliecloud.cacheDir = "/Xnfs/abc/charliecloud"
+  charliecloud.readOnlyInputs = true
+  charliecloud.runOptions = "--bind /scratch:/scratch --bind /Xnfs:/Xnfs --bind /home:/home"
+
+  process{
+    errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
+    maxRetries = 3
+    executor = "slurm"
+    queue = "Lake"
+    withLabel: big_mem_mono_cpus {
+      cpus = 1
+      memory = "128GB"
+      time = "24h"
+    }
+    withLabel: big_mem_multi_cpus {
+      cpus = 32
+      memory = "192GB"
+      time = "24h"
+    }
+    withLabel: small_mem_mono_cpus {
+      cpus = 1
+      memory = "16GB"
+      time = "24h"
+    }
+    withLabel: small_mem_multi_cpus {
+      cpus = 32
+      memory = "16GB"
+      time = "24h"
+    }
+  }
+
+  params {
+    max_memory = 512.GB
+    max_cpus = 32
+    max_time = 24.h
+  }
+}
-- 
GitLab