From 7d386a5ed1c6769e72f67af7e320b30e8cefcffb Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Wed, 29 Sep 2021 10:07:47 +0200
Subject: [PATCH] nf_modules: add rasusa

---
 src/nf_modules/rasusa/main.nf | 74 +++++++++++++++++++++++++++++++++++
 src/nf_modules/rasusa/test.nf | 26 ++++++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 src/nf_modules/rasusa/main.nf
 create mode 100644 src/nf_modules/rasusa/test.nf

diff --git a/src/nf_modules/rasusa/main.nf b/src/nf_modules/rasusa/main.nf
new file mode 100644
index 00000000..c530af43
--- /dev/null
+++ b/src/nf_modules/rasusa/main.nf
@@ -0,0 +1,74 @@
+version = "0.6.0"
+container_url = "quay.io/mbhall88/rasusa:${version}"
+
+include { index_fasta } from "./../samtools/main.nf"
+
+params.sample_fastq = ""
+params.sample_fastq_coverage = "1.0"
+params.sample_fastq_size = ""
+params.sample_fastq_out = ""
+workflow sample_fastq {
+  take:
+  fastq
+  fasta
+
+  main:
+  index_fasta(fasta)
+  sub_sample_fastq(fastq, index_fasta.out.index)
+
+  emit:
+  fastq = sub_sample_fastq.out.fastq
+
+}
+
+process sub_sample_fastq {
+  container = "${container_url}"
+  label "small_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.sample_fastq}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fastq)
+    tuple val(file_id), path(idx)
+
+  output:
+    tuple val(file_id), path("sub_*.fastq.gz"), emit: fastq
+
+  script:
+
+  switch(file_id) {
+    case {it instanceof List}:
+      file_prefix = file_id[0]
+    break
+    case {it instanceof Map}:
+      file_prefix = file_id.values()[0]
+    break
+    default:
+      file_prefix = file_id
+    break
+  }
+
+  sample_option = "-c " + params.sample_fastq_coverage
+  if (params.sample_fastq_size != ""){
+    sample_option = "-b " + params.sample_fastq_size
+  }
+
+  if (fastq.size() == 2)
+"""
+rasusa \
+  -i ${fastq[0]} ${fastq[1]} \
+  -g ${idx} \
+  ${sample_option} \
+  -o sub_${fastq[0].simpleName}.fastq.gz sub_${fastq[1].simpleName}.fastq.gz
+"""
+  else
+"""
+rasusa \
+  -i ${fastq} ${fastq} \
+  -g ${idx} \
+  ${sample_option} \
+  -o sub_${fastq.simpleName}.fastq.gz
+"""
+}
\ No newline at end of file
diff --git a/src/nf_modules/rasusa/test.nf b/src/nf_modules/rasusa/test.nf
new file mode 100644
index 00000000..0ee8e599
--- /dev/null
+++ b/src/nf_modules/rasusa/test.nf
@@ -0,0 +1,26 @@
+nextflow.enable.dsl=2
+
+/*
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb"
+*/
+
+include { sample_fastq } from "./main.nf" addParams(sample_fastq_coverage: params.coverage, sample_fastq_size: "")
+
+params.fastq = "data/fastq/*R{1,2}*"
+params.fasta = "data/fasta/*.fasta"
+params.coverage = "2.0"
+params.size = ""
+
+channel
+  .fromFilePairs( params.fastq, size: -1)
+  .set { fastq_files }
+
+channel
+  .fromPath( params.fasta )
+  .map { it -> [it.simpleName, it]}
+  .set { fasta_files }
+
+workflow {
+  sample_fastq(fastq_files, fasta_files.collect())
+}
\ No newline at end of file
-- 
GitLab