Merge branch 'Alia' of http://10.69.67.13/testoni-lab/bolero

4f985908 · Xavier Grand · 077d1693 · ca26a648 · 4f985908 · 4f985908
Commit 4f985908 authored 1 year ago by Xavier Grand
--- a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R
+++ b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R
@@ -13,7 +13,9 @@ option_list = list(
  make_option(c("-s", "--SPvariants"), type="character", default=NULL, 
              help="input identified SP variants table (.csv)", metavar="character"),
  make_option(c("-c", "--classification"), type="character", default=NULL, 
-              help="input classification of reads file (.txt)", metavar="character"))
+              help="input classification of reads file (.txt)", metavar="character"),
+  make_option(c("-b", "--barcode"), type="character", default=NULL, 
+              help="input barcode", metavar="character"))
 opt_parser = OptionParser(option_list=option_list)
 opt = parse_args(opt_parser)
@@ -85,7 +87,7 @@ ggplot(countSP, aes(x = "percent",
  scale_fill_manual(values = countSP$teinte) +
  labs(fill = "spliced-variants")
-ggsave(file = "SP_proportion_camembert.png",
+ggsave(file = paste0(opt$barcode, "_SP_proportion_piechart.png"),
       scale = 2,
       width = 1920,
       height = 1080,
@@ -100,7 +102,7 @@ ggplot(countSP, aes(x = nom, y = proportion, fill = nom)) +
  xlab(label = "spliced-variants") +
  ylab(label = "percent")
-ggsave(file = "SP_proportion.png",
+ggsave(file = paste0(opt$barcode, "_SP_proportion.png"),
       scale = 2,
       width = 1920,
       height = 1080,
@@ -127,7 +129,7 @@ count_species <- df_species %>% count(species)
 count_species <- dplyr::mutate(count_species,
                        percent = (as.numeric(n)/sum(as.numeric(n))*100))
 #print(count_species)
-write.table(df_species, file = "All_reads_identified.csv", 
+write.table(df_species, file = paste0(opt$barcode, "_all_reads_identified.csv"), 
            sep = "\t", quote = FALSE, row.names = FALSE)
 # Null dataset:
@@ -165,7 +167,7 @@ count_species_SPxx <- dplyr::mutate(count_species_SPxx,
                             percent=(as.numeric(n)/sum(as.numeric(n))*100))
 #print(count_species_SPxx)
 # save the tab:
-write.csv(count_species_SPxx, file = "Count_canonical_species_SPxx.csv")
+write.csv(count_species_SPxx, file = paste0(opt$barcode, "_count_canonical_species_SPxx.csv"))
 # prepare to plot:
 count_species_SPxx <- dplyr::inner_join(palette_complete,
@@ -176,7 +178,7 @@ count_species_SPxx <- dplyr::inner_join(palette_complete,
 count_species_SPxx$nom <- factor(count_species_SPxx$nom, levels = all_species_name)
 # Save:
-write.csv(count_species, file = "Count_species.csv")
+write.csv(count_species, file = paste0(opt$barcode, "_count_species.csv"))
 # RNA species composition all species:
 count_species <- dplyr::inner_join(palette_complete, count_species,
@@ -194,7 +196,7 @@ ggplot(count_species,
  labs(fill = "RNA species & spliced-variants") +
  xlab(label = "RNA species & spliced-variants")
-ggsave(file = "Count_RNAs_species.png",
+ggsave(file = paste0(opt$barcode, "_count_RNAs_species.png"),
       scale = 2,
       width = 1920,
       height = 1080,
@@ -220,7 +222,7 @@ ggplot(count_species_clear,
  labs(fill = "RNA species & spliced-variants") +
  xlab(label = "RNA species & spliced-variants")
-ggsave(file = "Count_RNAs_species_clear.png",
+ggsave(file = paste0(opt$barcode, "_count_RNAs_species_clear.png"),
       scale = 2,
       width = 1920,
       height = 1080,
@@ -246,7 +248,7 @@ ggplot(count_clear, aes(x = "percent",
  scale_fill_manual(values = count_clear$teinte) +
  labs(fill = "spliced-variants")
-ggsave(file = "SP_clear_proportion_camembert.png",
+ggsave(file = paste0(opt$barcode, "_SP_clear_proportion_piechart.png"),
       scale = 2,
       width = 1920,
       height = 1080,
@@ -263,7 +265,7 @@ ggplot(count_clear, aes(x = nom,
  xlab(label = "spliced-variants") +
  ylab(label = "percent")
-ggsave(file = "SP_clear_proportion.png",
+ggsave(file = paste0(opt$barcode, "_SP_clear_proportion.png"),
       scale = 2,
       width = 1920,
       height = 1080,
@@ -281,7 +283,7 @@ ggplot(count_species_SPxx, aes(x = "species",
  ylab(label = "TSS usage") +
  xlab(label = "percent")
-ggsave(file = "Count_RNAs_species_camembert.png",
+ggsave(file = paste0(opt$barcode, "_count_RNAs_species_piechart.png"),
       scale = 2,
       width = 1920,
       height = 1080,

--- a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R
+++ b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R
@@ -15,7 +15,9 @@ option_list = list(
  make_option(c("-c", "--classification"), type="character", default="./classification.txt", 
              help="input classification or reads file (.txt)", metavar="character"),
  make_option(c("-j", "--jwr"), type="character", default=NULL, 
-              help="input nanosplicer results table (.csv)", metavar="character"))
+              help="input nanosplicer results table (.csv)", metavar="character"),
+  make_option(c("-b", "--barcode"), type="character", default=NULL, 
+              help="input barcode", metavar="character"))
 opt_parser = OptionParser(option_list=option_list)
 opt = parse_args(opt_parser)
 reads_pos <- read.table(opt$classification,
@@ -113,7 +115,7 @@ df$acceptor_site <- sapply(df$pg_acceptor, assignation_acceptor)
 df <- dplyr::mutate(df,
             junction = paste0(donor_site, acceptor_site))
-write.table(df, file = "JWR_check_parsed.csv", row.names = FALSE, sep = "\t")
+write.table(df, file = paste0(opt$barcode, "_JWR_check_parsed.csv"), row.names = FALSE, sep = "\t")
 duplicated2 <- function(x){
  if (sum(dup <- duplicated(x))==0)
@@ -293,107 +295,8 @@ SP_variant_unique <- df_SPvariants %>% select(id, SP_name)
 SP_variant_unique <- SP_variant_unique[!duplicated(SP_variant_unique$id),] # distinct(SP_variant_unique, id)
 write.table(df_SPvariants, 
-            "identified_SPvariants.csv", 
+            paste0(opt$barcode, "_identified_SPvariants.csv"), 
            row.names = FALSE, 
            sep = "\t", 
            quote = FALSE)
-ggplot(df, aes(x=pg_donor)) +
-  geom_histogram(aes(y=after_stat(density)),color="darkblue", fill="lightblue") +
-  geom_density(alpha=.2, fill="lightblue") +
-  geom_vline(aes(xintercept=median(pg_donor)),
-              color="blue", linetype="dashed", linewidth=1) +
-  geom_vline(aes(xintercept=quantile(pg_donor, 0.025)),
-           linetype="dashed", linewidth=0.25) +
-  geom_vline(aes(xintercept=quantile(pg_donor, 0.975)),
-           linetype="dashed", linewidth=0.25) +
-  geom_vline(aes(xintercept=quantile(pg_donor, 0.01)),
-             color="green", linetype="dashed", linewidth=0.25) +
-  geom_vline(aes(xintercept=quantile(pg_donor, 0.99)),
-             color="green", linetype="dashed", linewidth=0.25) +
-  geom_vline(aes(xintercept=(median(pg_donor)+sd(pg_donor))),
-             color="red", linewidth=0.5) +
-  geom_vline(aes(xintercept=(median(pg_donor)-sd(pg_donor))),
-             color="red", linewidth=0.5) +
-  scale_x_continuous(breaks=c(min(df$pg_donor), 
-                              quantile(df$pg_donor, 0.025),
-                              quantile(df$pg_donor, 0.005),
-                              median(df$pg_donor)-sd(df$pg_donor),
-                              median(df$pg_donor),
-                              median(df$pg_donor)+sd(df$pg_donor),
-                              quantile(df$pg_donor, 0.975),
-                              quantile(df$pg_donor, 0.995),
-                              max(df$pg_donor)),
-                     label = c(min(df$pg_donor),
-                               floor(quantile(df$pg_donor, 0.025)),
-                               floor(quantile(df$pg_donor, 0.005)),
-                               round(median(df$pg_donor)-sd(df$pg_donor)),
-                               median(df$pg_donor),
-                               round(median(df$pg_donor)+sd(df$pg_donor)),
-                               floor(quantile(df$pg_donor, 0.975))+1,
-                               round(quantile(df$pg_donor, 0.995))+1,
-                               max(df$pg_donor))) +
-  theme(axis.text.x = element_text(angle = 45))
-ggsave(filename = "Donor_curve.png",
-       device = "png",
-       scale = 1,
-       width = 1920,
-       height = 1080,
-       units = "px",
-       dpi = 320)
-ggplot(df, aes(x=pg_acceptor)) +
-  geom_histogram(aes(y=after_stat(density)),color="red", fill="darksalmon") +
-  geom_density(alpha=.2, fill="darksalmon") +
-  geom_vline(aes(xintercept=median(pg_acceptor)),
-             color="red", linetype="dashed", linewidth=1) +
-  geom_vline(aes(xintercept=quantile(pg_acceptor, 0.025)),
-           linetype="dashed", linewidth=0.25) +
-  geom_vline(aes(xintercept=quantile(pg_acceptor, 0.975)),
-           linetype="dashed", linewidth=0.25) +
-  geom_vline(aes(xintercept=quantile(pg_acceptor, 0.005)),
-             color="green", linetype="dashed", linewidth=0.25) +
-  geom_vline(aes(xintercept=quantile(pg_acceptor, 0.995)),
-             color="green", linetype="dashed", linewidth=0.25) +
-  geom_vline(aes(xintercept=(median(pg_acceptor)+sd(pg_acceptor))),
-             color="blue", linewidth=0.5) +
-  geom_vline(aes(xintercept=(median(pg_acceptor)-sd(pg_acceptor))),
-             color="blue", linewidth=0.5) +
-  scale_x_continuous(breaks=c(min(df$pg_acceptor), 
-                              quantile(df$pg_acceptor, 0.025),
-                              quantile(df$pg_acceptor, 0.005),
-                              median(df$pg_acceptor)-sd(df$pg_acceptor),
-                              median(df$pg_acceptor),
-                              median(df$pg_acceptor)+sd(df$pg_acceptor),
-                              quantile(df$pg_acceptor, 0.975),
-                              quantile(df$pg_acceptor, 0.995),
-                              max(df$pg_acceptor)),
-                       label = c(min(df$pg_acceptor),
-                                 floor(quantile(df$pg_acceptor, 0.025)),
-                                 floor(quantile(df$pg_acceptor, 0.005)),
-                                 round(median(df$pg_acceptor)-sd(df$pg_acceptor)),
-                                 median(df$pg_acceptor),
-                                 round(median(df$pg_acceptor)+sd(df$pg_acceptor)),
-                                 floor(quantile(df$pg_acceptor, 0.975))+1,
-                                 floor(quantile(df$pg_acceptor, 0.995))+1,
-                                 max(df$pg_acceptor))) +
-  theme(axis.text.x = element_text(angle = 45))
-ggsave(filename = "Acceptor_curve.png",
-       device = "png",
-       scale = 1,
-       width = 1920,
-       height = 1080,
-       units = "px",
-       dpi = 320)
-# Graphs and tests:
-# sink("test_shapiro.txt")
-# print("Normality test: Shapiro-Wilk")
-# print("Donor site:")
-# print(shapiro.test(df$pg_donor))
-# print("Acceptor site:")
-# print(shapiro.test(df$pg_acceptor))
-# sink()
\ No newline at end of file
--- a/src/.docker_modules/r-bolero/1.0/Start_positions.R
+++ b/src/.docker_modules/r-bolero/1.0/Start_positions.R
@@ -14,7 +14,9 @@ conflict_prefer("lag", "dplyr")
 # Load Start_positions_count files:
 option_list = list(
  make_option(c("-i", "--input"), type="character", default=NULL, 
-              help="input start position file (.txt)", metavar="character")
+              help="input start position file (.txt)", metavar="character"),
+  make_option(c("-b", "--barcode"), type="character", default=NULL, 
+              help="input barcode", metavar="character")
 )
 opt_parser = OptionParser(option_list=option_list)
@@ -109,7 +111,7 @@ sam_bc01$promoter <- sapply(sam_bc01$start_position,
                            classify_reads)
 write.table(sam_bc01,
-            file = "classification_of_reads_per_RNA.txt",
+            file = paste0(opt$barcode, "_classification_of_reads_per_RNA.txt"),
            quote = FALSE, 
            sep = "\t", 
            row.names = FALSE)
@@ -164,7 +166,7 @@ abs_count_reads <- cbind(c(as.vector(promoters),"total"), abs_count_reads)
 colnames(abs_count_reads) <- c("promoter", "read_number")
 write.table(abs_count_reads,
-            file = "Count_reads_per_promoter.tsv",
+            file = paste0(opt$barcode, "_count_reads_per_promoter.tsv"),
            quote = FALSE, 
            sep = "\t", 
            row.names = FALSE)
@@ -201,7 +203,7 @@ plot_camembert <- function(barcode, df, tot) {
  print(camembert)
-  ggsave(filename = paste0("./Reads_start_promoters_", barcode, "_camembert.png"),
+  ggsave(filename = paste0("./", opt$barcode, "_reads_start_promoters_piechart.png"),
         plot = last_plot(),
         scale = 1,
         width = 1920,

--- a/src/bolero.nf
+++ b/src/bolero.nf
@@ -129,9 +129,9 @@ Channel
    .set { input }
 Channel
-  .of( params.adapt )
+    .of( params.adapt )
-  .ifEmpty { error "No adapter sequence defined." }
+    .ifEmpty { error "No adapter sequence defined." }
-  .set { adapt }
+    .set { adapt }
 Channel
    .fromPath( params.genome )
@@ -143,7 +143,10 @@ Channel
    .ifEmpty { error "No annotation defined, a gtf file describing transcripts and splice variants." }
    .set { gtf }
-// .map( it -> [it.baseName, it])
+Channel
+    .fromPath(params.input+'*/', type: 'dir')
+    .map(it -> [it.baseName, it])
+    .set{barcodes}
 /*
 ****************************************************************
@@ -161,10 +164,8 @@ if(!params.skipBC) {
  }
 }
-// Replace concatenate by seqkit fct to parallelization:
+include { barecode } from "./nf_modules/barecode/main.nf" 
 include { concatenate } from "./nf_modules/seqkit/main.nf"
-//include { concatenate } from "./nf_modules/concatenate/main.nf"
 include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf"
 include { hbv_genome } from "./nf_modules/minimap2/main.nf"
 include { seqkit_grep } from "./nf_modules/seqkit/main.nf"
@@ -178,9 +179,6 @@ include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.n
 include { rna_count } from "./nf_modules/rna_count/main.nf"
-// creation des fonctions NanoSplicer:
-// include { jwr_check } from "./nf_modules/nanosplicer/main.nf"
 /*
 ****************************************************************
                          Workflow
@@ -189,42 +187,41 @@ include { rna_count } from "./nf_modules/rna_count/main.nf"
 workflow {
  //######################## BASECALLING ########################
-  if(params.skipBC) {
+  if(params.skipBC) { // we take fastq files as input and skip basecalling
-    concatenate(params.input)
+    concatenate(barcodes)
-    // Replace by seqkit scat to parallelization
  }
-  else {
+  //il reste à adapter ça
+  else { // we take fast5 files as input and proceed to basecalling with guppy
    if(params.gpu_mode) {
      basecall_fast5_gpu(input)
      concatenate(basecall_fast5_gpu.out.pass)
-      // Replace by seqkit scat to parallelization
    }
    else {
      basecall_fast5_cpu(input)
      concatenate(basecall_fast5_cpu.out.pass)
-      // Replace by seqkit scat to parallelization
    }
  }
  //####################### PREPROCESSING #######################
  //Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs)
  seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp)
  //Cut of the 5'RACE sequence
  cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt)
  //########################## MAPPING ##########################
+  hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome.collect())
-  hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome)
  sort_index_bam(hbv_genome.out.bam)
-  // index_bam(sort_bam_genome.out.sorted_bam.collect())
  //###################### START POSITIONS #######################

--- a/src/nf_modules/cutadapt/main.nf
+++ b/src/nf_modules/cutadapt/main.nf
@@ -4,23 +4,23 @@ container_url = "xgrand/cutadapt:${version}"
 process cut_5pRACE {
  container = "${container_url}"
  label "small_mem_mono_cpus"
-  tag "cutadapt"
+  tag "${barcode}"
  if (params.cutadapt_out != "") {
    publishDir "results/${params.cutadapt_out}", mode: 'copy'
  }
  input:
-  path(fastq)
+  tuple val(barcode), path(fastq)
  val(adapt)
  output:
-  path("*_cut_*"), emit: fastq_cutadapt
+  tuple val(barcode), path("${barcode}_merged_porechoped_cut_fastq.fastq"), emit: fastq_cutadapt
  """
  cutadapt -e 0.2 -g ${adapt} \
   --revcomp \
-   -o "merged_porechoped_cut_fastq.fastq" \
+   -o "${barcode}_merged_porechoped_cut_fastq.fastq" \
   ${fastq}
  """
 }
\ No newline at end of file
--- a/src/nf_modules/junction_nanosplicer/main.nf
+++ b/src/nf_modules/junction_nanosplicer/main.nf
@@ -5,23 +5,24 @@ params.nanosplicer_out = ""
 process junctions_nanosplicer{
  container = "${container_url}"
  label "small_mem_mono_cpus"
-  tag "identification de variants d'épissage"
+  tag "${barcode}"
  if (params.nanosplicer_out != "") {
    publishDir "results/${params.nanosplicer_out}", mode: 'copy'
  }
  input:
-    path(txt)
+    tuple val(barcode), path(txt)
-    path(csv)
+    tuple val(barcode), path(csv)
  output:
-    path("Rplots.pdf")
+    path("${barcode}/JWR_check_parsed.csv")
-    path("JWR_check_parsed.csv")
+    tuple val(barcode), path("${barcode}/${barcode}_identified_SPvariants.csv"), emit: identified_SPvariants
-    path("*.png")
-    path("identified_SPvariants.csv"), emit: identified_SPvariants
  script:
    """
-    Rscript /Junctions_NanoSplicer.R -c ${txt} -j ${csv}
+    mkdir ${barcode}
+    cd ${barcode}/
+    Rscript /Junctions_NanoSplicer.R -c ../${txt} -j ../${csv}
+    mv identified_SPvariants.csv ${barcode}_identified_SPvariants.csv
    """
 }
\ No newline at end of file
--- a/src/nf_modules/minimap2/main.nf
+++ b/src/nf_modules/minimap2/main.nf
@@ -89,22 +89,25 @@ params.mapping_hbv_genome = "-ax splice --secondary=no -G 1650 -u n --eqx"
 process hbv_genome {
  container = "${container_url}"
  label "big_mem_multi_cpus"
+  tag "${barcode}"
  if (params.minimap2_genome_out != "") {
    publishDir "results/${params.minimap2_genome_out}", mode: 'copy'
  }
  input:
-  path(fastq)
+  tuple val(barcode), path(fastq)
  path(genome)
  output:
-  path("*"), emit: bam
+  tuple val(barcode), path("${barcode}/${barcode}_res.bam"), emit: bam
  script:
  memory = "${task.memory}" - ~/\s*GB/
  memory = memory.toInteger() / (task.cpus + 1.0)
  """
-  minimap2 ${params.mapping_hbv_genome} -t${task.cpus} -K ${memory} ${genome} ${fastq} |
+  mkdir ${barcode}
-    samtools view -Shb - > res.bam
+  cd ${barcode}/
+  minimap2 ${params.mapping_hbv_genome} -t ${task.cpus} -K ${memory} ../${genome} ../${fastq} |
+    samtools view -Shb - > ${barcode}_res.bam
  """
 }
\ No newline at end of file
--- a/src/nf_modules/nanosplicer/main.nf
+++ b/src/nf_modules/nanosplicer/main.nf
+version = "1.0"
+container_url = "xgrand/nanosplicer:${version}"
+params.nanosplicer_out = ""
+process jwr_checker {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "${barcode}"
+  if (params.nanosplicer_out != "") {
+    publishDir "results/${params.nanosplicer_out}", mode: 'copy'
+  }
+  input:
+    tuple val(barcode), path(bam), path(index)
+  output:
+    tuple val(barcode), path("${barcode}/${barcode}_JWR_check.h5.csv"), emit: nanosplicer_jwr
+  script:
+    """
+    mkdir ${barcode}
+    cd ${barcode}/
+    python3 /NanoSplicer/bin/JWR_checker.py --output_csv ../${bam} ${barcode}_JWR_check.h5 
+    """
+}
--- a/src/nf_modules/rna_count/main.nf
+++ b/src/nf_modules/rna_count/main.nf
@@ -5,22 +5,24 @@ params.rna_count_out = ""
 process rna_count{
  container = "${container_url}"
  label "small_mem_mono_cpus"
-  tag "RNA quantification"
+  tag "${barcode}"
  if (params.rna_count_out != "") {
    publishDir "results/${params.rna_count_out}", mode: 'copy'
  }
  input:
-    path(spvariants)
+    tuple val(barcode), path(spvariants)
-    path(classification)
+    tuple val(barcode), path(classification)
  output:
-    path("*.csv")
+    path("${barcode}/*.csv")
-    path("*.pdf")
+    path("${barcode}/*.pdf")
-    path("*.png")
+    path("${barcode}/*.png")
  script:
    """
-    Rscript /HBV_RNAs_count.R -s ${spvariants} -c ${classification}
+    mkdir ${barcode}
+    cd ${barcode}/
+    Rscript /HBV_RNAs_count.R -s ../${spvariants} -c ../${classification}
    """
 }
--- a/src/nf_modules/samtools/main.nf
+++ b/src/nf_modules/samtools/main.nf
@@ -24,21 +24,23 @@ samtools sort -@ ${task.cpus} ${bam} -O BAM -o ${bam.simpleName}_sorted.bam
 params.start_position_counts_out = ""
 process start_position_counts {
-    tag "Start positions count"
+    tag "${barcode}"
    label "big_mem_multi_cpus"
    publishDir "results/${params.start_position_counts_out}", mode: 'copy'
    input:
-        tuple path(bam), path(index)
+        tuple val(barcode), path(bam), path(index)
    output:
-        path "*", emit: count
+        tuple val(barcode), path("${barcode}/${barcode}_start_positions_counts.txt"), emit: count
    script:
 """
-samtools view -F 260 ${bam} |
+mkdir ${barcode}
+cd ${barcode}/
+samtools view -F 260 ../${bam} |
  cut -f 1,4 |
-  sort > Start_positions_counts.txt
+  sort > ${barcode}_start_positions_counts.txt
 """
 }
@@ -67,20 +69,22 @@ params.indexed_bam_out =""
 process sort_index_bam {
  container = "${container_url}"
  label "big_mem_multi_cpus"
-  tag "sorting"
+  tag "${barcode}"
  if (params.indexed_bam_out != "") {
    publishDir "results/${params.indexed_bam_out}", mode: 'copy'
  }
  input:
-    path(bam)
+    tuple val(barcode), path(bam)
  output:
-    tuple path("*sorted.bam"), path("*.bai"), emit: indexed_bam
+    tuple val(barcode), path("${barcode}/*sorted.bam"), path("${barcode}/*.bai"), emit: indexed_bam
  script:
 """
-samtools sort -@ ${task.cpus} ${bam} -o ${bam.simpleName}_sorted.bam
+mkdir ${barcode}
-samtools index -@ ${task.cpus} ${bam.simpleName}_sorted.bam
+cd ${barcode}/
+samtools sort -@ ${task.cpus} ../${bam} -o ${barcode}_sorted.bam
+samtools index -@ ${task.cpus} ${barcode}_sorted.bam
 """
 }
\ No newline at end of file
--- a/src/nf_modules/seqkit/main.nf
+++ b/src/nf_modules/seqkit/main.nf
@@ -29,35 +29,37 @@ params.seqkit_grep_out = ""
 process seqkit_grep {
  container = "${container_url}"
  label "small_mem_multi_cpus"
-  tag "Filter_reads"
+  tag "${barcode}"
  if (params.seqkit_grep_out != "") {
    publishDir "results/${params.seqkit_grep_out}", mode: 'copy'
  }
  input:
-    path(fastq)
+    tuple val(barcode), path(fastq)
    val(adapt)
    val(gsp)
  output:
-    path("filtered_5RACE_GSP.fastq"), emit: filtered_fastq
+    tuple val(barcode), path("${barcode}/${barcode}_filtered_5RACE_GSP.fastq"), emit: filtered_fastq
-    path("seq_stats.csv")
+    path("${barcode}/*.csv")
-    path("*.txt")
+    path("${barcode}/*.txt")
-    path("filtered_5RACE.fastq")
+    path("${barcode}/${barcode}_filtered_5RACE.fastq")
  script:
    lgadapt = Math.round(adapt.size().div(10))
    lggsp = Math.round(gsp.size().div(10))
    """
+    mkdir ${barcode}
+    cd ${barcode}/
    echo "mismatch allowed to 5'RACE adapter:  ${lgadapt}" > mismatch.txt
    echo "mismatch allowed to Gene Specific primer:  ${lggsp}" >> mismatch.txt
    echo ${adapt} > adapt.txt
    echo ${gsp} > gsp.txt
-    seqkit grep -i -f adapt.txt -m ${lgadapt} ${fastq} -o filtered_5RACE.fastq -j ${task.cpus}
+    seqkit grep -i -f adapt.txt -m ${lgadapt} ../${fastq} -o ${barcode}_filtered_5RACE.fastq -j ${task.cpus}
-    seqkit grep -i -f gsp.txt -m ${lggsp} filtered_5RACE.fastq -o filtered_5RACE_GSP.fastq -j ${task.cpus}
+    seqkit grep -i -f gsp.txt -m ${lggsp} ${barcode}_filtered_5RACE.fastq -o ${barcode}_filtered_5RACE_GSP.fastq -j ${task.cpus}
-    seqkit stats ${fastq} -T -j ${task.cpus} > seq_stats.csv
+    seqkit stats ../${fastq} -T -j ${task.cpus} > ${barcode}_seq_stats.csv
-    seqkit stats filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> seq_stats.csv
+    seqkit stats ${barcode}_filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
-    seqkit stats filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> seq_stats.csv
+    seqkit stats ${barcode}_filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
    """
 }
@@ -65,21 +67,24 @@ params.fastq_out = ""
 process concatenate {
  container = "${container_url}"
  label "big_mem_multi_cpus"
-  tag "Concatenate_reads"
+  tag "${barcode}"
  if (params.fastq_out != "") {
    publishDir "results/${params.fastq_out}", mode: 'copy'
  }
  input:
-    path fastq
+    tuple val(barcode), path(fastq)
  output:
-    path "merged.fastq.gz", emit: merged_fastq
+    tuple val(barcode), path("${barcode}/${barcode}_merged.fastq.gz"), emit: merged_fastq
  script:
    """
-    path=\$(readlink -f ${fastq})
+    mv ${fastq} path_${fastq}
-    seqkit scat -j ${task.cpus} -f \${path} --gz-only > merged.fastq
+    mkdir ${barcode}
-    gzip merged.fastq
+    cd ${barcode}/
+    path=\$(readlink -f ../path_${fastq})
+    seqkit scat -j ${task.cpus} -f \${path} --gz-only > ${barcode}_merged.fastq
+    gzip ${barcode}_merged.fastq
    """
 }
\ No newline at end of file
--- a/src/nf_modules/start_positions/main.nf
+++ b/src/nf_modules/start_positions/main.nf
@@ -5,21 +5,26 @@ params.start_position_counts_out =""
 process start_position_individuals{
  container = "${container_url}"
  label "small_mem_mono_cpus"
-  tag "start positions"
+  tag "${barcode}"
  if (params.start_position_counts_out != "") {
    publishDir "results/${params.start_position_counts_out}", mode: 'copy'
  }
  input:
-    path(start_position_counts)
+    tuple val(barcode), path(start_position_counts)
  output:
-    path("Rplots.pdf")
+    path("${barcode}/*.pdf")
-    path("*.png")
+    path("${barcode}/*.png")
-    path("Count_reads_per_promoter.tsv")
+    path("${barcode}/*.tsv")
-    path("classification_of_reads_per_RNA.txt"), emit: classification_of_reads
+    tuple val(barcode), path("${barcode}/${barcode}_classification_of_reads_per_RNA.txt"), emit: classification_of_reads
  script:
    """
-    Rscript /Start_positions.R -i ${start_position_counts}
+    mkdir ${barcode}
+    cd ${barcode}/
+    Rscript /Start_positions.R -i ../${start_position_counts}
+    mv classification_of_reads_per_RNA.txt ${barcode}_classification_of_reads_per_RNA.txt
+    mv Count_reads_per_promoter.tsv ${barcode}_count_reads_per_promoter.tsv
+    mv Rplots.pdf ${barcode}_Rplots.pdf
    """
 }
\ No newline at end of file