From 372438eee97aa26614455436e680e2f91146dd4f Mon Sep 17 00:00:00 2001
From: xgrand <xavier.grand@ens-lyon.fr>
Date: Thu, 17 Aug 2023 17:55:12 +0200
Subject: [PATCH] Add Alignment Score filtration but need to take into account
 the length of reads.

---
 .../samtools/1.14/docker_init.sh              |  8 +++---
 src/.docker_modules/samtools/1.17/Dockerfile  |  1 +
 .../samtools/1.17/docker_init.sh              |  5 ++++
 src/bolero.nf                                 | 12 ++++++---
 src/nextflow.config                           |  2 +-
 src/nf_modules/porechop/main.nf               |  6 +++--
 src/nf_modules/samtools/main.nf               | 27 +++++++++++++++++--
 7 files changed, 48 insertions(+), 13 deletions(-)
 create mode 100644 src/.docker_modules/samtools/1.17/Dockerfile
 create mode 100755 src/.docker_modules/samtools/1.17/docker_init.sh

diff --git a/src/.docker_modules/samtools/1.14/docker_init.sh b/src/.docker_modules/samtools/1.14/docker_init.sh
index 1d4a215..d76951f 100755
--- a/src/.docker_modules/samtools/1.14/docker_init.sh
+++ b/src/.docker_modules/samtools/1.14/docker_init.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-docker pull lbmc/samtools:1.14
-# docker build src/.docker_modules/samtools/1.14 -t 'lbmc/samtools:1.14'
-# docker push lbmc/samtools:1.14
-docker buildx build --platform linux/amd64,linux/arm64 -t "lbmc/samtools:1.14" --push src/.docker_modules/samtools/1.14
+# docker pull xgrand/samtools:1.14
+docker build src/.docker_modules/samtools/1.14 -t 'xgrand/samtools:1.14'
+docker push xgrand/samtools:1.14
+# docker buildx build --platform linux/amd64,linux/arm64 -t "xgrand/samtools:1.14" --push src/.docker_modules/samtools/1.14
diff --git a/src/.docker_modules/samtools/1.17/Dockerfile b/src/.docker_modules/samtools/1.17/Dockerfile
new file mode 100644
index 0000000..b7d7973
--- /dev/null
+++ b/src/.docker_modules/samtools/1.17/Dockerfile
@@ -0,0 +1 @@
+FROM staphb/samtools:1.17-2023-06
\ No newline at end of file
diff --git a/src/.docker_modules/samtools/1.17/docker_init.sh b/src/.docker_modules/samtools/1.17/docker_init.sh
new file mode 100755
index 0000000..df2e0b6
--- /dev/null
+++ b/src/.docker_modules/samtools/1.17/docker_init.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# docker pull xgrand/samtools:1.17
+docker build src/.docker_modules/samtools/1.17 -t 'xgrand/samtools:1.17'
+docker push xgrand/samtools:1.17
+# docker buildx build --platform linux/amd64,linux/arm64 -t "xgrand/samtools:1.17" --push src/.docker_modules/samtools/1.17
diff --git a/src/bolero.nf b/src/bolero.nf
index 159be2a..2bb2162 100755
--- a/src/bolero.nf
+++ b/src/bolero.nf
@@ -104,10 +104,11 @@ params.kit_barcoding = ""
 params.basecalling_out = "01_basecalling/"
 params.barcoding_out = "02_barcoding/"
 params.fastq_out = "03_fastq/"
-params.seqkit_grep_out = "14_seqkit/"
-params.porechop_out = "15_porechop/"
+params.seqkit_grep_out = "03_fastq/"
+params.porechop_out = "03_fastq/"
 params.cutadapt_out = "04_cutadapt/"
 params.minimap2_genome_out = "05_minimap2/"
+params.filtered_bam_out = "05_minimap2/"
 params.start_position_counts_out = "06_start_positions/"
 params.nanosplicer_out = "07_nanosplicer/"
 params.rna_count_out = "08_RNA_count/"
@@ -171,7 +172,6 @@ if(!params.skipBC) {
   }
 }
 
-// include { barecode } from "./nf_modules/barecode/main.nf" 
 include { barcoding_cpu } from "./nf_modules/ont-guppy/main.nf"
 include { control_basecalling } from "./nf_modules/pycoqc/main.nf"
 include { control_bam } from "./nf_modules/pycoqc/main.nf"
@@ -182,6 +182,7 @@ include { seqkit_grep } from "./nf_modules/seqkit/main.nf"
 include { sort_bam } from './nf_modules/samtools/main.nf' addParams(sort_bam_out: params.minimap2_genome_out)
 include { index_bam } from './nf_modules/samtools/main.nf' addParams(index_bam_out: params.minimap2_genome_out)
 include { sort_index_bam } from './nf_modules/samtools/main.nf' addParams(indexed_bam_out: params.minimap2_genome_out)
+include { filter_as } from './nf_modules/samtools/main.nf'
 include { start_position_counts } from "./nf_modules/samtools/main.nf"
 include { start_position_individuals } from "./nf_modules/start_positions/main.nf"
 include { jwr_checker } from "./nf_modules/nanosplicer/main.nf"
@@ -258,8 +259,11 @@ workflow {
 
   hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome.collect())
 
+  //Filter
+  filter_as(hbv_genome.out.bam)
+
   //Index
-  sort_index_bam(hbv_genome.out.bam)
+  sort_index_bam(filter_as.out.filtered_bam)
   
   //Quality control
   if(params.skipBC == false) {
diff --git a/src/nextflow.config b/src/nextflow.config
index 4b6a530..b087fb3 100755
--- a/src/nextflow.config
+++ b/src/nextflow.config
@@ -18,7 +18,7 @@ profiles {
     docker.enabled = true
     process {
       errorStrategy = 'finish'
-      memory = '12GB'
+      memory = '16GB'
       withLabel: big_mem_mono_cpus {
         cpus = 1
       }
diff --git a/src/nf_modules/porechop/main.nf b/src/nf_modules/porechop/main.nf
index 8f69462..8513104 100755
--- a/src/nf_modules/porechop/main.nf
+++ b/src/nf_modules/porechop/main.nf
@@ -4,7 +4,7 @@ container_url = "xgrand/porechop:${version}"
 params.porechop_out = ""
 process porechop {
     container = "${container_url}"
-    label "small_mem_multi_cpus"
+    label "big_mem_multi_cpus"
     tag "$barcode"
     if (params.porechop_out != "") {
       publishDir "results/${params.porechop_out}", mode: 'copy'
@@ -14,9 +14,11 @@ process porechop {
     tuple val(barcode), path(fastq)
 
   output:
-    tuple val(barcode), path("*"), emit: porechoped_fastq
+    tuple val(barcode), path("${barcode}/${barcode}_merged_porechoped.fastq.gz"), emit: porechoped_fastq
   script:
 """
+mkdir ${barcode}
+cd ${barcode}/
 porechop --input ${fastq} -o ${barcode}_merged_porechoped.fastq.gz --threads ${task.cpus}
 """
 }
\ No newline at end of file
diff --git a/src/nf_modules/samtools/main.nf b/src/nf_modules/samtools/main.nf
index 854c4f1..eae5423 100755
--- a/src/nf_modules/samtools/main.nf
+++ b/src/nf_modules/samtools/main.nf
@@ -1,5 +1,5 @@
-version = "1.7"
-container_url = "lbmc/samtools:${version}"
+version = "1.17"
+container_url = "xgrand/samtools:${version}"
 
 params.sort_bam_out =""
 process sort_bam {
@@ -87,4 +87,27 @@ cd ${barcode}/
 samtools sort -@ ${task.cpus} ../${bam} -o ${barcode}_sorted.bam
 samtools index -@ ${task.cpus} ${barcode}_sorted.bam
 """
+}
+
+params.filtered_bam_out = ""
+process filter_as {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "${barcode}"
+  if (params.filtered_bam_out != "") {
+    publishDir "results/${params.filtered_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(barcode), path(bam)
+
+  output:
+    tuple val(barcode), path("${barcode}/*_AS500.bam"), emit: filtered_bam
+
+  script:
+"""
+mkdir ${barcode}
+cd ${barcode}/
+samtools view -Shb -e '[AS]>=500' -@ ${task.cpus} ../${bam} -o ${barcode}_AS500.bam
+"""
 }
\ No newline at end of file
-- 
GitLab