diff --git a/src/RNAseq.config b/src/RNAseq.config index a6909dd4b252f54881da1f3eda28a3afc814af3e..7dfd997fab1adb881f634d57e3ca0fca5e55cfac 100644 --- a/src/RNAseq.config +++ b/src/RNAseq.config @@ -82,8 +82,9 @@ profiles { queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: dedup_genome { - container = "lbmc/umi_tools:1.0.0" - executor = "sge" + container = "lbmc/hisat2:2.1.0" + beforeScript = "source ~/.bashrc" + executor = "sge" clusterOptions = "-cwd -V" cpus = 1 memory = "20GB" @@ -91,7 +92,7 @@ profiles { queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: dedup_postgenome { - container = "lbmc/umi_tools:1.0.0" + container = "lbmc/hisat2:2.1.0" executor = "sge" clusterOptions = "-cwd -V" cpus = 1 diff --git a/src/RNAseq.nf b/src/RNAseq.nf index 7db3b045f6dbd98515c766e4529ec41822e85da4..53edf63dc37a6dafc7445be99a78db25ee79cb6f 100644 --- a/src/RNAseq.nf +++ b/src/RNAseq.nf @@ -369,12 +369,17 @@ if (params.do_dedup) { when: params.do_dedup - """ - umi_tools dedup -I ${bam[0]} \ - -S ${file_id}.dedup.bam \ - --paired \ - 2> ${file_id}_dedup.log - """ + shell: + ''' + samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam + input=$(samtools view -h !{bam[0]} | wc -l) + output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l) + diff=$(($input - $output)) + per=$(($diff * 100 / $input)) + echo "Input : $input reads" > !{file_id}_dedup.log + echo "Output : $output reads" >> !{file_id}_dedup.log + echo "$per % duplicated reads" >> !{file_id}_dedup.log + ''' } } else { HISAT_ALIGNED_DEDUP.set{DEDUP_GENOME} @@ -531,12 +536,17 @@ if (params.do_dedup) { when: params.do_dedup - """ - umi_tools dedup -I ${bam[0]} \ - -S ${file_id}.dedup.bam \ - --paired \ - 2> ${file_id}_dedup.log - """ + shell: + ''' + samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam + input=$(samtools view -h !{bam[0]} | wc -l) + output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l) + diff=$(($input - $output)) + per=$(($diff * 100 / $input)) + echo "Input : $input reads" > !{file_id}_dedup.log + echo "Output : $output reads" >> !{file_id}_dedup.log + echo "$per % duplicated reads" >> !{file_id}_dedup.log + ''' } } else { diff --git a/src/RibosomeProfiling.nf b/src/RibosomeProfiling.nf index 7f572a23155e9f5e0a7ff39d7803a6f7379edd1e..b65679ec0f30939ce00e31e48761233b2f28ed5c 100644 --- a/src/RibosomeProfiling.nf +++ b/src/RibosomeProfiling.nf @@ -353,12 +353,18 @@ if (params.do_dedup) { when: params.do_dedup - - """ - umi_tools dedup -I ${bam[0]} \ - -S ${file_id}.dedup.bam \ - > ${file_id}_dedup.log - """ + + shell: + ''' + samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam + input=$(samtools view -h !{bam[0]} | wc -l) + output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l) + diff=$(($input - $output)) + per=$(($diff * 100 / $input)) + echo "Input : $input reads" > !{file_id}_dedup.log + echo "Output : $output reads" >> !{file_id}_dedup.log + echo "$per % duplicated reads" >> !{file_id}_dedup.log + ''' } } else { HISAT_ALIGNED_DEDUP.set{DEDUP_GENOME} @@ -513,12 +519,19 @@ if (params.do_dedup){ when: params.do_dedup + + shell: + ''' + samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam + input=$(samtools view -h !{bam[0]} | wc -l) + output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l) + diff=$(($input - $output)) + per=$(($diff * 100 / $input)) + echo "Input : $input reads" > !{file_id}_dedup.log + echo "Output : $output reads" >> !{file_id}_dedup.log + echo "$per % duplicated reads" >> !{file_id}_dedup.log + ''' - """ - umi_tools dedup -I ${bam[0]} \ - -S ${file_id}.dedup.bam \ - > ${file_id}_dedup.log - """ } } else {