Skip to content
Snippets Groups Projects
Commit 7fa9c529 authored by elabaron's avatar elabaron
Browse files

modify dedup option using awk

parent 3b7be876
No related branches found
No related tags found
No related merge requests found
...@@ -82,7 +82,8 @@ profiles { ...@@ -82,7 +82,8 @@ profiles {
queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
} }
withName: dedup_genome { withName: dedup_genome {
container = "lbmc/umi_tools:1.0.0" container = "lbmc/hisat2:2.1.0"
beforeScript = "source ~/.bashrc"
executor = "sge" executor = "sge"
clusterOptions = "-cwd -V" clusterOptions = "-cwd -V"
cpus = 1 cpus = 1
...@@ -91,7 +92,7 @@ profiles { ...@@ -91,7 +92,7 @@ profiles {
queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
} }
withName: dedup_postgenome { withName: dedup_postgenome {
container = "lbmc/umi_tools:1.0.0" container = "lbmc/hisat2:2.1.0"
executor = "sge" executor = "sge"
clusterOptions = "-cwd -V" clusterOptions = "-cwd -V"
cpus = 1 cpus = 1
......
...@@ -369,12 +369,17 @@ if (params.do_dedup) { ...@@ -369,12 +369,17 @@ if (params.do_dedup) {
when: when:
params.do_dedup params.do_dedup
""" shell:
umi_tools dedup -I ${bam[0]} \ '''
-S ${file_id}.dedup.bam \ samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam
--paired \ input=$(samtools view -h !{bam[0]} | wc -l)
2> ${file_id}_dedup.log output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l)
""" diff=$(($input - $output))
per=$(($diff * 100 / $input))
echo "Input : $input reads" > !{file_id}_dedup.log
echo "Output : $output reads" >> !{file_id}_dedup.log
echo "$per % duplicated reads" >> !{file_id}_dedup.log
'''
} }
} else { } else {
HISAT_ALIGNED_DEDUP.set{DEDUP_GENOME} HISAT_ALIGNED_DEDUP.set{DEDUP_GENOME}
...@@ -531,12 +536,17 @@ if (params.do_dedup) { ...@@ -531,12 +536,17 @@ if (params.do_dedup) {
when: when:
params.do_dedup params.do_dedup
""" shell:
umi_tools dedup -I ${bam[0]} \ '''
-S ${file_id}.dedup.bam \ samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam
--paired \ input=$(samtools view -h !{bam[0]} | wc -l)
2> ${file_id}_dedup.log output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l)
""" diff=$(($input - $output))
per=$(($diff * 100 / $input))
echo "Input : $input reads" > !{file_id}_dedup.log
echo "Output : $output reads" >> !{file_id}_dedup.log
echo "$per % duplicated reads" >> !{file_id}_dedup.log
'''
} }
} else { } else {
......
...@@ -354,11 +354,17 @@ if (params.do_dedup) { ...@@ -354,11 +354,17 @@ if (params.do_dedup) {
when: when:
params.do_dedup params.do_dedup
""" shell:
umi_tools dedup -I ${bam[0]} \ '''
-S ${file_id}.dedup.bam \ samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam
> ${file_id}_dedup.log input=$(samtools view -h !{bam[0]} | wc -l)
""" output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l)
diff=$(($input - $output))
per=$(($diff * 100 / $input))
echo "Input : $input reads" > !{file_id}_dedup.log
echo "Output : $output reads" >> !{file_id}_dedup.log
echo "$per % duplicated reads" >> !{file_id}_dedup.log
'''
} }
} else { } else {
HISAT_ALIGNED_DEDUP.set{DEDUP_GENOME} HISAT_ALIGNED_DEDUP.set{DEDUP_GENOME}
...@@ -514,11 +520,18 @@ if (params.do_dedup){ ...@@ -514,11 +520,18 @@ if (params.do_dedup){
when: when:
params.do_dedup params.do_dedup
""" shell:
umi_tools dedup -I ${bam[0]} \ '''
-S ${file_id}.dedup.bam \ samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam
> ${file_id}_dedup.log input=$(samtools view -h !{bam[0]} | wc -l)
""" output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l)
diff=$(($input - $output))
per=$(($diff * 100 / $input))
echo "Input : $input reads" > !{file_id}_dedup.log
echo "Output : $output reads" >> !{file_id}_dedup.log
echo "$per % duplicated reads" >> !{file_id}_dedup.log
'''
} }
} else { } else {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment