diff --git a/src/docker_modules/HTSeq/0.8.0/Dockerfile b/src/docker_modules/HTSeq/0.8.0/Dockerfile index c655738fea3d89292f9ef5a9068893aafb6f38a1..a786b8026f66e9538c543f79624325ccb75952c6 100644 --- a/src/docker_modules/HTSeq/0.8.0/Dockerfile +++ b/src/docker_modules/HTSeq/0.8.0/Dockerfile @@ -14,4 +14,5 @@ RUN apt-get update && \ apt-get clean RUN pip3 install numpy==1.14.3 +RUN pip3 install pysam==0.15.0 RUN pip3 install HTSeq==${HTSEQ_VERSION} diff --git a/src/nf_modules/HTSeq/htseq.config b/src/nf_modules/HTSeq/htseq.config index ab3cc3a268f8c3d0f0233beb184c2ace5d9b7031..00f2fafb921828b21fe18d57240b9a943dcd2fb9 100644 --- a/src/nf_modules/HTSeq/htseq.config +++ b/src/nf_modules/HTSeq/htseq.config @@ -3,6 +3,9 @@ profiles { docker.temp = 'auto' docker.enabled = true process { + $sort_bam { + container = "samtools:1.7" + } $counting { container = "htseq:0.8.0" } @@ -10,6 +13,9 @@ profiles { } sge { process{ + $sort_bam { + beforeScript = "module purge; module load SAMtools/1.7" + } $trimming { beforeScript = "module purge; module load HTSeq/0.8.0" } diff --git a/src/nf_modules/HTSeq/htseq.nf b/src/nf_modules/HTSeq/htseq.nf index 5aa2f739bd64381724450640e9828a0b4fce1494..7cade9a55b17ced135f32a36ffd90dc5354b72af 100644 --- a/src/nf_modules/HTSeq/htseq.nf +++ b/src/nf_modules/HTSeq/htseq.nf @@ -1,11 +1,3 @@ -/* -* htseq : -* Imputs : sorted bams files -* Imputs : gtf -* Output : counts files -*/ -/* quality trimming */ - params.bam = "$baseDir/data/bam/*.bam" params.gtf = "$baseDir/data/annotation/*.gtf" @@ -15,18 +7,36 @@ log.info "gtf files : ${params.gtf}" Channel .fromPath( params.bam ) .ifEmpty { error "Cannot find any fastq files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { bam_files } Channel .fromPath( params.gtf ) .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" } .set { gtf_file } +process sort_bam { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from bam_files + + output: + set file_id, "*_sorted.sam" into sorted_bam_files + + script: +""" +# sort bam by name +samtools sort -@ ${task.cpus} -n -O SAM -o ${file_id}_sorted.sam ${bam} +""" +} + process counting { - tag "$bam.baseName" + tag "$file_id" publishDir "results/quantification/", mode: 'copy' input: - file bam from bam_files + set file_id, file(bam) from sorted_bam_files file gtf from gtf_file output: @@ -34,7 +44,9 @@ process counting { script: """ -htseq-count -r pos --mode=intersection-nonempty -a 10 -s no -t exon -i gene_id \ ---format=bam ${bam} ${gtf} > ${bam.baseName}.count +htseq-count ${bam} ${gtf} \ +-r pos --mode=intersection-nonempty -a 10 -s no -t exon -i gene_id \ +> ${file_id}.count """ } + diff --git a/src/nf_modules/HTSeq/tests/tests.sh b/src/nf_modules/HTSeq/tests.sh similarity index 75% rename from src/nf_modules/HTSeq/tests/tests.sh rename to src/nf_modules/HTSeq/tests.sh index 7ccef1815eb2f2e430095f764230160b26be85a6..f7255c2384d1b84e1f301c9494834954f8daa621 100755 --- a/src/nf_modules/HTSeq/tests/tests.sh +++ b/src/nf_modules/HTSeq/tests.sh @@ -1,4 +1,4 @@ -nextflow src/nf_modules/HTSeq/tests/counting.nf \ +nextflow src/nf_modules/HTSeq/htseq.nf \ -c src/nf_modules/HTSeq/htseq.config \ -profile docker \ --gtf "data/tiny_dataset/annot/tiny.gff" \ diff --git a/src/nf_modules/HTSeq/tests/counting.nf b/src/nf_modules/HTSeq/tests/counting.nf deleted file mode 100644 index f11736b1443f36e13b1986518f5de1c9187ca62e..0000000000000000000000000000000000000000 --- a/src/nf_modules/HTSeq/tests/counting.nf +++ /dev/null @@ -1,33 +0,0 @@ -params.bam = "$baseDir/data/bam/*.bam" -params.gtf = "$baseDir/data/annotation/*.gtf" - -log.info "bam files : ${params.bam}" -log.info "gtf files : ${params.gtf}" - -Channel - .fromPath( params.bam ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.bam}" } - .set { bam_files } -Channel - .fromPath( params.gtf ) - .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" } - .set { gtf_file } - -process counting { - tag "$bam.baseName" - publishDir "results/quantification/", mode: 'copy' - - input: - file bam from bam_files - file gtf from gtf_file - - output: - file "*.count" into count_files - - script: -""" -htseq-count -r pos --mode=intersection-nonempty -a 10 -s no -t exon -i gene_id \ ---format=bam ${bam} ${gtf} > ${bam.baseName}.count -""" -} -