Skip to content
Snippets Groups Projects
Verified Commit 0d1550a8 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

HTSeq: update nf structure

parent 419991ef
No related branches found
No related tags found
No related merge requests found
......@@ -14,4 +14,5 @@ RUN apt-get update && \
apt-get clean
RUN pip3 install numpy==1.14.3
RUN pip3 install pysam==0.15.0
RUN pip3 install HTSeq==${HTSEQ_VERSION}
......@@ -3,6 +3,9 @@ profiles {
docker.temp = 'auto'
docker.enabled = true
process {
$sort_bam {
container = "samtools:1.7"
}
$counting {
container = "htseq:0.8.0"
}
......@@ -10,6 +13,9 @@ profiles {
}
sge {
process{
$sort_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
$trimming {
beforeScript = "module purge; module load HTSeq/0.8.0"
}
......
/*
* htseq :
* Imputs : sorted bams files
* Imputs : gtf
* Output : counts files
*/
/* quality trimming */
params.bam = "$baseDir/data/bam/*.bam"
params.gtf = "$baseDir/data/annotation/*.gtf"
......@@ -15,18 +7,36 @@ log.info "gtf files : ${params.gtf}"
Channel
.fromPath( params.bam )
.ifEmpty { error "Cannot find any fastq files matching: ${params.bam}" }
.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
.set { bam_files }
Channel
.fromPath( params.gtf )
.ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
.set { gtf_file }
process sort_bam {
tag "$file_id"
cpus 4
input:
set file_id, file(bam) from bam_files
output:
set file_id, "*_sorted.sam" into sorted_bam_files
script:
"""
# sort bam by name
samtools sort -@ ${task.cpus} -n -O SAM -o ${file_id}_sorted.sam ${bam}
"""
}
process counting {
tag "$bam.baseName"
tag "$file_id"
publishDir "results/quantification/", mode: 'copy'
input:
file bam from bam_files
set file_id, file(bam) from sorted_bam_files
file gtf from gtf_file
output:
......@@ -34,7 +44,9 @@ process counting {
script:
"""
htseq-count -r pos --mode=intersection-nonempty -a 10 -s no -t exon -i gene_id \
--format=bam ${bam} ${gtf} > ${bam.baseName}.count
htseq-count ${bam} ${gtf} \
-r pos --mode=intersection-nonempty -a 10 -s no -t exon -i gene_id \
> ${file_id}.count
"""
}
nextflow src/nf_modules/HTSeq/tests/counting.nf \
nextflow src/nf_modules/HTSeq/htseq.nf \
-c src/nf_modules/HTSeq/htseq.config \
-profile docker \
--gtf "data/tiny_dataset/annot/tiny.gff" \
......
params.bam = "$baseDir/data/bam/*.bam"
params.gtf = "$baseDir/data/annotation/*.gtf"
log.info "bam files : ${params.bam}"
log.info "gtf files : ${params.gtf}"
Channel
.fromPath( params.bam )
.ifEmpty { error "Cannot find any fastq files matching: ${params.bam}" }
.set { bam_files }
Channel
.fromPath( params.gtf )
.ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
.set { gtf_file }
process counting {
tag "$bam.baseName"
publishDir "results/quantification/", mode: 'copy'
input:
file bam from bam_files
file gtf from gtf_file
output:
file "*.count" into count_files
script:
"""
htseq-count -r pos --mode=intersection-nonempty -a 10 -s no -t exon -i gene_id \
--format=bam ${bam} ${gtf} > ${bam.baseName}.count
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment