diff --git a/src/docker_modules/hisat2/2.0.0/Dockerfile b/src/docker_modules/hisat2/2.0.0/Dockerfile index 8b253adc6eb8ef62b89563ab932c5bd89c8afe5d..640d69196c2da6ea52e6aeb5fb69cf241e955d44 100644 --- a/src/docker_modules/hisat2/2.0.0/Dockerfile +++ b/src/docker_modules/hisat2/2.0.0/Dockerfile @@ -1,17 +1,15 @@ -FROM ubuntu:18.04 +FROM samtools:1.7 MAINTAINER Nicolas Fontrodona ENV HISAT2_VERSION=2.0.0 -ENV PACKAGES unzip=6.0* \ - gcc=4:7.3.0* \ - g++=4:7.3.0* \ - make=4.1* \ - curl=7.58.0* \ - ca-certificates=20180409 +ENV PACKAGES curl \ + zip \ + g++ \ + perl \ + python -RUN apt-get update && \ - apt-get install -y --no-install-recommends ${PACKAGES} && \ - apt-get clean +RUN apk update && \ + apk add ${PACKAGES} RUN curl -k -L http://ccb.jhu.edu/software/hisat2/downloads/hisat2-${HISAT2_VERSION}-beta-source.zip -o hisat2_linux-v${HISAT2_VERSION}.zip && \ unzip hisat2_linux-v${HISAT2_VERSION}.zip && \ @@ -19,5 +17,4 @@ cd hisat2-${HISAT2_VERSION}-beta && \ make && \ cp hisat2 /usr/bin && \ cp hisat2-* /usr/bin && \ -rm -Rf hisat2-${HISAT2_VERSION}-beta - +rm -Rf hisat2-${HISAT2_VERSION}-beta diff --git a/src/nf_modules/hisat2/indexing.config b/src/nf_modules/hisat2/indexing.config new file mode 100644 index 0000000000000000000000000000000000000000..47c14a572a4849eac8ebbe87dcb9a88c76e7a2c1 --- /dev/null +++ b/src/nf_modules/hisat2/indexing.config @@ -0,0 +1,36 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: index_fasta { + container = "hisat2:2.0.0" + cpus = 4 + } + } + } + singularity { + singularity.enabled = true + process { + withName: index_fasta { + cpus = 4 + container = "file://bin/hisat2:2.0.0.sif" + } + } + } + psmn { + process{ + withName: index_fasta { + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "hisat2/2.0.0" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 16 + time = "12h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + } + } +} diff --git a/src/nf_modules/hisat2/indexing.nf b/src/nf_modules/hisat2/indexing.nf new file mode 100644 index 0000000000000000000000000000000000000000..1b11b3ef7ec09a21e84e2dfd33cf6bea129bfa52 --- /dev/null +++ b/src/nf_modules/hisat2/indexing.nf @@ -0,0 +1,32 @@ +/* +* Hisat2 : +* Imputs : fastq files +* Imputs : fasta files +* Output : bam files +*/ + +/* fasta indexing */ +params.fasta = "$baseDir/data/bam/*.fasta" + +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .set { fasta_file } + +process index_fasta { + tag "$fasta.baseName" + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_file + + output: + file "*.index*" into index_files + + script: +""" +hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName}.index +""" +} diff --git a/src/nf_modules/hisat2/mapping_paired.config b/src/nf_modules/hisat2/mapping_paired.config new file mode 100644 index 0000000000000000000000000000000000000000..6f3aa47b1d1d7fc417230c0807aefd4d30d7d234 --- /dev/null +++ b/src/nf_modules/hisat2/mapping_paired.config @@ -0,0 +1,36 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: mapping_fastq { + cpus = 4 + container = "hisat2:2.0.0" + } + } + } + singularity { + singularity.enabled = true + process { + withName: mapping_fastq { + cpus = 4 + container = "file://bin/hisat2:2.0.0.sif" + } + } + } + sge { + process{ + withName: mapping_fastq { + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "hisat2/2.0.0" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 16 + time = "12h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + } + } +} diff --git a/src/nf_modules/hisat2/mapping_paired.nf b/src/nf_modules/hisat2/mapping_paired.nf new file mode 100644 index 0000000000000000000000000000000000000000..28b37e005db84e248fa72209ec1dfa2a290bd264 --- /dev/null +++ b/src/nf_modules/hisat2/mapping_paired.nf @@ -0,0 +1,48 @@ +params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" +params.index = "$baseDir/data/index/*.index.*" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$pair_id" + publishDir "results/mapping/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + file index from index_files.collect() + + output: + file "*" into counts_files + set pair_id, "*.bam" into bam_files + file "*_report.txt" into mapping_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) { + index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1] + } + } +""" +hisat2 -p ${task.cpus} \ + -x ${index_id} \ + -1 ${reads[0]} \ + -2 ${reads[1]} 2> \ +${pair_id}_hisat2_report.txt | \ +samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_hisat2_report.txt; then + exit 1 +fi +""" +} diff --git a/src/nf_modules/hisat2/mapping_single.config b/src/nf_modules/hisat2/mapping_single.config new file mode 100644 index 0000000000000000000000000000000000000000..6f3aa47b1d1d7fc417230c0807aefd4d30d7d234 --- /dev/null +++ b/src/nf_modules/hisat2/mapping_single.config @@ -0,0 +1,36 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: mapping_fastq { + cpus = 4 + container = "hisat2:2.0.0" + } + } + } + singularity { + singularity.enabled = true + process { + withName: mapping_fastq { + cpus = 4 + container = "file://bin/hisat2:2.0.0.sif" + } + } + } + sge { + process{ + withName: mapping_fastq { + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "hisat2/2.0.0" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 16 + time = "12h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + } + } +} diff --git a/src/nf_modules/hisat2/mapping_single.nf b/src/nf_modules/hisat2/mapping_single.nf new file mode 100644 index 0000000000000000000000000000000000000000..0fdb729e90f9bdc097209846e20af08a6a0ce41c --- /dev/null +++ b/src/nf_modules/hisat2/mapping_single.nf @@ -0,0 +1,53 @@ +/* +* for single-end data +*/ + +params.fastq = "$baseDir/data/fastq/*.fastq" +params.index = "$baseDir/data/index/*.index*" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$file_id" + publishDir "results/mapping/", mode: 'copy' + + input: + set file_id, file(reads) from fastq_files + file index from index_files.collect() + + output: + file "*" into count_files + set file_id, "*.bam" into bam_files + file "*_report.txt" into mapping_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) { + index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1] + } + } +""" +hisat2 -p ${task.cpus} \ + -x ${index_id} \ + -U ${reads} 2> \ +${file_id}_hisat2_report.txt | \ +samtools view -Sb - > ${file_id}.bam + +if grep -q "Error" ${file_id}_hisat2_report.txt; then + exit 1 +fi + +""" +} diff --git a/src/nf_modules/hisat2/tests.sh b/src/nf_modules/hisat2/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..50e4396652e867a3bc114db2ba79b4af4dc7de9a --- /dev/null +++ b/src/nf_modules/hisat2/tests.sh @@ -0,0 +1,39 @@ +./nextflow src/nf_modules/hisat2/indexing.nf \ + -c src/nf_modules/hisat2/indexing.config \ + -profile docker \ + --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ + -resume + +./nextflow src/nf_modules/hisat2/mapping_paired.nf \ + -c src/nf_modules/hisat2/mapping_paired.config \ + -profile docker \ + --index "results/mapping/index/tiny_v2.index*" \ + --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" \ + -resume + +./nextflow src/nf_modules/hisat2/mapping_single.nf \ + -c src/nf_modules/hisat2/mapping_single.config \ + -profile docker \ + --index "results/mapping/index/tiny_v2.index*" \ + --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" \ + -resume + +if [ -x "$(command -v singularity)" ]; then +./nextflow src/nf_modules/hisat2/indexing.nf \ + -c src/nf_modules/hisat2/indexing.config \ + -profile singularity \ + --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ + -resume + +./nextflow src/nf_modules/hisat2/mapping_paired.nf \ + -c src/nf_modules/hisat2/mapping_paired.config \ + -profile singularity \ + --index "results/mapping/index/tiny_v2.index*" \ + --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" + +./nextflow src/nf_modules/hisat2/mapping_single.nf \ + -c src/nf_modules/hisat2/mapping_single.config \ + -profile singularity \ + --index "results/mapping/index/tiny_v2.index*" \ + --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" +fi diff --git a/src/singularity_modules/hisat2/2.0.0/hisat2.def b/src/singularity_modules/hisat2/2.0.0/hisat2.def index a3941bf3f874f72d0a6600d327959987db34a866..d9d77b688e89480cf7db1a3327fc99b50fec9092 100644 --- a/src/singularity_modules/hisat2/2.0.0/hisat2.def +++ b/src/singularity_modules/hisat2/2.0.0/hisat2.def @@ -1,21 +1,38 @@ Bootstrap: docker -From: ubuntu:18.04 +From: alpine:3.8 %labels -MAINTAINER Nicolas Fontrodona +MAINTAINER Laurent Modolo %post +SAMTOOLS_VERSION=1.7 HISAT2_VERSION=2.0.0 -PACKAGES="unzip \ -gcc \ -g++ \ -make \ -curl \ -ca-certificates" +PACKAGES="git \ + make \ + gcc \ + musl-dev \ + zlib-dev \ + ncurses-dev \ + bzip2-dev \ + xz-dev \ + bash \ + curl \ + zip \ + g++ \ + perl \ + python" +apk update && \ +apk add ${PACKAGES} -apt-get update && \ -apt-get install -y --no-install-recommends ${PACKAGES} && \ -apt-get clean +git clone https://github.com/samtools/htslib.git && \ +cd htslib && \ +git checkout ${SAMTOOLS_VERSION} && \ +cd .. && \ +git clone https://github.com/samtools/samtools.git && \ +cd samtools && \ +git checkout ${SAMTOOLS_VERSION} && \ +make && \ +cp samtools /usr/bin/ curl -k -L http://ccb.jhu.edu/software/hisat2/downloads/hisat2-${HISAT2_VERSION}-beta-source.zip -o hisat2_linux-v${HISAT2_VERSION}.zip && \ unzip hisat2_linux-v${HISAT2_VERSION}.zip && \ @@ -26,6 +43,7 @@ cp hisat2-* /usr/bin && \ rm -Rf hisat2-${HISAT2_VERSION}-beta %environment +export SAMTOOLS_VERSION=1.7 export HISAT2_VERSION=2.0.0 %runscript