diff --git a/.gitignore b/.gitignore index e6966e87db118e219ac28c2aeaa064003245a426..73051c90863e872cd39cdaa7dc3c86cbd0e47c3e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ nextflow .nextflow.log* .nextflow/ work/ +results diff --git a/results/.gitignore b/results/.gitignore deleted file mode 100644 index 72e8ffc0db8aad71a934dd11e5968bd5109e54b4..0000000000000000000000000000000000000000 --- a/results/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* diff --git a/src/docker_modules/BWA/0.7.17/Dockerfile b/src/docker_modules/BWA/0.7.17/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..7a538498bc4a1489f31fe2692224c02203042c44 --- /dev/null +++ b/src/docker_modules/BWA/0.7.17/Dockerfile @@ -0,0 +1,30 @@ +FROM sambamba:0.6.7 +MAINTAINER Laurent Modolo + +ENV BWA_VERSION=0.7.17 +ENV SAMBLASTER_VERSION=0.1.24 + +ENV PACKAGES curl=7.58.0* \ + ca-certificates=20180409 \ + build-essential=12.4* \ + zlib1g-dev=1:1.2.11* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/lh3/bwa/releases/download/v${BWA_VERSION}/bwa-${BWA_VERSION}.tar.bz2 -o bwa-v${BWA_VERSION}.tar.bz2 && \ +tar xjf bwa-v${BWA_VERSION}.tar.bz2 && \ +cd bwa-${BWA_VERSION}/ && \ +make && \ +cp bwa /usr/bin && \ +cd .. && \ +rm -R bwa-${BWA_VERSION}/ + +RUN curl -k -L https://github.com/GregoryFaust/samblaster/releases/download/v.${SAMBLASTER_VERSION}/samblaster-v.${SAMBLASTER_VERSION}.tar.gz -o samblaster-v.${SAMBLASTER_VERSION}.tar.gz && \ +tar xvf samblaster-v.${SAMBLASTER_VERSION}.tar.gz && \ +cd samblaster-v.${SAMBLASTER_VERSION}/ && \ +make && \ +cp samblaster /usr/bin && \ +cd .. && \ +rm -R samblaster-v.${SAMBLASTER_VERSION}/ diff --git a/src/docker_modules/BWA/0.7.17/docker_init.sh b/src/docker_modules/BWA/0.7.17/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..888bec687cbd7e16eb8908d3ef1d402bfaf9dc6e --- /dev/null +++ b/src/docker_modules/BWA/0.7.17/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/BWA/0.7.17 -t 'bwa:0.7.17' diff --git a/src/docker_modules/GATK/4.0.8.1/Dockerfile b/src/docker_modules/GATK/4.0.8.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..aceded338897254a31dca826413b63cffd933532 --- /dev/null +++ b/src/docker_modules/GATK/4.0.8.1/Dockerfile @@ -0,0 +1,6 @@ +FROM broadinstitute/gatk:4.0.8.1 +MAINTAINER Laurent Modolo + +ENV GATK_VERSION=4.0.8.1 + +RUN cp gatk /usr/bin/ diff --git a/src/docker_modules/GATK/4.0.8.1/docker_init.sh b/src/docker_modules/GATK/4.0.8.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..34c497758976492553c1f5cb173203622382db06 --- /dev/null +++ b/src/docker_modules/GATK/4.0.8.1/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/GATK/4.0.8.1 -t 'gatk:4.0.8.1' diff --git a/src/docker_modules/bcftools/1.7/Dockerfile b/src/docker_modules/bcftools/1.7/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..b602f187b41a8d54429b06ca45822482292ed68a --- /dev/null +++ b/src/docker_modules/bcftools/1.7/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV BCFTOOLS_VERSION=1.7 +ENV PACKAGES bcftools=${BCFTOOLS_VERSION}* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/docker_modules/bcftools/1.7/docker_init.sh b/src/docker_modules/bcftools/1.7/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..6d19c6899eebecf2dcd8270ca2aa042a507a74ea --- /dev/null +++ b/src/docker_modules/bcftools/1.7/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/bcftools/1.7 -t 'bcftools:1.7' diff --git a/src/docker_modules/bioawk/1.0/Dockerfile b/src/docker_modules/bioawk/1.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..f7ca9803e60926ed90bce0abfe4cf7af90d72672 --- /dev/null +++ b/src/docker_modules/bioawk/1.0/Dockerfile @@ -0,0 +1,21 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV BIOAWK_VERSION=1.0 +ENV PACKAGES git=1:2.17* \ + build-essential=12.4* \ + ca-certificates=20180409 \ + zlib1g-dev=1:1.2.11* \ + byacc + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN git clone https://github.com/lh3/bioawk.git && \ + cd bioawk && \ + git checkout tags/v${BIOAWK_VERSION} && \ + make && \ + cd .. && \ + mv bioawk/bioawk /usr/bin/ && \ + rm -Rf bioawk diff --git a/src/docker_modules/bioawk/1.0/docker_init.sh b/src/docker_modules/bioawk/1.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..23c163352f6d380d6dcb0accb6de24d8281af8b6 --- /dev/null +++ b/src/docker_modules/bioawk/1.0/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/bioawk/1.0 -t 'bioawk:1.0' diff --git a/src/docker_modules/sambamba/0.6.7/Dockerfile b/src/docker_modules/sambamba/0.6.7/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..5858a176917fc301e165ba18a6d69c34a1bf786a --- /dev/null +++ b/src/docker_modules/sambamba/0.6.7/Dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV SAMBAMBA_VERSION=0.6.7 +ENV PACKAGES curl=7.58.0* \ + ca-certificates=20180409 \ + build-essential=12.4* \ + zlib1g-dev=1:1.2.11* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/biod/sambamba/releases/download/v${SAMBAMBA_VERSION}/sambamba_v${SAMBAMBA_VERSION}_linux.tar.bz2 -o sambamba_v${SAMBAMBA_VERSION}_linux.tar.bz2 && \ +tar xvjf sambamba_v${SAMBAMBA_VERSION}_linux.tar.bz2 && \ +mv sambamba /usr/bin/ && \ +rm -R sambamba_v${SAMBAMBA_VERSION}_linux* diff --git a/src/docker_modules/sambamba/0.6.7/docker_init.sh b/src/docker_modules/sambamba/0.6.7/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..2db68c5b8aec2734b47d91930c2285cb8cfe799f --- /dev/null +++ b/src/docker_modules/sambamba/0.6.7/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/sambamba/0.6.7 -t 'sambamba:0.6.7' diff --git a/src/docker_modules/samblaster/0.1.24/Dockerfile b/src/docker_modules/samblaster/0.1.24/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..deea1fd3be8e9c032a417d90fb4255bf26dcc805 --- /dev/null +++ b/src/docker_modules/samblaster/0.1.24/Dockerfile @@ -0,0 +1,20 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV SAMBLASTER_VERSION=0.1.24 +ENV PACKAGES curl=7.58.0* \ + ca-certificates=20180409 \ + build-essential=12.4* \ + zlib1g-dev=1:1.2.11* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/GregoryFaust/samblaster/releases/download/v.${SAMBLASTER_VERSION}/samblaster-v.${SAMBLASTER_VERSION}.tar.gz -o samblaster-v.${SAMBLASTER_VERSION}.tar.gz && \ +tar xvf samblaster-v.${SAMBLASTER_VERSION}.tar.gz && \ +cd samblaster-v.${SAMBLASTER_VERSION}/ && \ +make && \ +cp samblaster /usr/bin && \ +cd .. && \ +rm -R samblaster-v.${SAMBLASTER_VERSION}/ diff --git a/src/docker_modules/samblaster/0.1.24/docker_init.sh b/src/docker_modules/samblaster/0.1.24/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..24927903f57b4433e501d3ec94bf83670b51c057 --- /dev/null +++ b/src/docker_modules/samblaster/0.1.24/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/samblaster/0.1.24 -t 'samblaster:0.1.24' diff --git a/src/nf_modules/BWA/indexing.config b/src/nf_modules/BWA/indexing.config new file mode 100644 index 0000000000000000000000000000000000000000..607bda58f1061ef449df15d8eef17aa98a2b992d --- /dev/null +++ b/src/nf_modules/BWA/indexing.config @@ -0,0 +1,26 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $index_fasta { + container = "bwa:0.7.17" + } + } + } + sge { + process{ + $index_fasta { + beforeScript = "module purge; module load BWA/0.7.17" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + } + } +} diff --git a/src/nf_modules/BWA/indexing.nf b/src/nf_modules/BWA/indexing.nf new file mode 100644 index 0000000000000000000000000000000000000000..67ea3287bf374808ab4b01007fe05c381d711151 --- /dev/null +++ b/src/nf_modules/BWA/indexing.nf @@ -0,0 +1,29 @@ +params.fasta = "$baseDir/data/bam/*.fasta" + +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { fasta_file } + +process index_fasta { + tag "$fasta_id" + cpus 4 + publishDir "results/mapping/index/", mode: 'copy' + + input: + set fasta_id, file(fasta) from fasta_file + + output: + set fasta_id, "${fasta.baseName}.*" into index_files + file "*_bwa_report.txt" into index_files_report + + script: +""" +bwa index -p ${fasta.baseName} ${fasta} \ +&> ${fasta.baseName}_bwa_report.txt +""" +} + diff --git a/src/nf_modules/BWA/mapping_paired.config b/src/nf_modules/BWA/mapping_paired.config new file mode 100644 index 0000000000000000000000000000000000000000..c0370c3cc76f7048a03382a6ee7985ec1085d9b5 --- /dev/null +++ b/src/nf_modules/BWA/mapping_paired.config @@ -0,0 +1,26 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $mapping_fastq { + container = "bwa:0.7.17" + } + } + } + sge { + process{ + $mapping_fastq { + beforeScript = "module purge; module load BWA/0.7.17" + executor = "sge" + cpus = 4 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + } + } +} diff --git a/src/nf_modules/BWA/mapping_paired.nf b/src/nf_modules/BWA/mapping_paired.nf new file mode 100644 index 0000000000000000000000000000000000000000..5ac50ef555dc42b2415f76e72c1a36e932637894 --- /dev/null +++ b/src/nf_modules/BWA/mapping_paired.nf @@ -0,0 +1,38 @@ +params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" +params.index = "$baseDir/data/index/*.index.*" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .groupTuple() + .set { index_files } + +process mapping_fastq { + tag "$reads" + cpus 4 + publishDir "results/mapping/sam/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + set index_id, file(index) from index_files.collect() + + output: + file "${pair_id}.sam" into sam_files + file "${pair_id}_bwa_report.txt" into mapping_repport_files + + script: +""" +bwa mem -t ${task.cpus} \ +${index_id} ${reads[0]} ${reads[1]} \ +-o ${pair_id}.sam &> ${pair_id}_bwa_report.txt +""" +} + diff --git a/src/nf_modules/BWA/tests.sh b/src/nf_modules/BWA/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..1f3150cf8debc2d6be9ccf19167d25418af42223 --- /dev/null +++ b/src/nf_modules/BWA/tests.sh @@ -0,0 +1,17 @@ +./nextflow src/nf_modules/BWA/indexing.nf \ + -c src/nf_modules/BWA/indexing.config \ + -profile docker \ + --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" + +# ./nextflow src/nf_modules/BWA/mapping_single.nf \ +# -c src/nf_modules/BWA/mapping_single.config \ +# -profile docker \ +# --index "results/mapping/index/tiny_v2.index" \ +# --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" + +./nextflow src/nf_modules/BWA/mapping_paired.nf \ + -c src/nf_modules/BWA/mapping_paired.config \ + -profile docker \ + --index "results/mapping/index/tiny_v2*" \ + --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" + diff --git a/src/nf_modules/sambamba/index_bams.config b/src/nf_modules/sambamba/index_bams.config new file mode 100644 index 0000000000000000000000000000000000000000..0830a67ad8b1f6dd8eb993ec1c64b8ba94777f3e --- /dev/null +++ b/src/nf_modules/sambamba/index_bams.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $index_bam { + container = "sambamba:0.6.7" + } + } + } + sge { + process{ + $index_bam { + beforeScript = "module purge; module load sambamba/0.6.7" + } + } + } +} diff --git a/src/nf_modules/sambamba/index_bams.nf b/src/nf_modules/sambamba/index_bams.nf new file mode 100644 index 0000000000000000000000000000000000000000..0dc2dac682b103c043018bb9e7522eb2bfcf932a --- /dev/null +++ b/src/nf_modules/sambamba/index_bams.nf @@ -0,0 +1,26 @@ +params.bam = "$baseDir/data/bam/*.bam" + +log.info "bams files : ${params.bam}" + +Channel + .fromPath( params.bam ) + .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { bam_files } + +process index_bam { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from bam_files + + output: + set file_id, "*.bam*" into indexed_bam_file + + script: +""" +sambamba index -t ${task.cpus} ${bam} +""" +} + diff --git a/src/nf_modules/sambamba/sort_bams.config b/src/nf_modules/sambamba/sort_bams.config new file mode 100644 index 0000000000000000000000000000000000000000..2e335ad319401bfa1a7e1422989a234e4faedecc --- /dev/null +++ b/src/nf_modules/sambamba/sort_bams.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $sort_bam { + container = "sambamba:0.6.7" + } + } + } + sge { + process{ + $sort_bam { + beforeScript = "module purge; module load sambamba/0.6.7" + } + } + } +} diff --git a/src/nf_modules/sambamba/sort_bams.nf b/src/nf_modules/sambamba/sort_bams.nf new file mode 100644 index 0000000000000000000000000000000000000000..ac610cdca146693df69d8b765928d406b36652b6 --- /dev/null +++ b/src/nf_modules/sambamba/sort_bams.nf @@ -0,0 +1,26 @@ +params.bam = "$baseDir/data/bam/*.bam" + +log.info "bams files : ${params.bam}" + +Channel + .fromPath( params.bam ) + .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { bam_files } + +process sort_bam { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from bam_files + + output: + set file_id, "*_sorted.bam" into sorted_bam_files + + script: +""" +sambamba sort -t ${task.cpus} -o ${file_id}_sorted.bam ${bam} +""" +} + diff --git a/src/nf_modules/sambamba/split_bams.config b/src/nf_modules/sambamba/split_bams.config new file mode 100644 index 0000000000000000000000000000000000000000..d2182ba92d4c6f7c40d8490ceca973e06becaad3 --- /dev/null +++ b/src/nf_modules/sambamba/split_bams.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $split_bam { + container = "sambamba:0.6.7" + } + } + } + sge { + process{ + $split_bam { + beforeScript = "module purge; module load sambamba/0.6.7" + } + } + } +} diff --git a/src/nf_modules/sambamba/split_bams.nf b/src/nf_modules/sambamba/split_bams.nf new file mode 100644 index 0000000000000000000000000000000000000000..ba64d2e2b77eb3a9b24ec1355c2b3a68b95c7a4d --- /dev/null +++ b/src/nf_modules/sambamba/split_bams.nf @@ -0,0 +1,27 @@ +params.bam = "$baseDir/data/bam/*.bam" + +log.info "bams files : ${params.bam}" + +Channel + .fromPath( params.bam ) + .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { bam_files } + +process split_bam { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from bam_files + + output: + set file_id, "*_forward.bam*" into forward_bam_files + set file_id, "*_reverse.bam*" into reverse_bam_files + script: +""" +sambamba view -t ${task.cpus} -h -F "strand == '+'" ${bam} > ${file_id}_forward.bam +sambamba view -t ${task.cpus} -h -F "strand == '-'" ${bam} > ${file_id}_reverse.bam +""" +} + diff --git a/src/nf_modules/sambamba/tests.sh b/src/nf_modules/sambamba/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..d76db22cd3f3675675dbe660704ec114f1ba7b02 --- /dev/null +++ b/src/nf_modules/sambamba/tests.sh @@ -0,0 +1,14 @@ +./nextflow src/nf_modules/sambamba/sort_bams.nf \ + -c src/nf_modules/sambamba/sort_bams.config \ + -profile docker \ + --bam "data/tiny_dataset/map/tiny_v2.bam" + +./nextflow src/nf_modules/sambamba/index_bams.nf \ + -c src/nf_modules/sambamba/index_bams.config \ + -profile docker \ + --bam "data/tiny_dataset/map/tiny_v2.sort.bam" + +./nextflow src/nf_modules/sambamba/split_bams.nf \ + -c src/nf_modules/sambamba/split_bams.config \ + -profile docker \ + --bam "data/tiny_dataset/map/tiny_v2.bam" diff --git a/src/nf_modules/samblaster/dedup_sams.config b/src/nf_modules/samblaster/dedup_sams.config new file mode 100644 index 0000000000000000000000000000000000000000..69cff9971858dfba138ec5c4b335d566ece4235b --- /dev/null +++ b/src/nf_modules/samblaster/dedup_sams.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $dedup_sam { + container = "samblaster:0.1.24" + } + } + } + sge { + process{ + $dedup_sam { + beforeScript = "module purge; module load samblaster/0.1.24" + } + } + } +} diff --git a/src/nf_modules/samblaster/dedup_sams.nf b/src/nf_modules/samblaster/dedup_sams.nf new file mode 100644 index 0000000000000000000000000000000000000000..fd9f2b1778bb66a3625d0de5f540458960193a29 --- /dev/null +++ b/src/nf_modules/samblaster/dedup_sams.nf @@ -0,0 +1,26 @@ +params.sam = "$baseDir/data/sam/*.sam" + +log.info "sams files : ${params.sam}" + +Channel + .fromPath( params.sam ) + .ifEmpty { error "Cannot find any sam files matching: ${params.sam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { sam_files } + +process dedup_sam { + tag "$file_id" + cpus 4 + + input: + set file_id, file(sam) from sam_files + + output: + set file_id, "*_dedup.sam*" into dedup_sam_files + script: +""" +samblaster --addMateTags -i ${sam} -o ${file_id}_dedup.sam +""" +} + + diff --git a/src/nf_modules/samblaster/tests.sh b/src/nf_modules/samblaster/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..63bbfc846928adae8300acc1d1b6623acf4213dc --- /dev/null +++ b/src/nf_modules/samblaster/tests.sh @@ -0,0 +1,4 @@ +./nextflow src/nf_modules/samblaster/dedup_sams.nf \ + -c src/nf_modules/samblaster/dedup_sams.config \ + -profile docker \ + --sam "data/tiny_dataset/map/tiny_v2.sam"