diff --git a/.gitignore b/.gitignore index 73051c90863e872cd39cdaa7dc3c86cbd0e47c3e..cb95d1259483cf134e476684b1d08738b02270c7 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ nextflow .nextflow/ work/ results +workspace.code-workspace diff --git a/src/.docker_modules/alntools/dd96682/Dockerfile b/src/.docker_modules/alntools/dd96682/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..01fdd8995a3501ba9e3e6ceb727055dc26aa5656 --- /dev/null +++ b/src/.docker_modules/alntools/dd96682/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.9-buster as build +MAINTAINER Laurent Modolo + +ENV ALNTOOLS_VERSION=dd96682 +ENV PACKAGES git \ + ca-certificates \ + procps + +RUN apt-get update \ + && apt-get install -y --no-install-recommends ${PACKAGES}\ + && apt-get clean \ + && git clone https://github.com/churchill-lab/alntools.git \ + && cd alntools \ + && git checkout ${ALNTOOLS_VERSION} \ + && python setup.py install \ + && cd .. \ + && rm -R alntools \ + && apt-get autoremove --purge -y git ca-certificates + +CMD ["bash"] \ No newline at end of file diff --git a/src/.docker_modules/alntools/dd96682/docker_init.sh b/src/.docker_modules/alntools/dd96682/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..48190c462975649ace8430a6e0769cf742611b42 --- /dev/null +++ b/src/.docker_modules/alntools/dd96682/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/alntools:dd96682 +docker build src/.docker_modules/alntools/dd96682 -t 'lbmc/alntools:dd96682' +docker push lbmc/alntools:dd96682 diff --git a/src/.docker_modules/bwa/0.7.17/Dockerfile b/src/.docker_modules/bwa/0.7.17/Dockerfile index 654cf85e6c005f5be0fbae74fb5624848a367981..b5b5859a8f29564c5f260a094890223d4f04ebda 100644 --- a/src/.docker_modules/bwa/0.7.17/Dockerfile +++ b/src/.docker_modules/bwa/0.7.17/Dockerfile @@ -1,17 +1,14 @@ -FROM lbmc/sambamba:0.6.9 +FROM lbmc/samtools:1.11 MAINTAINER Laurent Modolo ENV BWA_VERSION=0.7.17 ENV SAMBLASTER_VERSION=0.1.24 -ENV PACKAGES curl=7.58* \ - ca-certificates=20180409 \ - build-essential=12.4* \ - zlib1g-dev=1:1.2.11* +ENV PACKAGES curl \ + g++ -RUN apt-get update && \ - apt-get install -y --no-install-recommends ${PACKAGES} && \ - apt-get clean +RUN apk update && \ + apk add ${PACKAGES} RUN curl -k -L https://github.com/lh3/bwa/releases/download/v${BWA_VERSION}/bwa-${BWA_VERSION}.tar.bz2 -o bwa-v${BWA_VERSION}.tar.bz2 && \ tar xjf bwa-v${BWA_VERSION}.tar.bz2 && \ @@ -21,10 +18,3 @@ cp bwa /usr/bin && \ cd .. && \ rm -R bwa-${BWA_VERSION}/ -RUN curl -k -L https://github.com/GregoryFaust/samblaster/releases/download/v.${SAMBLASTER_VERSION}/samblaster-v.${SAMBLASTER_VERSION}.tar.gz -o samblaster-v.${SAMBLASTER_VERSION}.tar.gz && \ -tar xvf samblaster-v.${SAMBLASTER_VERSION}.tar.gz && \ -cd samblaster-v.${SAMBLASTER_VERSION}/ && \ -make && \ -cp samblaster /usr/bin && \ -cd .. && \ -rm -R samblaster-v.${SAMBLASTER_VERSION}/ diff --git a/src/.docker_modules/deeptools/3.5.0/Dockerfile b/src/.docker_modules/deeptools/3.5.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..3940e3905f4ecc776f9a8158823c42599d49b829 --- /dev/null +++ b/src/.docker_modules/deeptools/3.5.0/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.9-slim +MAINTAINER Lauret Modolo + +ENV DEEPTOOLS_VERSION=3.5.0 +RUN apt-get update -qq \ + && apt-get install --no-install-recommends --yes \ + build-essential \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libssl-dev \ + libncurses5-dev +RUN pip3 install deeptools==${DEEPTOOLS_VERSION} diff --git a/src/.docker_modules/deeptools/3.5.0/docker_init.sh b/src/.docker_modules/deeptools/3.5.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..47b9e608149fac2739fc1200170d98981e6c4f78 --- /dev/null +++ b/src/.docker_modules/deeptools/3.5.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/deeptools:3.5.0 +docker build src/.docker_modules/deeptools/3.5.0 -t 'lbmc/deeptools:3.5.0' +docker push lbmc/deeptools:3.5.0 diff --git a/src/.docker_modules/emase-zero/0.3.1/Dockerfile b/src/.docker_modules/emase-zero/0.3.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..ac82286db088ea6363fef626d164abe43831f6c4 --- /dev/null +++ b/src/.docker_modules/emase-zero/0.3.1/Dockerfile @@ -0,0 +1,35 @@ +FROM debian:buster as build +MAINTAINER Laurent Modolo + +ENV EMASEZERO_VERSION=0.3.1 +ENV PACKAGES build-essential \ + ca-certificates \ + zlib1g-dev \ + git + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN git clone https://github.com/churchill-lab/emase-zero.git \ + && cd emase-zero/src/ \ + && make \ + && mv emase-zero /usr/local/bin/ \ + && ldd /usr/local/bin/emase-zero +# linux-vdso.so.1 (0x00007ffe8e35f000) +# libz.so.1 => /lib/x86_64-linux-gnu/libz.so.1 (0x00007fbd358b5000) +# libstdc++.so.6 => /usr/lib/x86_64-linux-gnu/libstdc++.so.6 (0x00007fbd35731000) +# libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007fbd355ae000) +# libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1 (0x00007fbd35594000) +# libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fbd353d3000) +# /lib64/ld-linux-x86-64.so.2 (0x00007fbd35af7000) + +FROM debian:buster-slim + +COPY --from=build /usr/local/bin/emase-zero /usr/local/bin/ +COPY --from=build /lib/x86_64-linux-gnu/libz.so.1 /lib/x86_64-linux-gnu/ +COPY --from=build /lib/x86_64-linux-gnu/libm.so.6 /lib/x86_64-linux-gnu/ +COPY --from=build /lib/x86_64-linux-gnu/libgcc_s.so.1 /lib/x86_64-linux-gnu/ + +RUN apt-get update && \ + apt-get install -y procps bash diff --git a/src/.docker_modules/emase-zero/0.3.1/docker_init.sh b/src/.docker_modules/emase-zero/0.3.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..cb295bd192aca5d48e4dae5c44729b13a43650cc --- /dev/null +++ b/src/.docker_modules/emase-zero/0.3.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/emase-zero:0.3.1 +docker build src/.docker_modules/emase-zero/0.3.1 -t 'lbmc/emase-zero:0.3.1' +docker push lbmc/emase-zero:0.3.1 diff --git a/src/.docker_modules/fastp/0.20.1/Dockerfile b/src/.docker_modules/fastp/0.20.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..0461d5ea260b3e894af982a27d1dc8d3b2860c16 --- /dev/null +++ b/src/.docker_modules/fastp/0.20.1/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/fastp:0.20.1--h8b12597_0 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/fastp/0.20.1/docker_init.sh b/src/.docker_modules/fastp/0.20.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..2b1f3bee40fb05504488fe026ff39811f9fef47d --- /dev/null +++ b/src/.docker_modules/fastp/0.20.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/fastp:0.20.1 +docker build src/.docker_modules/fastp/0.20.1 -t 'lbmc/fastp:0.20.1' +docker push lbmc/fastp:0.20.1 diff --git a/src/.docker_modules/g2gtools/0.2.8/Dockerfile b/src/.docker_modules/g2gtools/0.2.8/Dockerfile index c57b061fd1380ba0c432ba515ce7247044e0e957..2163f345c06c5478ca72fe8890cece852f0e78a6 100644 --- a/src/.docker_modules/g2gtools/0.2.8/Dockerfile +++ b/src/.docker_modules/g2gtools/0.2.8/Dockerfile @@ -1,11 +1,16 @@ -FROM python:3.8-alpine +FROM python:3.9-slim MAINTAINER Laurent Modolo ENV G2GTOOLS_VERSION=0.2.8 -RUN apk add --update --no-cache bash musl-dev linux-headers g++ cmake make build-base bzip2-dev zlib-dev xz-dev autoconf \ +RUN apt update \ + && apt install -y wget build-essential zlib1g-dev libbz2-dev liblzma-dev procps \ && wget https://github.com/churchill-lab/g2gtools/archive/v${G2GTOOLS_VERSION}.tar.gz \ && tar -xvf v${G2GTOOLS_VERSION}.tar.gz \ && cd g2gtools-${G2GTOOLS_VERSION} \ && pip install numpy \ + && pip install pysam \ && make install + + +CMD ["bash"] diff --git a/src/.docker_modules/macs3/3.0.0a6/Dockerfile b/src/.docker_modules/macs3/3.0.0a6/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..375b8f631fe40f9e05f94400d15057f303713438 --- /dev/null +++ b/src/.docker_modules/macs3/3.0.0a6/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.9-slim +MAINTAINER Laurent Modolo + +ENV MACS3_VERSION=3.0.0a6 +RUN apt-get update -qq \ + && apt-get install --no-install-recommends --yes \ + build-essential \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libssl-dev \ + libncurses5-dev \ + procps +RUN pip install macs3==${MACS3_VERSION} diff --git a/src/.docker_modules/macs3/3.0.0a6/docker_init.sh b/src/.docker_modules/macs3/3.0.0a6/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..3c830318a39076b8f2ca4dc2e7442c8c046a320a --- /dev/null +++ b/src/.docker_modules/macs3/3.0.0a6/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/macs3:3.0.0a6 +docker build src/.docker_modules/macs3/3.0.0a6 -t 'lbmc/macs3:3.0.0a6' +docker push lbmc/macs3:3.0.0a6 diff --git a/src/.docker_modules/minimap2/2.17/Dockerfile b/src/.docker_modules/minimap2/2.17/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1f5b05c02b573cc8d7392c53213cc78cbb6abeb5 --- /dev/null +++ b/src/.docker_modules/minimap2/2.17/Dockerfile @@ -0,0 +1,25 @@ +FROM quay.io/biocontainers/samtools:0.1.18--hfb9b9cc_10 AS samtools +# /usr/local/bin/samtools +# / # ldd /usr/local/bin/samtools +# /lib64/ld-linux-x86-64.so.2 (0x7efddcdcc000) +# libncurses.so.6 => /usr/local/bin/../lib/libncurses.so.6 (0x7efddcfad000) +# libtinfo.so.6 => /usr/local/bin/../lib/libtinfo.so.6 (0x7efddcf6f000) +# libm.so.6 => /lib64/ld-linux-x86-64.so.2 (0x7efddcdcc000) +# libz.so.1 => /usr/local/bin/../lib/libz.so.1 (0x7efddcf55000) +# libc.so.6 => /lib64/ld-linux-x86-64.so.2 (0x7efddcdcc000) + +FROM quay.io/biocontainers/minimap2:2.17--hed695b0_3 +MAINTAINER Laurent Modolo +ENV MINIMAP2_VERSION=2.17 + +COPY --from=samtools /usr/local/bin/samtools /usr/local/bin/ +COPY --from=samtools /usr/local//lib/libncurses.so.6 /usr/local/lib/ +COPY --from=samtools /usr/local//lib/libtinfo.so.6 /usr/local/lib/ + +# /usr/local/bin/minimap2 +# / # ldd /usr/local/bin/minimap2 +# /lib64/ld-linux-x86-64.so.2 (0x7fe14f5a8000) +# libm.so.6 => /lib64/ld-linux-x86-64.so.2 (0x7fe14f5a8000) +# libz.so.1 => /usr/local/bin/../lib/libz.so.1 (0x7fe14f7c4000) +# libpthread.so.0 => /lib64/ld-linux-x86-64.so.2 (0x7fe14f5a8000) +# libc.so.6 => /lib64/ld-linux-x86-64.so.2 (0x7fe14f5a8000) \ No newline at end of file diff --git a/src/.docker_modules/minimap2/2.17/docker_init.sh b/src/.docker_modules/minimap2/2.17/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..773f0cf6d1ec3f29c3e60e4b1fa359d28223e601 --- /dev/null +++ b/src/.docker_modules/minimap2/2.17/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/minimap2:2.17 +docker build src/.docker_modules/minimap2/2.17 -t 'lbmc/minimap2:2.17' +docker push lbmc/minimap2:2.17 diff --git a/src/.docker_modules/multiqc/1.9/Dockerfile b/src/.docker_modules/multiqc/1.9/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..3e82ccb436f841876ef71c73f4e6a611454bd7f2 --- /dev/null +++ b/src/.docker_modules/multiqc/1.9/Dockerfile @@ -0,0 +1,25 @@ +FROM debian:stretch +MAINTAINER Laurent Modolo + +ENV MULTIQC_VERSION=1.9 +ENV PACKAGES build-essential \ + python3-pip \ + python3-setuptools \ + python3-dev \ + python3-wheel \ + procps \ + locales + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN locale-gen en_US.UTF-8 +ENV LC_ALL=en_US.utf-8 +ENV LANG=en_US.utf-8 +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 + + +RUN pip3 install multiqc==${MULTIQC_VERSION} + diff --git a/src/.docker_modules/multiqc/1.9/docker_init.sh b/src/.docker_modules/multiqc/1.9/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..dcb2897242cb084d9bc851e2274c21aca99f00c2 --- /dev/null +++ b/src/.docker_modules/multiqc/1.9/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/multiqc:1.9 +docker build src/.docker_modules/multiqc/1.9 -t 'lbmc/multiqc:1.9' +docker push lbmc/multiqc:1.9 diff --git a/src/.docker_modules/pandoc/2.11/Dockerfile b/src/.docker_modules/pandoc/2.11/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..f62b84b960032314a2fb43daedd10d6b539a0c91 --- /dev/null +++ b/src/.docker_modules/pandoc/2.11/Dockerfile @@ -0,0 +1,8 @@ +FROM alpine:3.13 +MAINTAINER Laurent Modolo + +ENV PANDOC_VERSION=2.11 + +RUN echo "https://dl-cdn.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories \ + && apk update \ + && apk add pandoc~=${PANDOC_VERSION} make diff --git a/src/.docker_modules/pandoc/2.11/docker_init.sh b/src/.docker_modules/pandoc/2.11/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..3bbc7b6adc58f5f9fb2f8c554bbdfea9724cb4b2 --- /dev/null +++ b/src/.docker_modules/pandoc/2.11/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/pandoc:2.11 +docker build src/.docker_modules/pandoc/2.11 -t 'lbmc/pandoc:2.11' +docker push lbmc/pandoc:2.11 diff --git a/src/.docker_modules/r-base/4.0.2/Dockerfile b/src/.docker_modules/r-base/4.0.2/Dockerfile index f9ad39e2fe3ad59963ccdadb916a1d182ab91452..97e111459c9b685fa6abd6b18fbdf880ffe81bc9 100644 --- a/src/.docker_modules/r-base/4.0.2/Dockerfile +++ b/src/.docker_modules/r-base/4.0.2/Dockerfile @@ -23,7 +23,9 @@ ENV R_DEPS g++ \ libexecinfo-dev \ file \ ttf-linux-libertine \ - git + git \ + openssl \ + bash RUN echo "http://ftp.acc.umu.se/mirror/alpinelinux.org/v3.11/main" > /etc/apk/repositories \ && echo "http://ftp.acc.umu.se/mirror/alpinelinux.org/v3.11/community" >> /etc/apk/repositories \ diff --git a/src/.docker_modules/r-base/4.0.2/docker_init.sh b/src/.docker_modules/r-base/4.0.2/docker_init.sh index 16f69fd3450819f58f071fb6f4ea06ee15e38e27..d07371190e4360bb9ebca95c0cb16eef8b88e32d 100755 --- a/src/.docker_modules/r-base/4.0.2/docker_init.sh +++ b/src/.docker_modules/r-base/4.0.2/docker_init.sh @@ -1,4 +1,4 @@ #!/bin/sh docker pull lbmc/r-base:4.0.2 -docker build src/.docker_modules/r/4.0.2 -t 'lbmc/r-base:4.0.2' +docker build src/.docker_modules/r-base/4.0.2 -t 'lbmc/r-base:4.0.2' docker push lbmc/r-base:4.0.2 diff --git a/src/.docker_modules/r-base/4.0.3/Dockerfile b/src/.docker_modules/r-base/4.0.3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..58f752ff581aca0270beebd74e4c9abf8d0e7e8a --- /dev/null +++ b/src/.docker_modules/r-base/4.0.3/Dockerfile @@ -0,0 +1,36 @@ +FROM alpine:3.13.1 +MAINTAINER Lauret Modolo + +ENV R_PKGS R=~4.0.3 \ + R-mathlib=~4.0.3 \ + R-dev=~4.0.3 \ + R-doc=~4.0.3 + +ENV R_DEPS g++ \ + libxml2-dev \ + make \ + cmake \ + linux-headers \ + cairo-dev \ + libxmu-dev \ + pango-dev \ + perl \ + tiff-dev \ + icu-dev \ + libjpeg-turbo \ + pcre-dev \ + readline-dev \ + libexecinfo-dev \ + file \ + ttf-linux-libertine \ + git \ + openssl \ + autoconf \ + automake \ + libuv \ + http-parser \ + tzdata \ + libgit2-dev \ + bash + +RUN apk add --update --no-cache ${R_PKGS} ${R_DEPS} diff --git a/src/.docker_modules/r-base/4.0.3/docker_init.sh b/src/.docker_modules/r-base/4.0.3/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..2b4e97048e502f00ec3447bbced8d9f53d529c6c --- /dev/null +++ b/src/.docker_modules/r-base/4.0.3/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/r-base:4.0.3 +docker build src/.docker_modules/r-base/4.0.3 -t 'lbmc/r-base:4.0.3' +docker push lbmc/r-base:4.0.3 diff --git a/src/.docker_modules/samtools/1.11/Dockerfile b/src/.docker_modules/samtools/1.11/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..054c3e6e421ee3056d43556d6435a23775934ba3 --- /dev/null +++ b/src/.docker_modules/samtools/1.11/Dockerfile @@ -0,0 +1,27 @@ +FROM alpine:3.8 +MAINTAINER Laurent Modolo + +ENV SAMTOOLS_VERSION=1.11 +ENV PACKAGES git \ + make \ + gcc \ + musl-dev \ + zlib-dev \ + ncurses-dev \ + bzip2-dev \ + xz-dev \ + curl-dev \ + bash + +RUN apk update && \ + apk add ${PACKAGES} + +RUN git clone https://github.com/samtools/htslib.git && \ +cd htslib && \ +git checkout ${SAMTOOLS_VERSION} && \ +cd .. && \ +git clone https://github.com/samtools/samtools.git && \ +cd samtools && \ +git checkout ${SAMTOOLS_VERSION} && \ +make && \ +cp samtools /usr/bin/ diff --git a/src/.docker_modules/samtools/1.11/docker_init.sh b/src/.docker_modules/samtools/1.11/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..e5cf9c2896e0679b9124bdb4e38f852184f993f6 --- /dev/null +++ b/src/.docker_modules/samtools/1.11/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/samtools:1.11 +docker build src/.docker_modules/samtools/1.11 -t 'lbmc/samtools:1.11' +docker push lbmc/samtools:1.11 diff --git a/src/.docker_modules/star/2.5.3/Dockerfile b/src/.docker_modules/star/2.5.3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a045a2352dd59dc8de04e07b233a0399b891cde8 --- /dev/null +++ b/src/.docker_modules/star/2.5.3/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/star:2.7.3a--0 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/star/2.5.3/docker_init.sh b/src/.docker_modules/star/2.5.3/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..50beecfcc7fcb7a9b1943a418651cafb55851495 --- /dev/null +++ b/src/.docker_modules/star/2.5.3/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/star:2.7.3a +docker build src/.docker_modules/star/2.7.3a/ -t 'lbmc/star:2.7.3a' +docker push lbmc/star:2.7.3a diff --git a/src/.docker_modules/ucsc/407/Dockerfile b/src/.docker_modules/ucsc/407/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1499bdb1d58e48a64ee1a7dee550444527b7c82e --- /dev/null +++ b/src/.docker_modules/ucsc/407/Dockerfile @@ -0,0 +1,27 @@ +FROM debian:jessie +MAINTAINER Laurent Modolo + +ENV PACKAGES apt-utils \ + curl \ + build-essential \ + libssl-dev \ + libpng-dev \ + uuid-dev \ + libmysqlclient-dev \ + procps \ + rsync + + +RUN apt-get update && \ + apt-get install -y ${PACKAGES} + +ENV UCSC_VERSION=407 + +RUN curl -k -L http://hgdownload.soe.ucsc.edu/admin/exe/userApps.v${UCSC_VERSION}.src.tgz -o userApps.v${UCSC_VERSION}.src.tgz &&\ +tar xvf userApps.v${UCSC_VERSION}.src.tgz &&\ +cd userApps/ && \ +make &&\ +cd .. &&\ +mv userApps/bin/* /usr/bin/ &&\ +rm -R userApps.v${UCSC_VERSION}.src.tgz &&\ +rm -R userApps diff --git a/src/.docker_modules/ucsc/407/docker_init.sh b/src/.docker_modules/ucsc/407/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..1f092a8f48aa56e22b30716949337871950795a2 --- /dev/null +++ b/src/.docker_modules/ucsc/407/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/ucsc:407 +docker build src/.docker_modules/ucsc/407/ -t 'lbmc/ucsc:407' +docker push lbmc/ucsc:407 diff --git a/src/nextflow.config b/src/nextflow.config new file mode 100644 index 0000000000000000000000000000000000000000..a30fd44fa68eec707aafbb03aaabb0749af83429 --- /dev/null +++ b/src/nextflow.config @@ -0,0 +1,101 @@ +nextflowVersion = '>=20' + +manifest { + homePage = 'https://gitbio.ens-lyon.fr/LBMC/nextflow' + description = 'pipeline to ' + mainScript = 'main.nf' + version = '0.0.0' +} + +report { + enabled = true + file = "$baseDir/../results/report.html" +} + +profiles { + docker { + docker.temp = "auto" + docker.enabled = true + process { + errorStrategy = 'finish' + memory = '16GB' + withLabel: big_mem_mono_cpus { + cpus = 1 + } + withLabel: big_mem_multi_cpus { + cpus = 4 + } + } + } + singularity { + singularity.enabled = true + singularity.cacheDir = "./bin/" + process { + errorStrategy = 'finish' + memory = '16GB' + withLabel: big_mem_mono_cpus { + cpus = 1 + } + withLabel: big_mem_multi_cpus { + cpus = 4 + } + } + } + psmn { + singularity.enabled = true + singularity.cacheDir = "$baseDir/.singularity_psmn/" + singularity.runOptions = "--bind /Xnfs,/scratch" + process{ + errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + maxRetries = 3 + withLabel: big_mem_mono_cpus { + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 1 + memory = "128GB" + time = "12h" + queue = "monointeldeb128,monointeldeb192" + } + withLabel: big_mem_multi_cpus { + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 32 + memory = "192GB" + time = "24h" + queue = "CLG*,SLG*,Epyc*" + penv = "openmp32" + + } + } + } + ccin2p3 { + singularity.enabled = true + singularity.cacheDir = "$baseDir/.singularity_in2p3/" + singularity.runOptions = "--bind /pbs,/sps,/scratch" + process{ + errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + maxRetries = 3 + withLabel: big_mem_mono_cpus { + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + memory = "8GB" + queue = "huge" + } + withLabel: big_mem_multi_cpus { + container = "lbmc/urqt:d62c1f8" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + memory = "8GB" + queue = "huge" + } + } + } +} diff --git a/src/nf_modules/bedtools/main.nf b/src/nf_modules/bedtools/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..50a848e76e1c41f1217c7c47cc7d3f9e025d99b7 --- /dev/null +++ b/src/nf_modules/bedtools/main.nf @@ -0,0 +1,93 @@ +version = "2.25.0" +container_url = "lbmc/bedtools:${version}" + +process fasta_from_bed { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bed.baseName}" + + input: + path fasta + path bed + + output: + tuple val(bed.baseName), path("*_extracted.fasta"), emit: fasta + + script: +""" +bedtools getfasta -name \ +-fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta +""" +} + +process merge_bed { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bed.baseName}" + + input: + path bed + + output: + tuple val(bed[0].simpleName), path("*_merged.fasta"), emit: bed + + script: +""" +bedtools merge -i ${bed} > ${bed[0].simpleName}_merged.bed +""" +} + +process bam_to_fastq_singleend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bam_id}" + + input: + tuple val(bam_id), path(bam) + + output: + tuple val(bam_id), path("*.fastq"), emit: fastq + + script: +""" +bedtools bamtofastq \ +-i ${bam} -fq ${bam.baseName}.fastq +""" +} + +process bam_to_fastq_pairedend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bam_id}" + + input: + tuple val(bam_id), path(bam) + + output: + tuple val(bam_id), path("*.fastq"), emit: fastq + + script: +""" +bedtools bamtofastq \ +-i ${bam} -fq ${bam.baseName}_R1.fastq -fq2 ${bam.baseName}_R2.fastq +""" +} + +process bam_to_bedgraph { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bam_id}" + + input: + tuple val(bam_id), path(bam) + + output: + tuple val(bam_id), path("*.bg"), emit: bedgraph + + script: +""" +bedtools genomecov \ + -ibam ${bam} \ + -bg > ${bam.simpleName}.bg +""" +} diff --git a/src/nf_modules/bowtie/main.nf b/src/nf_modules/bowtie/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..d250e21f754a9ecb9c9c1bb84d174feb8b528fdd --- /dev/null +++ b/src/nf_modules/bowtie/main.nf @@ -0,0 +1,147 @@ +version = "1.2.2" +container_url = "lbmc/bowtie:${version}" + +process index_fasta { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$fasta.baseName" + + input: + path fasta + + output: + path "*.index*", emit: index + path "*_report.txt", emit: report + + script: +""" +bowtie-build --threads ${task.cpus} \ + -f ${fasta} ${fasta.baseName}.index &> \ + ${fasta.baseName}_bowtie_index_report.txt + +if grep -q "Error" ${fasta.baseName}_bowtie_index_report.txt; then + exit 1 +fi +""" +} + +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + + input: + path index + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +if (reads instanceof List) +""" +# -v specify the max number of missmatch, -k the number of match reported per +# reads +bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + -1 ${reads[0]} -2 ${reads[1]} 2> \ + ${pair_id}_bowtie_report_tmp.txt | \ + samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_bowtie_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${pair_id}_bowtie_report_tmp.txt > \ + ${pair_id}_bowtie_mapping_report.txt +""" +else +""" +bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + -q ${reads} 2> \ + ${file_id}_bowtie_report_tmp.txt | \ + samtools view -Sb - > ${file_id}.bam + +if grep -q "Error" ${file_id}_bowtie_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${file_id}_bowtie_report_tmp.txt > \ + ${file_id}_bowtie_mapping_report.txt +""" +} + +process mapping_fastq_pairedend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + + input: + path index + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" +# -v specify the max number of missmatch, -k the number of match reported per +# reads +bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + -1 ${reads[0]} -2 ${reads[1]} 2> \ + ${pair_id}_bowtie_report_tmp.txt | \ + samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_bowtie_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${pair_id}_bowtie_report_tmp.txt > \ + ${pair_id}_bowtie_mapping_report.txt +""" +} + + +process mapping_fastq_singleend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + path index + tuple val(file_id), path(reads) + + output: + set file_id, "*.bam", emit: bam + file "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" +bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + -q ${reads} 2> \ + ${file_id}_bowtie_report_tmp.txt | \ + samtools view -Sb - > ${file_id}.bam + +if grep -q "Error" ${file_id}_bowtie_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${file_id}_bowtie_report_tmp.txt > \ + ${file_id}_bowtie_mapping_report.txt +""" +} diff --git a/src/nf_modules/bowtie2/main.nf b/src/nf_modules/bowtie2/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..02d2663540b393f17b935bb4f4f0623bb5e2b2f3 --- /dev/null +++ b/src/nf_modules/bowtie2/main.nf @@ -0,0 +1,155 @@ +version = "2.3.4.1" +container_url = "lbmc/bowtie2:${version}" + +process index_fasta { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$fasta.baseName" + + input: + path fasta + + output: + path "*.index*", emit: index + path "*_report.txt", emit: report + + script: +""" +bowtie2-build --threads ${task.cpus} \ + ${fasta} \ + ${fasta.baseName}.index &> \ + ${fasta.baseName}_bowtie2_index_report.txt + +if grep -q "Error" ${fasta.baseName}_bowtie2_index_report.txt; then + exit 1 +fi +""" +} + + +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + + input: + path index + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +if (reads instanceof List) +""" +bowtie2 --very-sensitive \ + -p ${task.cpus} \ + -x ${index_id} \ + -1 ${reads[0]} \ + -2 ${reads[1]} 2> \ + ${pair_id}_bowtie2_mapping_report_tmp.txt | \ + samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_bowtie2_mapping_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${pair_id}_bowtie2_mapping_report_tmp.txt > \ + ${pair_id}_bowtie2_mapping_report.txt +""" +else +""" +bowtie2 --very-sensitive \ + -p ${task.cpus} \ + -x ${index_id} \ + -U ${reads} 2> \ + ${reads.baseName}_bowtie2_mapping_report_tmp.txt | \ + samtools view -Sb - > ${reads.baseName}.bam + +if grep -q "Error" ${reads.baseName}_bowtie2_mapping_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${reads.baseName}_bowtie2_mapping_report_tmp.txt > \ + ${reads.baseName}_bowtie2_mapping_report.txt +""" +} + +process mapping_fastq_pairedend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + + input: + path index + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" +bowtie2 --very-sensitive \ + -p ${task.cpus} \ + -x ${index_id} \ + -1 ${reads[0]} \ + -2 ${reads[1]} 2> \ + ${pair_id}_bowtie2_mapping_report_tmp.txt | \ + samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_bowtie2_mapping_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${pair_id}_bowtie2_mapping_report_tmp.txt > \ + ${pair_id}_bowtie2_mapping_report.txt +""" +} + + +process mapping_fastq_singleend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + path index + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" +bowtie2 --very-sensitive \ + -p ${task.cpus} \ + -x ${index_id} \ + -U ${reads} 2> \ + ${reads.baseName}_bowtie2_mapping_report_tmp.txt | \ + samtools view -Sb - > ${reads.baseName}.bam + +if grep -q "Error" ${reads.baseName}_bowtie2_mapping_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${reads.baseName}_bowtie2_mapping_report_tmp.txt > \ + ${reads.baseName}_bowtie2_mapping_report.txt +""" +} diff --git a/src/nf_modules/bwa/main.nf b/src/nf_modules/bwa/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..c6551dce588281d973d8b14a7bf10a1e446fad2b --- /dev/null +++ b/src/nf_modules/bwa/main.nf @@ -0,0 +1,64 @@ +version = "0.7.17" +container_url = "lbmc/bwa:${version}" + +process index_fasta { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(fasta) + + output: + tuple val(file_id), path("${fasta.simpleName}.*"), emit: index + tuple val(file_id), path("*_bwa_report.txt"), emit: report + + script: +""" +bwa index -p ${fasta.simpleName} ${fasta} \ +&> ${fasta.simpleName}_bwa_report.txt +""" +} + + +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(reads) + tuple val(index_id), path(index) + + output: + tuple val(file_id), path("*.bam"), emit: bam + tuple val(file_id), path("${id}_bwa_report.txt"), emit: report + + script: +if (file_id.containsKey('library')) { + library = file_id.library + id = file_id.id +} else { + library = file_id + id = file_id +} +bwa_mem_R = "@RG\\tID:${library}\\tSM:${library}\\tLB:lib_${library}\\tPL:illumina" +if (reads instanceof List) +""" +bwa mem -t ${task.cpus} \ +-R '${bwa_mem_R}' \ +${index_id} ${reads[0]} ${reads[1]} 2> \ + ${id}_bwa_report.txt | \ + samtools view -@ ${task.cpus} -Sb - > ${id}.bam +""" +else + +""" +bwa mem -t ${task.cpus} \ +-R '${bwa_mem_R}' \ +${index_id} ${reads} 2> \ + ${id}_bwa_report.txt | \ + samtools view -@ ${task.cpus} -Sb - > ${id}.bam +""" +} + diff --git a/src/nf_modules/cutadapt/main.nf b/src/nf_modules/cutadapt/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..6464935194f9cb9ce88f7175669a15d770fbd74f --- /dev/null +++ b/src/nf_modules/cutadapt/main.nf @@ -0,0 +1,142 @@ +version = "2.1" +container_url = "lbmc/cutadapt:${version}" + +adapter_3_prim = "AGATCGGAAGAG" +adapter_5_prim = "CTCTTCCGATCT" +trim_quality = "20" + + +process adaptor_removal { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$pair_id" + + input: + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*_cut_R{1,2}.fastq.gz"), emit: fastq + path "*_report.txt", emit: report + + script: +if (reads instanceof List) + """ + cutadapt -a ${adapter_3_prim} -g ${adapter_5_prim} -A ${adapter_3_prim} -G ${adapter_5_prim} \ + -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \ + ${reads[0]} ${reads[1]} > ${pair_id}_report.txt + """ +else: + """ + cutadapt -a ${adapter_3_prim} -g ${adapter_5_prim} \ + -o ${file_id}_cut.fastq.gz \ + ${reads} > ${file_id}_report.txt + """ +} + +process adaptor_removal_pairedend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$pair_id" + + input: + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*_cut_R{1,2}.fastq.gz"), emit: fastq + path "*_report.txt", emit: report + + script: + """ + cutadapt -a ${adapter_3_prim} -g ${adapter_5_prim} -A ${adapter_3_prim} -G ${adapter_5_prim} \ + -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \ + ${reads[0]} ${reads[1]} > ${pair_id}_report.txt + """ +} + +process adaptor_removal_singleend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*_cut.fastq.gz"), emit: fastq + path "*_report.txt", emit: report + + script: + """ + cutadapt -a ${adapter_3_prim} -g ${adapter_5_prim} \ + -o ${file_id}_cut.fastq.gz \ + ${reads} > ${file_id}_report.txt + """ +} + +process trimming_pairedend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$pair_id" + + input: + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*_trim_R{1,2}.fastq.gz"), emit:fastq + path "*_report.txt", emit: report + + script: +if (reads instanceof List) + """ + cutadapt -q ${trim_quality},${trim_quality} \ + -o ${pair_id}_trim_R1.fastq.gz -p ${pair_id}_trim_R2.fastq.gz \ + ${reads[0]} ${reads[1]} > ${pair_id}_report.txt + """ +else + """ + cutadapt -q ${trim_quality},${trim_quality} \ + -o ${file_id}_trim.fastq.gz \ + ${reads} > ${file_id}_report.txt + """ +} + +process trimming_pairedend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$pair_id" + + input: + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*_trim_R{1,2}.fastq.gz"), emit:fastq + path "*_report.txt", emit: report + + script: + """ + cutadapt -q ${trim_quality},${trim_quality} \ + -o ${pair_id}_trim_R1.fastq.gz -p ${pair_id}_trim_R2.fastq.gz \ + ${reads[0]} ${reads[1]} > ${pair_id}_report.txt + """ +} + +process trimming_singleend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*_trim.fastq.gz"), emit: fastq + path "*_report.txt", emit: report + + script: + """ + cutadapt -q ${trim_quality},${trim_quality} \ + -o ${file_id}_trim.fastq.gz \ + ${reads} > ${file_id}_report.txt + """ +} + diff --git a/src/nf_modules/deeptools/main.nf b/src/nf_modules/deeptools/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..ccf3657a3752ffff487bf81135d672e18ef63b84 --- /dev/null +++ b/src/nf_modules/deeptools/main.nf @@ -0,0 +1,87 @@ +version = "3.1.1" +container_url = "lbmc/deeptools:${version}" + +process index_bam { + container = "${container_url}" + label "big_mem__cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*.bam*"), emit: bam + + script: +""" +sambamba index -t ${task.cpus} ${bam} +""" +} + +process bam_to_bigwig { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + + input: + tuple val(file_id), path(bam), path(idx) + + output: + tuple val(file_id), path("*.bw"), emit: bw + + script: +""" +bamCoverage -p ${task.cpus} --ignoreDuplicates -b ${bam} \ + -o ${bam.simpleName}.bw +""" +} + +process compute_matrix { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "${bed_file_id}" + + input: + tuple val(file_id), path(bw) + tuple val(bed_file_id), path(bed) + + output: + tuple val(bed_file_id), path("*.mat.gz"), emit: matrix + + script: +""" +computeMatrix scale-regions -S ${bw} \ + -p ${task.cpus} \ + -R ${bed} \ + --beforeRegionStartLength 100 \ + --afterRegionStartLength 100 \ + -o ${bed.simpleName}.mat.gz +""" +} + +process plot_profile { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(matrix) + + output: + tuple val(file_id), path("*.pdf"), emit: pdf + + script: +/* +see more option at +https://deeptools.readthedocs.io/en/develop/content/tools/plotProfile.html +*/ +""" +plotProfile -m ${matrix} \ + --plotFileFormat=pdf \ + -out ${matrix.simpleName}.pdf \ + --plotType=fill \ + --perGroup \ + --plotTitle "${params.title}" +""" +} diff --git a/src/nf_modules/fastp/main.nf b/src/nf_modules/fastp/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..82f8d88062e982cd784f8673bc0267507dce3e69 --- /dev/null +++ b/src/nf_modules/fastp/main.nf @@ -0,0 +1,103 @@ +version = "0.20.1" +container_url = "lbmc/fastp:${version}" + +process fastp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + publishDir "results/QC/fastp/", mode: 'copy', pattern: "*.html" + + input: + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.fastq.gz"), emit: fastq + tuple val(pair_id), path("*.html"), emit: html + tuple val(pair_id), path("*.json"), emit: report + + script: +if (reads instanceof List) +""" +fastp --thread ${task.cpus} \ +--qualified_quality_phred 20 \ +--disable_length_filtering \ +--detect_adapter_for_pe \ +--in1 ${reads[0]} \ +--in2 ${reads[1]} \ +--out1 ${pair_id}_R1_trim.fastq.gz \ +--out2 ${pair_id}_R2_trim.fastq.gz \ +--html ${pair_id}.html \ +--json ${pair_id}_fastp.json \ +--report_title ${pair_id} +""" +else +""" +fastp --thread ${task.cpus} \ +--qualified_quality_phred 20 \ +--disable_length_filtering \ +--detect_adapter_for_pe \ +--in1 ${reads} \ +--out1 ${pair_id}_trim.fastq.gz \ +--html ${pair_id}.html \ +--json ${pair_id}_fastp.json \ +--report_title ${pair_id} +""" +} + +process fastp_pairedend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + publishDir "results/QC/fastp/", mode: 'copy', pattern: "*.html" + + input: + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.fastq.gz"), emit: fastq + tuple val(pair_id), path("*.html"), emit: html + tuple val(pair_id), path("*.json"), emit: report + + script: +""" +fastp --thread ${task.cpus} \ +--qualified_quality_phred 20 \ +--disable_length_filtering \ +--detect_adapter_for_pe \ +--in1 ${reads[0]} \ +--in2 ${reads[1]} \ +--out1 ${pair_id}_R1_trim.fastq.gz \ +--out2 ${pair_id}_R2_trim.fastq.gz \ +--html ${pair_id}.html \ +--json ${pair_id}_fastp.json \ +--report_title ${pair_id} +""" +} + +process fastp_singleend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + publishDir "results/QC/fastp/", mode: 'copy', pattern: "*.html" + + input: + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.fastq.gz"), emit: fastq + tuple val(pair_id), path("*.html"), emit: html + tuple val(pair_id), path("*.json"), emit: report + + script: +""" +fastp --thread ${task.cpus} \ +--qualified_quality_phred 20 \ +--disable_length_filtering \ +--detect_adapter_for_pe \ +--in1 ${reads} \ +--out1 ${pair_id}_trim.fastq.gz \ +--html ${pair_id}.html \ +--json ${pair_id}_fastp.json \ +--report_title ${pair_id} +""" +} diff --git a/src/nf_modules/fastqc/main.nf b/src/nf_modules/fastqc/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..5e770297d16b3a7dd3591d73091698d01738c4a3 --- /dev/null +++ b/src/nf_modules/fastqc/main.nf @@ -0,0 +1,61 @@ +version = "0.11.5" +container_url = "lbmc/fastqc:${version}" + +process fastqc_fastq { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$pair_id" + + input: + tuple val(pair_id), path(reads) + + output: + path "*.{zip,html}", emit: report + + script: +if (reads instanceof List) +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ + ${reads[0]} ${reads[1]} +""" +else +""" + fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads} +""" +} + +process fastqc_fastq_pairedend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$pair_id" + + input: + tuple val(pair_id), path(reads) + + output: + path "*.{zip,html}", emit: report + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ + ${reads[0]} ${reads[1]} +""" +} + +process fastqc_fastq_singleend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(reads) + + output: + path "*.{zip,html}", emit: report + + script: +""" + fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads} +""" +} + diff --git a/src/nf_modules/g2gtools/main.nf b/src/nf_modules/g2gtools/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..18a05b640b79a30c862caab6538316da5d3031e7 --- /dev/null +++ b/src/nf_modules/g2gtools/main.nf @@ -0,0 +1,129 @@ +version = "0.2.8" +container_url = "lbmc/g2gtools:${version}" + +process vci_build { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta) + output: + tuple val(file_id), path("*.vci.gz"), path("*.vci.gz.tbi"), emit: vci + tuple val(file_id), path("*_report.txt"), emit: report + script: + input_vcf = "" + for (vcf_file in vcf) { + input_vcf += " -i ${vcf_file}" + } +""" +g2gtools vcf2vci \ + -p ${task.cpus} \ + -f ${fasta} \ + ${input_vcf} \ + -s ${file_id} \ + -o ${file_id}.vci 2> ${file_id}_g2gtools_vcf2vci_report.txt +""" +} + +process incorporate_snp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vci), path(tbi) + tuple val(ref_id), path(fasta) + output: + tuple val(file_id), path("${file_id}_snp.fa"), path("${vci}"), path("${tbi}"), emit: fasta + tuple val(file_id), path("*_report.txt"), emit: report + script: +""" +g2gtools patch \ + -p ${task.cpus} \ + -i ${fasta} \ + -c ${vci} \ + -o ${file_id}_snp.fa 2> ${file_id}_g2gtools_path_report.txt +""" +} + +process incorporate_indel { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(fasta), path(vci), path(tbi) + output: + tuple val(file_id), path("${file_id}_snp_indel.fa"), path("${vci}"), path("${tbi}"), emit: fasta + tuple val(file_id), path("*_report.txt"), emit: report + script: +""" +g2gtools transform \ + -p ${task.cpus} \ + -i ${fasta} \ + -c ${vci} \ + -o ${file_id}_snp_indel.fa 2> ${file_id}_g2gtools_transform_report.txt +""" +} + +process convert_gtf { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vci), path(tbi) + tuple val(annot_id), path(gtf) + output: + tuple val(file_id), path("${file_id}.gtf"), emit: gtf + tuple val(file_id), path("*_report.txt"), emit: report + script: +""" +g2gtools convert \ + -i ${gtf} \ + -c ${vci} \ + -o ${file_id}.gtf 2> ${file_id}_g2gtools_convert_report.txt +""" +} + +process convert_bed { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vci), path(tbi) + tuple val(annot_id), path(bed) + output: + tuple val(file_id), path("${file_id}.bed"), emit: bed + tuple val(file_id), path("*_report.txt"), emit: report + script: +""" +g2gtools convert \ + -i ${bed} \ + -c ${vci} \ + -o ${file_id}.bed 2> ${file_id}_g2gtools_convert_report.txt +""" +} + +process convert_bam { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bam_id} ${file_id}" + + input: + tuple val(file_id), path(vci), path(tbi) + tuple val(bam_id), path(bam) + output: + tuple val(file_id), path("${file_id}_${bam_id.baseName}.bam"), emit: bam + tuple val(file_id), path("*_report.txt"), emit: report + script: +""" +g2gtools convert \ + -i ${bam} \ + -c ${vci} \ + -o ${file_id}_${bam.baseName}.bam 2> ${file_id}_g2gtools_convert_report.txt +""" +} \ No newline at end of file diff --git a/src/nf_modules/gatk3/main.nf b/src/nf_modules/gatk3/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..cb3656f4191dba556bcff54a7c6a675c49a5e93e --- /dev/null +++ b/src/nf_modules/gatk3/main.nf @@ -0,0 +1,265 @@ +version = "3.8.0" +container_url = "lbmc/gatk:${version}" + +process variant_calling { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam), path(bai) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.vcf"), emit: vcf + + script: +""" +gatk3 -T HaplotypeCaller \ + -nct ${task.cpus} \ + -R ${fasta} \ + -I ${bam} \ + -o ${file_id}.vcf +""" +} + +process filter_snp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_snp.vcf"), emit: vcf + script: +""" +gatk3 -T SelectVariants \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${vcf} \ + -selectType SNP \ + -o ${file_id}_snp.vcf +""" +} + +process filter_indels { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_indel.vcf"), emit: vcf + script: +""" +gatk3 -T SelectVariants \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${vcf} \ + -selectType INDEL \ + -o ${file_id}_indel.vcf +""" +} + +high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)" + +process high_confidence_snp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_snp.vcf"), emit: vcf + script: +""" +gatk3 -T VariantFiltration \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${vcf} \ + --filterExpression "${high_confidence_snp_filter}" \ + --filterName "basic_snp_filter" \ + -o ${file_id}_filtered_snp.vcf +""" +} + +high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0" + +process high_confidence_indels { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_indel.vcf"), emit: vcf + script: +""" +gatk3 -T VariantFiltration \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${vcf} \ + --filterExpression "${high_confidence_indel_filter}" \ + --filterName "basic_indel_filter" \ + -o ${file_id}_filtered_indel.vcf +""" +} + +process recalibrate_snp_table { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("recal_data_table"), emit: recal_table + script: +""" +gatk3 -T BaseRecalibrator \ + -nct ${task.cpus} \ + -R ${fasta} \ + -I ${bam} \ + -knownSites ${snp_file} \ + -knownSites ${indel_file} \ + -o recal_data_table +""" +} + +process recalibrate_snp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx) + tuple val(table_id), path(recal_data_table) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.bam"), emit: bam + script: +""" +gatk3 -T PrintReads \ + --use_jdk_deflater \ + --use_jdk_inflater \ + -nct ${task.cpus} \ + -R ${fasta} \ + -I ${bam} \ + -BQSR recal_data_table \ + -o ${file_id}_recal.bam +""" +} + +process haplotype_caller { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.gvcf"), emit: gvcf + script: +""" +gatk3 -T HaplotypeCaller \ + -nct ${task.cpus} \ + -R ${fasta} \ + -I ${bam} \ + -ERC GVCF \ + -variant_index_type LINEAR -variant_index_parameter 128000 \ + -o ${file_id}.gvcf +""" +} + +process gvcf_genotyping { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(gvcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.vcf"), emit: vcf + script: +""" +gatk3 -T GenotypeGVCFs \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${gvcf} \ + -o ${file_id}_joint.vcf +""" +} + +process select_variants_snp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf + script: +""" +gatk3 -T SelectVariants \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${vcf} \ + -selectType SNP \ + -o ${file_id}_joint_snp.vcf +""" +} + +process select_variants_indels { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf + script: +""" +gatk3 -T SelectVariants \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${vcf} \ + -selectType INDEL \ + -o ${file_id}_joint_indel.vcf +""" +} + +process personalized_genome { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_genome.fasta"), emit: fasta + + script: + library = pick_library(file_id, library_list) +""" +gatk3 -T FastaAlternateReferenceMaker\ + -R ${reference} \ + -V ${vcf} \ + -o ${library}_genome.fasta +""" +} + diff --git a/src/nf_modules/gatk4/main.nf b/src/nf_modules/gatk4/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..22efa0e0c0253f9f6a57db529d018c7978a93568 --- /dev/null +++ b/src/nf_modules/gatk4/main.nf @@ -0,0 +1,265 @@ +version = "4.2.0.0" +container_url = "broadinstitute/gatk:${version}" + +process variant_calling { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam), path(bai) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.vcf"), emit: vcf + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ + -R ${fasta} \ + -I ${bam} \ + -O ${bam.simpleName}.vcf +""" +} + +process filter_snp { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_snp.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ + -R ${fasta} \ + -V ${vcf} \ + -select-type SNP \ + -O ${vcf.simpleName}_snp.vcf +""" +} + +process filter_indels { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_indel.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ + -R ${fasta} \ + -V ${vcf} \ + -select-type INDEL \ + -O ${vcf.simpleName}_indel.vcf +""" +} + +high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)" + +process high_confidence_snp { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_snp.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ + -R ${fasta} \ + -V ${vcf} \ + --filter-expression "${high_confidence_snp_filter}" \ + --filter-name "basic_snp_filter" \ + -O ${vcf.simpleName}_filtered_snp.vcf +""" +} + +high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0" + +process high_confidence_indels { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_indel.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ + -R ${fasta} \ + -V ${vcf} \ + --filter-expression "${high_confidence_indel_filter}" \ + --filter-name "basic_indel_filter" \ + -O ${vcf.simpleName}_filtered_indel.vcf +""" +} + +process recalibrate_snp_table { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("recal_data_table"), emit: recal_table + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \ + -I ${snp_file} +gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \ + -I ${indel_file} +gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \ + -R ${fasta} \ + -I ${bam} \ + -known-sites ${snp_file} \ + -known-sites ${indel_file} \ + -O recal_data_table +""" +} + +process recalibrate_snp { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(recal_table) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.bam"), emit: bam + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \ + -R ${fasta} \ + -I ${bam} \ + --bqsr-recal-file recal_data_table \ + -O ${bam.simpleName}_recal.bam +""" +} + +process haplotype_caller { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.gvcf"), emit: gvcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ + -R ${fasta} \ + -I ${bam} \ + -ERC GVCF \ + -O ${bam.simpleName}.gvcf +""" +} + +process gvcf_genotyping { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(gvcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.vcf.gz"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \ + -R ${fasta} \ + -V ${gvcf} \ + -O ${gvcf.simpleName}_joint.vcf.gz +""" +} + +process select_variants_snp { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \ + -R ${fasta} \ + -V ${vcf} \ + -select-type SNP \ + -O ${vcf.simpleName}_joint_snp.vcf +""" +} + +process select_variants_indels { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ + -R ${fasta} \ + -V ${vcf} \ + -select-type INDEL \ + -O ${file_id}_joint_indel.vcf +""" +} + +process personalized_genome { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_genome.fasta"), emit: fasta + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ +""" +gatk --java-options "-Xmx${xmx_memory}G" FastaAlternateReferenceMaker\ + -R ${reference} \ + -V ${vcf} \ + -O ${vcf.simpleName}_genome.fasta +""" +} + diff --git a/src/nf_modules/kallisto/main.nf b/src/nf_modules/kallisto/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..bb80e4b361d08b3a34598ee1857268005545a030 --- /dev/null +++ b/src/nf_modules/kallisto/main.nf @@ -0,0 +1,68 @@ +version = "0.44.0" +container_url = "lbmc/kallisto:${version}" + +process index_fasta { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$fasta.baseName" + + input: + path fasta + + output: + path "*.index*", emit: index + path "*_report.txt", emit: report + + script: +""" +kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \ +2> ${fasta.baseName}_kallisto_index_report.txt +""" +} + + +process mapping_fastq_pairedend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + + input: + path index + tuple val(pair_id), path(reads) + + output: + path "${pair_id}", emit: counts + path "*_report.txt", emit: report + + script: +""" +mkdir ${pair_id} +kallisto quant -i ${index} -t ${task.cpus} \ +--bias --bootstrap-samples 100 -o ${pair_id} \ +${reads[0]} ${reads[1]} &> ${pair_id}_kallisto_mapping_report.txt +""" +} + + +process mapping_fastq_singleend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + path index + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("${pair_id}"), emit: counts + path "*_report.txt", emit: report + + script: +""" +mkdir ${file_id} +kallisto quant -i ${index} -t ${task.cpus} --single \ +--bias --bootstrap-samples 100 -o ${file_id} \ +-l ${params.mean} -s ${params.sd} \ +${reads} &> ${reads.simpleName}_kallisto_mapping_report.txt +""" +} diff --git a/src/nf_modules/macs2/main.nf b/src/nf_modules/macs2/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..a350fb438e71b9bfa7b5a3f42fe77e84e4fec651 --- /dev/null +++ b/src/nf_modules/macs2/main.nf @@ -0,0 +1,76 @@ +version = "2.1.2" +container_url = "lbmc/macs2:${version}" + +params.macs_gsize=3e9 +params.macs_mfold="5 50" + +process peak_calling { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + + input: + tuple val(file_id), path(bam_ip), path(bam_control) + + output: + tuple val(file_id), path("*.narrowPeak"), emit: peak + tuple val(file_id), path("*.bed"), emit: summits + tuple val(file_id), path("*_peaks.xls"), path("*_report.txt"), emit: report + + script: +/* remove --nomodel option for real dataset */ +""" +macs2 callpeak \ + --treatment ${bam_ip} \ + --call-summits \ + --control ${bam_control} \ + --keep-dup all \ + --name ${bam_ip.simpleName} \ + --mfold ${params.macs_mfold} \ + --gsize ${params.macs_gsize} 2> \ + ${bam_ip.simpleName}_macs2_report.txt + +if grep -q "ERROR" ${bam_ip.simpleName}_macs2_report.txt; then + echo "MACS3 error" + exit 1 +fi +""" +} + +process peak_calling_bg { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + + input: + tuple val(file_id), path(bg_ip), path(bg_control) + + output: + tuple val(file_id), path("*.narrowPeak"), emit: peak + tuple val(file_id), path("*.bed"), emit: summits + tuple val(file_id), path("*_report.txt"), emit: report + + script: +/* remove --nomodel option for real dataset */ +""" +awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \ + ${bg_ip.simpleName}.bed +awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \ + ${bg_control.simpleName}.bed +macs2 callpeak \ + --treatment ${bg_ip.simpleName}.bed \ + --call-summits \ + --control ${bg_control.simpleName}.bed \ + --keep-dup all \ + --name ${bg_ip.simpleName} \ + --mfold ${params.macs_mfold} \ + --gsize ${params.macs_gsize} 2> \ + ${bg_ip.simpleName}_macs2_report.txt + +if grep -q "ERROR" ${bg_ip.simpleName}_macs2_report.txt; then + echo "MACS3 error" + exit 1 +fi +""" +} + diff --git a/src/nf_modules/macs3/main.nf b/src/nf_modules/macs3/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..c36140aa236ad220213d8bc61ad726dcda170145 --- /dev/null +++ b/src/nf_modules/macs3/main.nf @@ -0,0 +1,74 @@ +version = "3.0.0a6" +container_url = "lbmc/macs3:${version}" + +params.macs_gsize=3e9 +params.macs_mfold=[5, 50] + +process peak_calling { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + + input: + tuple val(file_id), path(bam_ip), path(bam_control) + + output: + path "*", emit: peak + path "*_report.txt", emit: report + + script: +/* remove --nomodel option for real dataset */ +""" +macs3 callpeak \ + --treatment ${bam_ip} \ + --call-summits \ + --control ${bam_control} \ + --keep-dup all \ + --mfold params.macs_mfold[0] params.macs_mfold[1] + --name ${bam_ip.simpleName} \ + --gsize ${params.macs_gsize} 2> \ + ${bam_ip.simpleName}_macs3_report.txt + +if grep -q "ERROR" ${bam_ip.simpleName}_macs3_report.txt; then + echo "MACS3 error" + exit 1 +fi +""" +} + +process peak_calling_bg { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + + input: + tuple val(file_id), path(bg_ip), path(bg_control) + + output: + path "*", emit: peak + path "*_report.txt", emit: report + + script: +/* remove --nomodel option for real dataset */ +""" +awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \ + ${bg_ip.simpleName}.bed +awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \ + ${bg_control.simpleName}.bed +macs3 callpeak \ + --treatment ${bg_ip.simpleName}.bed \ + --call-summits \ + --control ${bg_control.simpleName}.bed \ + --keep-dup all \ + --mfold params.macs_mfold[0] params.macs_mfold[1] + --name ${bg_ip.simpleName} \ + --gsize ${params.macs_gsize} 2> \ + ${bg_ip.simpleName}_macs3_report.txt + +if grep -q "ERROR" ${bg_ip.simpleName}_macs3_report.txt; then + echo "MACS3 error" + exit 1 +fi +""" +} + diff --git a/src/nf_modules/minimap2/main.nf b/src/nf_modules/minimap2/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..73dc0e34560110c010da21bd6b3154beac131535 --- /dev/null +++ b/src/nf_modules/minimap2/main.nf @@ -0,0 +1,50 @@ +version = "2.17" +container_url = "lbmc/minimap2:${version}" + +process index_fasta { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$fasta.baseName" + + input: + path fasta + + output: + tuple path("${fasta}"), path("*.mmi*"), emit: index + path "*_report.txt", emit: report + + script: + memory = "${task.memory}" - ~/\s*GB/ +""" +minimap2 -t ${task.cpus} -I ${memory}G -d ${fasta.baseName}.mmi ${fasta} +""" +} + + +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + + input: + tuple path(fasta), path(index) + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + memory = "${task.memory}" - ~/\s*GB/ + memory = memory / (task.cpus + 1.0) +if (reads instanceof List) +""" +minimap2 -ax sr -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} | + samtools view -Sb - > ${pair_id}.bam +""" +else +""" +minimap2 -ax sr -t ${task.cpus} -K ${memory} ${fasta} ${reads} | + samtools view -Sb - > ${reads.baseName}.bam +""" +} \ No newline at end of file diff --git a/src/nf_modules/multiqc/main.nf b/src/nf_modules/multiqc/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..64cecaace1fbce77b5fecd99f47e253b33ded071 --- /dev/null +++ b/src/nf_modules/multiqc/main.nf @@ -0,0 +1,19 @@ +version = "1.9" +container_url = "lbmc/multiqc:${version}" + +process multiqc { + container = "${container_url}" + label "big_mem_mono_cpus" + publishDir "results/QC/", mode: 'copy' + + input: + path report + + output: + path "*multiqc_*", emit: report + + script: +""" +multiqc -f . +""" +} diff --git a/src/nf_modules/picard/main.nf b/src/nf_modules/picard/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..aa24096cef2ee44322e8648532b2a7ebe092411a --- /dev/null +++ b/src/nf_modules/picard/main.nf @@ -0,0 +1,61 @@ +version = "2.18.11" +container_url = "lbmc/picard:${version}" + +process mark_duplicate { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + output: + tuple val(file_id) , path("*.bam"), emit: bam + path "*_report.txt", emit: report + + + script: +""" +PicardCommandLine MarkDuplicates \ + VALIDATION_STRINGENCY=LENIENT \ + REMOVE_DUPLICATES=true \ + INPUT=${bam} \ + OUTPUT=${bam.baseName}_dedup.bam \ + METRICS_FILE=${bam.baseName}_picard_dedup_report.txt &> \ + picard_${bam.baseName}.log +""" +} + +process index_fasta { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(fasta) + output: + tuple val(file_id), path("*.dict"), emit: index + + script: +""" +PicardCommandLine CreateSequenceDictionary \ +REFERENCE=${fasta} \ +OUTPUT=${fasta.baseName}.dict +""" +} + +process index_bam { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + output: + tuple val(file_id), path("*"), emit: index + + script: +""" +PicardCommandLine BuildBamIndex \ +INPUT=${bam} +""" +} diff --git a/src/nf_modules/sambamba/main.nf b/src/nf_modules/sambamba/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..e07210bb98312e6ac52db4d8a59462df7bf5b738 --- /dev/null +++ b/src/nf_modules/sambamba/main.nf @@ -0,0 +1,57 @@ +version = "0.6.7" +container_url = "lbmc/sambamba:${version}" + +process index_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*.bam*"), emit: bam + + script: +""" +sambamba index -t ${task.cpus} ${bam} +""" +} + +process sort_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*.bam*"), emit: bam + + script: +""" +sambamba sort -t ${task.cpus} -o ${bam.baseName}_sorted.bam ${bam} +""" +} + + +process split_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_forward.bam*"), emit: bam_forward + tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse + script: +""" +sambamba view -t ${task.cpus} -h -F "strand == '+'" ${bam} > \ + ${bam.baseName}_forward.bam +sambamba view -t ${task.cpus} -h -F "strand == '-'" ${bam} > \ + ${bam.baseName}_reverse.bam +""" +} diff --git a/src/nf_modules/samtools/main.nf b/src/nf_modules/samtools/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..8143a81468fbd210c2c2bdc3b3eff1bd0d3f55b3 --- /dev/null +++ b/src/nf_modules/samtools/main.nf @@ -0,0 +1,228 @@ +version = "1.11" +container_url = "lbmc/samtools:${version}" + +process index_fasta { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(fasta) + output: + tuple val(file_id), path("*.fai"), emit: index + + script: +""" +samtools faidx ${fasta} +""" +} + +filter_bam_quality_threshold = 30 + +process filter_bam_quality { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_filtered.bam"), emit: bam + script: +""" +samtools view -@ ${task.cpus} -hb ${bam} -q ${filter_bam_quality_threshold} > \ + ${bam.simpleName}_filtered.bam +""" +} + + +process filter_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + path bed + + output: + tuple val(file_id), path("*_filtered.bam"), emit: bam + script: +""" +samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} > \ + ${bam.simpleName}_filtered.bam +""" +} + +process filter_bam_mapped { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_mapped.bam"), emit: bam + script: +""" +samtools view -@ ${task.cpus} -F 4 -hb ${bam} > \ + ${bam.simpleName}_mapped.bam +""" +} + +process filter_bam_unmapped { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_unmapped.bam"), emit: bam + script: +""" +samtools view -@ ${task.cpus} -f 4 -hb ${bam} > ${bam.simpleName}_unmapped.bam +""" +} + + +process index_bam { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path(bam), emit: bam + tuple val(file_id), path("*.bam.bai"), emit: bam_idx + + script: +""" +samtools index ${bam} +""" +} + +process sort_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*.bam*"), emit: bam + + script: +""" +samtools sort -@ ${task.cpus} -O BAM -o ${bam.simpleName}_sorted.bam ${bam} +""" +} + + +process split_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_forward.bam*"), emit: bam_forward + tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse + script: +""" +samtools view --@ ${Math.round(task.cpus/2)} \ + -hb -F 0x10 ${bam} > ${bam.simpleName}_forward.bam & +samtools view --@ ${Math.round(task.cpus/2)} \ + -hb -f 0x10 ${bam} > ${bam.simpleName}_reverse.bam +""" +} + + +process merge_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + cpus = 2 + + input: + tuple val(first_file_id), path(first_bam) + tuple val(second_file_id), path(second_bam) + + output: + tuple val(file_id), path("*.bam*"), emit: bam + script: +""" +samtools merge ${first_bam} ${second_bam} \ + ${first_bam.simpleName}_${second_file.simpleName}.bam +""" +} + +process merge_multi_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + cpus = 2 + + input: + tuple val(file_id), path(bams) + + output: + tuple val(file_id), path("*_merged.bam*"), emit: bam + script: +""" +samtools merge -@ ${task.cpus} \ + ${bams[0].simpleName}_merged.bam \ + ${bams} +""" +} + +process stats_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + cpus = 2 + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*.tsv"), emit: tsv + script: +""" +samtools flagstat -@ ${task.cpus} -O tsv ${bam} > ${bam.simpleName}_stats.tsv +""" +} + +process flagstat_2_multiqc { + tag "$file_id" + + input: + tuple val(file_id), path(tsv) + + output: + path "*.txt" , emit: report +""" +mv ${tsv} ${tsv.simpleName}.flagstat.txt +""" +} + +process idxstat_2_multiqc { + tag "$file_id" + + input: + tuple val(file_id), path(tsv) + + output: + path "*.txt", emit: report +""" +mv ${tsv} ${tsv.simpleName}.idxstats.txt +""" +} \ No newline at end of file diff --git a/src/nf_modules/sratoolkit/main.nf b/src/nf_modules/sratoolkit/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..3396d6e3e749eca10e6d69b3aa72e47ef81217f1 --- /dev/null +++ b/src/nf_modules/sratoolkit/main.nf @@ -0,0 +1,27 @@ +version = "2.8.2" +container_url = "lbmc/sratoolkit:${version}" + +process fastq_dump { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$sra" + + input: + val sra + + output: + tuple val(sra), path("*.fastq"), emit: fastq + + script: +""" +fastq-dump --split-files --gzip ${sra} +if [ -f ${sra}_1.fastq ] +then + mv ${sra}_1.fastq ${sra}_R1.fastq +fi +if [ -f ${sra}_2.fastq ] +then + mv ${sra}_2.fastq ${sra}_R2.fastq +fi +""" +} diff --git a/src/nf_modules/ucsc/main.nf b/src/nf_modules/ucsc/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..1e288e835c03a7564587e7862d28944683ce4780 --- /dev/null +++ b/src/nf_modules/ucsc/main.nf @@ -0,0 +1,28 @@ +version = "407" +container_url = "lbmc/ucsc:${version}" + +process bedgraph_to_bigwig { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + + input: + tuple val(file_id) path(bg) + tuple val(file_id) path(bed) + + output: + tuple val(file_id), path("*.bw"), emit: bw + + script: +""" +LC_COLLATE=C +# transform bed file of start-stop chromosome size to stop chromosome size +awk -v OFS="\\t" '{print \$1, \$3}' ${bed} > chromsize.txt + +sort -T ./ -k1,1 -k2,2n ${bg} > \ + bedGraphToBigWig - \ + chromsize.txt \ + ${bg.simpleName}_norm.bw +""" +} + diff --git a/src/nf_modules/urqt/main.nf b/src/nf_modules/urqt/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..48200cc0487938db3c8295e12c5077d8e721b39d --- /dev/null +++ b/src/nf_modules/urqt/main.nf @@ -0,0 +1,76 @@ +version = "d62c1f8" +container_url = "lbmc/urqt:${version}" + +trim_quality = "20" + +process trimming { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "${reads}" + + input: + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*_trim_R{1,2}.fastq.gz"), emit: fastq + path "*_report.txt", emit: report + + script: +if (reads instanceof List) +""" +UrQt --t 20 --m ${task.cpus} --gz \ + --in ${reads[0]} --inpair ${reads[1]} \ + --out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \ + > ${pair_id}_trimming_report.txt +""" +else +""" +UrQt --t 20 --m ${task.cpus} --gz \ + --in ${reads} \ + --out ${file_id}_trim.fastq.gz \ + > ${file_id}_trimming_report.txt +""" +} + +process trimming_pairedend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "${reads}" + + input: + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*_trim_R{1,2}.fastq.gz"), emit: fastq + path "*_report.txt", emit: report + + script: +""" +UrQt --t 20 --m ${task.cpus} --gz \ + --in ${reads[0]} --inpair ${reads[1]} \ + --out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \ + > ${pair_id}_trimming_report.txt +""" +} + +process trimming_singleend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*_trim.fastq.gz"), emit: fastq + path "*_report.txt", emit: report + + script: +""" +UrQt --t 20 --m ${task.cpus} --gz \ + --in ${reads} \ + --out ${file_id}_trim.fastq.gz \ + > ${file_id}_trimming_report.txt +""" +} + diff --git a/src/solution_RNASeq.nf b/src/solution_RNASeq.nf index 73940d6595aa3828629292cff067fcf20d3603f0..1f43fd1ff6be5c42453c07b581f722ad07feafa6 100644 --- a/src/solution_RNASeq.nf +++ b/src/solution_RNASeq.nf @@ -1,3 +1,9 @@ +nextflow.enable.dsl=2 + +/* +./nextflow src/solution_RNASeq.nf --fastq "data/tiny_dataset/fastq/tiny2_R{1,2}.fastq.gz" --fasta "data/tiny_dataset/fasta/tiny_v2_10.fasta" --bed "data/tiny_dataset/annot/tiny.bed" -profile docker +*/ + log.info "fastq files : ${params.fastq}" log.info "fasta file : ${params.fasta}" log.info "bed file : ${params.bed}" @@ -15,97 +21,16 @@ Channel .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } .set { fastq_files } -process adaptor_removal { - tag "$pair_id" - publishDir "results/fastq/adaptor_removal/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files - - output: - set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut - - script: - """ - - cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \ - -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \ - ${reads[0]} ${reads[1]} > ${pair_id}_report.txt - """ -} - -process trimming { - tag "${reads}" - publishDir "results/fastq/trimming/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files_cut - - output: - set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_trim - - script: -""" -UrQt --t 20 --m ${task.cpus} --gz \ ---in ${reads[0]} --inpair ${reads[1]} \ ---out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \ -> ${pair_id}_trimming_report.txt -""" -} - -process fasta_from_bed { - tag "${bed.baseName}" - publishDir "results/fasta/", mode: 'copy' - - input: - file fasta from fasta_files - file bed from bed_files - - output: - file "*_extracted.fasta" into fasta_files_extracted - - script: -""" -bedtools getfasta -name \ --fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta -""" +include { adaptor_removal_pairedend } from './nf_modules/cutadapt/main' +include { trimming_pairedend } from './nf_modules/urqt/main' +include { fasta_from_bed } from './nf_modules/bedtools/main' +include { index_fasta; mapping_fastq_pairedend } from './nf_modules/kallisto/main' + +workflow { + adaptor_removal_pairedend(fastq_files) + trimming_pairedend(adaptor_removal_pairedend.out.fastq) + fasta_from_bed(fasta_files, bed_files) + index_fasta(fasta_from_bed.out.fasta) + mapping_fastq_pairedend(index_fasta.out.index.collect(), trimming_pairedend.out.fastq) } -process index_fasta { - tag "$fasta.baseName" - publishDir "results/mapping/index/", mode: 'copy' - - input: - file fasta from fasta_files_extracted - - output: - file "*.index*" into index_files - file "*_kallisto_report.txt" into index_files_report - - script: -""" -kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \ -2> ${fasta.baseName}_kallisto_report.txt -""" -} - -process mapping_fastq { - tag "$reads" - publishDir "results/mapping/quantification/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files_trim - file index from index_files.collect() - - output: - file "*" into counts_files - - script: -""" -mkdir ${pair_id} - -kallisto quant -i ${index} -t ${task.cpus} \ ---bias --bootstrap-samples 100 -o ${pair_id} \ -${reads[0]} ${reads[1]} &> ${pair_id}/kallisto_report.txt -""" -}