diff --git a/CHANGELOG b/CHANGELOG deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..43ec4e433b23a3fbfd4ec2b2f6e1cb3ac2a28e2b --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,81 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.2.6] - 2018-08-23 +### Added +- Added `src/training_dataset.nf` to build a small training dataset from NGS data + +### Changed +- the structure of `src/nf_modules`: the `tests` folder was removed + +## [0.2.5] - 2018-08-22 +### Added +- This fine changelog + +### Changed +- the structure of `src/nf_modules`: the `tests` folder was removed + + +## [0.2.4] - 2018-08-02 +### Changed +- add `paired_id` variable in the output of every single-end data processes to match the paired output + + +## [0.2.3] - 2018-07-25 +### Added +- List of tools available as nextflow, docker or sge module to the `README.md` + + +## [0.2.2] - 2018-07-23 +### Added +- SRA module from cigogne/nextflow-master 52b510e48daa1fb7 + + +## [0.2.1] - 2018-07-23 +### Added +- List of tools available as nextflow, docker or sge module + + +## [0.2.0] - 2018-06-18 +### Added +- `doc/TP_computational_biologists.md` +- Kallisto/0.44.0 + +### Changed +- add `paired_id` variable in the output of every paired data processes +- BEDtools: fixes for fasta handling +- UrQt: fix git version in Docker + + +## [0.1.2] - 2018-06-18 +### Added +- `doc/tp_experimental_biologist.md` and Makefile to build the pdf +- tests files for BEDtools + +### Changed +- Kallisto: various fixes +- UrQt: improve output and various fixes + +### Removed +- `src/nf_test.config` modules have their own `.config` + + +## [0.1.2] - 2018-06-18 +### Added +- `doc/tp_experimental_biologist.md` and Makefile to build the pdf +- tests files for BEDtools + +### Changed +- Kallisto: various fixes +- UrQt: improve output and various fixes + +### Removed +- `src/nf_test.config` modules have their own `.config` + + +## [0.1.0] - 2018-05-06 +This is the first working version of the repository as a nextflow module repository + diff --git a/README.md b/README.md index 6f07ab7b151808aa7ddc91c0d7a55e54e1741d4c..2322dbb488ec7be4ec0407a8fec6c99a0fd7f9d6 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ You can fork this repository to build your own pipeline. To get the last commits from this repository into your fork use the following commands: ```sh -git remote add upstream https://gitlab.biologie.ens-lyon.fr/pipelines/nextflow.git +git remote add upstream gitlab_lbmc:pipelines/nextflow.git git pull upstream master ``` @@ -35,19 +35,19 @@ To install nextflow on you computer simply run the following command: src/install_nextflow.sh ``` -Then to initialise a given tools run the following command: +Then to initialize a given tools run the following command: ```sh src/docker_modules/<tool_name>/<tool_version>/docker_init.sh ``` -for example to initialise `file_handle` version `0.1.1`, run: +For example to initialize `file_handle` version `0.1.1`, run: ```sh src/docker_modules/file_handle/0.1.1/docker_init.sh ``` -To initialise all the tools: +To initialize all the tools: ```sh find src/docker_modules/ -name "docker_init.sh" | awk '{system($0)}' ``` @@ -67,15 +67,44 @@ cd .. Then to run the tests for a given tools run the following command: ```sh -src/nf_modules/<tool_name>/<tool_version>/tests/tests.sh +src/nf_modules/<tool_name>/<tool_version>/tests.sh ``` -for example to run the tests on `Bowtie2` run: +For example to run the tests on `Bowtie2` run: ```sh -src/nf_modules/Bowtie2/tests/tests.sh +src/nf_modules/Bowtie2/tests.sh ``` +## Available tools + +| tool | nf module | docker module | sge module | +|------|:---------:|:-------------:|:----------:| +BEDtools | ok | ok | ok +Bowtie | ok | ok | **no** +Bowtie2 | ok | ok | ok +canu | ok | ok | ok +cutadapt | ok | ok | ok +deepTools | **no** | ok | ok +FastQC | ok | ok | ok +file_handle | **no** | ok | ok +HISAT2 | **no** | ok | **no** +HTSeq | ok | ok | ok +Kallisto | ok | ok | ok +MACS2 | **no** | ok | ok +MultiQC | ok | ok | ok +MUSIC | ok | ok | ok +picard | **no** | ok | ok +pigz | **no** | ok | ok +RSEM | ok | ok | ok +SAMtools | ok | ok | ok +SRAtoolkit | ok | ok | ok +Salmon | **no** | ok | ok +TopHat | **no** | ok | ok +Trimmomatic | **no** | ok | ok +UrQt | ok | ok | ok + + ## Contributing Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us. diff --git a/src/Rnaseq.config b/src/Rnaseq.config deleted file mode 100644 index 1170cb83eb825ee111311c118e77de57c1c55dd7..0000000000000000000000000000000000000000 --- a/src/Rnaseq.config +++ /dev/null @@ -1,121 +0,0 @@ -profiles { - docker { - docker.temp = 'auto' - docker.enabled = true - process { - $adaptor_removal { - container = "cutadapt:1.14" - } - } - } - sge { - process{ - $adaptor_removal { - beforeScript = "module purge; module load cutadapt/1.14" - executor = "sge" - cpus = 1 - memory = "5GB" - time = "6h" - queueSize = 1000 - pollInterval = '60sec' - queue = 'h6-E5-2667v4deb128' - penv = 'openmp8' - } - } - } -} - -profiles { - docker { - docker.temp = 'auto' - docker.enabled = true - process { - $trimming { - container = "urqt:d62c1f8" - } - } - } - sge { - process{ - $trimming { - beforeScript = "module purge; module load UrQt/d62c1f8" - executor = "sge" - cpus = 4 - memory = "5GB" - time = "6h" - queueSize = 1000 - pollInterval = '60sec' - queue = 'h6-E5-2667v4deb128' - penv = 'openmp8' - } - } - } -} -profiles { - docker { - docker.temp = 'auto' - docker.enabled = true - process { - $fasta_from_bed { - container = "bedtools:2.25.0" - } - } - } - sge { - process{ - $fasta_from_bed { - beforeScript = "module purge; module load BEDtools/2.25.0" - executor = "sge" - cpus = 1 - memory = "5GB" - time = "6h" - queueSize = 1000 - pollInterval = '60sec' - queue = 'h6-E5-2667v4deb128' - penv = 'openmp8' - } - } - } -} - -profiles { - docker { - docker.temp = 'auto' - docker.enabled = true - process { - $index_fasta { - container = "kallisto:0.43.1" - } - $mapping_fastq { - container = "kallisto:0.43.1" - } - } - } - sge { - process{ - $index_fasta { - beforeScript = "module purge; module load Kallisto/0.43.1" - executor = "sge" - cpus = 1 - memory = "5GB" - time = "6h" - queueSize = 1000 - pollInterval = '60sec' - queue = 'h6-E5-2667v4deb128' - penv = 'openmp8' - } - $mapping_fastq { - beforeScript = "module purge; module load Kallisto/0.43.1" - executor = "sge" - cpus = 4 - memory = "5GB" - time = "6h" - queueSize = 1000 - pollInterval = '60sec' - queue = 'h6-E5-2667v4deb128' - penv = 'openmp8' - } - } - } -} - diff --git a/src/Rnaseq.nf b/src/Rnaseq.nf deleted file mode 100644 index 1b59e883aa31c4925197b580791e19d739b9190d..0000000000000000000000000000000000000000 --- a/src/Rnaseq.nf +++ /dev/null @@ -1,122 +0,0 @@ -params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" -params.fasta = "$baseDir/data/fasta/*.fasta" -params.bed = "$baseDir/data/annot/*.bed" - -log.info "fasta file : ${params.fasta}" -log.info "bed file : ${params.bed}" -log.info "fastq files : ${params.fastq}" - -Channel - .fromPath( params.fasta ) - .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } - .set { fasta_files } - -Channel - .fromPath( params.bed ) - .ifEmpty { error "Cannot find any bed files matching: ${params.bed}" } - .set { bed_files } - -Channel - .fromFilePairs( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } - -process adaptor_removal { - tag "$pair_id" - publishDir "results/fastq/adaptor_removal/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files - - output: - file "*_cut_R{1,2}.fastq.gz" into fastq_files_cut - - script: - """ - cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \ - -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \ - ${reads[0]} ${reads[1]} > ${pair_id}_report.txt - """ -} - -process trimming { - tag "${reads}" - cpus 4 - publishDir "results/fastq/trimming/", mode: 'copy' - - input: - file reads from fastq_files_cut - - output: - file "*_trim_R{1,2}.fastq.gz" into fastq_files_trim - - script: -""" -UrQt --t 20 --m ${task.cpus} --gz \ ---in ${reads[0]} --inpair ${reads[1]} \ ---out ${reads[0].baseName}_trim_R1.fastq.gz --outpair ${reads[1].baseName}_trim_R2.fastq.gz \ -> ${reads[0].baseName}_trimming_report.txt -""" -} - -process fasta_from_bed { - tag "${bed.baseName}" - cpus 4 - publishDir "results/fasta/", mode: 'copy' - - input: - file fasta from fasta_files - file bed from bed_files - - output: - file "*_extracted.fasta" into fasta_files_extracted - - script: -""" -bedtools getfasta -name \ --fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta -""" -} - -process index_fasta { - tag "$fasta.baseName" - publishDir "results/mapping/index/", mode: 'copy' - - input: - file fasta from fasta_files_extracted - - output: - file "*.index*" into index_files - - script: -""" -kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \ -> ${fasta.baseName}_kallisto_report.txt -""" -} - - -process mapping_fastq { - tag "$reads" - cpus 4 - publishDir "results/mapping/quantification/", mode: 'copy' - - input: - file reads from fastq_files_trim - file index from index_files - - output: - file "*" into counts_files - - script: -""" -mkdir ${reads[0].baseName} -kallisto quant -i ${index} -t ${task.cpus} \ ---bias --bootstrap-samples 100 -o ${reads[0].baseName} \ -${reads[0]} ${reads[1]} &> ${reads[0].baseName}_kallisto_report.txt -""" -} - - - - diff --git a/src/docker_modules/Bowtie/1.2.2/Dockerfile b/src/docker_modules/Bowtie/1.2.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..128e68acde94e20c03130a0a3551978231f7a9cd --- /dev/null +++ b/src/docker_modules/Bowtie/1.2.2/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV BOWTIE_VERSION=1.2.2 +ENV SAMTOOLS_VERSION=1.7 +ENV PACKAGES bowtie=${BOWTIE_VERSION}* \ + samtools=${SAMTOOLS_VERSION}* + + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/docker_modules/Bowtie/1.2.2/docker_init.sh b/src/docker_modules/Bowtie/1.2.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..8e010a67b74c0e5519d639c3cb06a14c1fb6f987 --- /dev/null +++ b/src/docker_modules/Bowtie/1.2.2/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/Bowtie/1.2.2 -t 'bowtie:1.2.2' diff --git a/src/docker_modules/Bowtie2/2.3.4.1/Dockerfile b/src/docker_modules/Bowtie2/2.3.4.1/Dockerfile index f8b1c5c048257ada6d0892dc787bd943aa2739b5..0f4ac75e48b5390714a22765c93939b72650b47e 100644 --- a/src/docker_modules/Bowtie2/2.3.4.1/Dockerfile +++ b/src/docker_modules/Bowtie2/2.3.4.1/Dockerfile @@ -4,8 +4,8 @@ MAINTAINER Laurent Modolo ENV BOWTIE2_VERSION=2.3.4.1 ENV SAMTOOLS_VERSION=1.7 ENV PACKAGES bowtie2=${BOWTIE2_VERSION}* \ - samtools=${SAMTOOLS_VERSION}* - + samtools=${SAMTOOLS_VERSION}* \ + perl=5.26.1* RUN apt-get update && \ apt-get install -y --no-install-recommends ${PACKAGES} && \ diff --git a/src/docker_modules/FastQC/0.11.5/Dockerfile b/src/docker_modules/FastQC/0.11.5/Dockerfile index f7b999e418fc358eaf063989e0ff058820164a95..999edf6a3bb5ad548289a0de1740899802ed335d 100644 --- a/src/docker_modules/FastQC/0.11.5/Dockerfile +++ b/src/docker_modules/FastQC/0.11.5/Dockerfile @@ -2,7 +2,8 @@ FROM ubuntu:18.04 MAINTAINER Laurent Modolo ENV FASTQC_VERSION=0.11.5 -ENV PACKAGES fastqc=${FASTQC_VERSION}* +ENV PACKAGES fastqc=${FASTQC_VERSION}* \ + perl=5.26* RUN apt-get update && \ apt-get install -y --no-install-recommends ${PACKAGES} && \ diff --git a/src/docker_modules/HISAT2/2.0.0/docker_init.sh b/src/docker_modules/HISAT2/2.0.0/docker_init.sh old mode 100644 new mode 100755 diff --git a/src/docker_modules/HTSeq/0.8.0/Dockerfile b/src/docker_modules/HTSeq/0.8.0/Dockerfile index c655738fea3d89292f9ef5a9068893aafb6f38a1..a786b8026f66e9538c543f79624325ccb75952c6 100644 --- a/src/docker_modules/HTSeq/0.8.0/Dockerfile +++ b/src/docker_modules/HTSeq/0.8.0/Dockerfile @@ -14,4 +14,5 @@ RUN apt-get update && \ apt-get clean RUN pip3 install numpy==1.14.3 +RUN pip3 install pysam==0.15.0 RUN pip3 install HTSeq==${HTSEQ_VERSION} diff --git a/src/docker_modules/MACS2/2.1.0/Dockerfile b/src/docker_modules/MACS2/2.1.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..faf2e9088ce84eadc75612627cf74aa7fd2c7381 --- /dev/null +++ b/src/docker_modules/MACS2/2.1.0/Dockerfile @@ -0,0 +1,20 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV MACS_VERSION=2.1.1.20160309 +ENV PACKAGES git=1:2.17* \ + build-essential=12.4* \ + python-pip=9.0.1* \ + ca-certificates=20180409 \ + python-setuptools=39.0.1* \ + python-dev=2.7* \ + python-numpy=1:1.13* \ + python-wheel=0.30.0* \ + zlib1g-dev=1:1.2.11* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN pip install MACS2==${MACS_VERSION} + diff --git a/src/docker_modules/MACS2/2.1.0/docker_init.sh b/src/docker_modules/MACS2/2.1.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..8af6fd7fe7139608a894fc053f987e6ef4b3380a --- /dev/null +++ b/src/docker_modules/MACS2/2.1.0/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/MACS2/2.1.0 -t 'macs2:2.1.0' diff --git a/src/docker_modules/MUSIC/6613c53/Dockerfile b/src/docker_modules/MUSIC/6613c53/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..0c6f4e4c018d75cafc0812278fb9899b354d4da2 --- /dev/null +++ b/src/docker_modules/MUSIC/6613c53/Dockerfile @@ -0,0 +1,25 @@ +FROM samtools:1.7 +MAINTAINER Laurent Modolo + +ENV PACKAGES git=1:2.17* \ + build-essential=12.4* \ + ca-certificates=20180409 \ + zlib1g-dev=1:1.2.11* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN git clone https://github.com/gersteinlab/MUSIC.git && \ + cd MUSIC && \ + git checkout ${MUSIC_VERSION} && \ + make clean && \ + make && \ + cd .. && \ + mv MUSIC/bin/MUSIC /usr/bin/ && \ + mv MUSIC/bin/generate_multimappability_signal.csh /usr/bin/ && \ + mv MUSIC/bin/run_MUSIC.csh /usr/bin/ && \ + rm -Rf MUSIC + +RUN chmod +x /usr/bin/* + diff --git a/src/docker_modules/MUSIC/6613c53/docker_init.sh b/src/docker_modules/MUSIC/6613c53/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..c6163d6b58877cd3a7fd31be65fa2b9ee958c3d5 --- /dev/null +++ b/src/docker_modules/MUSIC/6613c53/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/MUSIC/6613c53 -t 'music:6613c53' diff --git a/src/docker_modules/MultiQC/1.0/Dockerfile b/src/docker_modules/MultiQC/1.0/Dockerfile index 09b218a95597c3df504e78bc2043d2a139730016..d351f1c6a979de6616edce3fd276be262bad8b43 100644 --- a/src/docker_modules/MultiQC/1.0/Dockerfile +++ b/src/docker_modules/MultiQC/1.0/Dockerfile @@ -6,10 +6,16 @@ ENV PACKAGES build-essential=12.4* \ python3-pip=9.0.1* \ python3-setuptools=39.0.1* \ python3-dev=3.6.5* \ - python3-wheel=0.30.0* + python3-wheel=0.30.0* \ + locales RUN apt-get update && \ apt-get install -y --no-install-recommends ${PACKAGES} && \ apt-get clean +RUN locale-gen en_US.UTF-8 +ENV LC_ALL=en_US.utf-8 +ENV LANG=en_US.utf-8 + RUN pip3 install multiqc==${MULTIQC_VERSION} + diff --git a/src/docker_modules/RSEM/1.3.0/Dockerfile b/src/docker_modules/RSEM/1.3.0/Dockerfile index 1ccdaa74586943d175058c9c9c478e4c04bcdcec..337521c0496db33c93c20c8fd4d756efcba603a8 100644 --- a/src/docker_modules/RSEM/1.3.0/Dockerfile +++ b/src/docker_modules/RSEM/1.3.0/Dockerfile @@ -4,7 +4,7 @@ MAINTAINER Laurent Modolo ENV RSEM_VERSION=1.3.0 ENV BOWTIE2_VERSION=2.3.4.1 ENV SAMTOOLS_VERSION=1.7 -ENV PACKAGES git=1:2.17.0* \ +ENV PACKAGES git=1:2.17* \ build-essential=12.4* \ ca-certificates=20180409 \ zlib1g-dev=1:1.2.11* \ diff --git a/src/docker_modules/picard/2.18.11/Dockerfile b/src/docker_modules/picard/2.18.11/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6a358e55bf36a4be4842335f47491ec8ac3a4ced --- /dev/null +++ b/src/docker_modules/picard/2.18.11/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV PICARD_VERSION=2.18.11 +ENV PACKAGES default-jre=2:1.1* \ + curl=7.58.0* \ + ca-certificates=20180409 + + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/broadinstitute/picard/releases/download/${PICARD_VERSION}/picard.jar -o picard.jar && \ + mkdir -p /usr/share/java/ && \ + mv picard.jar /usr/share/java/ + +COPY PicardCommandLine /usr/bin/ +RUN chmod +x /usr/bin/PicardCommandLine diff --git a/src/docker_modules/picard/2.18.11/PicardCommandLine b/src/docker_modules/picard/2.18.11/PicardCommandLine new file mode 100644 index 0000000000000000000000000000000000000000..ce067365785f2f03c722668eff8d80a94b9b34d3 --- /dev/null +++ b/src/docker_modules/picard/2.18.11/PicardCommandLine @@ -0,0 +1,15 @@ +#!/bin/sh +set -eu +PRG="$(basename -- "$0")" +case "$PRG" in +picard-tools) + echo 1>&2 'Warning: picard-tools is deprecated and should be replaced by PicardCommandLine' + ;; +PicardCommandLine) + ;; +*) + set -- "$PRG" "$@" + ;; +esac + +exec java ${JAVA_OPTIONS-} -jar /usr/share/java/picard.jar "$@" diff --git a/src/docker_modules/picard/2.18.11/docker_init.sh b/src/docker_modules/picard/2.18.11/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..7546e343730c0380b749031baec984c1838b3a88 --- /dev/null +++ b/src/docker_modules/picard/2.18.11/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/picard/2.18.11 -t 'picard:2.18.11' diff --git a/src/fasta_sampler.nf b/src/fasta_sampler.nf deleted file mode 100644 index d1200ed496c77756cde525835f581b71b2528990..0000000000000000000000000000000000000000 --- a/src/fasta_sampler.nf +++ /dev/null @@ -1,18 +0,0 @@ -Channel - .fromPath( "data/tiny_dataset/fasta/*.fasta" ) - .set { fasta_file } - -process sample_fasta { - publishDir "results/sampling/", mode: 'copy' - - input: -file fasta from fasta_file - - output: -file "*_sample.fasta" into fasta_sample - - script: -""" -head ${fasta} > ${fasta.baseName}_sample.fasta -""" -} diff --git a/src/nf_modules/BEDtools/bedtools.config b/src/nf_modules/BEDtools/fasta_from_bed.config similarity index 100% rename from src/nf_modules/BEDtools/bedtools.config rename to src/nf_modules/BEDtools/fasta_from_bed.config diff --git a/src/nf_modules/BEDtools/bedtools.nf b/src/nf_modules/BEDtools/fasta_from_bed.nf similarity index 100% rename from src/nf_modules/BEDtools/bedtools.nf rename to src/nf_modules/BEDtools/fasta_from_bed.nf diff --git a/src/nf_modules/BEDtools/tests/tests.sh b/src/nf_modules/BEDtools/tests.sh similarity index 52% rename from src/nf_modules/BEDtools/tests/tests.sh rename to src/nf_modules/BEDtools/tests.sh index f27c274f45cdf0fab1003f81986d96b29f50fdc2..632ba5bff13f65685e3ff521f4a1496b337b55d9 100755 --- a/src/nf_modules/BEDtools/tests/tests.sh +++ b/src/nf_modules/BEDtools/tests.sh @@ -1,5 +1,5 @@ -nextflow src/nf_modules/BEDtools/tests/fasta_from_bed.nf \ - -c src/nf_modules/BEDtools/bedtools.config \ +./nextflow src/nf_modules/BEDtools/fasta_from_bed.nf \ + -c src/nf_modules/BEDtools/fasta_from_bed.config \ -profile docker \ --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ --bed "data/tiny_dataset/annot/tiny.bed" \ diff --git a/src/nf_modules/BEDtools/tests/fasta_from_bed.nf b/src/nf_modules/BEDtools/tests/fasta_from_bed.nf deleted file mode 100644 index 372f89e958f5fcf9256c37000f86e6b552e31def..0000000000000000000000000000000000000000 --- a/src/nf_modules/BEDtools/tests/fasta_from_bed.nf +++ /dev/null @@ -1,33 +0,0 @@ -params.fastq = "$baseDir/data/fasta/*.fasta" -params.bed = "$baseDir/data/annot/*.bed" - -log.info "fasta file : ${params.fasta}" -log.info "bed file : ${params.bed}" - -Channel - .fromPath( params.fasta ) - .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } - .set { fasta_files } -Channel - .fromPath( params.bed ) - .ifEmpty { error "Cannot find any bed files matching: ${params.bed}" } - .set { bed_files } - -process fasta_from_bed { - tag "${bed.baseName}" - cpus 4 - publishDir "results/fasta/", mode: 'copy' - - input: - file fasta from fasta_files - file bed from bed_files - - output: - file "*_extracted.fasta" into fasta_files_extracted - - script: -""" -bedtools getfasta -name \ --fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta -""" -} diff --git a/src/nf_modules/Bowtie/indexing.config b/src/nf_modules/Bowtie/indexing.config new file mode 100644 index 0000000000000000000000000000000000000000..d5851003aa4c3c95b34eb78de95e20ff17aee390 --- /dev/null +++ b/src/nf_modules/Bowtie/indexing.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $index_fasta { + container = "bowtie:1.2.2" + } + } + } + sge { + process{ + $index_fasta { + beforeScript = "module purge; module load Bowtie/1.2.2" + } + } + } +} diff --git a/src/nf_modules/Bowtie/indexing.nf b/src/nf_modules/Bowtie/indexing.nf new file mode 100644 index 0000000000000000000000000000000000000000..537d684b4bb39412eccfe31aec7c07d9312981d9 --- /dev/null +++ b/src/nf_modules/Bowtie/indexing.nf @@ -0,0 +1,33 @@ +/* fasta indexing */ + +params.fasta = "$baseDir/data/bam/*.fasta" + +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" } + .set { fasta_file } + +process index_fasta { + tag "$fasta.baseName" + cpus 4 + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_file + + output: + file "*.index*" into index_files + file "*_report.txt" into indexing_report + + script: +""" +bowtie-build --threads ${task.cpus} -f ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie_report.txt + +if grep -q "Error" ${fasta.baseName}_bowtie_report.txt; then + exit 1 +fi +""" +} + diff --git a/src/nf_modules/Bowtie/mapping_paired.config b/src/nf_modules/Bowtie/mapping_paired.config new file mode 100644 index 0000000000000000000000000000000000000000..86cc4bb7f3c6c553811f18ed64f03c003e4780ab --- /dev/null +++ b/src/nf_modules/Bowtie/mapping_paired.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $mapping_fastq { + container = "bowtie:1.2.2" + } + } + } + sge { + process{ + $mapping_fastq { + beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie/1.2.2" + } + } + } +} diff --git a/src/nf_modules/Bowtie/mapping_paired.nf b/src/nf_modules/Bowtie/mapping_paired.nf new file mode 100644 index 0000000000000000000000000000000000000000..cc9f40b265c61cf0609970fe7501d7267318cf9f --- /dev/null +++ b/src/nf_modules/Bowtie/mapping_paired.nf @@ -0,0 +1,54 @@ +/* +* mapping paired fastq +*/ + +params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" +params.index = "$baseDir/data/index/*.index.*" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$pair_id" + cpus 4 + publishDir "results/mapping/bams/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + file index from index_files.collect() + + output: + file "*.bam" into bam_files + file "*_report.txt" into mapping_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.ebwt/ && !(index_file =~ /.*\.rev\.1\.ebwt/)) { + index_id = ( index_file =~ /(.*)\.1\.ebwt/)[0][1] + } + } +""" +# -v specify the max number of missmatch, -k the number of match reported per +# reads +bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ +-1 ${reads[0]} -2 ${reads[1]} 2> \ +${pair_id}_bowtie_report.txt | \ +samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_bowtie_report.txt; then + exit 1 +fi +""" +} + + diff --git a/src/nf_modules/Bowtie/mapping_single.config b/src/nf_modules/Bowtie/mapping_single.config new file mode 100644 index 0000000000000000000000000000000000000000..86cc4bb7f3c6c553811f18ed64f03c003e4780ab --- /dev/null +++ b/src/nf_modules/Bowtie/mapping_single.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $mapping_fastq { + container = "bowtie:1.2.2" + } + } + } + sge { + process{ + $mapping_fastq { + beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie/1.2.2" + } + } + } +} diff --git a/src/nf_modules/Bowtie/mapping_single.nf b/src/nf_modules/Bowtie/mapping_single.nf new file mode 100644 index 0000000000000000000000000000000000000000..ad9754d1545e1154e2fa9bf18bcfde065738723a --- /dev/null +++ b/src/nf_modules/Bowtie/mapping_single.nf @@ -0,0 +1,50 @@ +/* +* mapping single end fastq +*/ + +params.fastq = "$baseDir/data/fastq/*.fastq" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$file_id" + cpus 4 + publishDir "results/mapping/bams/", mode: 'copy' + + input: + set file_id, file(reads) from fastq_files + file index from index_files.collect() + + output: + set file_id, "*.bam" into bam_files + file "*_report.txt" into mapping_report + + script: +index_id = index[0] +for (index_file in index) { + if (index_file =~ /.*\.1\.ebwt/ && !(index_file =~ /.*\.rev\.1\.ebwt/)) { + index_id = ( index_file =~ /(.*)\.1\.ebwt/)[0][1] + } +} +""" +bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ +-q ${reads} 2> \ +${file_id}_bowtie_report.txt | \ +samtools view -Sb - > ${file_id}.bam + +if grep -q "Error" ${file_id}_bowtie_report.txt; then + exit 1 +fi +""" +} diff --git a/src/nf_modules/Bowtie/tests.sh b/src/nf_modules/Bowtie/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..803f62814bc7d12e6b595b6a17ae9e96f684b6e2 --- /dev/null +++ b/src/nf_modules/Bowtie/tests.sh @@ -0,0 +1,17 @@ +./nextflow src/nf_modules/Bowtie/indexing.nf \ + -c src/nf_modules/Bowtie/indexing.config \ + -profile docker \ + --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" + +./nextflow src/nf_modules/Bowtie/mapping_single.nf \ + -c src/nf_modules/Bowtie/mapping_single.config \ + -profile docker \ + --index "results/mapping/index/*.ebwt" \ + --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" + +./nextflow src/nf_modules/Bowtie/mapping_paired.nf \ + -c src/nf_modules/Bowtie/mapping_paired.config \ + -profile docker \ + --index "results/mapping/index/*.ebwt" \ + --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" + diff --git a/src/nf_modules/Bowtie2/bowtie2.nf b/src/nf_modules/Bowtie2/bowtie2.nf deleted file mode 100644 index 30b75312ceba63ba5df43fb2c17e7d2a73102e2a..0000000000000000000000000000000000000000 --- a/src/nf_modules/Bowtie2/bowtie2.nf +++ /dev/null @@ -1,125 +0,0 @@ -/* -* Bowtie2 : -* Imputs : fastq files -* Imputs : fasta files -* Output : bam files -*/ - -/* fasta indexing */ -params.fasta = "$baseDir/data/bam/*.fasta" - -log.info "fasta files : ${params.fasta}" - -Channel - .fromPath( params.fasta ) - .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" } - .set { fasta_file } - -process index_fasta { - tag "$fasta.baseName" - cpus 4 - publishDir "results/mapping/index/", mode: 'copy' - - input: - file fasta from fasta_file - - output: - file "*.index*" into index_files - - script: -""" -bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt - -if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then - exit 1 -fi -""" -} - - -/* -* for paired-end data -*/ - -params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" -params.index = "$baseDir/data/index/*.index.*" - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" - -Channel - .fromFilePairs( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$pair_id" - cpus 4 - publishDir "results/mapping/bams/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files - file index from index_files.toList() - - output: - set pair_id, "*.bam" into bam_files - - script: -""" - bowtie2 --very-sensitive -p ${task.cpus} -x ${index[0].baseName} \ - -1 ${reads[0]} -2 ${reads[1]} 2> \ - ${pair_id}_bowtie2_report.txt | \ - samtools view -Sb - > ${pair_id}.bam - -if grep -q "Error" ${pair_id}_bowtie2_report.txt; then - exit 1 -fi -""" -} - -/* -* for single-end data -*/ - -params.fastq = "$baseDir/data/fastq/*.fastq" - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" - -Channel - .fromPath( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$reads.baseName" - cpus 4 - publishDir "results/mapping/bams/", mode: 'copy' - - input: - file reads from fastq_files - file index from index_files.toList() - - output: - file "*.bam" into bam_files - - script: -""" -bowtie2 --very_sensitive -p ${task.cpus} -x ${index[0].baseName} \ --U ${reads} 2> \ -${reads.baseName}_bowtie2_report.txt | \ -samtools view -Sb - > ${reads.baseName}.bam - -if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then - exit 1 -fi -""" -} diff --git a/src/nf_modules/Bowtie2/indexing.config b/src/nf_modules/Bowtie2/indexing.config new file mode 100644 index 0000000000000000000000000000000000000000..60f60547c31e5c6fea842e3c798940a2ba6b98c5 --- /dev/null +++ b/src/nf_modules/Bowtie2/indexing.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $index_fasta { + container = "bowtie2:2.3.4.1" + } + } + } + sge { + process{ + $index_fasta { + beforeScript = "module purge; module load Bowtie2/2.3.4.1" + } + } + } +} diff --git a/src/nf_modules/Bowtie2/tests/index.nf b/src/nf_modules/Bowtie2/indexing.nf similarity index 93% rename from src/nf_modules/Bowtie2/tests/index.nf rename to src/nf_modules/Bowtie2/indexing.nf index 2636f4be09ab1ff24142ab64ba9bf932f44a2233..4f3cde5e990f3528f2f000c8e68131edafdecf97 100644 --- a/src/nf_modules/Bowtie2/tests/index.nf +++ b/src/nf_modules/Bowtie2/indexing.nf @@ -17,6 +17,7 @@ process index_fasta { output: file "*.index*" into index_files + file "*_report.txt" into indexing_report script: """ diff --git a/src/nf_modules/Bowtie2/bowtie2.config b/src/nf_modules/Bowtie2/mapping_paired.config similarity index 66% rename from src/nf_modules/Bowtie2/bowtie2.config rename to src/nf_modules/Bowtie2/mapping_paired.config index e34b42f06980b9d1ac941fe184a899f2081f1ece..a8cd2991e8775c96045263645cf98079002275b1 100644 --- a/src/nf_modules/Bowtie2/bowtie2.config +++ b/src/nf_modules/Bowtie2/mapping_paired.config @@ -3,9 +3,6 @@ profiles { docker.temp = 'auto' docker.enabled = true process { - $index_fasta { - container = "bowtie2:2.3.4.1" - } $mapping_fastq { container = "bowtie2:2.3.4.1" } @@ -13,9 +10,6 @@ profiles { } sge { process{ - $index_fasta { - beforeScript = "module purge; module load Bowtie2/2.3.4.1" - } $mapping_fastq { beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1" } diff --git a/src/nf_modules/Bowtie2/tests/mapping_paired.nf b/src/nf_modules/Bowtie2/mapping_paired.nf similarity index 63% rename from src/nf_modules/Bowtie2/tests/mapping_paired.nf rename to src/nf_modules/Bowtie2/mapping_paired.nf index 835d0ca3c96e5f68c1650aeadff280eced9d5a11..7422b143606e720138d07ea3b04fe16d4301c117 100644 --- a/src/nf_modules/Bowtie2/tests/mapping_paired.nf +++ b/src/nf_modules/Bowtie2/mapping_paired.nf @@ -20,17 +20,24 @@ process mapping_fastq { input: set pair_id, file(reads) from fastq_files - file index from index_files.toList() + file index from index_files.collect() output: set pair_id, "*.bam" into bam_files + file "*_report.txt" into mapping_report script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } """ - bowtie2 --very-sensitive -p ${task.cpus} -x ${index[0].baseName} \ - -1 ${reads[0]} -2 ${reads[1]} 2> \ - ${pair_id}_bowtie2_report.txt | \ - samtools view -Sb - > ${pair_id}.bam +bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ +-1 ${reads[0]} -2 ${reads[1]} 2> \ +${pair_id}_bowtie2_report.txt | \ +samtools view -Sb - > ${pair_id}.bam if grep -q "Error" ${pair_id}_bowtie2_report.txt; then exit 1 diff --git a/src/nf_modules/Bowtie2/mapping_single.config b/src/nf_modules/Bowtie2/mapping_single.config new file mode 100644 index 0000000000000000000000000000000000000000..a8cd2991e8775c96045263645cf98079002275b1 --- /dev/null +++ b/src/nf_modules/Bowtie2/mapping_single.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $mapping_fastq { + container = "bowtie2:2.3.4.1" + } + } + } + sge { + process{ + $mapping_fastq { + beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1" + } + } + } +} diff --git a/src/nf_modules/Bowtie2/mapping_single.nf b/src/nf_modules/Bowtie2/mapping_single.nf new file mode 100644 index 0000000000000000000000000000000000000000..66676991aae710a5263d763dee2ad7dbde6b7a51 --- /dev/null +++ b/src/nf_modules/Bowtie2/mapping_single.nf @@ -0,0 +1,46 @@ +params.fastq = "$baseDir/data/fastq/*.fastq" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$file_id" + cpus 4 + publishDir "results/mapping/bams/", mode: 'copy' + + input: + set file_id, file(reads) from fastq_files + file index from index_files.collect() + + output: + set file_id, "*.bam" into bam_files + file "*_report.txt" into mapping_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" +bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ +-U ${reads} 2> \ +${file_id}_bowtie2_report.txt | \ +samtools view -Sb - > ${file_id}.bam + +if grep -q "Error" ${file_id}_bowtie2_report.txt; then + exit 1 +fi +""" +} diff --git a/src/nf_modules/Bowtie2/tests/tests.sh b/src/nf_modules/Bowtie2/tests.sh similarity index 50% rename from src/nf_modules/Bowtie2/tests/tests.sh rename to src/nf_modules/Bowtie2/tests.sh index 55e88edb145255aea143a632aea6e4b3cb5f2833..ab1483824cd1f9755d09495cd050c45cc4a82d30 100755 --- a/src/nf_modules/Bowtie2/tests/tests.sh +++ b/src/nf_modules/Bowtie2/tests.sh @@ -1,16 +1,16 @@ -nextflow src/nf_modules/Bowtie2/tests/index.nf \ - -c src/nf_modules/Bowtie2/bowtie2.config \ +./nextflow src/nf_modules/Bowtie2/indexing.nf \ + -c src/nf_modules/Bowtie2/indexing.config \ -profile docker \ --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" -nextflow src/nf_modules/Bowtie2/tests/mapping_single.nf \ - -c src/nf_modules/Bowtie2/bowtie2.config \ +./nextflow src/nf_modules/Bowtie2/mapping_single.nf \ + -c src/nf_modules/Bowtie2/mapping_single.config \ -profile docker \ --index "data/tiny_dataset/fasta/*.bt2" \ --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" -nextflow src/nf_modules/Bowtie2/tests/mapping_paired.nf \ - -c src/nf_modules/Bowtie2/bowtie2.config \ +./nextflow src/nf_modules/Bowtie2/mapping_paired.nf \ + -c src/nf_modules/Bowtie2/mapping_paired.config \ -profile docker \ --index "data/tiny_dataset/fasta/*.bt2" \ --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" diff --git a/src/nf_modules/Bowtie2/tests/mapping_single.nf b/src/nf_modules/Bowtie2/tests/mapping_single.nf deleted file mode 100644 index c5c7f585514c7ba97da98a660c02964a4d5a91b1..0000000000000000000000000000000000000000 --- a/src/nf_modules/Bowtie2/tests/mapping_single.nf +++ /dev/null @@ -1,38 +0,0 @@ -params.fastq = "$baseDir/data/fastq/*.fastq" - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" - -Channel - .fromPath( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$reads.baseName" - cpus 4 - publishDir "results/mapping/bams/", mode: 'copy' - - input: - file reads from fastq_files - file index from index_files.toList() - - output: - file "*.bam" into bam_files - - script: -""" -bowtie2 --very_sensitive -p ${task.cpus} -x ${index[0].baseName} \ --U ${reads} 2> \ -${reads.baseName}_bowtie2_report.txt | \ -samtools view -Sb - > ${reads.baseName}.bam - -if grep -q "Error" ${reads.baseName}_bowtie2_report.txt; then - exit 1 -fi -""" -} diff --git a/src/nf_modules/FastQC/fastqc_paired.config b/src/nf_modules/FastQC/fastqc_paired.config new file mode 100644 index 0000000000000000000000000000000000000000..a6589845aa98db0b4f6eaa663246db2604eb210b --- /dev/null +++ b/src/nf_modules/FastQC/fastqc_paired.config @@ -0,0 +1,25 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $fastqc_fastq { + container = "fastqc:0.11.5" + } + } + } + sge { + process{ + $fastqc_fastq { + beforeScript = "module purge; module load FastQC/0.11.5" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'monointeldeb128' + } + } + } +} diff --git a/src/nf_modules/FastQC/fastqc_paired.nf b/src/nf_modules/FastQC/fastqc_paired.nf new file mode 100644 index 0000000000000000000000000000000000000000..6755edec7dca244b1c1581dc6459cd2b8afcc996 --- /dev/null +++ b/src/nf_modules/FastQC/fastqc_paired.nf @@ -0,0 +1,26 @@ +params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" + +log.info "fastq files : ${params.fastq}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } + +process fastqc_fastq { + tag "$pair_id" + publishDir "results/fastq/fastqc/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + + output: + file "*.{zip,html}" into fastqc_report + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ +${reads[0]} ${reads[1]} +""" +} + diff --git a/src/nf_modules/FastQC/fastqc_single.config b/src/nf_modules/FastQC/fastqc_single.config new file mode 100644 index 0000000000000000000000000000000000000000..a6589845aa98db0b4f6eaa663246db2604eb210b --- /dev/null +++ b/src/nf_modules/FastQC/fastqc_single.config @@ -0,0 +1,25 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $fastqc_fastq { + container = "fastqc:0.11.5" + } + } + } + sge { + process{ + $fastqc_fastq { + beforeScript = "module purge; module load FastQC/0.11.5" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'monointeldeb128' + } + } + } +} diff --git a/src/nf_modules/FastQC/fastqc_single.nf b/src/nf_modules/FastQC/fastqc_single.nf new file mode 100644 index 0000000000000000000000000000000000000000..459841651ec85102f38a980bc621d0ca1c8626bb --- /dev/null +++ b/src/nf_modules/FastQC/fastqc_single.nf @@ -0,0 +1,27 @@ +params.fastq = "$baseDir/data/fastq/*.fastq" + +log.info "fastq files : ${params.fastq}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { fastq_files } + +process fastqc_fastq { + tag "$file_id" + publishDir "results/fastq/fastqc/", mode: 'copy' + cpus = 1 + + input: + set file_id, file(reads) from fastq_files + + output: + file "*.{zip,html}" into fastqc_report + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads} +""" +} + diff --git a/src/nf_modules/FastQC/tests.sh b/src/nf_modules/FastQC/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..de58b1028e221f05b7d084a7df6df5e8a077c4ec --- /dev/null +++ b/src/nf_modules/FastQC/tests.sh @@ -0,0 +1,9 @@ +./nextflow src/nf_modules/FastQC/fastqc_paired.nf \ + -c src/nf_modules/FastQC/fastqc_paired.config \ + -profile docker \ + --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" + +./nextflow src/nf_modules/FastQC/fastqc_single.nf \ + -c src/nf_modules/FastQC/fastqc_single.config \ + -profile docker \ + --fastq "data/tiny_dataset/fastq/tiny_S.fastq" diff --git a/src/nf_modules/HTSeq/htseq.config b/src/nf_modules/HTSeq/htseq.config index ab3cc3a268f8c3d0f0233beb184c2ace5d9b7031..00f2fafb921828b21fe18d57240b9a943dcd2fb9 100644 --- a/src/nf_modules/HTSeq/htseq.config +++ b/src/nf_modules/HTSeq/htseq.config @@ -3,6 +3,9 @@ profiles { docker.temp = 'auto' docker.enabled = true process { + $sort_bam { + container = "samtools:1.7" + } $counting { container = "htseq:0.8.0" } @@ -10,6 +13,9 @@ profiles { } sge { process{ + $sort_bam { + beforeScript = "module purge; module load SAMtools/1.7" + } $trimming { beforeScript = "module purge; module load HTSeq/0.8.0" } diff --git a/src/nf_modules/HTSeq/htseq.nf b/src/nf_modules/HTSeq/htseq.nf index 5aa2f739bd64381724450640e9828a0b4fce1494..7cade9a55b17ced135f32a36ffd90dc5354b72af 100644 --- a/src/nf_modules/HTSeq/htseq.nf +++ b/src/nf_modules/HTSeq/htseq.nf @@ -1,11 +1,3 @@ -/* -* htseq : -* Imputs : sorted bams files -* Imputs : gtf -* Output : counts files -*/ -/* quality trimming */ - params.bam = "$baseDir/data/bam/*.bam" params.gtf = "$baseDir/data/annotation/*.gtf" @@ -15,18 +7,36 @@ log.info "gtf files : ${params.gtf}" Channel .fromPath( params.bam ) .ifEmpty { error "Cannot find any fastq files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { bam_files } Channel .fromPath( params.gtf ) .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" } .set { gtf_file } +process sort_bam { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from bam_files + + output: + set file_id, "*_sorted.sam" into sorted_bam_files + + script: +""" +# sort bam by name +samtools sort -@ ${task.cpus} -n -O SAM -o ${file_id}_sorted.sam ${bam} +""" +} + process counting { - tag "$bam.baseName" + tag "$file_id" publishDir "results/quantification/", mode: 'copy' input: - file bam from bam_files + set file_id, file(bam) from sorted_bam_files file gtf from gtf_file output: @@ -34,7 +44,9 @@ process counting { script: """ -htseq-count -r pos --mode=intersection-nonempty -a 10 -s no -t exon -i gene_id \ ---format=bam ${bam} ${gtf} > ${bam.baseName}.count +htseq-count ${bam} ${gtf} \ +-r pos --mode=intersection-nonempty -a 10 -s no -t exon -i gene_id \ +> ${file_id}.count """ } + diff --git a/src/nf_modules/HTSeq/tests/tests.sh b/src/nf_modules/HTSeq/tests.sh similarity index 75% rename from src/nf_modules/HTSeq/tests/tests.sh rename to src/nf_modules/HTSeq/tests.sh index 7ccef1815eb2f2e430095f764230160b26be85a6..4a2b5ceb62651dc0178fd026f29cd4310ecab29b 100755 --- a/src/nf_modules/HTSeq/tests/tests.sh +++ b/src/nf_modules/HTSeq/tests.sh @@ -1,4 +1,4 @@ -nextflow src/nf_modules/HTSeq/tests/counting.nf \ +./nextflow src/nf_modules/HTSeq/htseq.nf \ -c src/nf_modules/HTSeq/htseq.config \ -profile docker \ --gtf "data/tiny_dataset/annot/tiny.gff" \ diff --git a/src/nf_modules/HTSeq/tests/counting.nf b/src/nf_modules/HTSeq/tests/counting.nf deleted file mode 100644 index f11736b1443f36e13b1986518f5de1c9187ca62e..0000000000000000000000000000000000000000 --- a/src/nf_modules/HTSeq/tests/counting.nf +++ /dev/null @@ -1,33 +0,0 @@ -params.bam = "$baseDir/data/bam/*.bam" -params.gtf = "$baseDir/data/annotation/*.gtf" - -log.info "bam files : ${params.bam}" -log.info "gtf files : ${params.gtf}" - -Channel - .fromPath( params.bam ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.bam}" } - .set { bam_files } -Channel - .fromPath( params.gtf ) - .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" } - .set { gtf_file } - -process counting { - tag "$bam.baseName" - publishDir "results/quantification/", mode: 'copy' - - input: - file bam from bam_files - file gtf from gtf_file - - output: - file "*.count" into count_files - - script: -""" -htseq-count -r pos --mode=intersection-nonempty -a 10 -s no -t exon -i gene_id \ ---format=bam ${bam} ${gtf} > ${bam.baseName}.count -""" -} - diff --git a/src/nf_modules/Kallisto/indexing.config b/src/nf_modules/Kallisto/indexing.config new file mode 100644 index 0000000000000000000000000000000000000000..94c14cd210981318ff555888b96f3c43796a9c66 --- /dev/null +++ b/src/nf_modules/Kallisto/indexing.config @@ -0,0 +1,26 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $index_fasta { + container = "kallisto:0.44.0" + } + } + } + sge { + process{ + $index_fasta { + beforeScript = "module purge; module load Kallisto/0.44.0" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + } + } +} diff --git a/src/nf_modules/Kallisto/tests/index.nf b/src/nf_modules/Kallisto/indexing.nf similarity index 83% rename from src/nf_modules/Kallisto/tests/index.nf rename to src/nf_modules/Kallisto/indexing.nf index cae4bb03384a919d998562aeefe98c9b1557fea3..9e38260f87e9bfa4384b69ef440c73acb6feba05 100644 --- a/src/nf_modules/Kallisto/tests/index.nf +++ b/src/nf_modules/Kallisto/indexing.nf @@ -17,11 +17,12 @@ process index_fasta { output: file "*.index*" into index_files + file "*_kallisto_report.txt" into index_files_report script: """ kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \ -> ${fasta.baseName}_kallisto_report.txt +2> ${fasta.baseName}_kallisto_report.txt """ } diff --git a/src/nf_modules/Kallisto/kallisto.nf b/src/nf_modules/Kallisto/kallisto.nf deleted file mode 100644 index 8867a0c3ab74393a43229e9bee4a39e71f4eddc2..0000000000000000000000000000000000000000 --- a/src/nf_modules/Kallisto/kallisto.nf +++ /dev/null @@ -1,121 +0,0 @@ -/* -* Kallisto : -* Imputs : fastq files -* Imputs : fasta files -* Output : bam files -*/ - -/* fasta indexing */ -params.fasta = "$baseDir/data/bam/*.fasta" - -log.info "fasta files : ${params.fasta}" - -Channel - .fromPath( params.fasta ) - .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } - .set { fasta_file } - -process index_fasta { - tag "$fasta.baseName" - publishDir "results/mapping/index/", mode: 'copy' - - input: - file fasta from fasta_file - - output: - file "*.index*" into index_files - - script: -""" -kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \ -> ${fasta.baseName}_kallisto_report.txt -""" -} - - -/* -* for paired-end data -*/ - -params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" -params.index = "$baseDir/data/index/*.index.*" - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" - -Channel - .fromFilePairs( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$reads" - cpus 4 - publishDir "results/mapping/quantification/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files - file index from index_files.toList() - - output: - file "*" into counts_files - - script: -""" -mkdir ${reads[0].baseName} -kallisto quant -i ${index} -t ${task.cpus} \ ---bias --bootstrap-samples 100 -o ${pair_id} \ -${reads[0]} ${reads[1]} &> ${pair_id}_kallisto_report.txt -""" -} - - -/* -* for single-end data -*/ - -params.fastq = "$baseDir/data/fastq/*.fastq" -params.index = "$baseDir/data/index/*.index*" -params.mean = 200 -params.sd = 100 - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" -log.info "mean read size: ${params.mean}" -log.info "sd read size: ${params.sd}" - -Channel - .fromPath( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$reads.baseName" - cpus 4 - publishDir "results/mapping/quantification/", mode: 'copy' - - input: - file reads from fastq_files - file index from index_files.toList() - - output: - file "*" into count_files - - script: -""" -mkdir ${reads.baseName} -kallisto quant -i ${index} -t ${task.cpus} --single ---bias --bootstrap-samples 100 -o ${reads.baseName} \ --l ${params.mean} -s ${params.sd} -o ./ \ -${reads} > ${reads.baseName}_kallisto_report.txt -""" -} - diff --git a/src/nf_modules/Kallisto/kallisto.config b/src/nf_modules/Kallisto/mapping_paired.config similarity index 57% rename from src/nf_modules/Kallisto/kallisto.config rename to src/nf_modules/Kallisto/mapping_paired.config index da7f54e0dfa345ef1b599ce328ceca40747dd9c6..de674527ae0735b3797dc69fe7ae90b6557e1fa1 100644 --- a/src/nf_modules/Kallisto/kallisto.config +++ b/src/nf_modules/Kallisto/mapping_paired.config @@ -3,9 +3,6 @@ profiles { docker.temp = 'auto' docker.enabled = true process { - $index_fasta { - container = "kallisto:0.44.0" - } $mapping_fastq { container = "kallisto:0.44.0" } @@ -13,17 +10,6 @@ profiles { } sge { process{ - $index_fasta { - beforeScript = "module purge; module load Kallisto/0.44.0" - executor = "sge" - cpus = 1 - memory = "5GB" - time = "6h" - queueSize = 1000 - pollInterval = '60sec' - queue = 'h6-E5-2667v4deb128' - penv = 'openmp8' - } $mapping_fastq { beforeScript = "module purge; module load Kallisto/0.44.0" executor = "sge" diff --git a/src/nf_modules/Kallisto/tests/mapping_paired.nf b/src/nf_modules/Kallisto/mapping_paired.nf similarity index 86% rename from src/nf_modules/Kallisto/tests/mapping_paired.nf rename to src/nf_modules/Kallisto/mapping_paired.nf index 8e8f94c9172c409a5af3be0be02f3970e5983030..4f4ad3d167292dcf9919506f89103dbbbab7f709 100644 --- a/src/nf_modules/Kallisto/tests/mapping_paired.nf +++ b/src/nf_modules/Kallisto/mapping_paired.nf @@ -20,17 +20,17 @@ process mapping_fastq { input: set pair_id, file(reads) from fastq_files - file index from index_files.toList() + file index from index_files.collect() output: file "*" into counts_files script: """ -mkdir ${reads[0].baseName} +mkdir ${pair_id} kallisto quant -i ${index} -t ${task.cpus} \ --bias --bootstrap-samples 100 -o ${pair_id} \ -${reads[0]} ${reads[1]} &> ${pair_id}_kallisto_report.txt +${reads[0]} ${reads[1]} &> ${pair_id}/kallisto_report.txt """ } diff --git a/src/nf_modules/Kallisto/mapping_single.config b/src/nf_modules/Kallisto/mapping_single.config new file mode 100644 index 0000000000000000000000000000000000000000..de674527ae0735b3797dc69fe7ae90b6557e1fa1 --- /dev/null +++ b/src/nf_modules/Kallisto/mapping_single.config @@ -0,0 +1,26 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $mapping_fastq { + container = "kallisto:0.44.0" + } + } + } + sge { + process{ + $mapping_fastq { + beforeScript = "module purge; module load Kallisto/0.44.0" + executor = "sge" + cpus = 4 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + } + } +} diff --git a/src/nf_modules/Kallisto/tests/mapping_single.nf b/src/nf_modules/Kallisto/mapping_single.nf similarity index 75% rename from src/nf_modules/Kallisto/tests/mapping_single.nf rename to src/nf_modules/Kallisto/mapping_single.nf index 2a46cbe6dc1274a2c46044f01a54e43c981126ad..97861e76faad7a266b279bb6fe8ac77964c94284 100644 --- a/src/nf_modules/Kallisto/tests/mapping_single.nf +++ b/src/nf_modules/Kallisto/mapping_single.nf @@ -11,6 +11,7 @@ log.info "sd read size: ${params.sd}" Channel .fromPath( params.fastq ) .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { fastq_files } Channel .fromPath( params.index ) @@ -18,24 +19,24 @@ Channel .set { index_files } process mapping_fastq { - tag "$reads.baseName" + tag "$file_id" cpus 4 publishDir "results/mapping/quantification/", mode: 'copy' input: - file reads from fastq_files - file index from index_files.toList() + set file_id, file(reads) from fastq_files + file index from index_files.collect() output: file "*" into count_files script: """ -mkdir ${reads.baseName} +mkdir ${file_id} kallisto quant -i ${index} -t ${task.cpus} --single \ ---bias --bootstrap-samples 100 -o ${reads.baseName} \ +--bias --bootstrap-samples 100 -o ${file_id} \ -l ${params.mean} -s ${params.sd} \ -${reads} > ${reads.baseName}_kallisto_report.txt +${reads} &> ${file_id}/kallisto_report.txt """ } diff --git a/src/nf_modules/Kallisto/tests/tests.sh b/src/nf_modules/Kallisto/tests.sh similarity index 50% rename from src/nf_modules/Kallisto/tests/tests.sh rename to src/nf_modules/Kallisto/tests.sh index f83a4cacb2caaa66490b00ba224205af13e92967..0f69fcc40a2cc72aba93094560b2a239d2d42a76 100755 --- a/src/nf_modules/Kallisto/tests/tests.sh +++ b/src/nf_modules/Kallisto/tests.sh @@ -1,16 +1,16 @@ -nextflow src/nf_modules/Kallisto/tests/index.nf \ - -c src/nf_modules/Kallisto/kallisto.config \ +./nextflow src/nf_modules/Kallisto/indexing.nf \ + -c src/nf_modules/Kallisto/indexing.config \ -profile docker \ --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" -nextflow src/nf_modules/Kallisto/tests/mapping_single.nf \ - -c src/nf_modules/Kallisto/kallisto.config \ +./nextflow src/nf_modules/Kallisto/mapping_single.nf \ + -c src/nf_modules/Kallisto/mapping_single.config \ -profile docker \ --index "results/mapping/index/tiny_v2.index" \ --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" -nextflow src/nf_modules/Kallisto/tests/mapping_paired.nf \ - -c src/nf_modules/Kallisto/kallisto.config \ +./nextflow src/nf_modules/Kallisto/mapping_paired.nf \ + -c src/nf_modules/Kallisto/mapping_paired.config \ -profile docker \ --index "results/mapping/index/tiny_v2.index" \ --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" diff --git a/src/nf_modules/MUSIC/peak_calling_single.config b/src/nf_modules/MUSIC/peak_calling_single.config new file mode 100644 index 0000000000000000000000000000000000000000..4685752be3d8e66da4e76aeffc82ce58d50389b3 --- /dev/null +++ b/src/nf_modules/MUSIC/peak_calling_single.config @@ -0,0 +1,30 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $compute_mappability { + container = "music:6613c53" + } + $music_preprocessing { + container = "music:6613c53" + } + $music_computation { + container = "music:6613c53" + } + } + } + sge { + process{ + $compute_mappability { + beforeScript = "module purge; module load MUSIC/6613c53" + } + $music_preprocessing { + beforeScript = "module purge; module load MUSIC/6613c53" + } + $music_computation { + beforeScript = "module purge; module load MUSIC/6613c53" + } + } + } +} diff --git a/src/nf_modules/MUSIC/peak_calling_single.nf b/src/nf_modules/MUSIC/peak_calling_single.nf new file mode 100644 index 0000000000000000000000000000000000000000..be280394b80e6ddd9644da85f62e8d2be5d843ed --- /dev/null +++ b/src/nf_modules/MUSIC/peak_calling_single.nf @@ -0,0 +1,104 @@ +params.read_size = 100 +params.frag_size = 200 +params.step_l = 50 +params.min_l = 200 +params.max_l = 5000 +log.info "bam files : ${params.bam}" +log.info "index files : ${params.index}" +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" } + .set { fasta_files } +Channel + .fromPath( params.bam ) + .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { bam_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process compute_mappability { + tag "${fasta.baseName}" + + input: + file index from index_files.collect() + file fasta from fasta_files + + output: + file "*.bin" into mappability + file "temp/chr_ids.txt" into chr_ids + + script: + +""" +generate_multimappability_signal.csh ${fasta} ${params.read_size} ./ +bash temp_map_reads.csh +bash temp_process_mapping.csh +""" +} + +process music_preprocessing { + tag "${file_id}" + + input: + set file_id, file(bam) from bam_files + file chr_ids from chr_ids.collect() + + output: + set file_id, "preprocessed/*.tar" into preprocessed_bam_files + + script: + +""" +mkdir preprocessed +samtools view *.bam | \ +MUSIC -preprocess SAM stdin preprocessed/ +mkdir preprocessed/sorted +MUSIC -sort_reads preprocessed/ preprocessed/sorted/ +mkdir preprocessed/dedup +MUSIC -remove_duplicates ./preprocessed/sorted 2 preprocessed/dedup/ +cd preprocessed +tar -c -f ${file_id}.tar * +""" +} + +preprocessed_bam_files_control = Channel.create() +preprocessed_bam_files_chip = Channel.create() +preprocessed_bam_files.choice( + preprocessed_bam_files_control, + preprocessed_bam_files_chip ) { a -> a[0] =~ /.*control.*/ ? 0 : 1 } + +process music_computation { + tag "${file_id}" + publishDir "results/peak_calling/${file_id}", mode: 'copy' + + input: + set file_id, file(control) from preprocessed_bam_files_chip + set file_id_control, file(chip) from preprocessed_bam_files_control.collect() + file mapp from mappability.collect() + + output: + file "*" into music_output_forward + file "*.bed" into peaks_forward + + script: + +""" +mkdir mappability control chip +mv ${mapp} mappability/ +tar -xf ${control} -C control/ +tar -xf ${chip} -C chip/ + +MUSIC -get_per_win_p_vals_vs_FC -chip chip/ -control control/ \ + -l_win_step ${params.step_l} \ + -l_win_min ${params.min_l} -l_win_max ${params.max_l} +MUSIC -get_multiscale_punctate_ERs \ + -chip chip/ -control control/ -mapp mappability/ \ + -l_mapp ${params.read_size} -l_frag ${params.frag_size} -q_val 1 -l_p 0 +ls -l +""" +} diff --git a/src/nf_modules/MUSIC/tests.sh b/src/nf_modules/MUSIC/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..7c6ad078ad2c27cbb378f8cdd6c505be9527fdef --- /dev/null +++ b/src/nf_modules/MUSIC/tests.sh @@ -0,0 +1,8 @@ +cp results/training/bams/sBNLN18.bam results/training/bams/sBNLN18_control.bam +./nextflow src/nf_modules/MUSIC/peak_calling_single.nf \ + -c src/nf_modules/MUSIC/peak_calling_single.config \ + -profile docker \ + --fasta "results/training/fasta/*.fasta" \ + --bam "results/training/bams/s*.bam" \ + --index "results/training/mapping/index/*" \ + --read_size 50 --frag_size 300 diff --git a/src/nf_modules/MultiQC/multiqc_paired.config b/src/nf_modules/MultiQC/multiqc_paired.config new file mode 100644 index 0000000000000000000000000000000000000000..c1bda95e70a612c7caf4825930e740faedd1c62d --- /dev/null +++ b/src/nf_modules/MultiQC/multiqc_paired.config @@ -0,0 +1,38 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $fastqc_fastq { + container = "fastqc:0.11.5" + } + $multiqc { + container = "multiqc:1.0" + } + } + } + sge { + process{ + $fastqc_fastq { + beforeScript = "module purge; module load FastQC/0.11.5" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'monointeldeb128' + } + $multiqc { + beforeScript = "module purge; module load FastQC/1.0" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'monointeldeb128' + } + } + } +} diff --git a/src/nf_modules/MultiQC/multiqc_paired.nf b/src/nf_modules/MultiQC/multiqc_paired.nf new file mode 100644 index 0000000000000000000000000000000000000000..b459a9bbc8ddd4c89cd51f164d3ef3a14c814841 --- /dev/null +++ b/src/nf_modules/MultiQC/multiqc_paired.nf @@ -0,0 +1,43 @@ +params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" + +log.info "fastq files : ${params.fastq}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } + +process fastqc_fastq { + tag "$pair_id" + publishDir "results/fastq/fastqc/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + + output: + file "*.{zip,html}" into fastqc_report + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ +${reads[0]} ${reads[1]} +""" +} + +process multiqc { + tag "$report[0].baseName" + publishDir "results/fastq/multiqc/", mode: 'copy' + cpus = 1 + + input: + file report from fastqc_report.collect() + + output: + file "*multiqc_*" into multiqc_report + + script: +""" +multiqc -f . +""" +} + diff --git a/src/nf_modules/MultiQC/multiqc_single.config b/src/nf_modules/MultiQC/multiqc_single.config new file mode 100644 index 0000000000000000000000000000000000000000..c1bda95e70a612c7caf4825930e740faedd1c62d --- /dev/null +++ b/src/nf_modules/MultiQC/multiqc_single.config @@ -0,0 +1,38 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $fastqc_fastq { + container = "fastqc:0.11.5" + } + $multiqc { + container = "multiqc:1.0" + } + } + } + sge { + process{ + $fastqc_fastq { + beforeScript = "module purge; module load FastQC/0.11.5" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'monointeldeb128' + } + $multiqc { + beforeScript = "module purge; module load FastQC/1.0" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'monointeldeb128' + } + } + } +} diff --git a/src/nf_modules/MultiQC/multiqc_single.nf b/src/nf_modules/MultiQC/multiqc_single.nf new file mode 100644 index 0000000000000000000000000000000000000000..ea1115b546f0776a4970e4a56fefcce5e3b90de9 --- /dev/null +++ b/src/nf_modules/MultiQC/multiqc_single.nf @@ -0,0 +1,44 @@ +params.fastq = "$baseDir/data/fastq/*.fastq" + +log.info "fastq files : ${params.fastq}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { fastq_files } + +process fastqc_fastq { + tag "$file_id" + publishDir "results/fastq/fastqc/", mode: 'copy' + cpus = 1 + + input: + set file_id, file(reads) from fastq_files + + output: + file "*.{zip,html}" into fastqc_report + + script: +""" +fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads} +""" +} + +process multiqc { + tag "$report[0].baseName" + publishDir "results/fastq/multiqc/", mode: 'copy' + cpus = 1 + + input: + file report from fastqc_report.collect() + + output: + file "*multiqc_*" into multiqc_report + + script: +""" +multiqc -f . +""" +} + diff --git a/src/nf_modules/MultiQC/tests.sh b/src/nf_modules/MultiQC/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..0e38a8e9e9710ec9e0d43a15432c8c8e96c0dfe0 --- /dev/null +++ b/src/nf_modules/MultiQC/tests.sh @@ -0,0 +1,9 @@ +./nextflow src/nf_modules/MultiQC/multiqc_paired.nf \ + -c src/nf_modules/MultiQC/multiqc_paired.config \ + -profile docker \ + --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" + +./nextflow src/nf_modules/MultiQC/multiqc_single.nf \ + -c src/nf_modules/MultiQC/multiqc_single.config \ + -profile docker \ + --fastq "data/tiny_dataset/fastq/tiny_S.fastq" diff --git a/src/nf_modules/RSEM/indexing.config b/src/nf_modules/RSEM/indexing.config new file mode 100644 index 0000000000000000000000000000000000000000..ddf93b6ec57c876681fc508a737b3287e20fdd61 --- /dev/null +++ b/src/nf_modules/RSEM/indexing.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $index_fasta { + container = "rsem:1.3.0" + } + } + } + sge { + process{ + $index_fasta { + beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" + } + } + } +} diff --git a/src/nf_modules/RSEM/tests/index.nf b/src/nf_modules/RSEM/indexing.nf similarity index 100% rename from src/nf_modules/RSEM/tests/index.nf rename to src/nf_modules/RSEM/indexing.nf diff --git a/src/nf_modules/RSEM/rsem.config b/src/nf_modules/RSEM/quantification_paired.config similarity index 63% rename from src/nf_modules/RSEM/rsem.config rename to src/nf_modules/RSEM/quantification_paired.config index 3209b6bcd480b36b1f5e48a1055db2cc8fc93e93..344ab1e30497454826a5e45537f0e8182fb8dc63 100644 --- a/src/nf_modules/RSEM/rsem.config +++ b/src/nf_modules/RSEM/quantification_paired.config @@ -3,9 +3,6 @@ profiles { docker.temp = 'auto' docker.enabled = true process { - $index_fasta { - container = "rsem:1.3.0" - } $mapping_fastq { container = "rsem:1.3.0" } @@ -13,9 +10,6 @@ profiles { } sge { process{ - $index_fasta { - beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" - } $mapping_fastq { beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" } diff --git a/src/nf_modules/RSEM/tests/quantification_paired.nf b/src/nf_modules/RSEM/quantification_paired.nf similarity index 71% rename from src/nf_modules/RSEM/tests/quantification_paired.nf rename to src/nf_modules/RSEM/quantification_paired.nf index 75ae42f49524ac0b251a53700220680f0f8f13b4..a22950fd2b4a0548ea7a45bc0a0965992246047f 100644 --- a/src/nf_modules/RSEM/tests/quantification_paired.nf +++ b/src/nf_modules/RSEM/quantification_paired.nf @@ -26,14 +26,23 @@ process mapping_fastq { file "*" into counts_files script: -index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } """ rsem-calculate-expression --bowtie2 \ --bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ --bowtie2-sensitivity-level "very_sensitive" \ -output-genome-bam -p ${task.cpus} \ ---paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \ -> ${pair_id}_rsem_bowtie2_report.txt +--paired-end ${reads[0]} ${reads[1]} ${index_id} ${pair_id} \ +2> ${pair_id}_rsem_bowtie2_report.txt + +if grep -q "Error" ${pair_id}_rsem_bowtie2_report.txt; then + exit 1 +fi """ } diff --git a/src/nf_modules/RSEM/quantification_single.config b/src/nf_modules/RSEM/quantification_single.config new file mode 100644 index 0000000000000000000000000000000000000000..344ab1e30497454826a5e45537f0e8182fb8dc63 --- /dev/null +++ b/src/nf_modules/RSEM/quantification_single.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $mapping_fastq { + container = "rsem:1.3.0" + } + } + } + sge { + process{ + $mapping_fastq { + beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" + } + } + } +} diff --git a/src/nf_modules/RSEM/tests/quantification_single.nf b/src/nf_modules/RSEM/quantification_single.nf similarity index 67% rename from src/nf_modules/RSEM/tests/quantification_single.nf rename to src/nf_modules/RSEM/quantification_single.nf index 0fd26f0ce83f1c11f2eea62846fad566beac3e8c..d52b7f446049abac081b5a17f2202909f070e600 100644 --- a/src/nf_modules/RSEM/tests/quantification_single.nf +++ b/src/nf_modules/RSEM/quantification_single.nf @@ -1,6 +1,6 @@ params.fastq = "$baseDir/data/fastq/*.fastq" params.index = "$baseDir/data/index/*.index*" -params.mean = 300 +params.mean = 200 params.sd = 100 log.info "fastq files : ${params.fastq}" @@ -11,6 +11,7 @@ log.info "sd read size: ${params.sd}" Channel .fromPath( params.fastq ) .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { fastq_files } Channel .fromPath( params.index ) @@ -18,27 +19,37 @@ Channel .set { index_files } process mapping_fastq { - tag "$reads.baseName" + tag "$file_id" cpus 4 publishDir "results/mapping/quantification/", mode: 'copy' input: - file reads from fastq_files + set file_id, file(reads) from fastq_files file index from index_files.toList() output: file "*" into count_files script: -index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } """ +ls -l rsem-calculate-expression --bowtie2 \ --bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ --bowtie2-sensitivity-level "very_sensitive" \ --fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \ --output-genome-bam -p ${task.cpus} \ -${reads} ${index_name} ${reads.baseName} \ -> ${reads.baseName}_rsem_bowtie2_report.txt +${reads} ${index_id} ${file_id} \ +2> ${file_id}_rsem_bowtie2_report.txt + +if grep -q "Error" ${file_id}_rsem_bowtie2_report.txt; then + exit 1 +fi """ } diff --git a/src/nf_modules/RSEM/rsem.nf b/src/nf_modules/RSEM/rsem.nf deleted file mode 100644 index 54e6ad0cec2305207c274efe6b9187d53439be75..0000000000000000000000000000000000000000 --- a/src/nf_modules/RSEM/rsem.nf +++ /dev/null @@ -1,139 +0,0 @@ -/* -* RSEM : -* Imputs : fastq files -* Imputs : fasta files -* Output : bam files -*/ - -/* fasta indexing */ -params.fasta = "$baseDir/data/bam/*.fasta" -params.annotation = "$baseDir/data/bam/*.gff3" - -log.info "fasta files : ${params.fasta}" - -Channel - .fromPath( params.fasta ) - .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } - .set { fasta_file } -Channel - .fromPath( params.annotation ) - .ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" } - .set { annotation_file } - -process index_fasta { - tag "$fasta.baseName" - cpus 4 - publishDir "results/mapping/index/", mode: 'copy' - - input: - file fasta from fasta_file - file annotation from annotation_file - - output: - file "*.index*" into index_files - - script: - def cmd_annotation = "--gff3 ${annotation}" - if(annotation ==~ /.*\.gtf$/){ - cmd_annotation = "--gtf ${annotation}" - } -""" -rsem-prepare-reference -p ${task.cpus} --bowtie2 \ ---bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ -${cmd_annotation} ${fasta} ${fasta.baseName}.index > \ -${fasta.baseName}_rsem_bowtie2_report.txt -""" -} - - -/* -* for paired-end data -*/ -params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" -params.index = "$baseDir/data/index/*.index.*" - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" - -Channel - .fromFilePairs( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$pair_id" - cpus 4 - publishDir "results/mapping/quantification/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files - file index from index_files.toList() - - output: - file "*" into counts_files - - script: -index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] -""" -rsem-calculate-expression --bowtie2 \ ---bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ ---bowtie2-sensitivity-level "very_sensitive" \ --output-genome-bam -p ${task.cpus} \ ---paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \ -> ${pair_id}_rsem_bowtie2_report.txt -""" -} - - - -/* -* for single-end data -*/ - -params.fastq = "$baseDir/data/fastq/*.fastq" -params.index = "$baseDir/data/index/*.index*" -params.mean = 300 -params.sd = 100 - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" -log.info "mean read size: ${params.mean}" -log.info "sd read size: ${params.sd}" - -Channel - .fromPath( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$reads.baseName" - cpus 4 - publishDir "results/mapping/quantification/", mode: 'copy' - - input: - file reads from fastq_files - file index from index_files.toList() - - output: - file "*" into count_files - - script: -index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] -""" -rsem-calculate-expression --bowtie2 \ ---bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ ---bowtie2-sensitivity-level "very_sensitive" \ ---fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \ ---output-genome-bam -p ${task.cpus} \ -${reads} ${index_name} ${tagname} > ${tagname}_rsem_bowtie2_report.txt -""" -} - diff --git a/src/nf_modules/RSEM/tests/tests.sh b/src/nf_modules/RSEM/tests.sh similarity index 54% rename from src/nf_modules/RSEM/tests/tests.sh rename to src/nf_modules/RSEM/tests.sh index f7fcd064b93430badd67899831561121580e4709..7d56b790607b7cc1128262f6439cb2b4d3d012ef 100755 --- a/src/nf_modules/RSEM/tests/tests.sh +++ b/src/nf_modules/RSEM/tests.sh @@ -1,17 +1,17 @@ -nextflow src/nf_modules/RSEM/tests/index.nf \ - -c src/nf_modules/RSEM/rsem.config \ +./nextflow src/nf_modules/RSEM/indexing.nf \ + -c src/nf_modules/RSEM/indexing.config \ -profile docker \ --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ --annotation "data/tiny_dataset/annot/tiny.gff" -nextflow src/nf_modules/RSEM/tests/quantification_single.nf \ - -c src/nf_modules/RSEM/rsem.config \ +./nextflow src/nf_modules/RSEM/quantification_single.nf \ + -c src/nf_modules/RSEM/quantification_single.config \ -profile docker \ --index "results/mapping/index/tiny_v2.index*" \ --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" -nextflow src/nf_modules/RSEM/tests/quantification_paired.nf \ - -c src/nf_modules/RSEM/rsem.config \ +./nextflow src/nf_modules/RSEM/quantification_paired.nf \ + -c src/nf_modules/RSEM/quantification_paired.config \ -profile docker \ --index "results/mapping/index/tiny_v2.index*" \ --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" diff --git a/src/nf_modules/SAMtools/filter_bams.config b/src/nf_modules/SAMtools/filter_bams.config new file mode 100644 index 0000000000000000000000000000000000000000..066684006e4a04daef5caf26e45ba74878bb4ebf --- /dev/null +++ b/src/nf_modules/SAMtools/filter_bams.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $filter_bam { + container = "samtools:1.7" + } + } + } + sge { + process{ + $filter_bam { + beforeScript = "module purge; module load SAMtools/1.7" + } + } + } +} diff --git a/src/nf_modules/SAMtools/tests/filter_bams.nf b/src/nf_modules/SAMtools/filter_bams.nf similarity index 67% rename from src/nf_modules/SAMtools/tests/filter_bams.nf rename to src/nf_modules/SAMtools/filter_bams.nf index 49021362d7bc97c950042ab163a5224a2eb28d02..0812a19b227049fd05b54f46cbc30b246da73127 100644 --- a/src/nf_modules/SAMtools/tests/filter_bams.nf +++ b/src/nf_modules/SAMtools/filter_bams.nf @@ -7,6 +7,7 @@ log.info "bed file : ${params.bed}" Channel .fromPath( params.bam ) .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { bam_files } Channel .fromPath( params.bed ) @@ -14,18 +15,18 @@ Channel .set { bed_files } process filter_bam { - tag "$bam.baseName" + tag "$file_id" cpus 4 input: - file bam from bam_files + set file_id, file(bam) from bam_files file bed from bed_files output: - file "*_filtered.bam*" into filtered_bam_files + set file_id, "*_filtered.bam*" into filtered_bam_files script: """ -samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} > ${bam.baseName}_filtered.bam +samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} > ${file_id}_filtered.bam """ } diff --git a/src/nf_modules/SAMtools/index_bams.config b/src/nf_modules/SAMtools/index_bams.config new file mode 100644 index 0000000000000000000000000000000000000000..3b23601d9566ee96f034d14a798c6c8e08a6870f --- /dev/null +++ b/src/nf_modules/SAMtools/index_bams.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $index_bam { + container = "samtools:1.7" + } + } + } + sge { + process{ + $index_bam { + beforeScript = "module purge; module load SAMtools/1.7" + } + } + } +} diff --git a/src/nf_modules/SAMtools/tests/index_bams.nf b/src/nf_modules/SAMtools/index_bams.nf similarity index 63% rename from src/nf_modules/SAMtools/tests/index_bams.nf rename to src/nf_modules/SAMtools/index_bams.nf index bea5441c2c5946a705117c4422581c3e3eea6f02..489b0f4f71f39d1fdc5b7870547e9fd18a29f9af 100644 --- a/src/nf_modules/SAMtools/tests/index_bams.nf +++ b/src/nf_modules/SAMtools/index_bams.nf @@ -5,14 +5,18 @@ log.info "bams files : ${params.bam}" Channel .fromPath( params.bam ) .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { bam_files } process index_bam { - tag "$bam.baseName" + tag "$file_id" + input: - file bam from bam_files + set file_id, file(bam) from bam_files + output: - file "*bam*" into indexed_bam_file + set file_id, "*.bam*" into indexed_bam_file + script: """ samtools index ${bam} diff --git a/src/nf_modules/SAMtools/samtools.config b/src/nf_modules/SAMtools/samtools.config deleted file mode 100644 index a10e3e578a16a66324e2c6989d054015af03e491..0000000000000000000000000000000000000000 --- a/src/nf_modules/SAMtools/samtools.config +++ /dev/null @@ -1,36 +0,0 @@ -profiles { - docker { - docker.temp = 'auto' - docker.enabled = true - process { - $sort_bam { - container = "samtools:1.7" - } - $index_bam { - container = "samtools:1.7" - } - $split_bam { - container = "samtools:1.7" - } - $filter_bam { - container = "samtools:1.7" - } - } - } - sge { - process{ - $trimming { - beforeScript = "module purge; module load SAMtools/1.7" - } - $index_bam { - beforeScript = "module purge; module load SAMtools/1.7" - } - $split_bam { - beforeScript = "module purge; module load SAMtools/1.7" - } - $filter_bam { - beforeScript = "module purge; module load SAMtools/1.7" - } - } - } -} diff --git a/src/nf_modules/SAMtools/samtools.nf b/src/nf_modules/SAMtools/samtools.nf deleted file mode 100644 index f178bb0d706a4b656bf40beca25bf4cea69e9103..0000000000000000000000000000000000000000 --- a/src/nf_modules/SAMtools/samtools.nf +++ /dev/null @@ -1,117 +0,0 @@ -/* -* SAMtools : -* Imputs : bam files -* Output : bam files -*/ - -/* bams sorting */ -params.bam = "$baseDir/data/bam/*.bam" - -log.info "bams files : ${params.bam}" - -Channel - .fromPath( params.bam ) - .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } - .set { bam_files } - -process sort_bam { - tag "$bam.baseName" - cpus 4 - - input: - file bam from bam_files - - output: - file "*_sorted.bam" into sorted_bam_files - - script: -""" -samtools sort -@ ${task.cpus} -O BAM -o ${bam.baseName}_sorted.bam ${bam} -""" -} - -/* bams indexing */ - -params.bam = "$baseDir/data/bam/*.bam" - -log.info "bams files : ${params.bam}" - -Channel - .fromPath( params.bam ) - .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } - .set { bam_files } - -process index_bam { - tag "$bam.baseName" - input: - file bam from bam_files - output: - file "*bam*" into indexed_bam_file - script: -""" -samtools index ${bam} -""" -} - - -/* bams spliting */ -params.bam = "$baseDir/data/bam/*.bam" - -log.info "bams files : ${params.bam}" - -Channel - .fromPath( params.bam ) - .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } - .set { bam_files } - -process split_bam { - tag "$bam.baseName" - cpus 2 - - input: - file bam from bam_files - - output: - file "*_forward.bam*" into forward_bam_files - file "*_reverse.bam*" into reverse_bam_files - script: -""" -samtools view -hb -F 0x10 ${bam} > ${bam}_forward.bam & -samtools view -hb -f 0x10 ${bam} > ${bam}_reverse.bam -""" -} - - -/* bams filtering */ -params.bam = "$baseDir/data/bam/*.bam" -params.bed = "$baseDir/data/bam/*.bed" - -log.info "bams files : ${params.bam}" -log.info "bed file : ${params.bed}" - -Channel - .fromPath( params.bam ) - .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } - .set { bam_files } -Channel - .fromPath( params.bed ) - .ifEmpty { error "Cannot find any bed file matching: ${params.bed}" } - .set { bed_files } - -process filter_bam { - tag "$bam.baseName" - cpus 4 - - input: - file bam from bam_files - file bed from bed_files - - output: - file "*_filtered.bam*" into filtered_bam_files - script: -""" -samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} > ${bam.baseName}_filtered.bam -""" -} - - diff --git a/src/nf_modules/SAMtools/sort_bams.config b/src/nf_modules/SAMtools/sort_bams.config new file mode 100644 index 0000000000000000000000000000000000000000..d1a8c503ace81d53fa5ff9cd382a435ac710e0cf --- /dev/null +++ b/src/nf_modules/SAMtools/sort_bams.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $sort_bam { + container = "samtools:1.7" + } + } + } + sge { + process{ + $sort_bam { + beforeScript = "module purge; module load SAMtools/1.7" + } + } + } +} diff --git a/src/nf_modules/SAMtools/tests/sort_bams.nf b/src/nf_modules/SAMtools/sort_bams.nf similarity index 53% rename from src/nf_modules/SAMtools/tests/sort_bams.nf rename to src/nf_modules/SAMtools/sort_bams.nf index 79a7590519616f3949aeadb651228666c172d0df..ab5c7e5140989b83eebd25f7b4dbb206520416b6 100644 --- a/src/nf_modules/SAMtools/tests/sort_bams.nf +++ b/src/nf_modules/SAMtools/sort_bams.nf @@ -5,21 +5,22 @@ log.info "bams files : ${params.bam}" Channel .fromPath( params.bam ) .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { bam_files } process sort_bam { - tag "$bams.baseName" + tag "$file_id" cpus 4 input: - file bam from bam_files + set file_id, file(bam) from bam_files output: - file "*_sorted.bam" into sorted_bam_files + set file_id, "*_sorted.bam" into sorted_bam_files script: """ -samtools sort -@ ${task.cpus} -O BAM -o ${bam.baseName}_sorted.bam ${bam} +samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} """ } diff --git a/src/nf_modules/SAMtools/split_bams.config b/src/nf_modules/SAMtools/split_bams.config new file mode 100644 index 0000000000000000000000000000000000000000..28b548efd5177e7457bd642b0c78198f2b48acd9 --- /dev/null +++ b/src/nf_modules/SAMtools/split_bams.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $split_bam { + container = "samtools:1.7" + } + } + } + sge { + process{ + $split_bam { + beforeScript = "module purge; module load SAMtools/1.7" + } + } + } +} diff --git a/src/nf_modules/SAMtools/split_bams.nf b/src/nf_modules/SAMtools/split_bams.nf new file mode 100644 index 0000000000000000000000000000000000000000..f8ba6a50c7d7aecfb7c9c7b0fff8d4436cf055a4 --- /dev/null +++ b/src/nf_modules/SAMtools/split_bams.nf @@ -0,0 +1,27 @@ +params.bam = "$baseDir/data/bam/*.bam" + +log.info "bams files : ${params.bam}" + +Channel + .fromPath( params.bam ) + .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { bam_files } + +process split_bam { + tag "$file_id" + cpus 2 + + input: + set file_id, file(bam) from bam_files + + output: + set file_id, "*_forward.bam*" into forward_bam_files + set file_id, "*_reverse.bam*" into reverse_bam_files + script: +""" +samtools view -hb -F 0x10 ${bam} > ${file_id}_forward.bam & +samtools view -hb -f 0x10 ${bam} > ${file_id}_reverse.bam +""" +} + diff --git a/src/nf_modules/SAMtools/tests.sh b/src/nf_modules/SAMtools/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..9b7ac5e47388ea99ecf137ddd6add0b2ad313149 --- /dev/null +++ b/src/nf_modules/SAMtools/tests.sh @@ -0,0 +1,20 @@ +./nextflow src/nf_modules/SAMtools/sort_bams.nf \ + -c src/nf_modules/SAMtools/sort_bams.config \ + -profile docker \ + --bam "data/tiny_dataset/map/tiny_v2.bam" + +./nextflow src/nf_modules/SAMtools/index_bams.nf \ + -c src/nf_modules/SAMtools/index_bams.config \ + -profile docker \ + --bam "data/tiny_dataset/map/tiny_v2.sort.bam" + +./nextflow src/nf_modules/SAMtools/split_bams.nf \ + -c src/nf_modules/SAMtools/split_bams.config \ + -profile docker \ + --bam "data/tiny_dataset/map/tiny_v2.bam" + +./nextflow src/nf_modules/SAMtools/filter_bams.nf \ + -c src/nf_modules/SAMtools/filter_bams.config \ + -profile docker \ + --bam "data/tiny_dataset/map/tiny_v2.bam" \ + --bed "data/tiny_dataset/OLD/2genes.bed" diff --git a/src/nf_modules/SAMtools/tests/split_bams.nf b/src/nf_modules/SAMtools/tests/split_bams.nf deleted file mode 100644 index edc20864de3c81dd4fc371d4455a62b53ed8a8f9..0000000000000000000000000000000000000000 --- a/src/nf_modules/SAMtools/tests/split_bams.nf +++ /dev/null @@ -1,26 +0,0 @@ -params.bam = "$baseDir/data/bam/*.bam" - -log.info "bams files : ${params.bam}" - -Channel - .fromPath( params.bam ) - .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" } - .set { bam_files } - -process split_bam { - tag "$bam.baseName" - cpus 2 - - input: - file bam from bam_files - - output: - file "*_forward.bam*" into forward_bam_files - file "*_reverse.bam*" into reverse_bam_files - script: -""" -samtools view -hb -F 0x10 ${bam} > ${bam}_forward.bam & -samtools view -hb -f 0x10 ${bam} > ${bam}_reverse.bam -""" -} - diff --git a/src/nf_modules/SAMtools/tests/tests.sh b/src/nf_modules/SAMtools/tests/tests.sh deleted file mode 100755 index e3e809982cdf68959603f5496e49f02f2493afd6..0000000000000000000000000000000000000000 --- a/src/nf_modules/SAMtools/tests/tests.sh +++ /dev/null @@ -1,20 +0,0 @@ -nextflow src/nf_modules/SAMtools/tests/sort_bams.nf \ - -c src/nf_modules/SAMtools/samtools.config \ - -profile docker \ - --bam "data/tiny_dataset/map/tiny_v2.bam" - -nextflow src/nf_modules/SAMtools/tests/index_bams.nf \ - -c src/nf_modules/SAMtools/samtools.config \ - -profile docker \ - --bam "data/tiny_dataset/map/tiny_v2.sort.bam" - -nextflow src/nf_modules/SAMtools/tests/split_bams.nf \ - -c src/nf_modules/SAMtools/samtools.config \ - -profile docker \ - --bam "data/tiny_dataset/map/tiny_v2.bam" - -nextflow src/nf_modules/SAMtools/tests/filter_bams.nf \ - -c src/nf_modules/SAMtools/samtools.config \ - -profile docker \ - --bam "data/tiny_dataset/map/tiny_v2.bam" \ - --bed "data/tiny_dataset/OLD/2genes.bed" diff --git a/src/nf_modules/SRAtoolkit/fastqdump.config b/src/nf_modules/SRAtoolkit/fastqdump.config new file mode 100644 index 0000000000000000000000000000000000000000..128a4fef272aeb70993f5b614e0c34f901fc0c60 --- /dev/null +++ b/src/nf_modules/SRAtoolkit/fastqdump.config @@ -0,0 +1,25 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $fastq_dump { + container = "sratoolkit:2.8.2" + } + } + } + sge { + process{ + $fastq_dump { + beforeScript = "module purge; module load SRAtoolkit/2.8.2" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'monointeldeb128' + } + } + } +} diff --git a/src/nf_modules/SRAtoolkit/fastqdump.nf b/src/nf_modules/SRAtoolkit/fastqdump.nf new file mode 100644 index 0000000000000000000000000000000000000000..3dde59641dfa837398abb1e169704d42a57b9fff --- /dev/null +++ b/src/nf_modules/SRAtoolkit/fastqdump.nf @@ -0,0 +1,47 @@ +/* +* sra-tools : + +*/ + +/* fastq-dump +* Imputs : srr list +* Outputs : fastq files +*/ + +params.list_srr = "$baseDir/data/SRR/*.txt" + +log.info "downloading list srr : ${params.list_srr}" + +Channel + .fromPath( params.list_srr ) + .ifEmpty { error "Cannot find any bam files matching: ${params.list_srr}" } + .splitCsv() + .map { it -> it[0]} + .set { SRR } + +//run is the column name containing SRR ids + +process fastq_dump { + tag "$file_id" + publishDir "results/download/fastq/${file_id}/", mode: 'copy' + + input: + val file_id from SRR + + output: + set file_id, "*.fastq" into fastq + + script: +""" +#for test only 10000 reads are downloading with the option -N 10000 -X 20000 +fastq-dump --split-files --defline-seq '@\$ac_\$si/\$ri' --defline-qual "+" -N 10000 -X 20000 ${file_id} +if [ -f ${file_id}_1.fastq ] +then + mv ${file_id}_1.fastq ${file_id}_R1.fastq +fi +if [ -f ${file_id}_2.fastq ] +then + mv ${file_id}_2.fastq ${file_id}_R2.fastq +fi +""" +} diff --git a/src/nf_modules/SRAtoolkit/list-srr.txt b/src/nf_modules/SRAtoolkit/list-srr.txt new file mode 100644 index 0000000000000000000000000000000000000000..a58fc103ffe37a56f511aee117b26383b1e3f516 --- /dev/null +++ b/src/nf_modules/SRAtoolkit/list-srr.txt @@ -0,0 +1,6 @@ +ERR572281 +ERR572146 +ERR572201 +ERR638114 +ERR638115 +ERR638116 diff --git a/src/nf_modules/SRAtoolkit/tests.sh b/src/nf_modules/SRAtoolkit/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..d8575eb43181382f580699f5afd73f0ed69b9a2c --- /dev/null +++ b/src/nf_modules/SRAtoolkit/tests.sh @@ -0,0 +1,4 @@ +./nextflow src/nf_modules/SRAtoolkit/fastqdump.nf \ + -c src/nf_modules/SRAtoolkit/fastqdump.config \ + -profile docker \ + --list_srr "src/nf_modules/SRAtoolkit/list-srr.txt" diff --git a/src/nf_modules/UrQt/tests.sh b/src/nf_modules/UrQt/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..96f5845311afe29a8d5043bf7556b3d4331b045b --- /dev/null +++ b/src/nf_modules/UrQt/tests.sh @@ -0,0 +1,9 @@ +./nextflow src/nf_modules/UrQt/trimming_paired.nf \ + -c src/nf_modules/UrQt/trimming_paired.config \ + -profile docker \ + --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" + +./nextflow src/nf_modules/UrQt/trimming_single.nf \ + -c src/nf_modules/UrQt/trimming_single.config \ + -profile docker \ + --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" diff --git a/src/nf_modules/UrQt/tests/tests.sh b/src/nf_modules/UrQt/tests/tests.sh deleted file mode 100755 index ebad73150b861322459f118dd63c2e2fd81af1f3..0000000000000000000000000000000000000000 --- a/src/nf_modules/UrQt/tests/tests.sh +++ /dev/null @@ -1,9 +0,0 @@ -nextflow src/nf_modules/UrQt/tests/trimming_paired.nf \ - -c src/nf_modules/UrQt/urqt.config \ - -profile docker \ - --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" - -nextflow src/nf_modules/UrQt/tests/trimming_single.nf \ - -c src/nf_modules/UrQt/urqt.config \ - -profile docker \ - --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" diff --git a/src/nf_modules/UrQt/trimming_paired.config b/src/nf_modules/UrQt/trimming_paired.config new file mode 100644 index 0000000000000000000000000000000000000000..46a86729f1367966225fcb37c5ef7f11070c7255 --- /dev/null +++ b/src/nf_modules/UrQt/trimming_paired.config @@ -0,0 +1,26 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $trimming { + container = "urqt:d62c1f8" + } + } + } + sge { + process{ + $trimming { + beforeScript = "module purge; module load UrQt/d62c1f8" + executor = "sge" + cpus = 4 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + } + } +} diff --git a/src/nf_modules/UrQt/tests/trimming_paired.nf b/src/nf_modules/UrQt/trimming_paired.nf similarity index 100% rename from src/nf_modules/UrQt/tests/trimming_paired.nf rename to src/nf_modules/UrQt/trimming_paired.nf diff --git a/src/nf_modules/UrQt/trimming_single.config b/src/nf_modules/UrQt/trimming_single.config new file mode 100644 index 0000000000000000000000000000000000000000..46a86729f1367966225fcb37c5ef7f11070c7255 --- /dev/null +++ b/src/nf_modules/UrQt/trimming_single.config @@ -0,0 +1,26 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $trimming { + container = "urqt:d62c1f8" + } + } + } + sge { + process{ + $trimming { + beforeScript = "module purge; module load UrQt/d62c1f8" + executor = "sge" + cpus = 4 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + } + } +} diff --git a/src/nf_modules/UrQt/tests/trimming_single.nf b/src/nf_modules/UrQt/trimming_single.nf similarity index 59% rename from src/nf_modules/UrQt/tests/trimming_single.nf rename to src/nf_modules/UrQt/trimming_single.nf index 3160f3b6c4cf075a78ec6e7155f2c18559660ad3..5eb1e84c57b26417e6794af8bbbe03dc5377c7c7 100644 --- a/src/nf_modules/UrQt/tests/trimming_single.nf +++ b/src/nf_modules/UrQt/trimming_single.nf @@ -5,24 +5,25 @@ log.info "fastq files : ${params.fastq}" Channel .fromPath( params.fastq ) .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { fastq_files } process trimming { - tag "$reads.baseName" + tag "$file_id" cpus 4 input: - file reads from fastq_files + set file_id, file(reads) from fastq_files output: - file "*_trim.fastq.gz" into fastq_files_trim + set file_id, "*_trim.fastq.gz" into fastq_files_trim script: """ UrQt --t 20 --m ${task.cpus} --gz \ --in ${reads} \ - --out ${reads.baseName}_trim.fastq.gz \ - > ${reads.baseName}_trimming_report.txt + --out ${file_id}_trim.fastq.gz \ + > ${file_id}_trimming_report.txt """ } diff --git a/src/nf_modules/UrQt/urqt.nf b/src/nf_modules/UrQt/urqt.nf deleted file mode 100644 index d24033632011796fb1bb3d27185cf62a737444a9..0000000000000000000000000000000000000000 --- a/src/nf_modules/UrQt/urqt.nf +++ /dev/null @@ -1,74 +0,0 @@ -/* -* urqt : -* Imputs : fastq files -* Output : fastq files -*/ -/* quality trimming */ - -/* -* for paired-end data -*/ - -params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" - -log.info "fastq files : ${params.fastq}" - -Channel - .fromFilePairs( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } - -process trimming { - tag "${reads}" - cpus 4 - publishDir "results/fastq/trimming/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files - - output: - set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_trim - - script: -""" -UrQt --t 20 --m ${task.cpus} --gz \ ---in ${reads[0]} --inpair ${reads[1]} \ ---out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \ -> ${pair_id}_trimming_report.txt -""" -} - -/* -* for single-end data -*/ - -params.fastq = "$baseDir/data/fastq/*.fastq" - -log.info "fastq files : ${params.fastq}" - -Channel - .fromPath( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } - -process trimming { - tag "$reads.baseName" - cpus 4 - publishDir "results/fastq/trimming/", mode: 'copy' - - input: - file reads from fastq_files - - output: - file "*_trim.fastq.gz" into fastq_files_trim - - script: -""" - -UrQt --t 20 --m ${task.cpus} --gz \ ---in ${reads} \ ---out ${reads.baseName}_trim.fastq.gz \ -> ${reads.baseName}_trimming_report.txt -""" -} - diff --git a/src/nf_modules/cutadapt/cutadapt.config b/src/nf_modules/cutadapt/adaptor_removal_paired.config similarity index 50% rename from src/nf_modules/cutadapt/cutadapt.config rename to src/nf_modules/cutadapt/adaptor_removal_paired.config index 07efa9be0c8808c0cf730fb5d0dcb7ae8d351b3f..aa1a372b694db02c9d290a0dccf3c67e14f1c5f7 100644 --- a/src/nf_modules/cutadapt/cutadapt.config +++ b/src/nf_modules/cutadapt/adaptor_removal_paired.config @@ -24,30 +24,3 @@ profiles { } } } - -profiles { - docker { - docker.temp = 'auto' - docker.enabled = true - process { - $trimming { - container = "cutadapt:1.14" - } - } - } - sge { - process{ - $trimming { - beforeScript = "module purge; module load cutadapt/1.14" - executor = "sge" - cpus = 1 - memory = "5GB" - time = "6h" - queueSize = 1000 - pollInterval = '60sec' - queue = 'h6-E5-2667v4deb128' - penv = 'openmp8' - } - } - } -} diff --git a/src/nf_modules/cutadapt/tests/adaptor_removal_paired.nf b/src/nf_modules/cutadapt/adaptor_removal_paired.nf similarity index 100% rename from src/nf_modules/cutadapt/tests/adaptor_removal_paired.nf rename to src/nf_modules/cutadapt/adaptor_removal_paired.nf diff --git a/src/nf_modules/cutadapt/adaptor_removal_single.config b/src/nf_modules/cutadapt/adaptor_removal_single.config new file mode 100644 index 0000000000000000000000000000000000000000..aa1a372b694db02c9d290a0dccf3c67e14f1c5f7 --- /dev/null +++ b/src/nf_modules/cutadapt/adaptor_removal_single.config @@ -0,0 +1,26 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $adaptor_removal { + container = "cutadapt:1.14" + } + } + } + sge { + process{ + $adaptor_removal { + beforeScript = "module purge; module load cutadapt/1.14" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + } + } +} diff --git a/src/nf_modules/cutadapt/tests/adaptor_removal_single.nf b/src/nf_modules/cutadapt/adaptor_removal_single.nf similarity index 55% rename from src/nf_modules/cutadapt/tests/adaptor_removal_single.nf rename to src/nf_modules/cutadapt/adaptor_removal_single.nf index dc889a0e26d12e90f4913ee2043f60e6f930df4d..26f3cd7602bf242f718a027bad253ba2fe1e8d8c 100644 --- a/src/nf_modules/cutadapt/tests/adaptor_removal_single.nf +++ b/src/nf_modules/cutadapt/adaptor_removal_single.nf @@ -3,22 +3,23 @@ log.info "fastq files : ${params.fastq}" Channel .fromPath( params.fastq ) .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { fastq_files } process adaptor_removal { - tag "$reads.baseName" + tag "$file_id" input: - file reads from fastq_files + set file_id, file(reads) from fastq_files output: - file "*_cut.fastq.gz" into fastq_files_cut + set file_id, "*_cut.fastq.gz" into fastq_files_cut script: """ cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT\ - -o ${reads.baseName}_cut.fastq.gz \ - ${reads} > ${reads.baseName}_report.txt + -o ${file_id}_cut.fastq.gz \ + ${reads} > ${file_id}_report.txt """ } diff --git a/src/nf_modules/cutadapt/cutadapt.nf b/src/nf_modules/cutadapt/cutadapt.nf deleted file mode 100644 index 204a9e5a5e34ecef5b47e1892bc0ac9e0f6a1760..0000000000000000000000000000000000000000 --- a/src/nf_modules/cutadapt/cutadapt.nf +++ /dev/null @@ -1,134 +0,0 @@ -/* -* cutadapt : -* Imputs : fastq files -* Output : fastq files -*/ - -/* Illumina adaptor removal */ - -/* -* for paired-end data -*/ - -params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" - -log.info "fastq files : ${params.fastq}" - -Channel - .fromFilePairs( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } - -process adaptor_removal { - tag "$pair_id" - publishDir "results/fastq/adaptor_removal/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files - - output: - set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut - - script: - """ - cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \ - -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \ - ${reads[0]} ${reads[1]} > ${pair_id}_report.txt - """ -} - -/* -* for single-end data -*/ - -params.fastq = "$baseDir/data/fastq/*.fastq" - -log.info "fastq files : ${params.fastq}" - -Channel - .fromPath( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } - -process adaptor_removal { - tag "$reads.baseName" - publishDir "results/fastq/adaptor_removal/", mode: 'copy' - - input: - file reads from fastq_files - - output: - file "*_cut.fastq.gz" into fastq_files_cut - - script: - """ - cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT\ - -o ${reads.baseName}_cut.fastq.gz \ - ${reads} > ${reads.baseName}_report.txt - """ -} - -/* quality trimming */ - -/* -* for paired-end data -*/ - -params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" - -log.info "fastq files : ${params.fastq}" - -Channel - .fromFilePairs( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } - -process trimming { - tag "$pair_id" - publishDir "results/fastq/trimming/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files - - output: - set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_trim - - script: - """ - cutadapt -q 20,20 \ - -o ${pair_id}_trim_R1.fastq.gz -p ${pair_id}_trim_R2.fastq.gz \ - ${reads[0]} ${reads[1]} > ${pair_id}_report.txt - """ -} - -/* -* for single-end data -*/ - -params.fastq = "$baseDir/data/fastq/*.fastq" - -log.info "fastq files : ${params.fastq}" - -Channel - .fromPath( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } - -process trimming { - tag "$reads.baseName" - publishDir "results/fastq/trimming/", mode: 'copy' - - input: - file reads from fastq_files - - output: - file "*_trim.fastq.gz" into fastq_files_trim - - script: - """ - cutadapt -q 20,20 \ - -o ${reads.baseName}_trim.fastq.gz \ - ${reads} > ${reads.baseName}_report.txt - """ -} - diff --git a/src/nf_modules/cutadapt/tests.sh b/src/nf_modules/cutadapt/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..75089624c4c790df6f4153b2b52048dbfd2d61ad --- /dev/null +++ b/src/nf_modules/cutadapt/tests.sh @@ -0,0 +1,19 @@ +./nextflow src/nf_modules/cutadapt/adaptor_removal_paired.nf \ + -c src/nf_modules/cutadapt/adaptor_removal_paired.config \ + -profile docker \ + --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" + +./nextflow src/nf_modules/cutadapt/adaptor_removal_single.nf \ + -c src/nf_modules/cutadapt/adaptor_removal_single.config \ + -profile docker \ + --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" + +./nextflow src/nf_modules/cutadapt/trimming_paired.nf \ + -c src/nf_modules/cutadapt/trimming_paired.config \ + -profile docker \ + --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" + +./nextflow src/nf_modules/cutadapt/trimming_single.nf \ + -c src/nf_modules/cutadapt/trimming_single.config \ + -profile docker \ + --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" diff --git a/src/nf_modules/cutadapt/tests/tests.sh b/src/nf_modules/cutadapt/tests/tests.sh deleted file mode 100755 index a684410cf8234c3b5172679ea9de8198decf02e8..0000000000000000000000000000000000000000 --- a/src/nf_modules/cutadapt/tests/tests.sh +++ /dev/null @@ -1,19 +0,0 @@ -nextflow src/nf_modules/cutadapt/tests/adaptor_removal_paired.nf \ - -c src/nf_modules/cutadapt/cutadapt.config \ - -profile docker \ - --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" - -nextflow src/nf_modules/cutadapt/tests/adaptor_removal_single.nf \ - -c src/nf_modules/cutadapt/cutadapt.config \ - -profile docker \ - --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" - -nextflow src/nf_modules/cutadapt/tests/trimming_paired.nf \ - -c src/nf_modules/cutadapt/cutadapt.config \ - -profile docker \ - --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" - -nextflow src/nf_modules/cutadapt/tests/trimming_single.nf \ - -c src/nf_modules/cutadapt/cutadapt.config \ - -profile docker \ - --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" diff --git a/src/nf_modules/cutadapt/trimming_paired.config b/src/nf_modules/cutadapt/trimming_paired.config new file mode 100644 index 0000000000000000000000000000000000000000..be03e9d728f08bf863f6909e4673f3c0bef810be --- /dev/null +++ b/src/nf_modules/cutadapt/trimming_paired.config @@ -0,0 +1,26 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $trimming { + container = "cutadapt:1.14" + } + } + } + sge { + process{ + $trimming { + beforeScript = "module purge; module load cutadapt/1.14" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + } + } +} diff --git a/src/nf_modules/cutadapt/tests/trimming_paired.nf b/src/nf_modules/cutadapt/trimming_paired.nf similarity index 100% rename from src/nf_modules/cutadapt/tests/trimming_paired.nf rename to src/nf_modules/cutadapt/trimming_paired.nf diff --git a/src/nf_modules/cutadapt/trimming_single.config b/src/nf_modules/cutadapt/trimming_single.config new file mode 100644 index 0000000000000000000000000000000000000000..be03e9d728f08bf863f6909e4673f3c0bef810be --- /dev/null +++ b/src/nf_modules/cutadapt/trimming_single.config @@ -0,0 +1,26 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $trimming { + container = "cutadapt:1.14" + } + } + } + sge { + process{ + $trimming { + beforeScript = "module purge; module load cutadapt/1.14" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + } + } +} diff --git a/src/nf_modules/cutadapt/tests/trimming_single.nf b/src/nf_modules/cutadapt/trimming_single.nf similarity index 52% rename from src/nf_modules/cutadapt/tests/trimming_single.nf rename to src/nf_modules/cutadapt/trimming_single.nf index c2dd2627da7d3b989b556c479ca86a43897e4898..9b3764dbed4dd51bfb361b56b4f5fc737b18c270 100644 --- a/src/nf_modules/cutadapt/tests/trimming_single.nf +++ b/src/nf_modules/cutadapt/trimming_single.nf @@ -3,22 +3,23 @@ log.info "fastq files : ${params.fastq}" Channel .fromPath( params.fastq ) .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { fastq_files } process trimming { - tag "$reads.baseName" + tag "$file_id" input: - file reads from fastq_files + set file_id, file(reads) from fastq_files output: - file "*_trim.fastq.gz" into fastq_files_cut + set file_id, "*_trim.fastq.gz" into fastq_files_cut script: """ cutadapt -q 20,20 \ - -o ${reads.baseName}_trim.fastq.gz \ - ${reads} > ${reads.baseName}_report.txt + -o ${file_id}_trim.fastq.gz \ + ${reads} > ${file_id}_report.txt """ } diff --git a/src/sge_modules b/src/sge_modules index 03a80f96cfe966f0ac855f0ac12a0b39b9ca2064..1d6cfb91449b187b05a3a78df9a06ff3baaf5558 160000 --- a/src/sge_modules +++ b/src/sge_modules @@ -1 +1 @@ -Subproject commit 03a80f96cfe966f0ac855f0ac12a0b39b9ca2064 +Subproject commit 1d6cfb91449b187b05a3a78df9a06ff3baaf5558 diff --git a/src/training_dataset.config b/src/training_dataset.config new file mode 100644 index 0000000000000000000000000000000000000000..00a471c6c4a356b9fd843e7185b6c440ea37bfcb --- /dev/null +++ b/src/training_dataset.config @@ -0,0 +1,94 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $build_synthetic_bed { + container = "bedtools:2.25.0" + } + $fasta_from_bed { + container = "bedtools:2.25.0" + } + $index_fasta { + container = "bowtie2:2.3.4.1" + } + $mapping_fastq_paired { + container = "bowtie2:2.3.4.1" + } + $bam_2_fastq_paired { + container = "samtools:1.7" + } + $sort_bam_paired { + container = "samtools:1.7" + } + $index_bam_paired { + container = "samtools:1.7" + } + $mapping_fastq_single { + container = "bowtie2:2.3.4.1" + } + $bam_2_fastq_single { + container = "samtools:1.7" + } + $sort_bam_single { + container = "samtools:1.7" + } + $index_bam_single { + container = "samtools:1.7" + } + } + } + sge { + process{ + $build_synthetic_bed { + beforeScript = "module purge; module load BEDtools/2.25.0" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + $fasta_from_bed { + beforeScript = "module purge; module load BEDtools/2.25.0" + executor = "sge" + cpus = 1 + memory = "5GB" + time = "6h" + queueSize = 1000 + pollInterval = '60sec' + queue = 'h6-E5-2667v4deb128' + penv = 'openmp8' + } + $index_fasta { + beforeScript = "module purge; module load Bowtie2/2.3.4.1" + } + $mapping_fastq_paired { + beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1" + } + $bam_2_fastq_paired { + beforeScript = "module purge; module load SAMtools/1.7" + } + $sort_bam_paired { + beforeScript = "module purge; module load SAMtools/1.7" + } + $index_bam_paired { + beforeScript = "module purge; module load SAMtools/1.7" + } + $mapping_fastq_single { + beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1" + } + $bam_2_fastq_single { + beforeScript = "module purge; module load SAMtools/1.7" + } + $sort_bam_single { + beforeScript = "module purge; module load SAMtools/1.7" + } + $index_bam_single { + beforeScript = "module purge; module load SAMtools/1.7" + } + } + } +} diff --git a/src/training_dataset.nf b/src/training_dataset.nf new file mode 100644 index 0000000000000000000000000000000000000000..c6f48e882a82eaa7437a409c7500c7045224893e --- /dev/null +++ b/src/training_dataset.nf @@ -0,0 +1,308 @@ +/* +small pipeline to build a training dataset from whole genome data + +input: +- fasta +- fastq +- chromosome +- start position +- stop position + +output: +- sort fasta +- sort fastq + +example for paired-end data: +./nextflow src/training_dataset.nf -c src/training_dataset.config --fasta "data/genome.fa" --fastq_paired "data/*_R{1,2}.fastq.gz" --chromosome "X" --start 5305683 --stop 5333928 -resume + +example for single-end data: +./nextflow src/training_dataset.nf -c src/training_dataset.config --fasta "data/genome.fa" --fastq_single "data/*_R1.fastq.gz" --chromosome "X" --start 5305683 --stop 5333928 -resume + +*/ + +params.fastq_paired = "" +params.fastq_single = "" + +log.info "fasta files : ${params.fasta}" +log.info "fastq paired files : ${params.fastq_paired}" +log.info "fastq single files : ${params.fastq_single}" +log.info "chromosome : ${params.chromosome}" +log.info "start position : ${params.start}" +log.info "stop position : ${params.stop}" + + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any index files matching: ${params.fasta}" } + .set { fasta_file } + + +process build_synthetic_bed { + tag "${chromosome}:${start}-${stop}" + cpus 4 + + input: + val chromosome from params.chromosome + val start from params.start + val stop from params.stop + + output: + file "*.bed" into bed_files + + script: +""" +echo "${chromosome}\t${start}\t${stop}" > synthetic.bed +""" +} + +process fasta_from_bed { + tag "${fasta.baseName}" + cpus 4 + publishDir "results/training/fasta/", mode: 'copy' + + input: + file fasta from fasta_file + file bed from bed_files + val chromosome from params.chromosome + + output: + file "*.fasta" into fasta_files_extracted + + script: +""" +bedtools getfasta \ +-fi ${fasta} -bed ${bed} -fo s${fasta.baseName}.fasta +""" +} + +process index_fasta { + tag "$fasta.baseName" + cpus 4 + publishDir "results/training/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_files_extracted + + output: + file "*.index*" into index_files + file "*_report.txt" into indexing_report + + script: +""" +bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt + +if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then + exit 1 +fi +""" +} + +if ( params.fastq_paired != "" ) { + Channel + .fromFilePairs( params.fastq_paired ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq_paired}" } + .set { fastq_files_paired } + + process mapping_fastq_paired { + tag "$pair_id" + cpus 4 + + input: + set pair_id, file(reads) from fastq_files_paired + file index from index_files.collect() + + output: + set pair_id, "*.bam" into bam_files_paired + file "*_report.txt" into mapping_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } + """ + bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ + -1 ${reads[0]} -2 ${reads[1]} 2> \ + ${pair_id}_bowtie2_report.txt | \ + samtools view -Sb - > ${pair_id}.bam + + if grep -q "Error" ${pair_id}_bowtie2_report.txt; then + exit 1 + fi + """ + } + + bam_files_paired.into{ bam_files_paired_fa; bam_files_paired_ba} + + process bam_2_fastq_paired { + tag "$file_id" + publishDir "results/training/fastq/", mode: 'copy' + + input: + set file_id, file(bam) from bam_files_paired_fa + + output: + set file_id, "*.fastq" into fastq_files_extracted + script: + """ + samtools fastq -1 s${file_id}_R1.fastq -2 s${file_id}_R2.fastq -F 0x4 ${bam} + """ + } + + process filter_bam_paired { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from bam_files_paired_ba + file bed from bed_files + + output: + set file_id, "*.bam" into filtered_bam_files_paired + script: + """ + samtools view -@ ${task.cpus} -hb ${bam} -F 0x4 > f${file_id}.bam + """ + } + + process sort_bam_paired { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + cpus 4 + + input: + set file_id, file(bam) from filtered_bam_files_paired + + output: + set file_id, "*.bam" into sorted_bam_files_paired + + script: + """ + samtools sort -@ ${task.cpus} -O BAM -o s${file_id}.bam ${bam} + """ + } + + process index_bam_paired { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_files_paired + + output: + set file_id, "*.bam*" into indexed_bam_file_paired + + script: + """ + samtools index ${bam} + """ + } +} + + +if ( params.fastq_single != "" ) { + Channel + .fromPath( params.fastq_single ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq_single}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { fastq_files_single } + + process mapping_fastq_single { + tag "$file_id" + cpus 4 + + input: + set file_id, file(reads) from fastq_files_single + file index from index_files.collect() + + output: + set file_id, "*.bam" into bam_files_single + file "*_report.txt" into mapping_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } + """ + bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ + -U ${reads} 2> \ + ${file_id}_bowtie2_report.txt | \ + samtools view -Sb - > ${file_id}.bam + + if grep -q "Error" ${file_id}_bowtie2_report.txt; then + exit 1 + fi + """ + } + + bam_files_single.into{ bam_files_single_fa; bam_files_single_ba} + + process bam_2_fastq_single { + tag "$file_id" + + input: + set file_id, file(bam) from bam_files_single_fa + + output: + set file_id, "*.fastq" into fastq_files_extracted + script: + """ + samtools fastq -0 s${file_id}.fastq -F 0x4 ${bam} + """ + } + + process filter_bam_single { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from bam_files_single_ba + file bed from bed_files + + output: + set file_id, "*.bam" into filtered_bam_files_single + script: + """ + samtools view -@ ${task.cpus} -hb ${bam} -F 0x4 > f${file_id}.bam + """ + } + + process sort_bam_single { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + cpus 4 + + input: + set file_id, file(bam) from filtered_bam_files_single + + output: + set file_id, "*.bam" into sorted_bam_files_single + + script: + """ + samtools sort -@ ${task.cpus} -O BAM -o s${file_id}.bam ${bam} + """ + } + + process index_bam_single { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_files_single + + output: + set file_id, "*.bam*" into indexed_bam_file_single + + script: + """ + samtools index ${bam} + """ + } +} +