diff --git a/doc/.gitkeep b/doc/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/doc/doublet_rate.pdf b/doc/doublet_rate.pdf new file mode 100644 index 0000000000000000000000000000000000000000..387b007e4b40b214b2d3bd1abf6ad6c0c132223b Binary files /dev/null and b/doc/doublet_rate.pdf differ diff --git a/doc/doublet_rate.png b/doc/doublet_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..387b007e4b40b214b2d3bd1abf6ad6c0c132223b Binary files /dev/null and b/doc/doublet_rate.png differ diff --git a/nextflow b/nextflow new file mode 100755 index 0000000000000000000000000000000000000000..224341c9849e6dd31040a3904945cfb65a780d40 --- /dev/null +++ b/nextflow @@ -0,0 +1,472 @@ +#!/usr/bin/env bash +# +# Copyright 2020-2021, Seqera Labs +# Copyright 2013-2019, Centre for Genomic Regulation (CRG) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[[ "$NXF_DEBUG" == 'x' ]] && set -x +NXF_VER=${NXF_VER:-'21.10.6'} +NXF_ORG=${NXF_ORG:-'nextflow-io'} +NXF_HOME=${NXF_HOME:-$HOME/.nextflow} +NXF_PROT=${NXF_PROT:-'https'} +NXF_BASE=${NXF_BASE:-$NXF_PROT://www.nextflow.io/releases} +NXF_TEMP=${NXF_TEMP:-$TMPDIR} +NXF_DIST=${NXF_DIST:-$NXF_HOME/framework} +NXF_CLI="$0 $@" +NXF_CLI_OPTS=${NXF_CLI_OPTS:-} + +export NXF_CLI +export NXF_ORG +export NXF_HOME + +if [[ $TERM && $TERM != 'dumb' ]]; then +if command -v tput &>/dev/null; then +GREEN=$(tput setaf 2; tput bold) +YELLOW=$(tput setaf 3) +RED=$(tput setaf 1) +NORMAL=$(tput sgr0) +fi +fi + +function echo_red() { + >&2 echo -e "$RED$*$NORMAL" +} + +function echo_green() { + echo -e "$GREEN$*$NORMAL" +} + +function echo_yellow() { + >&2 echo -e "$YELLOW$*$NORMAL" +} + +function die() { + echo_red "$*" + exit 1 +} + +function get_abs_filename() { + echo "$(cd "$(dirname "$1")" && pwd)/$(basename "$1")" +} + +function get() { + if command -v curl &>/dev/null; then + GET="curl -fsSL '$1' -o '$2'" + elif command -v wget &>/dev/null; then + GET="wget -q '$1' -O '$2'" + else + echo_red "ERROR: Cannot find 'curl' nor 'wget' utility -- please install one of them" + exit 1 + fi + + printf "Downloading nextflow dependencies. It may require a few seconds, please wait .. " + eval $GET; status=$? + printf "\r\033[K" + if [ $status -ne 0 ]; then + echo_red "ERROR: Cannot download nextflow required file -- make sure you can connect to the internet" + echo "" + echo "Alternatively you can try to download this file:" + echo " $1" + echo "" + echo "and save it as:" + echo " ${3:-$2}" + echo "" + exit 1 + fi +} + +function make_temp() { + local base=${NXF_TEMP:=$PWD} + if [ "$(uname)" = 'Darwin' ]; then mktemp "${base}/nxf-tmp.XXXXXX" || exit $? + else mktemp -t nxf-tmp.XXXXXX -p "${base}" || exit $? + fi +} + +function resolve_link() { + [[ ! -f $1 ]] && exit 1 + if command -v realpath &>/dev/null; then + realpath "$1" + elif command -v readlink &>/dev/null; then + local target="$1" + cd "$(dirname "$target")"; target="$(basename "$target")" + while [ -L "$target" ]; do + target="$(readlink "$target")" + cd "$(dirname "$target")"; target="$(basename "$target")" + done + echo "$(cd "$(dirname "$target")"; pwd -P)/$target" + else + echo_yellow "WARN: Neither \`realpath\` nor \`readlink\` command can be found" + exit 1 + fi +} + +function current_ver() { + [[ $NXF_EDGE == 1 ]] && printf 'edge' || printf 'latest' +} + +function install() { + local tmpfile=$(make_temp) + local version=$(set +u; [[ $NXF_VER ]] && printf "v$NXF_VER" || current_ver) + local action="a=${2:-default}" + get "$NXF_BASE/$version/nextflow?$action" "$tmpfile" "$1" || exit $? + mv "$tmpfile" "$1" || exit $? + chmod +x "$1" || exit $? + bash "$1" -download || exit $? + echo '' + echo -e $'Nextflow installation completed. Please note:' + echo -e $'- the executable file `nextflow` has been created in the folder:' $(dirname $1) + if [[ ! "$PATH" =~ (^|:)"$(dirname $1)"(:|$) ]]; then + echo -e $'- you may complete the installation by moving it to a directory in your $PATH' + fi + echo '' +} + +function launch_nextflow() { + # the launch command line + local cmdline=() + # remove leading and trailing double-quotes + for x in "${launcher[@]}"; do + x="${x%\"}" + x="${x#\"}" + cmdline+=("$x") + done + + if [[ $NXF_MPIRUN ]]; then + local rank='' + [[ $SLURM_PROCID ]] && rank=$SLURM_PROCID + [[ $OMPI_COMM_WORLD_RANK ]] && rank=$OMPI_COMM_WORLD_RANK + if [[ ! $rank ]]; then + echo_red 'It looks you are not running in a MPI enabled environment -- cannot find `$OMPI_COMM_WORLD_RANK` nor `$SLURM_PROCID` variable'; + exit 1; + fi + if [[ $SLURM_CPUS_PER_TASK && $SLURM_MEM_PER_CPU ]]; then + export NXF_CLUSTER_MAXCPUS=$SLURM_CPUS_PER_TASK + export NXF_CLUSTER_MAXMEMORY="$(($SLURM_MEM_PER_CPU*$SLURM_CPUS_PER_TASK))MB" + fi + if [[ $rank == 0 ]]; then + # sleep a few seconds in order to wait worker daemons to bootstrap + sleep ${NXF_SLEEP:-10} + export NXF_EXECUTOR='ignite' + export NXF_CLUSTER_SHUTDOWNONCOMPLETE='true' + else + args=(-log .nextflow_node_${rank}.log node ignite) + fi + # start in daemon mode + elif [[ "$bg" ]]; then + local pid_file="${NXF_PID_FILE:-.nextflow.pid}" + cmdline+=("${args[@]}") + exec "${cmdline[@]}" & + disown + echo $! > "$pid_file" + exit 0 + fi + + cmdline+=("${args[@]}") + exec "${cmdline[@]}" + exit 1 +} + +# check self-install +if [ "$0" = "bash" ] || [ "$0" = "/bin/bash" ]; then + if [ -d nextflow ]; then + echo 'Please note:' + echo "- The install procedure needs to create a file named 'nextflow' in this folder, but a directory with this name already exists." + echo "- Please renamed/delete that directory, or execute the Nextflow install procedure in another folder." + echo '' + exit 1 + fi + install "$PWD/nextflow" install + exit 0 +fi + + +# parse the command line +bg='' +dockerize='' +declare -a jvmopts=() +declare -a args=("$@") +declare -a commands=(clone config drop help history info ls pull run view node console kuberun) +# $NXF_CLI_OPTS allow to pass arbitrary cli opts via the environment +# note: do not wrap with quotes because the variable can be used to specify more than on option separate by blanks +[ "$NXF_CLI_OPTS" ] && args+=($NXF_CLI_OPTS) + +cmd='' +while [[ $# != 0 ]]; do + case $1 in + -D*) + if [[ ! "$cmd" ]]; then + jvmopts+=("$1") + fi + ;; + -d|-dockerize) + if [[ ! "$cmd" && ! -f /.nextflow/dockerized ]]; then + dockerize=1 + fi + ;; + -bg) + if [[ ! -f /.nextflow/dockerized ]]; then + bg=1 + fi + ;; + -download) + if [[ ! "$cmd" ]]; then + rm -rf "$NXF_DIST/$NXF_VER" || exit $? + bash "$0" -version || exit $? + exit 0 + fi + ;; + -self-update|self-update) + if [[ ! "$cmd" ]]; then + [[ -z $NXF_EDGE && $NXF_VER = *-edge ]] && NXF_EDGE=1 + unset NXF_VER + install "$0" update + exit 0 + fi + ;; + -process.executor|-executor.name) + if [[ $2 && $2 == 'ignite' ]]; then + [ ! $NXF_MODE ] && NXF_MODE='ignite'; shift; + fi + ;; + -with-mpi) + [ ! $NXF_MODE ] && NXF_MODE='ignite' + NXF_MPIRUN='true' + ;; + *) + [[ $1 && $1 != -* && ! "$cmd" && ${commands[*]} =~ $1 ]] && cmd=$1 + ;; + esac + shift +done + +NXF_DOCKER_OPTS=${NXF_DOCKER_OPTS:=''} +if [[ "$dockerize" ]]; then + if [[ "$bg" ]]; then detach='--detach '; else detach=''; fi + NXF_ASSETS=${NXF_ASSETS:-${NXF_HOME:-$HOME/.nextflow}/assets} + mkdir -p "$NXF_ASSETS" + exec docker run $detach --rm --net host \ + -e NXF_ANSI_LOG=false \ + -e USER -e HOME -e NXF_ASSETS=$NXF_ASSETS -e NXF_USRMAP=$(id -u) -e NXF_DOCKER_OPTS='-u $(id -u)' \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v $HOME:$HOME:ro,Z -v $NXF_ASSETS:$NXF_ASSETS:Z -v $PWD:$PWD:Z -w $PWD $NXF_DOCKER_OPTS \ + nextflow/nextflow:$NXF_VER nextflow "${args[@]}" + exit 1 +fi + +CAPSULE_LOG=${CAPSULE_LOG:=''} +CAPSULE_RESET=${CAPSULE_RESET:=''} +CAPSULE_CACHE_DIR=${CAPSULE_CACHE_DIR:="$NXF_HOME/capsule"} + +NXF_PACK=one +NXF_MODE=${NXF_MODE:-''} +NXF_JAR=${NXF_JAR:-nextflow-$NXF_VER-$NXF_PACK.jar} +NXF_BIN=${NXF_BIN:-$NXF_DIST/$NXF_VER/$NXF_JAR} +NXF_PATH=$(dirname "$NXF_BIN") +NXF_URL=${NXF_URL:-$NXF_BASE/v$NXF_VER/$NXF_JAR} +NXF_GRAB=${NXF_GRAB:-''} +NXF_CLASSPATH=${NXF_CLASSPATH:-''} +NXF_MPIRUN=${NXF_MPIRUN:=''} +NXF_HOST=${HOSTNAME:-localhost} +[[ $NXF_LAUNCHER ]] || NXF_LAUNCHER=${NXF_HOME}/tmp/launcher/nextflow-${NXF_PACK}_${NXF_VER}/${NXF_HOST} + +if [[ $NXF_MODE == ignite ]]; then + # Fix JDK bug when there's a limit on the OS virtual memory + # https://bugs.openjdk.java.net/browse/JDK-8044054 + # https://issues.apache.org/jira/browse/HADOOP-7154 + export MALLOC_ARENA_MAX=4 +fi + +# Determine the path to this file +if [[ $NXF_PACK = all ]]; then + NXF_BIN=$(which "$0" 2>/dev/null) + [ $? -gt 0 -a -f "$0" ] && NXF_BIN="./$0" +fi + +# use nextflow custom java home path +if [[ "$NXF_JAVA_HOME" ]]; then + JAVA_HOME="$NXF_JAVA_HOME" + unset JAVA_CMD +fi +# Determine the Java command to use to start the JVM. +if [ ! -x "$JAVA_CMD" ] ; then + if [ -d "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVA_CMD="$JAVA_HOME/jre/sh/java" + else + JAVA_CMD="$JAVA_HOME/bin/java" + fi + elif [ -x /usr/libexec/java_home ]; then + JAVA_CMD="$(/usr/libexec/java_home -v 1.8+)/bin/java" + else + JAVA_CMD="$(which java)" || JAVA_CMD=java + fi +fi + +# Retrieve the java version from a NF local file +JAVA_KEY="$NXF_HOME/tmp/ver/$(resolve_link "$JAVA_CMD" | sed 's@/@.@g')" +if [ -f "$JAVA_KEY" ]; then + JAVA_VER="$(cat "$JAVA_KEY")" +else + JAVA_VER="$("$JAVA_CMD" $NXF_OPTS -version 2>&1)" + if [ $? -ne 0 ]; then + echo_red "${JAVA_VER:-Failed to launch the Java virtual machine}" + echo_yellow "NOTE: Nextflow is trying to use the Java VM defined by the following environment variables:\n JAVA_CMD: $JAVA_CMD\n NXF_OPTS: $NXF_OPTS\n" + exit 1 + fi + JAVA_VER=$(echo "$JAVA_VER" | awk '/version/ {gsub(/"/, "", $3); print $3}') + # check NF version + if [[ ! $NXF_VER =~ ([0-9]+)\.([0-9]+)\.([0-9].*) ]]; then + echo_red "Not a valid Nextflow version: $NXF_VER" + exit 1 + fi + major=${BASH_REMATCH[1]} + minor=${BASH_REMATCH[2]} + # legacy version - Java 7/8 only + if [ $major -eq 0 ] && [ $minor -lt 26 ]; then + version_check="^(1.7|1.8)" + version_message="Java 7 or 8" + else + version_check="^(1.8|9|10|11|12|13|14|15|16|17)" + version_message="Java 8 or later (up to 17)" + fi + if [[ ! $JAVA_VER =~ $version_check ]]; then + echo_red "ERROR: Cannot find Java or it's a wrong version -- please make sure that $version_message is installed" + if [[ "$NXF_JAVA_HOME" ]]; then + echo_yellow "NOTE: Nextflow is trying to use the Java VM defined by the following environment variables:\n JAVA_CMD: $JAVA_CMD\n NXF_JAVA_HOME: $NXF_JAVA_HOME\n" + else + echo_yellow "NOTE: Nextflow is trying to use the Java VM defined by the following environment variables:\n JAVA_CMD: $JAVA_CMD\n JAVA_HOME: $JAVA_HOME\n" + fi + exit 1 + fi + if [[ ! $JAVA_VER =~ ^(1.8|9|10|11|12|13|14|15|16|17) ]]; then + echo_yellow "NOTE: Nextflow is not tested with Java $JAVA_VER -- It's recommended the use of version 8 up to 17\n" + elif [[ ! $JAVA_VER =~ ^(1.8|9|10|11) && $NXF_MODE == ignite ]]; then + echo_yellow "WARN: Apache Ignite executor is not tested with Java $JAVA_VER -- It's recommended the use of version 8 up to 11\n" + fi + mkdir -p $(dirname "$JAVA_KEY") + [[ -f $JAVA_VER ]] && echo $JAVA_VER > "$JAVA_KEY" +fi + +# Verify nextflow jar is available +if [ ! -f "$NXF_BIN" ]; then + [ -f "$NXF_PATH" ] && rm "$NXF_PATH" + mkdir -p "$NXF_PATH" || exit $? + tmpfile=$(make_temp) + get "$NXF_URL" "$tmpfile" "$NXF_BIN" + mv "$tmpfile" "$NXF_BIN" +fi + +COLUMNS=${COLUMNS:-`tty -s && tput cols 2>/dev/null || true`} +declare -a JAVA_OPTS=() +JAVA_OPTS+=(-Dfile.encoding=UTF-8 -Dcapsule.trampoline -Dcapsule.java.cmd="$JAVA_CMD") +if [[ $cmd == console ]]; then bg=1; +else JAVA_OPTS+=(-Djava.awt.headless=true) +fi + +[[ "$JAVA_HOME" ]] && JAVA_OPTS+=(-Dcapsule.java.home="$JAVA_HOME") +[[ "$CAPSULE_LOG" ]] && JAVA_OPTS+=(-Dcapsule.log=$CAPSULE_LOG) +[[ "$CAPSULE_RESET" ]] && JAVA_OPTS+=(-Dcapsule.reset=true) +[[ "$cmd" != "run" && "$cmd" != "node" ]] && JAVA_OPTS+=(-XX:+TieredCompilation -XX:TieredStopAtLevel=1 -Dcom.sun.security.enableAIAcaIssuers=true) +[[ "$NXF_OPTS" ]] && JAVA_OPTS+=($NXF_OPTS) +[[ "$NXF_CLASSPATH" ]] && export NXF_CLASSPATH +[[ "$NXF_GRAB" ]] && export NXF_GRAB +[[ "$COLUMNS" ]] && export COLUMNS +[[ "$NXF_TEMP" ]] && JAVA_OPTS+=(-Djava.io.tmpdir="$NXF_TEMP") +[[ "${jvmopts[@]}" ]] && JAVA_OPTS+=("${jvmopts[@]}") +export JAVA_CMD +export CAPSULE_CACHE_DIR +export NXF_PLUGINS_DIR +export NXF_PLUGINS_MODE +export NXF_PLUGINS_DEFAULT +export NXF_PACK + +# lookup the a `md5` command +if hash md5sum 2>/dev/null; then MD5=md5sum; +elif hash gmd5sum 2>/dev/null; then MD5=gmd5sum; +elif hash md5 2>/dev/null; then MD5=md5; +else MD5='' +fi + +# when no md5 command is available fallback on default execution +if [ ! "$MD5" ] || [ "$CAPSULE_RESET" ]; then + launcher=($("$JAVA_CMD" "${JAVA_OPTS[@]}" -jar "$NXF_BIN")) + launch_nextflow + exit 1 +fi + +# creates a md5 unique for the given variables +env_md5() { +cat <<EOF | $MD5 | cut -f1 -d' ' +$JAVA_CMD +$JAVA_VER +${JAVA_OPTS[@]} +$NXF_HOME +$NXF_VER +$NXF_OPTS +$NXF_GRAB +$NXF_CLASSPATH +EOF +} + +# checked if a cached classpath file exists and it newer that the nextflow boot jar file +if [[ -f /.nextflow/dockerized ]]; then + LAUNCH_FILE=/.nextflow/launch-classpath +else + LAUNCH_FILE="${NXF_LAUNCHER}/classpath-$(env_md5)" +fi +if [ -s "$LAUNCH_FILE" ] && [ "$LAUNCH_FILE" -nt "$NXF_BIN" ]; then + declare -a launcher="($(cat "$LAUNCH_FILE"))" +else + # otherwise run the capsule and get the result classpath in the 'launcher' and save it to a file + cli=($("$JAVA_CMD" "${JAVA_OPTS[@]}" -jar "$NXF_BIN")) + [[ $? -ne 0 ]] && echo_red 'Unable to initialize nextflow environment' && exit 1 + + if [[ "$JAVA_VER" =~ ^(9|10|11|12|13|14|15|16|17) ]]; then + launcher=("${cli[@]:0:1}") + launcher+=(--add-opens=java.base/java.lang=ALL-UNNAMED) + launcher+=(--add-opens=java.base/java.io=ALL-UNNAMED) + launcher+=(--add-opens=java.base/java.nio=ALL-UNNAMED) + launcher+=(--add-opens=java.base/java.net=ALL-UNNAMED) + launcher+=(--add-opens=java.base/java.util=ALL-UNNAMED) + launcher+=(--add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED) + launcher+=(--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED) + launcher+=(--add-opens=java.base/java.nio.file.spi=ALL-UNNAMED) + launcher+=(--add-opens=java.base/sun.nio.ch=ALL-UNNAMED) + launcher+=(--add-opens=java.base/sun.nio.fs=ALL-UNNAMED) + launcher+=(--add-opens=java.base/sun.net.www.protocol.http=ALL-UNNAMED) + launcher+=(--add-opens=java.base/sun.net.www.protocol.https=ALL-UNNAMED) + launcher+=(--add-opens=java.base/sun.net.www.protocol.ftp=ALL-UNNAMED) + launcher+=(--add-opens=java.base/sun.net.www.protocol.file=ALL-UNNAMED) + launcher+=(--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED) + launcher+=("${cli[@]:1}") + else + launcher=("${cli[@]}") + fi + + # Don't show errors if the LAUNCH_FILE can't be created + if mkdir -p "${NXF_LAUNCHER}" 2>/dev/null; then + STR='' + for x in "${launcher[@]}"; do + [[ "$x" != "\"-Duser.dir=$PWD\"" ]] && STR+="$x " + done + printf "$STR">"$LAUNCH_FILE" + else + echo_yellow "Warning: Couldn't create cached classpath folder: $NXF_LAUNCHER -- Maybe NXF_HOME is not writable?" + fi + +fi + +# finally run it +launch_nextflow diff --git a/src/.docker_modules/alntools/dd96682/Dockerfile b/src/.docker_modules/alntools/dd96682/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..8bd4d05acee40478d3d0f9a79bc6dbd1c0789bd5 --- /dev/null +++ b/src/.docker_modules/alntools/dd96682/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.9-buster as build +MAINTAINER Laurent Modolo + +ENV ALNTOOLS_VERSION=dd96682 +ENV PACKAGES git \ + ca-certificates \ + gawk \ + procps + +RUN apt-get update \ + && apt-get install -y --no-install-recommends ${PACKAGES}\ + && apt-get clean \ + && git clone https://github.com/churchill-lab/alntools.git \ + && cd alntools \ + && git checkout ${ALNTOOLS_VERSION} \ + && python setup.py install \ + && cd .. \ + && rm -R alntools \ + && pip install six \ + && apt-get autoremove --purge -y git ca-certificates + +CMD ["bash"] \ No newline at end of file diff --git a/src/.docker_modules/alntools/dd96682/docker_init.sh b/src/.docker_modules/alntools/dd96682/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..48190c462975649ace8430a6e0769cf742611b42 --- /dev/null +++ b/src/.docker_modules/alntools/dd96682/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/alntools:dd96682 +docker build src/.docker_modules/alntools/dd96682 -t 'lbmc/alntools:dd96682' +docker push lbmc/alntools:dd96682 diff --git a/src/.docker_modules/bamutils/1.0.14/Dockerfile b/src/.docker_modules/bamutils/1.0.14/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..9a116c6d442d99a997f1d9f5ada95a46e5366ab1 --- /dev/null +++ b/src/.docker_modules/bamutils/1.0.14/Dockerfile @@ -0,0 +1,22 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV BAMUTILS_VERSION=1.0.14 +ENV PACKAGES git \ + build-essential \ + g++ \ + libssl-dev \ + zlib1g-dev + +RUN apt-get update && apt-get -y install ${PACKAGES} + +RUN git clone https://github.com/statgen/libStatGen && \ +cd libStatGen && \ +git checkout fae4fca874b3b78bf9b61c0 && \ +make && \ +cd ../ && \ +git clone https://github.com/statgen/bamUtil && \ +cd bamUtil && \ +git checkout v${BAMUTILS_VERSION} && \ +make && \ +make install diff --git a/src/.docker_modules/bamutils/1.0.14/docker_init.sh b/src/.docker_modules/bamutils/1.0.14/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..2d89e9ccd542b1a848d74231068cff94bfe92239 --- /dev/null +++ b/src/.docker_modules/bamutils/1.0.14/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/bamutils:1.0.14 +docker build src/.docker_modules/bamutils/1.0.14 -t 'lbmc/bamutils:1.0.14' +docker push lbmc/bamutils:1.0.14 diff --git a/src/.docker_modules/bcftools/1.7/Dockerfile b/src/.docker_modules/bcftools/1.7/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..b602f187b41a8d54429b06ca45822482292ed68a --- /dev/null +++ b/src/.docker_modules/bcftools/1.7/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV BCFTOOLS_VERSION=1.7 +ENV PACKAGES bcftools=${BCFTOOLS_VERSION}* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/.docker_modules/bcftools/1.7/docker_init.sh b/src/.docker_modules/bcftools/1.7/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..c2bf925159aeb2708d742ff891ff96b5d40bf05a --- /dev/null +++ b/src/.docker_modules/bcftools/1.7/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/bcftools:1.7 +docker build src/.docker_modules/bcftools/1.7 -t 'lbmc/bcftools:1.7' +docker push lbmc/bcftools:1.7 diff --git a/src/.docker_modules/bedops/2.4.39/Dockerfile b/src/.docker_modules/bedops/2.4.39/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..2586c39b0c8f6fc0be6f145b0e72419c9c62f300 --- /dev/null +++ b/src/.docker_modules/bedops/2.4.39/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/bedops:2.4.39--hc9558a2_0 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/bedops/2.4.39/docker_init.sh b/src/.docker_modules/bedops/2.4.39/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..a50d06b132b1d69bf8e7d18c55fe05b4a5a15b7b --- /dev/null +++ b/src/.docker_modules/bedops/2.4.39/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/bedops:2.4.39 +docker build src/.docker_modules/bedops/2.4.39 -t 'lbmc/bedops:2.4.39' +docker push lbmc/bedops:2.4.39 diff --git a/src/.docker_modules/bedtools/2.25.0/Dockerfile b/src/.docker_modules/bedtools/2.25.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..73be9c631be18e86b98ba3098275a269b4171650 --- /dev/null +++ b/src/.docker_modules/bedtools/2.25.0/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:16.04 +MAINTAINER Laurent Modolo + +ENV BEDTOOLS_VERSION=2.25.0 +ENV PACKAGES bedtools=${BEDTOOLS_VERSION}* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/.docker_modules/bedtools/2.25.0/docker_init.sh b/src/.docker_modules/bedtools/2.25.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..e35c4d6aa13c4fd78c4797d68150471af81ef94a --- /dev/null +++ b/src/.docker_modules/bedtools/2.25.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/bedtools:2.25.0 +docker build src/.docker_modules/bedtools/2.25.0 -t 'lbmc/bedtools:2.25.0' +docker push lbmc/bedtools:2.25.0 diff --git a/src/.docker_modules/bedtools/2.30.0/Dockerfile b/src/.docker_modules/bedtools/2.30.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..e41c1f0f50e6e6c561bbdbfc059bd535bc8ad172 --- /dev/null +++ b/src/.docker_modules/bedtools/2.30.0/Dockerfile @@ -0,0 +1 @@ +FROM quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_1 \ No newline at end of file diff --git a/src/.docker_modules/bedtools/2.30.0/docker_init.sh b/src/.docker_modules/bedtools/2.30.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..b23fa27ee4e668610dca9da740fa53dbdc18e3db --- /dev/null +++ b/src/.docker_modules/bedtools/2.30.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/bedtools:2.30.0 +docker build src/.docker_modules/bedtools/2.30.0 -t 'lbmc/bedtools:2.30.0' +docker push lbmc/bedtools:2.30.0 diff --git a/src/.docker_modules/bioawk/1.0/Dockerfile b/src/.docker_modules/bioawk/1.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..f7ca9803e60926ed90bce0abfe4cf7af90d72672 --- /dev/null +++ b/src/.docker_modules/bioawk/1.0/Dockerfile @@ -0,0 +1,21 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV BIOAWK_VERSION=1.0 +ENV PACKAGES git=1:2.17* \ + build-essential=12.4* \ + ca-certificates=20180409 \ + zlib1g-dev=1:1.2.11* \ + byacc + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN git clone https://github.com/lh3/bioawk.git && \ + cd bioawk && \ + git checkout tags/v${BIOAWK_VERSION} && \ + make && \ + cd .. && \ + mv bioawk/bioawk /usr/bin/ && \ + rm -Rf bioawk diff --git a/src/.docker_modules/bioawk/1.0/docker_init.sh b/src/.docker_modules/bioawk/1.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..8e6d7444062e7368f074f0b80386060e0f0b1a07 --- /dev/null +++ b/src/.docker_modules/bioawk/1.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/bioawk:1.0 +docker build src/.docker_modules/bioawk/1.0 -t 'lbmc/bioawk:1.0' +docker push lbmc/bioawk:1.0 diff --git a/src/.docker_modules/bowtie/1.2.2/Dockerfile b/src/.docker_modules/bowtie/1.2.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..128e68acde94e20c03130a0a3551978231f7a9cd --- /dev/null +++ b/src/.docker_modules/bowtie/1.2.2/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV BOWTIE_VERSION=1.2.2 +ENV SAMTOOLS_VERSION=1.7 +ENV PACKAGES bowtie=${BOWTIE_VERSION}* \ + samtools=${SAMTOOLS_VERSION}* + + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/.docker_modules/bowtie/1.2.2/docker_init.sh b/src/.docker_modules/bowtie/1.2.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..814a311d967eaf943fd1fa864c8c137ce724c812 --- /dev/null +++ b/src/.docker_modules/bowtie/1.2.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/bowtie:1.2.2 +docker build src/.docker_modules/bowtie/1.2.2 -t 'lbmc/bowtie:1.2.2' +docker push lbmc/bowtie:1.2.2 diff --git a/src/.docker_modules/bowtie2/2.3.4.1/Dockerfile b/src/.docker_modules/bowtie2/2.3.4.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..0f4ac75e48b5390714a22765c93939b72650b47e --- /dev/null +++ b/src/.docker_modules/bowtie2/2.3.4.1/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV BOWTIE2_VERSION=2.3.4.1 +ENV SAMTOOLS_VERSION=1.7 +ENV PACKAGES bowtie2=${BOWTIE2_VERSION}* \ + samtools=${SAMTOOLS_VERSION}* \ + perl=5.26.1* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/.docker_modules/bowtie2/2.3.4.1/docker_init.sh b/src/.docker_modules/bowtie2/2.3.4.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..bdb93e1663ee77e81a65020a0f25a8df182f9245 --- /dev/null +++ b/src/.docker_modules/bowtie2/2.3.4.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/bowtie2:2.3.4.1 +docker build src/.docker_modules/bowtie2/2.3.4.1 -t 'lbmc/bowtie2:2.3.4.1' +docker push lbmc/bowtie2:2.3.4.1 diff --git a/src/.docker_modules/bwa/0.7.17/Dockerfile b/src/.docker_modules/bwa/0.7.17/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..b5b5859a8f29564c5f260a094890223d4f04ebda --- /dev/null +++ b/src/.docker_modules/bwa/0.7.17/Dockerfile @@ -0,0 +1,20 @@ +FROM lbmc/samtools:1.11 +MAINTAINER Laurent Modolo + +ENV BWA_VERSION=0.7.17 +ENV SAMBLASTER_VERSION=0.1.24 + +ENV PACKAGES curl \ + g++ + +RUN apk update && \ + apk add ${PACKAGES} + +RUN curl -k -L https://github.com/lh3/bwa/releases/download/v${BWA_VERSION}/bwa-${BWA_VERSION}.tar.bz2 -o bwa-v${BWA_VERSION}.tar.bz2 && \ +tar xjf bwa-v${BWA_VERSION}.tar.bz2 && \ +cd bwa-${BWA_VERSION}/ && \ +make && \ +cp bwa /usr/bin && \ +cd .. && \ +rm -R bwa-${BWA_VERSION}/ + diff --git a/src/.docker_modules/bwa/0.7.17/docker_init.sh b/src/.docker_modules/bwa/0.7.17/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..3cabcbd9adcfdc35d6746a3f56534170a2d1d63a --- /dev/null +++ b/src/.docker_modules/bwa/0.7.17/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/bwa:0.7.17 +docker build src/.docker_modules/bwa/0.7.17 -t 'lbmc/bwa:0.7.17' +docker push lbmc/bwa:0.7.17 diff --git a/src/.docker_modules/canu/1.6/Dockerfile b/src/.docker_modules/canu/1.6/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..66a55ef7ef5a6cc4ed048ee1a2494afd9877aa43 --- /dev/null +++ b/src/.docker_modules/canu/1.6/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV CANU_VERSION=1.6 +ENV PACKAGES canu=${CANU_VERSION}* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/.docker_modules/canu/1.6/docker_init.sh b/src/.docker_modules/canu/1.6/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..b1afabb6dedba67dc9a9537ea570a9c5c62da28f --- /dev/null +++ b/src/.docker_modules/canu/1.6/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/canu:1.6 +docker build src/.docker_modules/canu/1.6 -t 'lbmc/canu:1.6' +docker push lbmc/canu:1.6 diff --git a/src/.docker_modules/cellphonedb/3.0.0/Dockerfile b/src/.docker_modules/cellphonedb/3.0.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..18e7cf712d67b5a4cf8998915212d236e38d8491 --- /dev/null +++ b/src/.docker_modules/cellphonedb/3.0.0/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.7-slim + +ENV RPY2_CFFI_MODE=ABI + +RUN mkdir /app /docker +COPY requirements.txt /app +COPY requirements-web.txt /app +WORKDIR /app + +RUN pip install -r requirements.txt +RUN pip install -r requirements-web.txt +RUN pip install cellphonedb==3.0.0 + + +COPY . /app +COPY ./docker /docker +RUN mkdir -p /var/log/uwsgi +RUN touch /var/log/uwsgi/cellphonedb.log +RUN chmod +x /app + +CMD ["/docker/run-system.sh"] diff --git a/src/.docker_modules/cellphonedb/3.0.0/cellphonedb.ini b/src/.docker_modules/cellphonedb/3.0.0/cellphonedb.ini new file mode 100644 index 0000000000000000000000000000000000000000..ea75bcab10bd3fe4b64250f48715b524b224442e --- /dev/null +++ b/src/.docker_modules/cellphonedb/3.0.0/cellphonedb.ini @@ -0,0 +1,14 @@ +[uwsgi] +module = wsgi:app + +master = true +processes = 4 + + +socket = 0.0.0.0:5000 +chmod-socket = 660 +vacuum = true + +die-on-term = true + +stats = /tmp/stats.socket \ No newline at end of file diff --git a/src/.docker_modules/cellphonedb/3.0.0/docker/postgres/Dockerfile b/src/.docker_modules/cellphonedb/3.0.0/docker/postgres/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..25b936ad249c9f0fd6003c503e7856c3a66e3435 --- /dev/null +++ b/src/.docker_modules/cellphonedb/3.0.0/docker/postgres/Dockerfile @@ -0,0 +1,5 @@ +FROM postgres:9.6 + +ENV POSTGRES_PASSWORD root +ENV POSTGRES_USER root +ENV POSTGRES_DB cellphonedb \ No newline at end of file diff --git a/src/.docker_modules/cellphonedb/3.0.0/docker/run-system.sh b/src/.docker_modules/cellphonedb/3.0.0/docker/run-system.sh new file mode 100755 index 0000000000000000000000000000000000000000..9b8cd443032f5a201190956c9ff1031689931842 --- /dev/null +++ b/src/.docker_modules/cellphonedb/3.0.0/docker/run-system.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +exec uwsgi --ini cellphonedb.ini --log-master diff --git a/src/.docker_modules/cellphonedb/3.0.0/requirements-web.txt b/src/.docker_modules/cellphonedb/3.0.0/requirements-web.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa15fd0b3898ac21f75fa9ed2e1b9c5b0f88a282 --- /dev/null +++ b/src/.docker_modules/cellphonedb/3.0.0/requirements-web.txt @@ -0,0 +1,6 @@ +flask>=1.1,<1.1.99 +Flask-RESTful>=0.3,<0.3.99 +Flask-Testing>=0.8,<0.8.99 +pika>=0.12,<0.12.99 +boto3>=1.7,<1.7.99 +uWSGI>=2.0,<2.0.99 diff --git a/src/.docker_modules/cellphonedb/3.0.0/requirements.txt b/src/.docker_modules/cellphonedb/3.0.0/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..2f7e9961e0a4bd1e3748baaaf151d84bb785b50a --- /dev/null +++ b/src/.docker_modules/cellphonedb/3.0.0/requirements.txt @@ -0,0 +1,18 @@ +click>=7.0,<8.0 +pandas>=1.0.0,<=1.1.4 +numpy>=1.16.5,<=1.19.5 +numpy-groupies +scikit-learn==0.22 +flask>=1.0,<2.0.0 +Flask-RESTful>=0.3 +Flask-Testing>=0.7 +SQLAlchemy>=1.3,<1.4 +PyYAML>=5.4,<6.0 +requests>=2.25,<3.0 +pika>=1.0.0,<2.0 +boto3>=1.15,<2.0 +geosketch==0.3 +rpy2>=3.4.0,<4.0 +tqdm>=4.3,<5.0 +h5py<3.0.0 +anndata>=0.7,<=0.75 diff --git a/src/.docker_modules/crossmap/0.4.1/Dockerfile b/src/.docker_modules/crossmap/0.4.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..bc988bb81d9f6fb83699092f8c68c5b476489a94 --- /dev/null +++ b/src/.docker_modules/crossmap/0.4.1/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/crossmap:0.4.1--pyh5ca1d4c_0 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/crossmap/0.4.1/docker_init.sh b/src/.docker_modules/crossmap/0.4.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..8bf250e04c90965f0be29ff62eb3bd54c08e81cc --- /dev/null +++ b/src/.docker_modules/crossmap/0.4.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/crossmap:0.4.1 +docker build src/.docker_modules/crossmap/0.4.1/ -t 'lbmc/crossmap:0.4.1' +docker push lbmc/crossmap:0.4.1 diff --git a/src/.docker_modules/cutadapt/1.14/Dockerfile b/src/.docker_modules/cutadapt/1.14/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..35d23b7b235d3ad1e611af9287c92d9147f8e440 --- /dev/null +++ b/src/.docker_modules/cutadapt/1.14/Dockerfile @@ -0,0 +1,14 @@ +FROM alpine:3.8 +MAINTAINER Laurent Modolo + +ENV CUTADAPT_VERSION=1.14 +ENV PACKAGES bash \ + python3 \ + python3-dev \ + musl-dev \ + gcc + +RUN apk update && \ + apk add ${PACKAGES} + +RUN pip3 install cutadapt==${CUTADAPT_VERSION} diff --git a/src/.docker_modules/cutadapt/1.14/docker_init.sh b/src/.docker_modules/cutadapt/1.14/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..1ba18cb47af7cf8a8c9d4d0fee001f8e2d5747b1 --- /dev/null +++ b/src/.docker_modules/cutadapt/1.14/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/cutadapt:1.14 +docker build src/.docker_modules/cutadapt/1.14 -t 'lbmc/cutadapt:1.14' +docker push lbmc/cutadapt:1.14 diff --git a/src/.docker_modules/cutadapt/1.15/Dockerfile b/src/.docker_modules/cutadapt/1.15/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..96bbd20ded423656458885b2522348711a838e97 --- /dev/null +++ b/src/.docker_modules/cutadapt/1.15/Dockerfile @@ -0,0 +1,14 @@ +FROM alpine:3.8 +MAINTAINER Laurent Modolo + +ENV CUTADAPT_VERSION=1.15 +ENV PACKAGES bash \ + python3 \ + python3-dev \ + musl-dev \ + gcc + +RUN apk update && \ + apk add ${PACKAGES} + +RUN pip3 install cutadapt==${CUTADAPT_VERSION} diff --git a/src/.docker_modules/cutadapt/1.15/docker_init.sh b/src/.docker_modules/cutadapt/1.15/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..49303006414d8a1ab61bda8da49b850824dde551 --- /dev/null +++ b/src/.docker_modules/cutadapt/1.15/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/cutadapt:1.15 +docker build src/.docker_modules/cutadapt/1.15 -t 'lbmc/cutadapt:1.15' +docker push lbmc/cutadapt:1.15 diff --git a/src/.docker_modules/cutadapt/2.1/Dockerfile b/src/.docker_modules/cutadapt/2.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..cb394b2f6b4c6f1256b71e2d62520e714b411b99 --- /dev/null +++ b/src/.docker_modules/cutadapt/2.1/Dockerfile @@ -0,0 +1,17 @@ +FROM alpine:3.8 +MAINTAINER Laurent Modolo + +ENV CUTADAPT_VERSION=2.1 +ENV PACKAGES bash \ + python3 \ + python3-dev \ + musl-dev \ + gcc + +RUN apk update && \ + apk add ${PACKAGES} + +RUN pip3 install --upgrade pip && \ + pip3 install cutadapt==${CUTADAPT_VERSION} + + diff --git a/src/.docker_modules/cutadapt/2.1/docker_init.sh b/src/.docker_modules/cutadapt/2.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..cda255f0f22841d3f9cdf61480a053d47c948071 --- /dev/null +++ b/src/.docker_modules/cutadapt/2.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/cutadapt:2.1 +docker build src/.docker_modules/cutadapt/2.1 -t 'lbmc/cutadapt:2.1' +docker push lbmc/cutadapt:2.1 diff --git a/src/.docker_modules/deeptools/3.0.2/Dockerfile b/src/.docker_modules/deeptools/3.0.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1d42cc9362faca77200117770ccb13dba10a1fe0 --- /dev/null +++ b/src/.docker_modules/deeptools/3.0.2/Dockerfile @@ -0,0 +1,18 @@ +FROM debian:stretch +MAINTAINER Laurent Modolo + +ENV DEEPTOOLS_VERSION=3.0.2 +ENV PACKAGES build-essential \ + python3-pip \ + python3-setuptools \ + python3-dev \ + python3-wheel \ + zlib1g-dev \ + procps \ + libcurl4-gnutls-dev + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN pip3 install deeptools==${DEEPTOOLS_VERSION} diff --git a/src/.docker_modules/deeptools/3.0.2/docker_init.sh b/src/.docker_modules/deeptools/3.0.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..33959edcd7627e94d34d890d875d6cbe0fced74f --- /dev/null +++ b/src/.docker_modules/deeptools/3.0.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/deeptools:3.0.2 +docker build src/.docker_modules/deeptools/3.0.2 -t 'lbmc/deeptools:3.0.2' +docker push lbmc/deeptools:3.0.2 diff --git a/src/.docker_modules/deeptools/3.1.1/Dockerfile b/src/.docker_modules/deeptools/3.1.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..2e84a709cd0078765070d284e5119e236b251283 --- /dev/null +++ b/src/.docker_modules/deeptools/3.1.1/Dockerfile @@ -0,0 +1,19 @@ +FROM debian:stretch +MAINTAINER Laurent Modolo + +ENV DEEPTOOLS_VERSION=3.1.1 +ENV PACKAGES build-essential \ + python3-pip \ + python3-setuptools \ + python3-dev \ + python3-wheel \ + zlib1g-dev \ + libcurl4-gnutls-dev \ + procps + + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN pip3 install deeptools==${DEEPTOOLS_VERSION} diff --git a/src/.docker_modules/deeptools/3.1.1/docker_init.sh b/src/.docker_modules/deeptools/3.1.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..06e63a90199385965a012175fe3f448f75539ba4 --- /dev/null +++ b/src/.docker_modules/deeptools/3.1.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/deeptools:3.1.1 +docker build src/.docker_modules/deeptools/3.1.1 -t 'lbmc/deeptools:3.1.1' +docker push lbmc/deeptools:3.1.1 diff --git a/src/.docker_modules/deeptools/3.5.0/Dockerfile b/src/.docker_modules/deeptools/3.5.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..4680f9494d6c3fca32b27b49f9ee85838f6bb161 --- /dev/null +++ b/src/.docker_modules/deeptools/3.5.0/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.9-slim +MAINTAINER Lauret Modolo + +ENV DEEPTOOLS_VERSION=3.5.0 +RUN apt-get update -qq \ + && apt-get install --no-install-recommends --yes \ + build-essential \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libssl-dev \ + libncurses5-dev \ + procps +RUN pip3 install deeptools==${DEEPTOOLS_VERSION} diff --git a/src/.docker_modules/deeptools/3.5.0/docker_init.sh b/src/.docker_modules/deeptools/3.5.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..47b9e608149fac2739fc1200170d98981e6c4f78 --- /dev/null +++ b/src/.docker_modules/deeptools/3.5.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/deeptools:3.5.0 +docker build src/.docker_modules/deeptools/3.5.0 -t 'lbmc/deeptools:3.5.0' +docker push lbmc/deeptools:3.5.0 diff --git a/src/.docker_modules/deeptools/3.5.1/Dockerfile b/src/.docker_modules/deeptools/3.5.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..389e85418fe5edda166a1cc976f83d04601c8113 --- /dev/null +++ b/src/.docker_modules/deeptools/3.5.1/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.8-slim +MAINTAINER Lauret Modolo + +ENV DEEPTOOLS_VERSION=3.5.1 +RUN apt-get update -qq \ + && apt-get install --no-install-recommends --yes \ + build-essential \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libssl-dev \ + libncurses5-dev \ + libcurl4 \ + libc6 \ + procps +RUN pip3 install pysam deeptools==${DEEPTOOLS_VERSION} diff --git a/src/.docker_modules/deeptools/3.5.1/docker_init.sh b/src/.docker_modules/deeptools/3.5.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..3fc070449234f36f67db65d18436fad9d0f404a7 --- /dev/null +++ b/src/.docker_modules/deeptools/3.5.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/deeptools:3.5.1 +docker build src/.docker_modules/deeptools/3.5.1 -t 'lbmc/deeptools:3.5.1' +docker push lbmc/deeptools:3.5.1 diff --git a/src/.docker_modules/docker_push.sh b/src/.docker_modules/docker_push.sh new file mode 100755 index 0000000000000000000000000000000000000000..32e0abc1fac52ceb410b03200b80eea96e9b181a --- /dev/null +++ b/src/.docker_modules/docker_push.sh @@ -0,0 +1,2 @@ +#!/bin/sh +fd "Dockerfile" src/.docker_modules | perl -pe 's|.*docker_modules/(.*)/(.*)/Dockerfile|\1:\2|g' | awk '{system("docker push lbmc/"$0)}' diff --git a/src/.docker_modules/docker_update.sh b/src/.docker_modules/docker_update.sh new file mode 100644 index 0000000000000000000000000000000000000000..39a729bf3b000647df396663343ac7ef64c30f63 --- /dev/null +++ b/src/.docker_modules/docker_update.sh @@ -0,0 +1,2 @@ +#!/bin/sh +fd "Dockerfile" src/.docke_modules | perl -pe 's|.*docker_modules/(.*)/(.*)/Dockerfile|\1:\2|g' | awk '{system("docker tag "$0" lbmc/" $0)}' diff --git a/src/.docker_modules/emase-zero/0.3.1/Dockerfile b/src/.docker_modules/emase-zero/0.3.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..ac82286db088ea6363fef626d164abe43831f6c4 --- /dev/null +++ b/src/.docker_modules/emase-zero/0.3.1/Dockerfile @@ -0,0 +1,35 @@ +FROM debian:buster as build +MAINTAINER Laurent Modolo + +ENV EMASEZERO_VERSION=0.3.1 +ENV PACKAGES build-essential \ + ca-certificates \ + zlib1g-dev \ + git + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN git clone https://github.com/churchill-lab/emase-zero.git \ + && cd emase-zero/src/ \ + && make \ + && mv emase-zero /usr/local/bin/ \ + && ldd /usr/local/bin/emase-zero +# linux-vdso.so.1 (0x00007ffe8e35f000) +# libz.so.1 => /lib/x86_64-linux-gnu/libz.so.1 (0x00007fbd358b5000) +# libstdc++.so.6 => /usr/lib/x86_64-linux-gnu/libstdc++.so.6 (0x00007fbd35731000) +# libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007fbd355ae000) +# libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1 (0x00007fbd35594000) +# libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fbd353d3000) +# /lib64/ld-linux-x86-64.so.2 (0x00007fbd35af7000) + +FROM debian:buster-slim + +COPY --from=build /usr/local/bin/emase-zero /usr/local/bin/ +COPY --from=build /lib/x86_64-linux-gnu/libz.so.1 /lib/x86_64-linux-gnu/ +COPY --from=build /lib/x86_64-linux-gnu/libm.so.6 /lib/x86_64-linux-gnu/ +COPY --from=build /lib/x86_64-linux-gnu/libgcc_s.so.1 /lib/x86_64-linux-gnu/ + +RUN apt-get update && \ + apt-get install -y procps bash diff --git a/src/.docker_modules/emase-zero/0.3.1/docker_init.sh b/src/.docker_modules/emase-zero/0.3.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..cb295bd192aca5d48e4dae5c44729b13a43650cc --- /dev/null +++ b/src/.docker_modules/emase-zero/0.3.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/emase-zero:0.3.1 +docker build src/.docker_modules/emase-zero/0.3.1 -t 'lbmc/emase-zero:0.3.1' +docker push lbmc/emase-zero:0.3.1 diff --git a/src/.docker_modules/emase/0.10.16/Dockerfile b/src/.docker_modules/emase/0.10.16/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6b41883f520fd53d874ffe3c68905d9166f04156 --- /dev/null +++ b/src/.docker_modules/emase/0.10.16/Dockerfile @@ -0,0 +1,15 @@ +FROM python:2.7 +MAINTAINER Laurent Modolo + +ENV EMASE_VERSION=0.10.16 + +RUN apt-get update \ + && apt-get install -y python-h5py \ + && pip install numpy==1.8.2 \ + && pip install numexpr==2.3.1 \ + && pip install cython>=0.13 \ + && pip install tables==3.1.0 \ + && pip install emase==${EMASE_VERSION} + +CMD ["bash"] + diff --git a/src/.docker_modules/emase/0.10.16/docker_init.sh b/src/.docker_modules/emase/0.10.16/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..1a0b7d71a5ce9673675452f9ae6c1520b412963d --- /dev/null +++ b/src/.docker_modules/emase/0.10.16/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/emase:0.10.16 +docker build src/.docker_modules/emase/0.10.16 -t 'lbmc/emase:0.10.16' +docker push lbmc/emase:0.10.16 diff --git a/src/.docker_modules/fastp/0.19.7/Dockerfile b/src/.docker_modules/fastp/0.19.7/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..234995a7b43807c45e673f6fc64677123cdfd67f --- /dev/null +++ b/src/.docker_modules/fastp/0.19.7/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/fastp:0.19.7--hdbcaa40_0 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/fastp/0.19.7/docker_init.sh b/src/.docker_modules/fastp/0.19.7/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..1085915c2cfd5caf2599275d8ad50a909704d728 --- /dev/null +++ b/src/.docker_modules/fastp/0.19.7/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/fastp:0.19.7 +docker build src/.docker_modules/fastp/0.19.7 -t 'lbmc/fastp:0.19.7' +docker push lbmc/fastp:0.19.7 diff --git a/src/.docker_modules/fastp/0.20.1/Dockerfile b/src/.docker_modules/fastp/0.20.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..0461d5ea260b3e894af982a27d1dc8d3b2860c16 --- /dev/null +++ b/src/.docker_modules/fastp/0.20.1/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/fastp:0.20.1--h8b12597_0 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/fastp/0.20.1/docker_init.sh b/src/.docker_modules/fastp/0.20.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..2b1f3bee40fb05504488fe026ff39811f9fef47d --- /dev/null +++ b/src/.docker_modules/fastp/0.20.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/fastp:0.20.1 +docker build src/.docker_modules/fastp/0.20.1 -t 'lbmc/fastp:0.20.1' +docker push lbmc/fastp:0.20.1 diff --git a/src/.docker_modules/fastqc/0.11.5/Dockerfile b/src/.docker_modules/fastqc/0.11.5/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..999edf6a3bb5ad548289a0de1740899802ed335d --- /dev/null +++ b/src/.docker_modules/fastqc/0.11.5/Dockerfile @@ -0,0 +1,10 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV FASTQC_VERSION=0.11.5 +ENV PACKAGES fastqc=${FASTQC_VERSION}* \ + perl=5.26* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/.docker_modules/fastqc/0.11.5/docker_init.sh b/src/.docker_modules/fastqc/0.11.5/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..6b82ff40580dc34b3594278ef2f9c46d36f73560 --- /dev/null +++ b/src/.docker_modules/fastqc/0.11.5/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/fastqc:0.11.5 +docker build src/.docker_modules/fastqc/0.11.5 -t 'lbmc/fastqc:0.11.5' +docker push lbmc/fastqc:0.11.5 diff --git a/src/.docker_modules/file_handle/0.1.1/Dockerfile b/src/.docker_modules/file_handle/0.1.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..0557983d5d16c686c577b5eee0ee1308086760fb --- /dev/null +++ b/src/.docker_modules/file_handle/0.1.1/Dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV FILE_HANDLE_VERSION 0.1.1 +ENV PACKAGES git=1:2.17.0* \ + ca-certificates=20180409 + + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN git clone https://github.com/LBMC/file_handle.git && \ + cd file_handle && \ + git checkout tags/v${FILE_HANDLE_VERSION} && \ + cp src/file_handle.py /usr/bin/file_handle.py && \ + chmod +x /usr/bin/file_handle.py diff --git a/src/.docker_modules/file_handle/0.1.1/docker_init.sh b/src/.docker_modules/file_handle/0.1.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..0f1cf512532dc8d72490b9ebb174d90418f4c640 --- /dev/null +++ b/src/.docker_modules/file_handle/0.1.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/file_handle:0.1.1 +docker build src/.docker_modules/file_handle/0.1.1 -t 'lbmc/file_handle:0.1.1' +docker push lbmc/file_handle:0.1.1 diff --git a/src/.docker_modules/flexi_splitter/1.0.2/Dockerfile b/src/.docker_modules/flexi_splitter/1.0.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..49f5f7091902703740fbaac084bdebfa1e596fb9 --- /dev/null +++ b/src/.docker_modules/flexi_splitter/1.0.2/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.9-slim +MAINTAINER Lauret Modolo + +ENV FLEXI_SPLITTER_VERSION=1.0.2 +RUN apt-get update -qq \ + && apt-get install --no-install-recommends --yes \ + build-essential \ + procps +RUN pip3 install flexi-splitter==${FLEXI_SPLITTER_VERSION} +RUN apt-get remove --yes \ + build-essential + +CMD [ "bash" ] \ No newline at end of file diff --git a/src/.docker_modules/flexi_splitter/1.0.2/docker_init.sh b/src/.docker_modules/flexi_splitter/1.0.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..2159a29d67fcc2dd6d5b01994da8c667708638a7 --- /dev/null +++ b/src/.docker_modules/flexi_splitter/1.0.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/flexi_splitter:1.0.2 +docker build src/.docker_modules/flexi_splitter/1.0.2 -t 'lbmc/flexi_splitter:1.0.2' +docker push lbmc/flexi_splitter:1.0.2 diff --git a/src/.docker_modules/freebayes/1.3.2/Dockerfile b/src/.docker_modules/freebayes/1.3.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..f696522d1663186e96949ff9a28f79847f048a77 --- /dev/null +++ b/src/.docker_modules/freebayes/1.3.2/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/freebayes:1.3.2--py36h89e4507_1 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/freebayes/1.3.2/docker_init.sh b/src/.docker_modules/freebayes/1.3.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..3dee0714d779f3df172fb007962dcf5219f7cc3f --- /dev/null +++ b/src/.docker_modules/freebayes/1.3.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/freebayes:1.3.2 +docker build src/.docker_modules/freebayes/1.3.2/ -t 'lbmc/freebayes:1.3.2' +docker push lbmc/freebayes:1.3.2 diff --git a/src/.docker_modules/g2gtools/0.2.7/Dockerfile b/src/.docker_modules/g2gtools/0.2.7/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6268b9a83d5fbd548e94384a92eeb548d5bc46a2 --- /dev/null +++ b/src/.docker_modules/g2gtools/0.2.7/Dockerfile @@ -0,0 +1,12 @@ +FROM jcrist/alpine-conda:4.6.8 + +RUN /opt/conda/bin/conda config --add channels r \ + && /opt/conda/bin/conda config --add channels bioconda \ + && /opt/conda/bin/conda install --yes \ + -c kbchoi g2gtools pycparser setuptools\ + nomkl \ + && /opt/conda/bin/conda clean -afy \ + && find /opt/conda/ -follow -type f -name '*.a' -delete \ + && find /opt/conda/ -follow -type f -name '*.pyc' -delete \ + && find /opt/conda/ -follow -type f -name '*.js.map' -delete + diff --git a/src/.docker_modules/g2gtools/0.2.7/docker_init.sh b/src/.docker_modules/g2gtools/0.2.7/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..2da2d65ca6ecc154956f98dd2bc980f70311c41c --- /dev/null +++ b/src/.docker_modules/g2gtools/0.2.7/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/g2gtools:0.2.7 +docker build src/.docker_modules/g2gtools/0.2.7 -t 'lbmc/g2gtools:0.2.7' +docker push lbmc/g2gtools:0.2.7 diff --git a/src/.docker_modules/g2gtools/0.2.8/Dockerfile b/src/.docker_modules/g2gtools/0.2.8/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..2163f345c06c5478ca72fe8890cece852f0e78a6 --- /dev/null +++ b/src/.docker_modules/g2gtools/0.2.8/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.9-slim +MAINTAINER Laurent Modolo + +ENV G2GTOOLS_VERSION=0.2.8 + +RUN apt update \ + && apt install -y wget build-essential zlib1g-dev libbz2-dev liblzma-dev procps \ + && wget https://github.com/churchill-lab/g2gtools/archive/v${G2GTOOLS_VERSION}.tar.gz \ + && tar -xvf v${G2GTOOLS_VERSION}.tar.gz \ + && cd g2gtools-${G2GTOOLS_VERSION} \ + && pip install numpy \ + && pip install pysam \ + && make install + + +CMD ["bash"] diff --git a/src/.docker_modules/g2gtools/0.2.8/docker_init.sh b/src/.docker_modules/g2gtools/0.2.8/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..99cbd49ff63c77fb957b0193c4596029257b2de7 --- /dev/null +++ b/src/.docker_modules/g2gtools/0.2.8/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/g2gtools:0.2.8 +docker build src/.docker_modules/g2gtools/0.2.8 -t 'lbmc/g2gtools:0.2.8' +docker push lbmc/g2gtools:0.2.8 diff --git a/src/.docker_modules/gatk/3.8.0/Dockerfile b/src/.docker_modules/gatk/3.8.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..af8e30448ea39b16a558d7d4bd8f1fb7ce4b9b2d --- /dev/null +++ b/src/.docker_modules/gatk/3.8.0/Dockerfile @@ -0,0 +1,8 @@ +FROM broadinstitute/gatk3:3.8-0 +MAINTAINER Laurent Modolo + +ENV GATK_VERSION=3.8.0 + +RUN echo "#\!/bin/sh\njava -jar /usr/GenomeAnalysisTK.jar \$@" > /bin/gatk3 +RUN chmod +x /bin/gatk3 +RUN sed -i 's/java/java -Xmx128g/g' /bin/gatk3 diff --git a/src/.docker_modules/gatk/3.8.0/docker_init.sh b/src/.docker_modules/gatk/3.8.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..1188be23f86622450582486167064c6829913394 --- /dev/null +++ b/src/.docker_modules/gatk/3.8.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/gatk:3.8.0 +docker build src/.docker_modules/gatk/3.8.0 -t 'lbmc/gatk:3.8.0' +docker push lbmc/gatk:3.8.0 diff --git a/src/.docker_modules/gatk/4.0.8.1/Dockerfile b/src/.docker_modules/gatk/4.0.8.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..aceded338897254a31dca826413b63cffd933532 --- /dev/null +++ b/src/.docker_modules/gatk/4.0.8.1/Dockerfile @@ -0,0 +1,6 @@ +FROM broadinstitute/gatk:4.0.8.1 +MAINTAINER Laurent Modolo + +ENV GATK_VERSION=4.0.8.1 + +RUN cp gatk /usr/bin/ diff --git a/src/.docker_modules/gatk/4.0.8.1/docker_init.sh b/src/.docker_modules/gatk/4.0.8.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..ddfd8ee0205fa9e9af20878ec561821fc4173057 --- /dev/null +++ b/src/.docker_modules/gatk/4.0.8.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/gatk:4.0.8.1 +docker build src/.docker_modules/gatk/4.0.8.1 -t 'lbmc/gatk:4.0.8.1' +docker push lbmc/gatk:4.0.8.1 diff --git a/src/.docker_modules/gffread/0.11.8/Dockerfile b/src/.docker_modules/gffread/0.11.8/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..cbb5f60b09e834d05a50d9b4e2d4975577700673 --- /dev/null +++ b/src/.docker_modules/gffread/0.11.8/Dockerfile @@ -0,0 +1,16 @@ +FROM alpine:3.12 +MAINTAINER Laurent Modolo + +ENV GFFREAD_VERSION=0.11.8 +ENV PACKAGES make \ + g++ \ + bash \ + perl + +RUN apk update && \ + apk add ${PACKAGES} && \ +wget http://ccb.jhu.edu/software/stringtie/dl/gffread-${GFFREAD_VERSION}.tar.gz && \ +tar -xvf gffread-${GFFREAD_VERSION}.tar.gz && \ +cd gffread-${GFFREAD_VERSION}/ && \ +make && \ +cp gffread /usr/bin/ diff --git a/src/.docker_modules/gffread/0.11.8/docker_init.sh b/src/.docker_modules/gffread/0.11.8/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..44c18612cbc9b8d5c093d980848bfc03d1b2f1e6 --- /dev/null +++ b/src/.docker_modules/gffread/0.11.8/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/gffread:0.11.8 +docker build src/.docker_modules/gffread/0.11.8 -t 'lbmc/gffread:0.11.8' +docker push lbmc/gffread:0.11.8 diff --git a/src/.docker_modules/gffread/0.12.2/Dockerfile b/src/.docker_modules/gffread/0.12.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a60a75facd1d5a641231b946181a12ca9a6172c8 --- /dev/null +++ b/src/.docker_modules/gffread/0.12.2/Dockerfile @@ -0,0 +1,16 @@ +FROM alpine:3.12 +MAINTAINER Laurent Modolo + +ENV GFFREAD_VERSION=0.12.2 +ENV PACKAGES make \ + g++ \ + bash \ + perl + +RUN apk update && \ + apk add ${PACKAGES} && \ +wget http://ccb.jhu.edu/software/stringtie/dl/gffread-${GFFREAD_VERSION}.tar.gz && \ +tar -xvf gffread-${GFFREAD_VERSION}.tar.gz && \ +cd gffread-${GFFREAD_VERSION}/ && \ +make && \ +cp gffread /usr/bin/ diff --git a/src/.docker_modules/gffread/0.12.2/docker_init.sh b/src/.docker_modules/gffread/0.12.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..72debe46652d5e66be7cee9067d33083f28e04ee --- /dev/null +++ b/src/.docker_modules/gffread/0.12.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/gffread:0.12.2 +docker build src/.docker_modules/gffread/0.12.2 -t 'lbmc/gffread:0.12.2' +docker push lbmc/gffread:0.12.2 diff --git a/src/.docker_modules/hisat2/2.0.0/Dockerfile b/src/.docker_modules/hisat2/2.0.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6da58508779ea3adfa22c26f672040e8f588b85c --- /dev/null +++ b/src/.docker_modules/hisat2/2.0.0/Dockerfile @@ -0,0 +1,20 @@ +FROM lbmc/samtools:1.7 +MAINTAINER Nicolas Fontrodona + +ENV HISAT2_VERSION=2.0.0 +ENV PACKAGES curl \ + zip \ + g++ \ + perl \ + python + +RUN apk update && \ + apk add ${PACKAGES} + +RUN curl -k -L http://ccb.jhu.edu/software/hisat2/downloads/hisat2-${HISAT2_VERSION}-beta-source.zip -o hisat2_linux-v${HISAT2_VERSION}.zip && \ +unzip hisat2_linux-v${HISAT2_VERSION}.zip && \ +cd hisat2-${HISAT2_VERSION}-beta && \ +make && \ +cp hisat2 /usr/bin && \ +cp hisat2-* /usr/bin && \ +rm -Rf hisat2-${HISAT2_VERSION}-beta diff --git a/src/.docker_modules/hisat2/2.0.0/docker_init.sh b/src/.docker_modules/hisat2/2.0.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..8bfb16363342039e3fff7057259a8e835c2a8c6d --- /dev/null +++ b/src/.docker_modules/hisat2/2.0.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/hisat2:2.0.0 +docker build src/.docker_modules/hisat2/2.0.0 -t 'lbmc/hisat2:2.0.0' +docker push lbmc/hisat2:2.0.0 diff --git a/src/.docker_modules/hisat2/2.1.0/Dockerfile b/src/.docker_modules/hisat2/2.1.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..0fc7dacd5984006ac3d0dfebe00dfc42ed050cbd --- /dev/null +++ b/src/.docker_modules/hisat2/2.1.0/Dockerfile @@ -0,0 +1,20 @@ +FROM lbmc/samtools:1.7 +MAINTAINER Nicolas Fontrodona + +ENV HISAT2_VERSION=2.1.0 +ENV PACKAGES curl \ + zip \ + g++ \ + perl \ + python + +RUN apk update && \ + apk add ${PACKAGES} + +RUN curl -k -L http://ccb.jhu.edu/software/hisat2/dl/hisat2-${HISAT2_VERSION}-source.zip -o hisat2_linux-v${HISAT2_VERSION}.zip && \ +unzip hisat2_linux-v${HISAT2_VERSION}.zip && \ +cd hisat2-${HISAT2_VERSION} && \ +make && \ +cp hisat2 /usr/bin && \ +cp hisat2-* /usr/bin && \ +rm -Rf hisat2-${HISAT2_VERSION} diff --git a/src/.docker_modules/hisat2/2.1.0/docker_init.sh b/src/.docker_modules/hisat2/2.1.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..55fb191ab23cbe7615f70ba5488a227b0b69580a --- /dev/null +++ b/src/.docker_modules/hisat2/2.1.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/hisat2:2.1.0 +docker build src/.docker_modules/hisat2/2.1.0 -t 'lbmc/hisat2:2.1.0' +docker push lbmc/hisat2:2.1.0 diff --git a/src/.docker_modules/htseq/0.11.2/Dockerfile b/src/.docker_modules/htseq/0.11.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..821d7760af2350605617aea694a92ff2470a9cb7 --- /dev/null +++ b/src/.docker_modules/htseq/0.11.2/Dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV HTSEQ_VERSION=0.11.2 +ENV PACKAGES build-essential=12.4* \ + python3-pip=9.0.1* \ + python3-setuptools=39.0.1* \ + python3-dev=3.6.7* \ + python3-wheel=0.30.0* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN pip3 install numpy==1.14.3 +RUN pip3 install pysam==0.15.0 +RUN pip3 install HTSeq==${HTSEQ_VERSION} diff --git a/src/.docker_modules/htseq/0.11.2/docker_init.sh b/src/.docker_modules/htseq/0.11.2/docker_init.sh new file mode 100644 index 0000000000000000000000000000000000000000..b46f01de71ede2fa99923f609cc98d96994a6bf6 --- /dev/null +++ b/src/.docker_modules/htseq/0.11.2/docker_init.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker build src/docker_modules/HTSeq/0.11.2 -t 'htseq:0.11.2' diff --git a/src/.docker_modules/htseq/0.13.5/Dockerfile b/src/.docker_modules/htseq/0.13.5/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..68347cf4a63723c11b3a7e4d01817c6ef8d18f79 --- /dev/null +++ b/src/.docker_modules/htseq/0.13.5/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/htseq:0.13.5--py39h70b41aa_1 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/htseq/0.13.5/docker_init.sh b/src/.docker_modules/htseq/0.13.5/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..ca03fe9af2bc8f9f2bb9d70efc02a0c01e40cfde --- /dev/null +++ b/src/.docker_modules/htseq/0.13.5/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/htseq:0.13.5 +docker build src/.docker_modules/htseq/0.13.5 -t 'lbmc/htseq:0.13.5' +docker push lbmc/htseq:0.13.5 diff --git a/src/.docker_modules/htseq/0.8.0/Dockerfile b/src/.docker_modules/htseq/0.8.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..492d91a697b945a3acd3aa3bd0ebb09590fb3de5 --- /dev/null +++ b/src/.docker_modules/htseq/0.8.0/Dockerfile @@ -0,0 +1,18 @@ + +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV HTSEQ_VERSION=0.8.0 +ENV PACKAGES build-essential \ + python3-pip \ + python3-setuptools \ + python3-dev \ + python3-wheel + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN pip3 install numpy==1.14.3 +RUN pip3 install pysam==0.15.0 +RUN pip3 install HTSeq==${HTSEQ_VERSION} diff --git a/src/.docker_modules/htseq/0.8.0/docker_init.sh b/src/.docker_modules/htseq/0.8.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..e322517cf457f8a8a9041da975a7851caf2ab4ef --- /dev/null +++ b/src/.docker_modules/htseq/0.8.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/htseq:0.8.0 +docker build src/.docker_modules/htseq/0.8.0 -t 'lbmc/htseq:0.8.0' +docker push lbmc/htseq:0.8.0 diff --git a/src/.docker_modules/kallisto/0.43.1/Dockerfile b/src/.docker_modules/kallisto/0.43.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..68a731a7fad98ed9c423909797d259baf2ea66eb --- /dev/null +++ b/src/.docker_modules/kallisto/0.43.1/Dockerfile @@ -0,0 +1,15 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV KALLISTO_VERSION=0.43.1 +ENV PACKAGES curl=7.58.0* \ + ca-certificates=20180409 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/pachterlab/kallisto/releases/download/v${KALLISTO_VERSION}/kallisto_linux-v${KALLISTO_VERSION}.tar.gz -o kallisto_linux-v${KALLISTO_VERSION}.tar.gz && \ +tar xzf kallisto_linux-v${KALLISTO_VERSION}.tar.gz && \ +cp kallisto_linux-v${KALLISTO_VERSION}/kallisto /usr/bin && \ +rm -Rf kallisto_linux-v${KALLISTO_VERSION}* diff --git a/src/.docker_modules/kallisto/0.43.1/docker_init.sh b/src/.docker_modules/kallisto/0.43.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..b93c004d24d291b3c92bab0ca7d9ae7c7131cf7a --- /dev/null +++ b/src/.docker_modules/kallisto/0.43.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/kallisto:0.43.1 +docker build src/.docker_modules/kallisto/0.43.1 -t 'lbmc/kallisto:0.43.1' +docker push lbmc/kallisto:0.43.1 diff --git a/src/.docker_modules/kallisto/0.44.0/Dockerfile b/src/.docker_modules/kallisto/0.44.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..9c4dd013e34bf76c4dcd408622eebccbe3e52396 --- /dev/null +++ b/src/.docker_modules/kallisto/0.44.0/Dockerfile @@ -0,0 +1,15 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV KALLISTO_VERSION=0.44.0 +ENV PACKAGES curl=7.58.0* \ + ca-certificates=20180409 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/pachterlab/kallisto/releases/download/v${KALLISTO_VERSION}/kallisto_linux-v${KALLISTO_VERSION}.tar.gz -o kallisto_linux-v${KALLISTO_VERSION}.tar.gz && \ +tar xzf kallisto_linux-v${KALLISTO_VERSION}.tar.gz && \ +cp kallisto_linux-v${KALLISTO_VERSION}/kallisto /usr/bin && \ +rm -Rf kallisto_linux-v${KALLISTO_VERSION}* diff --git a/src/.docker_modules/kallisto/0.44.0/docker_init.sh b/src/.docker_modules/kallisto/0.44.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..4fa79008a07f4e9a4afe6c8bb20fb8ca60b98858 --- /dev/null +++ b/src/.docker_modules/kallisto/0.44.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/kallisto:0.44.0 +docker build src/.docker_modules/kallisto/0.44.0 -t 'lbmc/kallisto:0.44.0' +docker push lbmc/kallisto:0.44.0 diff --git a/src/.docker_modules/kallistobustools/0.24.4/Dockerfile b/src/.docker_modules/kallistobustools/0.24.4/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..b3bb53556f54e2e47510dd89d79e273ea2f3723f --- /dev/null +++ b/src/.docker_modules/kallistobustools/0.24.4/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.7 +MAINTAINER Laurent Modolo + +ENV KB_VERSION=0.24.4 + +RUN pip3 install kb_python==${KB_VERSION} +ENTRYPOINT ["/bin/sh"] +RUN ln -s /usr/local/lib/python3.7/site-packages/kb_python/bins/linux/bustools/bustools /usr/local/bin/ +RUN ln -s /usr/local/lib/python3.7/site-packages/kb_python/bins/linux/kallisto/kallisto /usr/local/bin/ diff --git a/src/.docker_modules/kallistobustools/0.24.4/docker_init.sh b/src/.docker_modules/kallistobustools/0.24.4/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..216b302fea20598e923a7ab21d492235042f1738 --- /dev/null +++ b/src/.docker_modules/kallistobustools/0.24.4/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/kallistobustools:0.24.4 +docker build src/.docker_modules/kallistobustools/0.24.4 -t 'lbmc/kallistobustools:0.24.4' +docker push lbmc/kallistobustools:0.24.4 diff --git a/src/.docker_modules/kallistobustools/0.39.3/Dockerfile b/src/.docker_modules/kallistobustools/0.39.3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1649aeedd22d978ce3591e3b2c227abf1e3aa2be --- /dev/null +++ b/src/.docker_modules/kallistobustools/0.39.3/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.8-alpine +MAINTAINER Laurent Modolo + +ENV B_VERSION=0.39.3 +ENV K_VERSION=0.46.1 + +RUN apk add --update --no-cache bash musl-dev linux-headers g++ cmake make build-base hdf5 hdf5-dev zlib-dev autoconf bash && \ +wget https://github.com/BUStools/bustools/archive/v${B_VERSION}.tar.gz && \ +tar xvf v${B_VERSION}.tar.gz && \ +cd bustools-${B_VERSION} && \ +mkdir build && \ +cd build && \ +cmake .. && \ +sed -i -e 's/"Common\.hpp"/"Common\.hpp"\n#include <cmath>/g' ../src/bustools_whitelist.h && \ +sed -i 's/pow/std::pow/g' ../src/bustools_whitelist.cpp && \ +make && \ +make install && \ +wget https://github.com/pachterlab/kallisto/archive/v${K_VERSION}.tar.gz && \ +tar xvf v${K_VERSION}.tar.gz && \ +cd kallisto-${K_VERSION} && \ +mkdir build && \ +cd build && \ +cmake .. && \ +make && \ +make install && \ +wget https://github.com/BUStools/getting_started/releases/download/getting_started/t2g.py && \ +chmod +x t2g.py && \ +mv t2g.py /usr/local/bin/ && \ +rm -R kallisto* bustools* v${K_VERSION}.tar.gz v${B_VERSION}.tar.gz + +CMD ["sh"] diff --git a/src/.docker_modules/kallistobustools/0.39.3/docker_init.sh b/src/.docker_modules/kallistobustools/0.39.3/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..5cdd8c44773d7eb1f21bf5f500e3556f978ecdf9 --- /dev/null +++ b/src/.docker_modules/kallistobustools/0.39.3/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/kallistobustools:0.39.3 +docker build src/.docker_modules/kallistobustools/0.39.3 -t 'lbmc/kallistobustools:0.39.3' +docker push lbmc/kallistobustools:0.39.3 diff --git a/src/.docker_modules/kb/0.26.0/Dockerfile b/src/.docker_modules/kb/0.26.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..374646355b3f3f3895e8450c70d49eec1688c280 --- /dev/null +++ b/src/.docker_modules/kb/0.26.0/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.9-slim + +ENV KB_VERSION="0.26.0" + +RUN apt update && apt install -y procps && pip3 install kb-python==${KB_VERSION} + +COPY t2g.py /usr/bin/ +COPY fix_t2g.py /usr/bin/ + +RUN chmod +x /usr/bin/t2g.py +RUN chmod +x /usr/bin/fix_t2g.py + +CMD [ "bash" ] diff --git a/src/.docker_modules/kb/0.26.0/docker_init.sh b/src/.docker_modules/kb/0.26.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..347c10e4aa1ac05d130b860b10024b016fdc4eef --- /dev/null +++ b/src/.docker_modules/kb/0.26.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/kb:0.26.0 +docker build src/.docker_modules/kb/0.26.0 -t 'lbmc/kb:0.26.0' +docker push lbmc/kb:0.26.0 diff --git a/src/.docker_modules/kb/0.26.0/fix_t2g.py b/src/.docker_modules/kb/0.26.0/fix_t2g.py new file mode 100644 index 0000000000000000000000000000000000000000..a6b4619b5a17ad3fd4351918eb77c13b8c106f94 --- /dev/null +++ b/src/.docker_modules/kb/0.26.0/fix_t2g.py @@ -0,0 +1,64 @@ +#!/usr/local/bin/python +import os +import re +import gzip +import argparse + + +def validate_file(f): + if not os.path.exists(f): + # Argparse uses the ArgumentTypeError to give a rejection message like: + # error: argument input: x does not exist + raise argparse.ArgumentTypeError("{0} does not exist".format(f)) + return f + + +def t2g_line(transcript, gene): + return str(transcript) + "\t" + str(gene) + "\n" + + +def build_t2g_re(): + return re.compile("([A-Z]+[0-9]+)\.\S+\s([A-Z]+[0-9]+)\.\S+") + + +def get_t2g(line, t2g_re): + return t2g_re.match(line) + + +def get_t2g_line(line, t2g_re): + t2g_id = get_t2g(line, t2g_re) + return {'transcript_id': t2g_id, 'gene_id': t2g_id} + + +def write_t2g_line(t2g, line, t2g_re): + results = get_t2g_line(line, t2g_re) + if results['transcript_id']: + t2g.write( + t2g_line( + results['transcript_id'].group(1), + results['gene_id'].group(2) + ) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="create transcript to genes file from a gtf file." + ) + parser.add_argument( + "-f", "--t2g", dest="t2g", required=True, type=validate_file, + help="t2g file", metavar="FILE" + ) + args = parser.parse_args() + t2g_re = build_t2g_re() + + try: + with gzip.open(args.t2g, "rb") as gtf: + with open("fix_t2g.txt", "w") as t2g: + for line in gtf: + write_t2g_line(t2g, str(line), t2g_re) + except gzip.BadGzipFile: + with open(args.t2g, "r") as gtf: + with open("fix_t2g.txt", "w") as t2g: + for line in gtf: + write_t2g_line(t2g, str(line), t2g_re) diff --git a/src/.docker_modules/kb/0.26.0/t2g.py b/src/.docker_modules/kb/0.26.0/t2g.py new file mode 100755 index 0000000000000000000000000000000000000000..b99e74e6c2c3d9574ce54008bc58143c14b229a5 --- /dev/null +++ b/src/.docker_modules/kb/0.26.0/t2g.py @@ -0,0 +1,75 @@ +#!/usr/local/bin/python +import os +import re +import gzip +import argparse + + +def validate_file(f): + if not os.path.exists(f): + # Argparse uses the ArgumentTypeError to give a rejection message like: + # error: argument input: x does not exist + raise argparse.ArgumentTypeError("{0} does not exist".format(f)) + return f + + +def t2g_line(transcript, gene): + return str(transcript) + "\t" + str(gene) + "\n" + + +def build_gene_re(): + return re.compile(".*gene_id\s+\"(\S+)\";.*") + + +def build_transcript_re(): + return re.compile(".*transcript_id\s+\"(\S+)\";.*") + + +def get_gene(line, gene_re): + return gene_re.match(line) + + +def get_transcript(line, transcript_re): + return transcript_re.match(line) + + +def gtf_line(line, transcript_re, gene_re): + transcript_id = get_transcript(line, transcript_re) + gene_id = get_gene(line, gene_re) + return {'transcript_id': transcript_id, 'gene_id': gene_id} + + +def write_t2g_line(t2g, line, transcript_re, gene_re): + results = gtf_line(line, transcript_re, gene_re) + if results['transcript_id']: + t2g.write( + t2g_line( + results['transcript_id'].group(1), + results['gene_id'].group(1) + ) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="create transcript to genes file from a gtf file." + ) + parser.add_argument( + "-g", "--gtf", dest="gtf", required=True, type=validate_file, + help="gtf file", metavar="FILE" + ) + args = parser.parse_args() + gene_re = build_gene_re() + transcript_re = build_transcript_re() + + try: + with gzip.open(args.gtf, "rb") as gtf: + with open("t2g_dup.txt", "w") as t2g: + for line in gtf: + write_t2g_line(t2g, str(line), transcript_re, gene_re) + except gzip.BadGzipFile: + with open(args.gtf, "r") as gtf: + with open("t2g_dup.txt", "w") as t2g: + for line in gtf: + write_t2g_line(t2g, str(line), transcript_re, gene_re) + diff --git a/src/.docker_modules/kb/0.26.3/Dockerfile b/src/.docker_modules/kb/0.26.3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..5cd35dfae7d474b25223ebeca80fe6adada91db3 --- /dev/null +++ b/src/.docker_modules/kb/0.26.3/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.9-slim + +ENV KB_VERSION="0.26.3" + +RUN apt update && apt install -y procps make gcc zlib1g-dev libbz2-dev libcurl4 liblzma-dev \ + && pip3 install pysam anndata h5py Jinja2 loompy nbconvert nbformat ngs-tools numpy pandas plotly scanpy scikit-learn tqdm \ + && pip3 install kb-python==${KB_VERSION} gffutils + +COPY t2g.py /usr/bin/ + +RUN chmod +x /usr/bin/t2g.py + +CMD [ "bash" ] diff --git a/src/.docker_modules/kb/0.26.3/docker_init.sh b/src/.docker_modules/kb/0.26.3/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..0294274a2518ef8622027bb56b49c4baded4a9d8 --- /dev/null +++ b/src/.docker_modules/kb/0.26.3/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/kb:0.26.3 +docker build src/.docker_modules/kb/0.26.3 -t 'lbmc/kb:0.26.3' +docker push lbmc/kb:0.26.3 diff --git a/src/.docker_modules/kb/0.26.3/t2g.py b/src/.docker_modules/kb/0.26.3/t2g.py new file mode 100755 index 0000000000000000000000000000000000000000..f9f0b45dc89b385c3ed52dc252f8f09eb3bc8c74 --- /dev/null +++ b/src/.docker_modules/kb/0.26.3/t2g.py @@ -0,0 +1,47 @@ +#!/usr/local/bin/python +import os +import gffutils +import argparse + + +def validate_file(f): + if not os.path.exists(f): + # Argparse uses the ArgumentTypeError to give a rejection message like: + # error: argument input: x does not exist + raise argparse.ArgumentTypeError("{0} does not exist".format(f)) + return f + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="create transcript to genes file from a gtf file." + ) + parser.add_argument( + "-g", "--gtf", dest="gtf", required=True, type=validate_file, + help="gtf file", metavar="FILE" + ) + args = parser.parse_args() + + db = gffutils.create_db( + args.gtf, + dbfn=":memory:", + force=True, + merge_strategy="merge", + disable_infer_transcripts=False, + disable_infer_genes=False + ) + with open("t2g.txt", "w") as t2g: + for gene in db.all_features(): + for transcript in db.children( + gene, featuretype='transcript', order_by='start' + ): + t2g_line = str(transcript["transcript_id"][0]) + \ + "\t" + \ + str(gene["gene_id"][0]) + t2g_line = t2g_line.split("\t") + t2g.write( + str(t2g_line[0].split(".")[0]) + + "\t" + + str(t2g_line[1].split(".")[0]) + + "\n" + ) diff --git a/src/.docker_modules/last/1060/Dockerfile b/src/.docker_modules/last/1060/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..4193a11f30d443b870a188bebffc33c56038d336 --- /dev/null +++ b/src/.docker_modules/last/1060/Dockerfile @@ -0,0 +1,25 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV LAST_VERSION=1060 +ENV PACKAGES curl=7.58.0* \ + unzip \ + make=4.1* \ + g++ \ + zlib1g-dev=1:1.2.11* \ + ca-certificates=20180409 \ + build-essential=12.4* \ + python + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L http://last.cbrc.jp/last-${LAST_VERSION}.zip -o last-${LAST_VERSION}.zip && \ +unzip last-${LAST_VERSION}.zip && \ +cd last-${LAST_VERSION} && \ +make && \ +cp src/last* /usr/bin/ && \ +cp scripts/* /usr/bin/ && \ +cd .. && \ +rm -Rf last-${LAST_VERSION} diff --git a/src/.docker_modules/last/1060/docker_init.sh b/src/.docker_modules/last/1060/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..0e8393fb88f803e9648cc1185f10c3198daf5976 --- /dev/null +++ b/src/.docker_modules/last/1060/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/last:1060 +docker build src/.docker_modules/last/1060/ -t 'lbmc/last:1060' +docker push lbmc/last:1060 diff --git a/src/.docker_modules/liftover/357/Dockerfile b/src/.docker_modules/liftover/357/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..973a1dc03c9cc8c73209f13f1fecf84906b8be7b --- /dev/null +++ b/src/.docker_modules/liftover/357/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/ucsc-liftover:357--1 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/liftover/357/docker_init.sh b/src/.docker_modules/liftover/357/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..68bd90585292fb30242e3d2cdd94e3538d277f6f --- /dev/null +++ b/src/.docker_modules/liftover/357/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/liftover:357 +docker build src/.docker_modules/liftover/357/ -t 'lbmc/liftover:357' +docker push lbmc/liftover:357 diff --git a/src/.docker_modules/macs2/2.1.2/Dockerfile b/src/.docker_modules/macs2/2.1.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..71979a50044cd8ce9c52f1d413db269088d352dd --- /dev/null +++ b/src/.docker_modules/macs2/2.1.2/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/macs2:2.1.2--py27r351h14c3975_1 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/macs2/2.1.2/docker_init.sh b/src/.docker_modules/macs2/2.1.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..8dc7b2483a1aa91f1f637e26812469f861b68f0e --- /dev/null +++ b/src/.docker_modules/macs2/2.1.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/macs2:2.1.2 +docker build src/.docker_modules/macs2/2.1.2 -t 'lbmc/macs2:2.1.2' +docker push lbmc/macs2:2.1.2 diff --git a/src/.docker_modules/macs3/3.0.0a6/Dockerfile b/src/.docker_modules/macs3/3.0.0a6/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..375b8f631fe40f9e05f94400d15057f303713438 --- /dev/null +++ b/src/.docker_modules/macs3/3.0.0a6/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.9-slim +MAINTAINER Laurent Modolo + +ENV MACS3_VERSION=3.0.0a6 +RUN apt-get update -qq \ + && apt-get install --no-install-recommends --yes \ + build-essential \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libssl-dev \ + libncurses5-dev \ + procps +RUN pip install macs3==${MACS3_VERSION} diff --git a/src/.docker_modules/macs3/3.0.0a6/docker_init.sh b/src/.docker_modules/macs3/3.0.0a6/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..3c830318a39076b8f2ca4dc2e7442c8c046a320a --- /dev/null +++ b/src/.docker_modules/macs3/3.0.0a6/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/macs3:3.0.0a6 +docker build src/.docker_modules/macs3/3.0.0a6 -t 'lbmc/macs3:3.0.0a6' +docker push lbmc/macs3:3.0.0a6 diff --git a/src/.docker_modules/minimap2/2.17/Dockerfile b/src/.docker_modules/minimap2/2.17/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1f5b05c02b573cc8d7392c53213cc78cbb6abeb5 --- /dev/null +++ b/src/.docker_modules/minimap2/2.17/Dockerfile @@ -0,0 +1,25 @@ +FROM quay.io/biocontainers/samtools:0.1.18--hfb9b9cc_10 AS samtools +# /usr/local/bin/samtools +# / # ldd /usr/local/bin/samtools +# /lib64/ld-linux-x86-64.so.2 (0x7efddcdcc000) +# libncurses.so.6 => /usr/local/bin/../lib/libncurses.so.6 (0x7efddcfad000) +# libtinfo.so.6 => /usr/local/bin/../lib/libtinfo.so.6 (0x7efddcf6f000) +# libm.so.6 => /lib64/ld-linux-x86-64.so.2 (0x7efddcdcc000) +# libz.so.1 => /usr/local/bin/../lib/libz.so.1 (0x7efddcf55000) +# libc.so.6 => /lib64/ld-linux-x86-64.so.2 (0x7efddcdcc000) + +FROM quay.io/biocontainers/minimap2:2.17--hed695b0_3 +MAINTAINER Laurent Modolo +ENV MINIMAP2_VERSION=2.17 + +COPY --from=samtools /usr/local/bin/samtools /usr/local/bin/ +COPY --from=samtools /usr/local//lib/libncurses.so.6 /usr/local/lib/ +COPY --from=samtools /usr/local//lib/libtinfo.so.6 /usr/local/lib/ + +# /usr/local/bin/minimap2 +# / # ldd /usr/local/bin/minimap2 +# /lib64/ld-linux-x86-64.so.2 (0x7fe14f5a8000) +# libm.so.6 => /lib64/ld-linux-x86-64.so.2 (0x7fe14f5a8000) +# libz.so.1 => /usr/local/bin/../lib/libz.so.1 (0x7fe14f7c4000) +# libpthread.so.0 => /lib64/ld-linux-x86-64.so.2 (0x7fe14f5a8000) +# libc.so.6 => /lib64/ld-linux-x86-64.so.2 (0x7fe14f5a8000) \ No newline at end of file diff --git a/src/.docker_modules/minimap2/2.17/docker_init.sh b/src/.docker_modules/minimap2/2.17/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..773f0cf6d1ec3f29c3e60e4b1fa359d28223e601 --- /dev/null +++ b/src/.docker_modules/minimap2/2.17/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/minimap2:2.17 +docker build src/.docker_modules/minimap2/2.17 -t 'lbmc/minimap2:2.17' +docker push lbmc/minimap2:2.17 diff --git a/src/.docker_modules/multiqc/1.0/Dockerfile b/src/.docker_modules/multiqc/1.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..7acf600c99d9f3500bf015d4b79e3675ac5d7789 --- /dev/null +++ b/src/.docker_modules/multiqc/1.0/Dockerfile @@ -0,0 +1,25 @@ +FROM debian:stretch +MAINTAINER Laurent Modolo + +ENV MULTIQC_VERSION=1.0 +ENV PACKAGES build-essential \ + python3-pip \ + python3-setuptools \ + python3-dev \ + python3-wheel \ + procps \ + locales + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN locale-gen en_US.UTF-8 +ENV LC_ALL=en_US.utf-8 +ENV LANG=en_US.utf-8 +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 + + +RUN pip3 install multiqc==${MULTIQC_VERSION} + diff --git a/src/.docker_modules/multiqc/1.0/docker_init.sh b/src/.docker_modules/multiqc/1.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..1b45ce3e7d6a58c98cf34f7614603dec9ad525fc --- /dev/null +++ b/src/.docker_modules/multiqc/1.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/multiqc:1.0 +docker build src/.docker_modules/multiqc/1.0 -t 'lbmc/multiqc:1.0' +docker push lbmc/multiqc:1.0 diff --git a/src/.docker_modules/multiqc/1.7/Dockerfile b/src/.docker_modules/multiqc/1.7/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..ea71b1ad8227b9456863b7b00fc5d818db090288 --- /dev/null +++ b/src/.docker_modules/multiqc/1.7/Dockerfile @@ -0,0 +1,25 @@ +FROM debian:stretch +MAINTAINER Laurent Modolo + +ENV MULTIQC_VERSION=1.7 +ENV PACKAGES build-essential \ + python3-pip \ + python3-setuptools \ + python3-dev \ + python3-wheel \ + procps \ + locales + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN locale-gen en_US.UTF-8 +ENV LC_ALL=en_US.utf-8 +ENV LANG=en_US.utf-8 +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 + + +RUN pip3 install multiqc==${MULTIQC_VERSION} + diff --git a/src/.docker_modules/multiqc/1.7/docker_init.sh b/src/.docker_modules/multiqc/1.7/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..e091f04a2752d2fcf2901f580ff8f001c8589df6 --- /dev/null +++ b/src/.docker_modules/multiqc/1.7/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/multiqc:1.7 +docker build src/.docker_modules/multiqc/1.7 -t 'lbmc/multiqc:1.7' +docker push lbmc/multiqc:1.7 diff --git a/src/.docker_modules/multiqc/1.9/Dockerfile b/src/.docker_modules/multiqc/1.9/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..3e82ccb436f841876ef71c73f4e6a611454bd7f2 --- /dev/null +++ b/src/.docker_modules/multiqc/1.9/Dockerfile @@ -0,0 +1,25 @@ +FROM debian:stretch +MAINTAINER Laurent Modolo + +ENV MULTIQC_VERSION=1.9 +ENV PACKAGES build-essential \ + python3-pip \ + python3-setuptools \ + python3-dev \ + python3-wheel \ + procps \ + locales + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN locale-gen en_US.UTF-8 +ENV LC_ALL=en_US.utf-8 +ENV LANG=en_US.utf-8 +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 + + +RUN pip3 install multiqc==${MULTIQC_VERSION} + diff --git a/src/.docker_modules/multiqc/1.9/docker_init.sh b/src/.docker_modules/multiqc/1.9/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..dcb2897242cb084d9bc851e2274c21aca99f00c2 --- /dev/null +++ b/src/.docker_modules/multiqc/1.9/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/multiqc:1.9 +docker build src/.docker_modules/multiqc/1.9 -t 'lbmc/multiqc:1.9' +docker push lbmc/multiqc:1.9 diff --git a/src/.docker_modules/music/6613c53/Dockerfile b/src/.docker_modules/music/6613c53/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..aa47eb572ca48736457bb0f60b0857bbe6c5d9df --- /dev/null +++ b/src/.docker_modules/music/6613c53/Dockerfile @@ -0,0 +1,38 @@ +FROM alpine:3.8 +MAINTAINER Laurent Modolo + +ENV MUSIC_VERSION=6613c53 +ENV SAMTOOLS_VERSION=1.7 +ENV PACKAGES g++ \ +bash \ +pcre-dev \ +openssl-dev \ +build-base \ +bzip2-dev \ +xz-dev \ +git \ +curl + +RUN apk update && \ + apk add ${PACKAGES} +RUN curl -L -o samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + http://jaist.dl.sourceforge.net/project/samtools/samtools/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && tar jxvf samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && cd samtools-${SAMTOOLS_VERSION}/ \ + && ./configure --without-curses \ + && make \ + && make install + +RUN git clone https://github.com/gersteinlab/MUSIC.git && \ + cd MUSIC && \ + git checkout ${MUSIC_VERSION} && \ + make clean && \ + make && \ + cd .. && \ + mv MUSIC/bin/MUSIC /usr/bin/ && \ + mv MUSIC/bin/generate_multimappability_signal.csh /usr/bin/ && \ + mv MUSIC/bin/run_MUSIC.csh /usr/bin/ && \ + rm -Rf MUSIC + +RUN chmod +x /usr/bin/* + diff --git a/src/.docker_modules/music/6613c53/docker_init.sh b/src/.docker_modules/music/6613c53/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..20e327a97a09ced0c55b16d6a780f35a09e1c881 --- /dev/null +++ b/src/.docker_modules/music/6613c53/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/music:6613c53 +docker build src/.docker_modules/music/6613c53 -t 'lbmc/music:6613c53' +docker push lbmc/music:6613c53 diff --git a/src/.docker_modules/pandoc/2.11/Dockerfile b/src/.docker_modules/pandoc/2.11/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..f62b84b960032314a2fb43daedd10d6b539a0c91 --- /dev/null +++ b/src/.docker_modules/pandoc/2.11/Dockerfile @@ -0,0 +1,8 @@ +FROM alpine:3.13 +MAINTAINER Laurent Modolo + +ENV PANDOC_VERSION=2.11 + +RUN echo "https://dl-cdn.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories \ + && apk update \ + && apk add pandoc~=${PANDOC_VERSION} make diff --git a/src/.docker_modules/pandoc/2.11/docker_init.sh b/src/.docker_modules/pandoc/2.11/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..3bbc7b6adc58f5f9fb2f8c554bbdfea9724cb4b2 --- /dev/null +++ b/src/.docker_modules/pandoc/2.11/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/pandoc:2.11 +docker build src/.docker_modules/pandoc/2.11 -t 'lbmc/pandoc:2.11' +docker push lbmc/pandoc:2.11 diff --git a/src/.docker_modules/picard/2.18.11/Dockerfile b/src/.docker_modules/picard/2.18.11/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6a358e55bf36a4be4842335f47491ec8ac3a4ced --- /dev/null +++ b/src/.docker_modules/picard/2.18.11/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV PICARD_VERSION=2.18.11 +ENV PACKAGES default-jre=2:1.1* \ + curl=7.58.0* \ + ca-certificates=20180409 + + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/broadinstitute/picard/releases/download/${PICARD_VERSION}/picard.jar -o picard.jar && \ + mkdir -p /usr/share/java/ && \ + mv picard.jar /usr/share/java/ + +COPY PicardCommandLine /usr/bin/ +RUN chmod +x /usr/bin/PicardCommandLine diff --git a/src/.docker_modules/picard/2.18.11/PicardCommandLine b/src/.docker_modules/picard/2.18.11/PicardCommandLine new file mode 100644 index 0000000000000000000000000000000000000000..c8eebf8345906cae9ef2e63409500857de479f09 --- /dev/null +++ b/src/.docker_modules/picard/2.18.11/PicardCommandLine @@ -0,0 +1,15 @@ +#!/bin/sh +set -eu +PRG="$(basename -- "$0")" +case "$PRG" in +picard-tools) + echo 1>&2 'Warning: picard-tools is deprecated and should be replaced by PicardCommandLine' + ;; +PicardCommandLine) + ;; +*) + set -- "$PRG" "$@" + ;; +esac + +exec java -Xmx40g ${JAVA_OPTIONS-} -jar /usr/share/java/picard.jar "$@" diff --git a/src/.docker_modules/picard/2.18.11/docker_init.sh b/src/.docker_modules/picard/2.18.11/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..82c4cf7d3bdf581587fa7a3345cff9cb465158ae --- /dev/null +++ b/src/.docker_modules/picard/2.18.11/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/picard:2.18.11 +docker build src/.docker_modules/picard/2.18.11 -t 'lbmc/picard:2.18.11' +docker push lbmc/picard:2.18.11 diff --git a/src/.docker_modules/pigz/2.4/Dockerfile b/src/.docker_modules/pigz/2.4/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..75f9e2fcfd9b5394b45fbfa4541861e6e4ab43e1 --- /dev/null +++ b/src/.docker_modules/pigz/2.4/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV PIGZ_VERSION=2.4 +ENV PACKAGES pigz=${PIGZ_VERSION}* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/.docker_modules/pigz/2.4/docker_init.sh b/src/.docker_modules/pigz/2.4/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..38d7347d72e9345ca69f54c9d8ea2ac3ec0ebbb8 --- /dev/null +++ b/src/.docker_modules/pigz/2.4/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/pigz:2.4 +docker build src/.docker_modules/pigz/2.4 -t 'lbmc/pigz:2.4' +docker push lbmc/pigz:2.4 diff --git a/src/.docker_modules/python/3.8/Dockerfile b/src/.docker_modules/python/3.8/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..20b4edd3aaa75374695afb4ca34c7be889e01933 --- /dev/null +++ b/src/.docker_modules/python/3.8/Dockerfile @@ -0,0 +1,2 @@ +FROM python:3.8.0a3-stretch +MAINTAINER Remi Seraphin diff --git a/src/.docker_modules/python/3.8/docker_init.sh b/src/.docker_modules/python/3.8/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..9a1c9b8b04f56586c6fadda16add7af4f66c3454 --- /dev/null +++ b/src/.docker_modules/python/3.8/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/python:3.8 +docker build src/.docker_modules/python/3.8 -t 'lbmc/python:3.8' +docker push lbmc/python:3.8 diff --git a/src/.docker_modules/r-base/3.5.3/Dockerfile b/src/.docker_modules/r-base/3.5.3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..7c937de7a166d84c0bf75fe4a17fb4b162d61391 --- /dev/null +++ b/src/.docker_modules/r-base/3.5.3/Dockerfile @@ -0,0 +1,7 @@ +FROM rocker/r-ver:3.5.3 +MAINTAINER Remi Seraphin + +RUN apt update && \ +apt install libssl-dev libcurl4-openssl-dev libxml2-dev zlib1g-dev git procps +RUN R -e "install.packages('tidyverse'); \ + install.packages('ggplot2')" diff --git a/src/.docker_modules/r-base/3.5.3/docker_init.sh b/src/.docker_modules/r-base/3.5.3/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..f62473c54ae30f85f36293fb74b42fc255062a46 --- /dev/null +++ b/src/.docker_modules/r-base/3.5.3/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/r:3.5.3 +docker build src/.docker_modules/r-base/3.5.3 -t 'lbmc/r:3.5.3' +docker push lbmc/r:3.5.3 diff --git a/src/.docker_modules/r-base/3.6.2/Dockerfile b/src/.docker_modules/r-base/3.6.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..fd4dcf7867019a2ecb73241fd0d6d39c7196fec1 --- /dev/null +++ b/src/.docker_modules/r-base/3.6.2/Dockerfile @@ -0,0 +1,37 @@ +FROM alpine:3.11.5 +MAINTAINER Lauret Modolo + +ENV R_PKGS R=~3.6.2 \ + R-mathlib=~3.6.2 \ + R-dev=~3.6.2 + +ENV R_DEPS g++ \ + libxml2-dev \ + make \ + cmake \ + linux-headers \ + cairo-dev \ + libxmu-dev \ + pango-dev \ + perl \ + tiff-dev \ + gcc \ + gfortran \ + icu-dev \ + libjpeg-turbo \ + libpng-dev \ + openblas-dev \ + pcre-dev \ + readline-dev \ + xz-dev \ + zlib-dev \ + bzip2-dev \ + curl-dev \ + libexecinfo-dev \ + git + +RUN echo "http://ftp.acc.umu.se/mirror/alpinelinux.org/v3.11/main" > /etc/apk/repositories && \ + echo "http://ftp.acc.umu.se/mirror/alpinelinux.org/v3.11/community" >> /etc/apk/repositories && \ + apk add --update --no-cache ${R_PKGS} ${R_DEPS} + +CMD ["R", "--no-save"] diff --git a/src/.docker_modules/r-base/3.6.2/docker_init.sh b/src/.docker_modules/r-base/3.6.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..d1e6e8183e95ba787d244ae17f13d0f6e97eefba --- /dev/null +++ b/src/.docker_modules/r-base/3.6.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/r-base:3.6.2 +docker build src/.docker_modules/r-base/3.6.2 -t 'lbmc/r-base:3.6.2' +docker push lbmc/r-base:3.6.2 diff --git a/src/.docker_modules/r-base/4.0.0/Dockerfile b/src/.docker_modules/r-base/4.0.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6bfa98a516d2cf1ca66d07ffc8749ad1c57240d4 --- /dev/null +++ b/src/.docker_modules/r-base/4.0.0/Dockerfile @@ -0,0 +1,38 @@ +FROM alpine:3.12.0 +MAINTAINER Lauret Modolo + +ENV R_PKGS R=~4.0.0 \ + R-mathlib=~4.0.0 \ + R-dev=~4.0.0 + +ENV R_DEPS g++ \ + libxml2-dev \ + make \ + cmake \ + linux-headers \ + cairo-dev \ + libxmu-dev \ + pango-dev \ + perl \ + tiff-dev \ + gcc \ + gfortran \ + icu-dev \ + libjpeg-turbo \ + libpng-dev \ + openblas-dev \ + pcre-dev \ + readline-dev \ + xz-dev \ + zlib-dev \ + bzip2-dev \ + curl-dev \ + libexecinfo-dev \ + git + +RUN echo "http://ftp.acc.umu.se/mirror/alpinelinux.org/v3.11/main" > /etc/apk/repositories && \ + echo "http://ftp.acc.umu.se/mirror/alpinelinux.org/v3.11/community" >> /etc/apk/repositories && \ + sed -i -e 's/v[[:digit:]]\..*\//edge\//g' /etc/apk/repositories && \ + apk add --update --no-cache ${R_PKGS} ${R_DEPS} + +CMD ["R", "--no-save"] diff --git a/src/.docker_modules/r-base/4.0.0/docker_init.sh b/src/.docker_modules/r-base/4.0.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..fe24f44d1733d3a3cce32eb516ddcd7d8ae50930 --- /dev/null +++ b/src/.docker_modules/r-base/4.0.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/r-base:4.0.0 +docker build src/.docker_modules/r-base/4.0.0 -t 'lbmc/r-base:4.0.0' +docker push lbmc/r-base:4.0.0 diff --git a/src/.docker_modules/r-base/4.0.2/Dockerfile b/src/.docker_modules/r-base/4.0.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..97e111459c9b685fa6abd6b18fbdf880ffe81bc9 --- /dev/null +++ b/src/.docker_modules/r-base/4.0.2/Dockerfile @@ -0,0 +1,35 @@ +FROM alpine:3.12.0 +MAINTAINER Lauret Modolo + +ENV R_PKGS R=~4.0.2 \ + R-mathlib=~4.0.2 \ + R-dev=~4.0.2 \ + R-doc=~4.0.2 + +ENV R_DEPS g++ \ + libxml2-dev \ + make \ + cmake \ + linux-headers \ + cairo-dev \ + libxmu-dev \ + pango-dev \ + perl \ + tiff-dev \ + icu-dev \ + libjpeg-turbo \ + pcre-dev \ + readline-dev \ + libexecinfo-dev \ + file \ + ttf-linux-libertine \ + git \ + openssl \ + bash + +RUN echo "http://ftp.acc.umu.se/mirror/alpinelinux.org/v3.11/main" > /etc/apk/repositories \ +&& echo "http://ftp.acc.umu.se/mirror/alpinelinux.org/v3.11/community" >> /etc/apk/repositories \ +&& sed -i -e 's/v[[:digit:]]\..*\//edge\//g' /etc/apk/repositories \ +&& apk add --update --no-cache ${R_PKGS} ${R_DEPS} + +CMD ["R", "--no-save"] diff --git a/src/.docker_modules/r-base/4.0.2/docker_init.sh b/src/.docker_modules/r-base/4.0.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..d07371190e4360bb9ebca95c0cb16eef8b88e32d --- /dev/null +++ b/src/.docker_modules/r-base/4.0.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/r-base:4.0.2 +docker build src/.docker_modules/r-base/4.0.2 -t 'lbmc/r-base:4.0.2' +docker push lbmc/r-base:4.0.2 diff --git a/src/.docker_modules/r-base/4.0.3/Dockerfile b/src/.docker_modules/r-base/4.0.3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..58f752ff581aca0270beebd74e4c9abf8d0e7e8a --- /dev/null +++ b/src/.docker_modules/r-base/4.0.3/Dockerfile @@ -0,0 +1,36 @@ +FROM alpine:3.13.1 +MAINTAINER Lauret Modolo + +ENV R_PKGS R=~4.0.3 \ + R-mathlib=~4.0.3 \ + R-dev=~4.0.3 \ + R-doc=~4.0.3 + +ENV R_DEPS g++ \ + libxml2-dev \ + make \ + cmake \ + linux-headers \ + cairo-dev \ + libxmu-dev \ + pango-dev \ + perl \ + tiff-dev \ + icu-dev \ + libjpeg-turbo \ + pcre-dev \ + readline-dev \ + libexecinfo-dev \ + file \ + ttf-linux-libertine \ + git \ + openssl \ + autoconf \ + automake \ + libuv \ + http-parser \ + tzdata \ + libgit2-dev \ + bash + +RUN apk add --update --no-cache ${R_PKGS} ${R_DEPS} diff --git a/src/.docker_modules/r-base/4.0.3/docker_init.sh b/src/.docker_modules/r-base/4.0.3/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..2b4e97048e502f00ec3447bbced8d9f53d529c6c --- /dev/null +++ b/src/.docker_modules/r-base/4.0.3/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/r-base:4.0.3 +docker build src/.docker_modules/r-base/4.0.3 -t 'lbmc/r-base:4.0.3' +docker push lbmc/r-base:4.0.3 diff --git a/src/.docker_modules/rfiltration/Dockerfile b/src/.docker_modules/rfiltration/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..db17dbe19014069ab917da50d6d10fff572ef6c7 --- /dev/null +++ b/src/.docker_modules/rfiltration/Dockerfile @@ -0,0 +1,26 @@ +##### BASE IMAGE ##### +FROM satijalab/seurat:4.0.4 + +MAINTAINER Maxime LEPETIT + + + +RUN apt-get install procps + + +RUN R --no-echo --no-restore --no-save -e "remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')" + +RUN R --no-echo --no-restore --no-save -e "BiocManager::install('DropletUtils',version='3.13')" + +RUN R --no-echo --no-restore --no-save -e "install.packages('tidyverse',dependency=TRUE, repos='http://cran.rstudio.com/')" + +RUN R --no-echo --no-restore --no-save -e "install.packages('PCDimension', version ='1.1.11' , repos='http://cran.rstudio.com/')" + +COPY function.R usr/src/ + +COPY ranalysis_part1.R usr/src/ + + +COPY ranalysis_part2.R usr/src/ + +RUN chmod a+x usr/src/* diff --git a/src/.docker_modules/rfiltration/docker_init.sh b/src/.docker_modules/rfiltration/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..a8093d2e89c0257c2803c05cb14bfd29acb854b9 --- /dev/null +++ b/src/.docker_modules/rfiltration/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull mlepetit/rfiltration +docker build src/.docker_modules/mlepetit/rfiltration -t 'mlepetit/rfiltration' +docker push mlepetit/rfiltration diff --git a/src/.docker_modules/ribotricer/1.3.2/Dockerfile b/src/.docker_modules/ribotricer/1.3.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..451a68a1fdf14d8e517f6aad5504f516993cfa1f --- /dev/null +++ b/src/.docker_modules/ribotricer/1.3.2/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:20.04 +MAINTAINER Emmanuel Labaronne + +ENV RIBOTRICER_VERSION=1.3.2 +ENV PACKAGES python3\ + python3-dev\ + python3-pip + +RUN apt-get update && \ + apt-get install -y ${PACKAGES} && \ + apt-get clean + +RUN pip3 install ribotricer==${RIBOTRICER_VERSION} diff --git a/src/.docker_modules/ribotricer/1.3.2/docker_init.sh b/src/.docker_modules/ribotricer/1.3.2/docker_init.sh new file mode 100644 index 0000000000000000000000000000000000000000..71311abf3f5168669979e0494ffe3e1f344da9dd --- /dev/null +++ b/src/.docker_modules/ribotricer/1.3.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/ribotricer:1.3.2 +docker build src/.docker_modules/ribotricer/1.3.2 -t 'lbmc/ribotricer:1.3.2' +docker push lbmc/ribotricer:1.3.2 diff --git a/src/.docker_modules/rsem/1.3.0/Dockerfile b/src/.docker_modules/rsem/1.3.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..337521c0496db33c93c20c8fd4d756efcba603a8 --- /dev/null +++ b/src/.docker_modules/rsem/1.3.0/Dockerfile @@ -0,0 +1,24 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV RSEM_VERSION=1.3.0 +ENV BOWTIE2_VERSION=2.3.4.1 +ENV SAMTOOLS_VERSION=1.7 +ENV PACKAGES git=1:2.17* \ + build-essential=12.4* \ + ca-certificates=20180409 \ + zlib1g-dev=1:1.2.11* \ + bowtie2=${BOWTIE2_VERSION}* \ + samtools=${SAMTOOLS_VERSION}* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN git clone https://github.com/deweylab/RSEM.git RSEM_${RSEM_VERSION} && \ + cd RSEM_${RSEM_VERSION} && \ + git checkout tags/v${RSEM_VERSION} && \ + make && \ + cd .. && \ + mv RSEM_${RSEM_VERSION}/rsem-* RSEM_${RSEM_VERSION}/rsem_* RSEM_${RSEM_VERSION}/convert-* RSEM_${RSEM_VERSION}/extract-* /usr/bin/ && \ + rm -Rf RSEM_${RSEM_VERSION} diff --git a/src/.docker_modules/rsem/1.3.0/docker_init.sh b/src/.docker_modules/rsem/1.3.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..aadcb4d8ce01353c3510a6a649d640121865bf8d --- /dev/null +++ b/src/.docker_modules/rsem/1.3.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/rsem:1.3.0 +docker build src/.docker_modules/rsem/1.3.0 -t 'lbmc/rsem:1.3.0' +docker push lbmc/rsem:1.3.0 diff --git a/src/.docker_modules/sabre/039a55e/Dockerfile b/src/.docker_modules/sabre/039a55e/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..f769d43f51f93c949411beea29a540ce2f01a030 --- /dev/null +++ b/src/.docker_modules/sabre/039a55e/Dockerfile @@ -0,0 +1,20 @@ +FROM alpine:3.12.0 +MAINTAINER Lauret Modolo + +ENV SABRE_VERSION=039a55e + +ENV SABRE_DEPS g++ bash make zlib-dev git + +RUN echo "http://ftp.acc.umu.se/mirror/alpinelinux.org/v3.11/main" > /etc/apk/repositories \ +&& echo "http://ftp.acc.umu.se/mirror/alpinelinux.org/v3.11/community" >> /etc/apk/repositories \ +&& sed -i -e 's/v[[:digit:]]\..*\//edge\//g' /etc/apk/repositories \ +&& apk add --update --no-cache ${SABRE_DEPS} \ +&& git clone https://github.com/najoshi/sabre.git \ +&& cd sabre \ +&& git checkout $SABRE_VERSION \ +&& make \ +&& mv sabre /usr/bin \ +&& chmod +x /usr/bin/sabre + + +CMD ["bash"] diff --git a/src/.docker_modules/sabre/039a55e/docker_init.sh b/src/.docker_modules/sabre/039a55e/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..fc0f318f612a582b7a56691d27cd5454b2b3370b --- /dev/null +++ b/src/.docker_modules/sabre/039a55e/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/sabre:039a55e +docker build src/.docker_modules/sabre/039a55e -t 'lbmc/sabre:039a55e' +docker push lbmc/sabre:039a55e diff --git a/src/.docker_modules/salmon/0.8.2/Dockerfile b/src/.docker_modules/salmon/0.8.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..31bb28ecce7ec5e5e70baf591ec418a1258bd2dd --- /dev/null +++ b/src/.docker_modules/salmon/0.8.2/Dockerfile @@ -0,0 +1,15 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV SALMON_VERSION=0.8.2 +ENV PACKAGES curl=7.58.0* \ + ca-certificates=20180409 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/COMBINE-lab/salmon/releases/download/v${SALMON_VERSION}/Salmon-${SALMON_VERSION}_linux_x86_64.tar.gz -o Salmon-${SALMON_VERSION}_linux_x86_64.tar.gz && \ +tar xzf Salmon-${SALMON_VERSION}_linux_x86_64.tar.gz && \ +mv Salmon-${SALMON_VERSION}_linux_x86_64/bin/* /usr/bin/ && \ +rm -Rf Salmon-${SALMON_VERSION}* diff --git a/src/.docker_modules/salmon/0.8.2/docker_init.sh b/src/.docker_modules/salmon/0.8.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..f44850b49c43ae852f1ef93b88a09f301169f780 --- /dev/null +++ b/src/.docker_modules/salmon/0.8.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/salmon:0.8.2 +docker build src/.docker_modules/salmon/0.8.2 -t 'lbmc/salmon:0.8.2' +docker push lbmc/salmon:0.8.2 diff --git a/src/.docker_modules/sambamba/0.6.7/Dockerfile b/src/.docker_modules/sambamba/0.6.7/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..5858a176917fc301e165ba18a6d69c34a1bf786a --- /dev/null +++ b/src/.docker_modules/sambamba/0.6.7/Dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV SAMBAMBA_VERSION=0.6.7 +ENV PACKAGES curl=7.58.0* \ + ca-certificates=20180409 \ + build-essential=12.4* \ + zlib1g-dev=1:1.2.11* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/biod/sambamba/releases/download/v${SAMBAMBA_VERSION}/sambamba_v${SAMBAMBA_VERSION}_linux.tar.bz2 -o sambamba_v${SAMBAMBA_VERSION}_linux.tar.bz2 && \ +tar xvjf sambamba_v${SAMBAMBA_VERSION}_linux.tar.bz2 && \ +mv sambamba /usr/bin/ && \ +rm -R sambamba_v${SAMBAMBA_VERSION}_linux* diff --git a/src/.docker_modules/sambamba/0.6.7/docker_init.sh b/src/.docker_modules/sambamba/0.6.7/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..ccedf316633c21653bde1312e1ccd5376b95fafe --- /dev/null +++ b/src/.docker_modules/sambamba/0.6.7/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/sambamba:0.6.7 +docker build src/.docker_modules/sambamba/0.6.7 -t 'lbmc/sambamba:0.6.7' +docker push lbmc/sambamba:0.6.7 diff --git a/src/.docker_modules/sambamba/0.6.9/Dockerfile b/src/.docker_modules/sambamba/0.6.9/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..f45a65412dba77df471e7cd8ee5c79bd2439f1d9 --- /dev/null +++ b/src/.docker_modules/sambamba/0.6.9/Dockerfile @@ -0,0 +1,16 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV SAMBAMBA_VERSION=0.6.9 +ENV PACKAGES curl=7.58.0* \ + ca-certificates=20180409 \ + build-essential=12.4* \ + zlib1g-dev=1:1.2.11* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/biod/sambamba/releases/download/v${SAMBAMBA_VERSION}/sambamba-${SAMBAMBA_VERSION}-linux-static.gz -o sambamba_v${SAMBAMBA_VERSION}_linux.gz && \ +gunzip sambamba_v${SAMBAMBA_VERSION}_linux.gz && \ +mv sambamba_v${SAMBAMBA_VERSION}_linux /usr/bin/sambamba diff --git a/src/.docker_modules/sambamba/0.6.9/docker_init.sh b/src/.docker_modules/sambamba/0.6.9/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..9525b17e688d739198a1421f641c2281b45ade9a --- /dev/null +++ b/src/.docker_modules/sambamba/0.6.9/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/sambamba:0.6.9 +docker build src/.docker_modules/sambamba/0.6.9 -t 'lbmc/sambamba:0.6.9' +docker push lbmc/sambamba:0.6.9 diff --git a/src/.docker_modules/samblaster/0.1.24/Dockerfile b/src/.docker_modules/samblaster/0.1.24/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..dcba024b879e8ef7720e35873d9e8101116891c6 --- /dev/null +++ b/src/.docker_modules/samblaster/0.1.24/Dockerfile @@ -0,0 +1,22 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV SAMBLASTER_VERSION=0.1.24 +ENV SAMTOOLS_VERSION=1.7 +ENV PACKAGES curl=7.58.0* \ + samtools=${SAMTOOLS_VERSION}* \ + ca-certificates=20180409 \ + build-essential=12.4* \ + zlib1g-dev=1:1.2.11* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN curl -k -L https://github.com/GregoryFaust/samblaster/releases/download/v.${SAMBLASTER_VERSION}/samblaster-v.${SAMBLASTER_VERSION}.tar.gz -o samblaster-v.${SAMBLASTER_VERSION}.tar.gz && \ +tar xvf samblaster-v.${SAMBLASTER_VERSION}.tar.gz && \ +cd samblaster-v.${SAMBLASTER_VERSION}/ && \ +make && \ +cp samblaster /usr/bin && \ +cd .. && \ +rm -R samblaster-v.${SAMBLASTER_VERSION}/ diff --git a/src/.docker_modules/samblaster/0.1.24/docker_init.sh b/src/.docker_modules/samblaster/0.1.24/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..0fec5a0782d348935647212a430f9c1efe7d4367 --- /dev/null +++ b/src/.docker_modules/samblaster/0.1.24/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/samblaster:0.1.24 +docker build src/.docker_modules/samblaster/0.1.24 -t 'lbmc/samblaster:0.1.24' +docker push lbmc/samblaster:0.1.24 diff --git a/src/.docker_modules/samtools/1.11/Dockerfile b/src/.docker_modules/samtools/1.11/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..054c3e6e421ee3056d43556d6435a23775934ba3 --- /dev/null +++ b/src/.docker_modules/samtools/1.11/Dockerfile @@ -0,0 +1,27 @@ +FROM alpine:3.8 +MAINTAINER Laurent Modolo + +ENV SAMTOOLS_VERSION=1.11 +ENV PACKAGES git \ + make \ + gcc \ + musl-dev \ + zlib-dev \ + ncurses-dev \ + bzip2-dev \ + xz-dev \ + curl-dev \ + bash + +RUN apk update && \ + apk add ${PACKAGES} + +RUN git clone https://github.com/samtools/htslib.git && \ +cd htslib && \ +git checkout ${SAMTOOLS_VERSION} && \ +cd .. && \ +git clone https://github.com/samtools/samtools.git && \ +cd samtools && \ +git checkout ${SAMTOOLS_VERSION} && \ +make && \ +cp samtools /usr/bin/ diff --git a/src/.docker_modules/samtools/1.11/docker_init.sh b/src/.docker_modules/samtools/1.11/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..e5cf9c2896e0679b9124bdb4e38f852184f993f6 --- /dev/null +++ b/src/.docker_modules/samtools/1.11/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/samtools:1.11 +docker build src/.docker_modules/samtools/1.11 -t 'lbmc/samtools:1.11' +docker push lbmc/samtools:1.11 diff --git a/src/.docker_modules/samtools/1.7/Dockerfile b/src/.docker_modules/samtools/1.7/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..212101a98a9e01783e95d2db93c43d359d5f4d3a --- /dev/null +++ b/src/.docker_modules/samtools/1.7/Dockerfile @@ -0,0 +1,26 @@ +FROM alpine:3.8 +MAINTAINER Laurent Modolo + +ENV SAMTOOLS_VERSION=1.7 +ENV PACKAGES git \ + make \ + gcc \ + musl-dev \ + zlib-dev \ + ncurses-dev \ + bzip2-dev \ + xz-dev \ + bash + +RUN apk update && \ + apk add ${PACKAGES} + +RUN git clone https://github.com/samtools/htslib.git && \ +cd htslib && \ +git checkout ${SAMTOOLS_VERSION} && \ +cd .. && \ +git clone https://github.com/samtools/samtools.git && \ +cd samtools && \ +git checkout ${SAMTOOLS_VERSION} && \ +make && \ +cp samtools /usr/bin/ diff --git a/src/.docker_modules/samtools/1.7/docker_init.sh b/src/.docker_modules/samtools/1.7/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..83c510a9e6fe22e1c28eac9bed5e44d1c707da15 --- /dev/null +++ b/src/.docker_modules/samtools/1.7/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/samtools:1.7 +docker build src/.docker_modules/samtools/1.7 -t 'lbmc/samtools:1.7' +docker push lbmc/samtools:1.7 diff --git a/src/.docker_modules/sanity/Dockerfile b/src/.docker_modules/sanity/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..99f9545ea137d6a7450983b9719301c7e7edf578 --- /dev/null +++ b/src/.docker_modules/sanity/Dockerfile @@ -0,0 +1,30 @@ +##### BASE IMAGE ##### +FROM ubuntu:18.04 + +##### METADATA ##### +LABEL base.image="ubuntu:18.04" +LABEL version="1" +LABEL software="Sanity" +LABEL software.version="1.0" +LABEL software.description="Sanity" +LABEL software.website="" +LABEL software.documentation="https://github.com/jmbreda/Sanity" + +##### VARIABLES ##### + +# Use variables for convenient updates/re-usability +ENV SOFTWARE_VERSION 1.0 + +RUN apt-get update \ + && apt-get install -y tzdata \ + && ln -fs /usr/share/zoneinfo/Europe/Berlin /etc/localtime \ + && dpkg-reconfigure --frontend noninteractive tzdata \ + && apt-get install --yes git make g++ libgomp1 \ + && git clone https://github.com/jmbreda/Sanity.git \ + && cd Sanity \ + && cd src \ + && make \ + && cp ../bin/Sanity /usr/bin \ + && cd ../../ \ + && rm -rf Sanity \ + && apt-get remove --purge --yes git make g++ \ diff --git a/src/.docker_modules/sanity/docker_init.sh b/src/.docker_modules/sanity/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..876fecd80a1d5eaf08ce74e8596960b999768480 --- /dev/null +++ b/src/.docker_modules/sanity/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull mlepetit/saanity +docker build src/.docker_modules/mlepetit/sanity -t 'lbmc/sanity' +docker push lbmc/sanity diff --git a/src/.docker_modules/sratoolkit/2.8.2/Dockerfile b/src/.docker_modules/sratoolkit/2.8.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..55622ef79ad427763d9f8e6e1523d405906835c9 --- /dev/null +++ b/src/.docker_modules/sratoolkit/2.8.2/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV SRATOOLKIT_VERSION=2.8.2 +ENV PACKAGES sra-toolkit=${SRATOOLKIT_VERSION}* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/.docker_modules/sratoolkit/2.8.2/docker_init.sh b/src/.docker_modules/sratoolkit/2.8.2/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce040fcc1b3ed4f7041d01421e7a2031d983ef6f --- /dev/null +++ b/src/.docker_modules/sratoolkit/2.8.2/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/sratoolkit:2.8.2 +docker build src/.docker_modules/sratoolkit/2.8.2 -t 'lbmc/sratoolkit:2.8.2' +docker push lbmc/sratoolkit:2.8.2 diff --git a/src/.docker_modules/star/2.5.3/Dockerfile b/src/.docker_modules/star/2.5.3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a045a2352dd59dc8de04e07b233a0399b891cde8 --- /dev/null +++ b/src/.docker_modules/star/2.5.3/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/star:2.7.3a--0 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/star/2.5.3/docker_init.sh b/src/.docker_modules/star/2.5.3/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..50beecfcc7fcb7a9b1943a418651cafb55851495 --- /dev/null +++ b/src/.docker_modules/star/2.5.3/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/star:2.7.3a +docker build src/.docker_modules/star/2.7.3a/ -t 'lbmc/star:2.7.3a' +docker push lbmc/star:2.7.3a diff --git a/src/.docker_modules/star/2.7.3a/Dockerfile b/src/.docker_modules/star/2.7.3a/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a045a2352dd59dc8de04e07b233a0399b891cde8 --- /dev/null +++ b/src/.docker_modules/star/2.7.3a/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/star:2.7.3a--0 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/star/2.7.3a/docker_init.sh b/src/.docker_modules/star/2.7.3a/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..50beecfcc7fcb7a9b1943a418651cafb55851495 --- /dev/null +++ b/src/.docker_modules/star/2.7.3a/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/star:2.7.3a +docker build src/.docker_modules/star/2.7.3a/ -t 'lbmc/star:2.7.3a' +docker push lbmc/star:2.7.3a diff --git a/src/.docker_modules/subread/1.6.4/Dockerfile b/src/.docker_modules/subread/1.6.4/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..97d5870ad4d7a59ec0e8f38300d8e86651cc2e8b --- /dev/null +++ b/src/.docker_modules/subread/1.6.4/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/subread:1.6.4--h84994c4_1 +MAINTAINER Laurent Modolo diff --git a/src/.docker_modules/subread/1.6.4/docker_init.sh b/src/.docker_modules/subread/1.6.4/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..0dd51ca0dbc45ab1b2c237c1a43c670f14dd184a --- /dev/null +++ b/src/.docker_modules/subread/1.6.4/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/subread:1.6.4 +docker build src/.docker_modules/subread/1.6.4 -t 'lbmc/subread:1.6.4' +docker push lbmc/subread:1.6.4 diff --git a/src/.docker_modules/tophat/2.1.1/Dockerfile b/src/.docker_modules/tophat/2.1.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..3c34d4641239ebfa7cbb65a716e436f2e0c099ee --- /dev/null +++ b/src/.docker_modules/tophat/2.1.1/Dockerfile @@ -0,0 +1,11 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV TOPHAT_VERSION=2.1.1 +ENV PACKAGES tophat=${TOPHAT_VERSION}*\ + bowtie=1.2.2*\ + libsys-hostname-long-perl + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/.docker_modules/tophat/2.1.1/docker_init.sh b/src/.docker_modules/tophat/2.1.1/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..67151131596b2c2dda5e5cc7beadc69dcd64aa6c --- /dev/null +++ b/src/.docker_modules/tophat/2.1.1/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/tophat:2.1.1 +docker build src/.docker_modules/tophat/2.1.1 -t 'lbmc/tophat:2.1.1' +docker push lbmc/tophat:2.1.1 diff --git a/src/.docker_modules/trimmomatic/0.36/Dockerfile b/src/.docker_modules/trimmomatic/0.36/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..8e04269a05b905fa41c17fe86338bce8f75b33c0 --- /dev/null +++ b/src/.docker_modules/trimmomatic/0.36/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV TRIMMOMATIC_VERSION=0.36 +ENV PACKAGES trimmomatic=${TRIMMOMATIC_VERSION}* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean diff --git a/src/.docker_modules/trimmomatic/0.36/docker_init.sh b/src/.docker_modules/trimmomatic/0.36/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..f054581bde67aff212a04284c5f463f8a6e4ab75 --- /dev/null +++ b/src/.docker_modules/trimmomatic/0.36/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/trimmomatic:0.36 +docker build src/.docker_modules/trimmomatic/0.36 -t 'lbmc/trimmomatic:0.36' +docker push lbmc/trimmomatic:0.36 diff --git a/src/.docker_modules/ucsc/375/Dockerfile b/src/.docker_modules/ucsc/375/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..9365a974bdc86f80897ad97ca8d13436b3bb364d --- /dev/null +++ b/src/.docker_modules/ucsc/375/Dockerfile @@ -0,0 +1,27 @@ +FROM debian:jessie +MAINTAINER Laurent Modolo + +ENV PACKAGES apt-utils \ + curl \ + build-essential \ + libssl-dev \ + libpng-dev \ + uuid-dev \ + libmysqlclient-dev \ + procps \ + rsync + + +RUN apt-get update && \ + apt-get install -y ${PACKAGES} + +ENV UCSC_VERSION=375 + +RUN curl -k -L http://hgdownload.soe.ucsc.edu/admin/exe/userApps.v${UCSC_VERSION}.src.tgz -o userApps.v${UCSC_VERSION}.src.tgz &&\ +tar xvf userApps.v${UCSC_VERSION}.src.tgz &&\ +cd userApps/ && \ +make &&\ +cd .. &&\ +mv userApps/bin/* /usr/bin/ &&\ +rm -R userApps.v${UCSC_VERSION}.src.tgz &&\ +rm -R userApps diff --git a/src/.docker_modules/ucsc/375/docker_init.sh b/src/.docker_modules/ucsc/375/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..f0cc90565cc1f5583eb0c4303976300f695500e0 --- /dev/null +++ b/src/.docker_modules/ucsc/375/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/ucsc:375 +docker build src/.docker_modules/ucsc/375/ -t 'lbmc/ucsc:375' +docker push lbmc/ucsc:375 diff --git a/src/.docker_modules/ucsc/400/Dockerfile b/src/.docker_modules/ucsc/400/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6409f862f0c1b51d853597cd4404bb55f03137c2 --- /dev/null +++ b/src/.docker_modules/ucsc/400/Dockerfile @@ -0,0 +1,27 @@ +FROM debian:jessie +MAINTAINER Laurent Modolo + +ENV PACKAGES apt-utils \ + curl \ + build-essential \ + libssl-dev \ + libpng-dev \ + uuid-dev \ + libmysqlclient-dev \ + procps \ + rsync + + +RUN apt-get update && \ + apt-get install -y ${PACKAGES} + +ENV UCSC_VERSION=400 + +RUN curl -k -L http://hgdownload.soe.ucsc.edu/admin/exe/userApps.v${UCSC_VERSION}.src.tgz -o userApps.v${UCSC_VERSION}.src.tgz &&\ +tar xvf userApps.v${UCSC_VERSION}.src.tgz &&\ +cd userApps/ && \ +make &&\ +cd .. &&\ +mv userApps/bin/* /usr/bin/ &&\ +rm -R userApps.v${UCSC_VERSION}.src.tgz &&\ +rm -R userApps diff --git a/src/.docker_modules/ucsc/400/docker_init.sh b/src/.docker_modules/ucsc/400/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..83c2161652164d7ccecaf82b4ca25babde445599 --- /dev/null +++ b/src/.docker_modules/ucsc/400/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/ucsc:400 +docker build src/.docker_modules/ucsc/400/ -t 'lbmc/ucsc:400' +docker push lbmc/ucsc:400 diff --git a/src/.docker_modules/ucsc/407/Dockerfile b/src/.docker_modules/ucsc/407/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1499bdb1d58e48a64ee1a7dee550444527b7c82e --- /dev/null +++ b/src/.docker_modules/ucsc/407/Dockerfile @@ -0,0 +1,27 @@ +FROM debian:jessie +MAINTAINER Laurent Modolo + +ENV PACKAGES apt-utils \ + curl \ + build-essential \ + libssl-dev \ + libpng-dev \ + uuid-dev \ + libmysqlclient-dev \ + procps \ + rsync + + +RUN apt-get update && \ + apt-get install -y ${PACKAGES} + +ENV UCSC_VERSION=407 + +RUN curl -k -L http://hgdownload.soe.ucsc.edu/admin/exe/userApps.v${UCSC_VERSION}.src.tgz -o userApps.v${UCSC_VERSION}.src.tgz &&\ +tar xvf userApps.v${UCSC_VERSION}.src.tgz &&\ +cd userApps/ && \ +make &&\ +cd .. &&\ +mv userApps/bin/* /usr/bin/ &&\ +rm -R userApps.v${UCSC_VERSION}.src.tgz &&\ +rm -R userApps diff --git a/src/.docker_modules/ucsc/407/docker_init.sh b/src/.docker_modules/ucsc/407/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..1f092a8f48aa56e22b30716949337871950795a2 --- /dev/null +++ b/src/.docker_modules/ucsc/407/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/ucsc:407 +docker build src/.docker_modules/ucsc/407/ -t 'lbmc/ucsc:407' +docker push lbmc/ucsc:407 diff --git a/src/.docker_modules/umi_tools/0.5.4/Dockerfile b/src/.docker_modules/umi_tools/0.5.4/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..81c4cc8c1529187fd8c34f5106f05e2ba6b55be6 --- /dev/null +++ b/src/.docker_modules/umi_tools/0.5.4/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/umi_tools:0.5.4--py27hdd9f355_1 +MAINTAINER Rémi SERAPHIN diff --git a/src/.docker_modules/umi_tools/0.5.4/docker_init.sh b/src/.docker_modules/umi_tools/0.5.4/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..200e9c066fe98de8262a48eea0f615b064ff90a4 --- /dev/null +++ b/src/.docker_modules/umi_tools/0.5.4/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/umi_tools:1.0.0 +docker build src/.docker_modules/umi_tools/1.0.0/ -t 'lbmc/umi_tools:1.0.0' +docker push lbmc/umi_tools:1.0.0 diff --git a/src/.docker_modules/umi_tools/1.0.0/Dockerfile b/src/.docker_modules/umi_tools/1.0.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..d82cc9a6c7dc0877522149912b09899f7879ede9 --- /dev/null +++ b/src/.docker_modules/umi_tools/1.0.0/Dockerfile @@ -0,0 +1,25 @@ +FROM debian:stretch +MAINTAINER Rémi SERAPHIN + +ENV UMI_TOOLS_VERSION=1.0.0 +ENV PACKAGES="bash \ + python3 \ + python3-dev \ + python3-pip \ + procps \ + zlib1g-dev \ + zlib1g \ + bzip2 \ + libbz2-dev \ + liblzma-dev \ + libcurl4-openssl-dev \ + make \t + gcc" + +RUN apt-get update && \ +apt-get install -y --no-install-recommends ${PACKAGES} && \ +apt-get clean + +RUN pip3 install setuptools +RUN pip3 install umi_tools==${UMI_TOOLS_VERSION} + diff --git a/src/.docker_modules/umi_tools/1.0.0/docker_init.sh b/src/.docker_modules/umi_tools/1.0.0/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..200e9c066fe98de8262a48eea0f615b064ff90a4 --- /dev/null +++ b/src/.docker_modules/umi_tools/1.0.0/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/umi_tools:1.0.0 +docker build src/.docker_modules/umi_tools/1.0.0/ -t 'lbmc/umi_tools:1.0.0' +docker push lbmc/umi_tools:1.0.0 diff --git a/src/.docker_modules/urqt/d62c1f8/Dockerfile b/src/.docker_modules/urqt/d62c1f8/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..9b54a7eb91ad3767532c8703fe25d6cad29e27cb --- /dev/null +++ b/src/.docker_modules/urqt/d62c1f8/Dockerfile @@ -0,0 +1,21 @@ +FROM ubuntu:18.04 +MAINTAINER Laurent Modolo + +ENV URQT_VERSION=d62c1f8 +ENV PACKAGES git=1:2.17* \ + build-essential=12.4* \ + ca-certificates=20180409 \ + procps \ + zlib1g-dev=1:1.2.11* + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + apt-get clean + +RUN git clone https://github.com/l-modolo/UrQt.git && \ + cd UrQt && \ + git checkout ${URQT_VERSION} && \ + make && \ + cd .. && \ + mv UrQt/UrQt /usr/bin/ && \ + rm -Rf UrQt diff --git a/src/.docker_modules/urqt/d62c1f8/docker_init.sh b/src/.docker_modules/urqt/d62c1f8/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..bb3fb4f882ec4f93e4cec643e035fb7d2d7a4963 --- /dev/null +++ b/src/.docker_modules/urqt/d62c1f8/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/urqt:d62c1f8 +docker build src/.docker_modules/urqt/d62c1f8 -t 'lbmc/urqt:d62c1f8' +docker push lbmc/urqt:d62c1f8 diff --git a/src/Rscript/function.R b/src/Rscript/function.R new file mode 100755 index 0000000000000000000000000000000000000000..8eac6424361c107394a3541683749da7263116fb --- /dev/null +++ b/src/Rscript/function.R @@ -0,0 +1,612 @@ +suppressPackageStartupMessages({ + +library(DropletUtils) +library(Matrix) +library(tidyverse) +library(DoubletFinder) +library(ggplot2) +library(PCDimension) +library(viridis) +library(scales) +library(Seurat) +library(cluster) +library(dplyr) +}) + +custom_colors <- list() + +colors_dutch <- c( + '#FFC312','#C4E538','#12CBC4','#FDA7DF','#ED4C67', + '#F79F1F','#A3CB38','#1289A7','#D980FA','#B53471', + '#EE5A24','#009432','#0652DD','#9980FA','#833471', + '#EA2027','#006266','#1B1464','#5758BB','#6F1E51' +) + +colors_spanish <- c( + '#40407a','#706fd3','#f7f1e3','#34ace0','#33d9b2', + '#2c2c54','#474787','#aaa69d','#227093','#218c74', + '#ff5252','#ff793f','#d1ccc0','#ffb142','#ffda79', + '#b33939','#cd6133','#84817a','#cc8e35','#ccae62' +) + +custom_colors$discrete <- c(colors_dutch, colors_spanish) + + + + +reset_par <- function(){ + op <- structure(list(xlog = FALSE, ylog = FALSE, adj = 0.5, ann = TRUE, + ask = FALSE, bg = "transparent", bty = "o", cex = 1, + cex.axis = 1, cex.lab = 1, cex.main = 1.2, cex.sub = 1, + col = "black", col.axis = "black", col.lab = "black", + col.main = "black", col.sub = "black", crt = 0, err = 0L, + family = "", fg = "black", fig = c(0, 1, 0, 1), + fin = c(6.99999895833333, 6.99999895833333), font = 1L, + font.axis = 1L, font.lab = 1L, font.main = 2L, + font.sub = 1L, lab = c(5L, 5L, 7L), las = 0L, + lend = "round", lheight = 1, ljoin = "round", lmitre = 10, + lty = "solid", lwd = 1, mai = c(1.02, 0.82, 0.82, 0.42), + mar = c(5.1, 4.1, 4.1, 2.1), mex = 1, mfcol = c(1L, 1L), + mfg = c(1L, 1L, 1L,1L), mfrow = c(1L, 1L), + mgp = c(3, 1, 0), mkh = 0.001, new = FALSE, + oma = c(0, 0, 0, 0), omd = c(0, 1, 0, 1), + omi = c(0, 0, 0,0), pch = 1L, + pin = c(5.75999895833333, 5.15999895833333), + plt = c(0.117142874574832, 0.939999991071427, + 0.145714307397962, 0.882857125425167), + ps = 12L, pty = "m", smo = 1, srt = 0, tck = NA_real_, + tcl = -0.5, usr = c(0.568, 1.432, 0.568, 1.432), + xaxp = c(0.6, 1.4, 4), xaxs = "r", xaxt = "s", + xpd = FALSE, yaxp = c(0.6, 1.4, 4), yaxs = "r", + yaxt = "s", ylbias = 0.2), + .Names = c("xlog", "ylog", "adj", "ann", "ask", "bg", + "bty", "cex", "cex.axis", "cex.lab", "cex.main", "cex.sub", + "col", "col.axis", "col.lab", "col.main", "col.sub", "crt", + "err", "family", "fg", "fig", "fin", "font", "font.axis", + "font.lab", "font.main", "font.sub", "lab", "las", "lend", + "lheight", "ljoin", "lmitre", "lty", "lwd", "mai", "mar", + "mex", "mfcol", "mfg", "mfrow", "mgp", "mkh", "new", "oma", + "omd", "omi", "pch", "pin", "plt", "ps", "pty", "smo", + "srt", "tck", "tcl", "usr", "xaxp", "xaxs", "xaxt", "xpd", + "yaxp", "yaxs", "yaxt", "ylbias")) + par(op) +} + + +read_count_output <- function(dir) { + name="spliced" + dir <- normalizePath(dir, mustWork = TRUE) +m <- readMM(paste0(dir, "/","counts_unfiltered","/", name, ".mtx")) + m <- Matrix::t(m) + m <- as(m, "dgCMatrix") + # The matrix read has cells in rows + ge <- ".genes.txt" + genes <- readLines(file(paste0(dir, "/","counts_unfiltered","/" ,name, ge))) + barcodes <- readLines(file(paste0(dir, "/","counts_unfiltered","/", name, ".barcodes.txt"))) + colnames(m) <- barcodes + rownames(m) <- genes + return(m) +} + + + + + + +#' Knee plot for filtering empty droplets +#' +#' Visualizes the inflection point to filter empty droplets. This function plots +#' different datasets with a different color. Facets can be added after calling +#' this function with `facet_*` functions. Will be added to the next release +#' version of BUSpaRse. +#' +#' @param bc_rank A `DataFrame` output from `DropletUtil::barcodeRanks`. +#' @return A ggplot2 object. +knee_plot <- function(bc_rank) { + knee_plt <- tibble(rank = bc_rank[["rank"]], + total = bc_rank[["total"]]) %>% + distinct() %>% + dplyr::filter(total > 0) + annot <- tibble(inflection = metadata(bc_rank)[["inflection"]], + rank_cutoff = max(bc_rank$rank[bc_rank$total > metadata(bc_rank)[["inflection"]]])) + p <- ggplot(knee_plt, aes(total, rank)) + + geom_line() + + geom_hline(aes(yintercept = rank_cutoff), data = annot, linetype = 2) + + geom_vline(aes(xintercept = inflection), data = annot, linetype = 2) + + scale_x_log10() + + scale_y_log10() + + labs(y = "Barcode Rank", x = "Total UMIs") + return(p) +} + + + + +###Fuction to visualize kneeplot and remove empty droplet + + +RemoveEmptyDrop <- function(sample){ + + tot_counts <- Matrix::colSums(sample) + summary(tot_counts) + ###barcodeRanks provide of DropletUtils library + bc_rank <- barcodeRanks(sample, lower = 10) + + ##Visalisation empty droplet + jpeg("Kneeplot.jpeg") + show(knee_plot(bc_rank)) + dev.off() + + + ##Filter empty droplet + sample <- sample[, tot_counts > metadata(bc_rank)$inflection] + + + return(sample) + +} #end function + +###Fuction to load t2g.txt and to match ensemble identifier with gene symbol +MatchGeneSymbol <- function(dirt2g,sample){ + + ##Need tidyverse pacckage + + tr<-read_tsv(dirt2g, col_names = c("transcript", "gene", "gene_symbol")) + tr2g <- tr %>%select(-transcript) %>% distinct() + + + rownames(sample) <- tr2g$gene_symbol[match(rownames(sample), tr2g$gene)] + + + return(sample) + +} #end function + + + + +##Fuction that create seurat object and load t2g.txt and to match ensemble identifier with gene symbol +Create_Seurat_Object <- function(sample){ + + ##Need Seurat + + + seu_obj <- CreateSeuratObject(sample, min.cells = 3, min.features = 300,verbose=FALSE) + show(dim(seu_obj)) + + + return(seu_obj) + +} #end function + +Calc_QC_metrics <- function(seu_obj){ + + seu_obj[["percent.mt"]] <- PercentageFeatureSet(seu_obj, pattern = "^MT-") + seu_obj[["percent.ribo"]] <- PercentageFeatureSet(seu_obj, pattern = "^RP[SL]") + + jpeg("QC_metrics.jpeg") + show(VlnPlot(seu_obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)) + dev.off() + + return(seu_obj) + +} #end functi +##Fuction to subset seurat object and remove low quality librairies base on Mitochondrial percent and number of gene min in a cell + +RemoveLowQuality <- function(seu_obj,nFeatureRNA,percent_mt){ + + + + seu_obj <- subset(seu_obj, subset = nFeature_RNA > nFeatureRNA & percent.mt < percent_mt) + + show(dim(seu_obj)) + + + return(seu_obj) + +} #end function + + + + +PreRemoveDoublet <- function(seu_obj_tmp){ + + + + #DOUBLLETFINDER NEEDS TO RUN this 5 step before to run + + + seu_obj_tmp <- NormalizeData(seu_obj_tmp) + seu_obj_tmp <- FindVariableFeatures(seu_obj_tmp, selection.method = "vst", nfeatures = 2000) + seu_obj_tmp <- ScaleData(seu_obj_tmp) + seu_obj_tmp <- RunPCA(seu_obj_tmp) + seu_obj_tmp <- RunUMAP(seu_obj_tmp, dims = 1:10) + return(seu_obj_tmp) +} + + + + +Run_doublet_finder <- function(seu_obj,nexp){ + + ##pK value + print("Run Doublet finder opti") + sweep.res.list_kidney <- paramSweep_v3(seu_obj, PCs = 1:10, sct = FALSE) + sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = FALSE) + bcmvn_kidney <- find.pK(sweep.stats_kidney) + + + pK_opt_tmp<-filter(bcmvn_kidney,BCmetric == max(BCmetric)) + pK_opt<-as.numeric(as.character(t(pK_opt_tmp$pK))) + + + print(paste0("Optimum pK :",pK_opt)) + + N_exp <- round(ncol(seu_obj) * nexp) # expect 4% doublets + + #Run doublet finder + seu_obj <- doubletFinder_v3(seu_obj, pN = 0.25, pK = pK_opt, nExp = N_exp, PCs = 1:10) + #seu_obj <- doubletFinder_v3(seu_obj, pN = 0.25, pK = 0.09, nExp = N_exp, PCs = 1:10) + + DF.name = colnames(seu_obj@meta.data)[grepl("DF.classification", colnames(seu_obj@meta.data))] + + + jpeg("Doublet_detection.jpeg") + show(DimPlot(seu_obj, group.by = DF.name,cols = c("blue","red")) + NoAxes()+ggtitle("Doublet detection")) + dev.off() + + print("Remove Doublets") + show(paste0( sum( seu_obj@meta.data[, DF.name] == "Doublet"), " doublets detected/removed")) + seu_obj = seu_obj[, seu_obj@meta.data[, DF.name] == "Singlet"] + + return(seu_obj) + +} #end function + +### Function to remove empty feature +RemoveEmptyFeature <- function(seu_obj){ + + + + seu_obj <- seu_obj[rowSums(seu_obj) > 1,] + + return(seu_obj) + +} #end function + + + +save_RDS <- function(seu_obj,name){ + + path<-paste0(name,".Rds") + saveRDS(seu_obj,file=path) + + + +} #end function + + +### Function to extract raw data for sanity normaliztion +Extract_data <- function(seu_obj){ + + + df_tmp<-as.data.frame(GetAssayData(object = seu_obj, slot = "counts")) + df_tmp<- cbind(GeneID = rownames(df_tmp), df_tmp) + rownames(df_tmp) <- NULL + + name<-"counts_filtered_doublet" + path<-paste0( name, ".txt") + write.table(df_tmp,path,sep = '\t', row.names = F, col.names=T, quote = F) + + return(seu_obj) + +} #end function + + + + +read_RDS <- function(path_seu_obj){ + + path_seu_obj <- normalizePath(path_seu_obj, mustWork = TRUE) + + + seu_obj <-readRDS(path_seu_obj) + + return(seu_obj) + +} #end function + + + +SetSanityMatrix <- function(seu_obj,norm_mtx){ + + norm_mtx <- normalizePath(norm_mtx, mustWork = TRUE) + + + df_norm_tmp<-Matrix(exp(as.matrix(read.table(norm_mtx,row.names = 1, header = T)) ),sparse = TRUE) + M<-median(colSums(as.data.frame(GetAssayData(object = seu_obj, slot = "counts")))) + + + df_norm_tmp=df_norm_tmp*M + + seu_obj <- SetAssayData(object = seu_obj, slot = "data", new.data = df_norm_tmp) + + + + return(seu_obj) + +} #end function + + +VariabeFeature <- function(seu_obj){ + + seu_obj <- FindVariableFeatures(seu_obj, selection.method = "vst", nfeatures = 2000,verbose=FALSE) + + # Identify the 10 most highly variable genes + top10 <- head(VariableFeatures(seu_obj), 10) + + # plot variable features with and without labels + plot1 <- VariableFeaturePlot(seu_obj) + plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) + jpeg("Variable_feature.jpg", width = 350, height = 350) + show(plot2) + dev.off() + + return(seu_obj) + +} #end function + + + + +evplot = function(ev) { + # Broken stick model (MacArthur 1957) + n = length(ev) + bsm = data.frame(j=seq(1:n), p=0) + bsm$p[1] = 1/n + for (i in 2:n) bsm$p[i] = bsm$p[i-1] + (1/(n + 1 - i)) + bsm$p = 100*bsm$p/n + # Plot eigenvalues and % of variation for each axis + op = par(mfrow=c(2,1),omi=c(0.1,0.3,0.1,0.1), mar=c(1, 1, 1, 1)) + barplot(ev, main="Eigenvalues", col="bisque", las=2) + abline(h=mean(ev), col="red") + legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n",cex = 1.5) + barplot(t(cbind(100*ev/sum(ev), bsm$p[n:1])), beside=TRUE, + main="% variation", col=c("bisque",2), las=2) + legend("topright", c("% eigenvalue", "Broken stick model"), + pch=15, col=c("bisque",2), bty="n",cex = 1.5) + par(op) +} + +ChooseComponent = function(seu_obj) { + + + df_ev<-(seu_obj@reductions[["pca"]]@stdev)^2 + + jpeg("Choose_component.jpg", width = 350, height = 350) + show(evplot(df_ev)) + dev.off() + +} +RunCenterScalePCA <- function(seu_obj){ + + + + all.genes <- rownames(seu_obj) + seu_obj<- ScaleData(seu_obj, features = all.genes) + + seu_obj <- RunPCA(seu_obj, features = VariableFeatures(object = seu_obj),rev.pca=TRUE) + return(seu_obj) + +} #end function + +VizFeatureCP <- function(seu_obj,cp){ + jpeg("VizDimPca.jpg", width = 350, height = 350) + show(VizDimLoadings(seu_obj, dims = 1:cp, reduction = "pca")) + dev.off() + seu_obj <- FindNeighbors(seu_obj, dims = 1:cp) + return(seu_obj) + +} #end function + +# calculates the convex hull of (J,pen) +# kv : coordinates of the convex hull + +conv.hull <- function(J,pen){ + + K = length(J) + k = 1 + kv = c() + pv = c() + + while (k<K){ + pk = (J[(k+1):K]-J[k]) / (pen[k]-pen[(k+1):K]) + dm = which.max(pk) + kv = c(kv,k) + k = k + dm + } + + kv = c(kv,K) + return(kv=kv) + +} #end function + + + + +mean_silhouette<-function(object){ + + mean_silhouette_score=c() + j = 0 + K=c() + + object <- FindClusters(object, resolution = 0.01,verbose = FALSE) + + object<- RunUMAP(object, dims = 1:4,verbose = FALSE) + distance_matrix <- dist(Embeddings(object[['umap']])[, 1:2]) + for (rr in seq(0.01,1,by=0.01)){ + table_silhouette=c() + silhouette=c() + clusters=c() + object@meta.data[["seurat_clusters"]]=c() + j = j+1 + show(j) + + object <- FindClusters(object, resolution = rr,verbose = FALSE) + K[j] = length(unique(Idents(object))) + + + clusters <- object@meta.data[["seurat_clusters"]] + + table_silhouette <- silhouette(as.numeric(clusters), dist = distance_matrix) + + silhouette<-table_silhouette + + object@meta.data[["seurat_clusters"]] <- silhouette[,3] + mean_silhouette_score[j] <- mean(object@meta.data[["seurat_clusters"]]) + + } + + x = seq(0.01,1,by=0.01) + ch = conv.hull(J = -mean_silhouette_score, pen = seq(0.01,1,by=0.01)) + ch2 = conv.hull(J = -K, pen = seq(0.01,1,by=0.01)) + par(mar=c(5,5,5,5)+0.1, las=1) + + jpeg("silhouette_res_cluster.jpg", width = 600, height = 350) + show(plot.new()) + show(plot.window(xlim=range(x), ylim=range(K))) + show(lines.default(x, K, col="red")) + + show(axis(1,at = x)) + show(axis(2,at=seq(min(K),max(K),by=1))) + show(box()) + + show(plot.window(xlim=range(x), ylim=range(mean_silhouette_score))) + show(lines(x,mean_silhouette_score, col="blue")) + show(lines(x[ch],mean_silhouette_score[ch],type="l",col=2)) + show(lines(x[ch],mean_silhouette_score[ch],type="b",col=2)) + + show(axis(4)) + show(mtext("Mean Silhouette score", side=4, line=3, cex.lab=1,las=3)) + + # add a main title and bottom and left axis labels + show(title( xlab="Resolution",ylab="Clusters")) + dev.off() + + + x[ch] + mean_silhouette_score[ch] + +} + + +GraphClusteringLouvain <- function(seu_obj,res,cp){ + resolution<-as.numeric(res) + principal_component<-as.numeric(cp) + seu_obj <- FindNeighbors(seu_obj, dims = 1:principal_component) + seu_obj <- FindClusters(seu_obj, resolution = resolution) + return(seu_obj) + +} #end function + + + +SilPlot <- function(seu_obj,res,cp){ + ##Initilization + resolution<-as.numeric(res) + principal_component<-as.numeric(cp) + mean_silhouette_score=c() + K=c() + table_silhouette=c() + silhouette=c() + clusters=c() + + ##Compute silhouette score with res=resolution of clustering and cp=nuber of principal coponent + seu_obj@meta.data[["seurat_clusters"]]=c() + seu_obj <- FindClusters(seu_obj, resolution = resolution,verbose = FALSE) + seu_obj<- RunUMAP(seu_obj, dims = 1:principal_component,verbose = FALSE) + distance_matrix <- dist(Embeddings(seu_obj[['umap']])[, 1:2]) + clusters <- seu_obj@meta.data[["seurat_clusters"]] + table_silhouette <- silhouette(as.numeric(clusters), dist = distance_matrix) + silhouette<-table_silhouette + + seu_obj@meta.data[["silhouette_score"]] <- silhouette[,3] + + mean_silhouette_score <- mean(seu_obj@meta.data[["seurat_clusters"]],na.rm=TRUE) + + #Compute Umap with silhouette score for all individuals cells + jpeg("silhouette_score_umap.jpg", width = 600, height = 350) + show(FeaturePlot(seu_obj, features = c("silhouette_score"),label = TRUE,label.size = 6 , pt.size = 0.5,repel = TRUE,cols = c("green", "red"))+ ggtitle(' Silhouette index')) + dev.off() + + + #Create silhoette plot + p <- seu_obj@meta.data %>%mutate(barcode = rownames(.)) %>%arrange(seurat_clusters,-silhouette_score) %>%mutate(barcode = factor(barcode, levels = barcode)) %>% + ggplot() +geom_col(aes(barcode, silhouette_score, fill = seurat_clusters), show.legend = FALSE) +geom_hline(yintercept = mean_silhouette_score, color = 'red', linetype = 'dashed') + + scale_x_discrete(name = 'Cells') +scale_y_continuous(name = 'Silhouette score') + scale_fill_manual(values = custom_colors$discrete) +theme_bw() + + theme(axis.title.x = element_blank(),axis.text.x = element_blank(), axis.ticks.x = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + + + jpeg("silhouette_plot.jpg", width = 600, height = 350) + + show(p) + dev.off() + ##Compute % cells with sil score negative + N_negative=sum(seu_obj@meta.data[["silhouette_score"]]<0) + N_tot=sum(seu_obj@meta.data[["silhouette_score"]]!=0) + percent_negative_sil=(N_negative/N_tot)*100 + paste0( percent_negative_sil, " % Cells with negative score") + + +} #end function + + + + + + + +UMAPViz <- function(seu_obj,cp){ + principal_component<-as.numeric(cp) + + seu_obj<- RunUMAP(seu_obj, dims = 1:principal_component,verbose=FALSE) + + + # note that you can set `label = TRUE` or use the LabelClusters function to help label + # individual clusters + jpeg("UMAP.jpg", width = 600, height = 350) + show(DimPlot(seu_obj, reduction = "umap",label = TRUE,label.size = 6 ,cols = custom_colors$discrete)+ theme(legend.text=element_text(size=15))) +dev.off() + + + return(seu_obj) + +} #end function + + + + +extrac_cpdb <- function(seu_obj){ + + seu_obj@meta.data$seurat_annotations<-Idents(seu_obj) + write.table(as.matrix(seu_obj@assays$RNA@data), 'cellphonedb_count.txt', sep='\t', quote=F) + meta_data <- cbind(rownames(seu_obj@meta.data), seu_obj@meta.data[,'seurat_annotations', drop=F]) + meta_data <- as.matrix(meta_data) + meta_data[is.na(meta_data)] = "Unkown" # There can't be... In the cell type NA + + write.table(meta_data, 'cellphonedb_meta.txt', sep='\t', quote=F, row.names=F) + + + + +} #end function + + + +MarkerGene <- function(seu_obj){ + + VlnPlot(seu_obj, features = c("CHGA","CHGB","CD68","CD3D","CD3E","CD3G","CD19","MS4A1","TEK","KDR","PDGFRB","RGS5","S100B","POMC","PRL","SOX2")) + + +} #end function + diff --git a/src/Rscript/scQualityControlR.R b/src/Rscript/scQualityControlR.R new file mode 100755 index 0000000000000000000000000000000000000000..0d151b6e39a0e4055c27dd35fdca0f304742b14a --- /dev/null +++ b/src/Rscript/scQualityControlR.R @@ -0,0 +1,53 @@ +#!/usr/bin/Rscript + +source("/scratch/Bio/mlepetit/nextflow/src/function.R") + + +args = commandArgs(trailingOnly=TRUE) + +dir=as.character(args[1]) +t2g_file=as.character(args[2]) +percent_mt=as.numeric(args[3]) +nfeature=as.numeric(args[4]) + +n_doublet_exp=as.numeric(args[5]) + + + +print("Read KB Output") + +tumor=read_count_output(dir) + +print("Remove empty droplet") +tumor=RemoveEmptyDrop(tumor) + +print("Match Ensembl ID / Gene symbol") +tumor=MatchGeneSymbol(t2g_file,tumor) + +print("Create Seurat Object") +seu_object_tumor<-Create_Seurat_Object(tumor) +rm(tumor) + +print("Compute QC metrics") +seu_object_tumor<-Calc_QC_metrics(seu_object_tumor) + +print("Remove low quality library") +seu_object_tumor<-RemoveLowQuality(seu_object_tumor,nfeature,percent_mt) + +print("Run optimization for pK value") +seu_object_tumor<-PreRemoveDoublet(seu_object_tumor) + +print("Run DoubletFinder") +seu_object_tumor<-Run_doublet_finder(seu_object_tumor,n_doublet_exp) + +print("Remove EmptyFeature") +seu_object_tumor<-RemoveEmptyFeature(seu_object_tumor) +dim(seu_object_tumor) + +print("Save Object") +name_obj="Seurat_filtered_obj" +save_RDS(seu_object_tumor,name_obj) + +print("Extract Count Data") +Extract_data(seu_object_tumor) + diff --git a/src/Rscript/scVizualisationR.R b/src/Rscript/scVizualisationR.R new file mode 100755 index 0000000000000000000000000000000000000000..a830dcf8d155f6ce98013864c8c355b001f93d0f --- /dev/null +++ b/src/Rscript/scVizualisationR.R @@ -0,0 +1,47 @@ +#!/usr/bin/Rscript + +source("/scratch/Bio/mlepetit/nextflow/src/function.R") + +args = commandArgs(trailingOnly=TRUE) + +norm_matrix=args[1] +rds_obj=args[2] +cp=args[3] +res=args[4] + +print("Load seurat object") +seu_obj<-read_RDS(rds_obj) + +print("Set normalize matrix to seurat object") +seu_obj<-SetSanityMatrix(seu_obj,norm_matrix) + +print("Select Highy Variable Feature") +seu_obj<-VariabeFeature(seu_obj) + +print("Center scale and run pca") +seu_obj<-RunCenterScalePCA(seu_obj) + +print("Choose component") +ChooseComponent(seu_obj) + + +print("Viz Feature") +seu_obj<-VizFeatureCP(seu_obj,cp) + +print("Compute Sil and number of cluster as function of resolution") +mean_silhouette(seu_obj) + +print("KNN Graph Clustering and Louvain optimisation") +seu_object<-GraphClusteringLouvain(seu_obj,res,cp) + +print("Create Silouette plot") +SilPlot(seu_obj,res,cp) + +print("Compute UMAP") +seu_obj<-UMAPViz(seu_obj,cp) + + +print("Save Object") +name="Seurat_fin_obj" +save_RDS(seu_obj,name) + diff --git a/src/in2p3.pbs b/src/in2p3.pbs new file mode 100644 index 0000000000000000000000000000000000000000..7f929a7efa4aca6eb47b4114a97eed0912d2bdbb --- /dev/null +++ b/src/in2p3.pbs @@ -0,0 +1,31 @@ +#! /usr/local/bin/bash -l +##################################### +# job script example with GE options +##################################### +#$ -q demon +#$ -l demon=1 +#$ -P P_lbmc +#$ -N nf_pipe +#$ -o /sps/lbmc/lmodolo/logs/ # change to your username ! +#$ -e /sps/lbmc/lmodolo/logs/ # change to your username ! +#$ -r n # relaunch y/n +#$ -M laurent.modolo@ens-lyon.fr # change to your mail ! +#$ -m be ## send an email when the job starts and ends +#$ -l os=cl7 ## choose OS +#$ -l sps=1 ## acces /sps directory +##################################### + +NF_VERSION=19.10 +NF=/pbs/throng/lbmc/cl7/nextflow/${NF_VERSION}/nextflow +# change to your username +SCRATCH=/sps/lbmc/lmodolo/ +# change to your project / pipeline ! +PIPELINE=${SCRATCH}/nextflow/src/training_dataset.nf +# change to your project / pipeline ! +CONFIG=${SCRATCH}/nextflow/src/training_dataset.config + +${NF} ${PIPELINE} -c ${CONFIG} -profile ccin2p3 \ +--fasta "${SCRATCH}/nextflow/data/tiny_dataset/fasta/tiny_v2.fasta" \ +--fastq_single "${SCRATCH}/nextflow/data/tiny_dataset/fastq/tiny2_S.fastq.gz" \ +--chromosome "X" --start 5305683 --stop 5333928 -resume \ +-w "${SCRATCH}" diff --git a/src/install_nextflow.sh b/src/install_nextflow.sh new file mode 100755 index 0000000000000000000000000000000000000000..272c24929f856970e80f8cd8739fea8754ae9f6c --- /dev/null +++ b/src/install_nextflow.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +java -version +curl -s https://get.nextflow.io | bash diff --git a/src/launch_analysis.nf b/src/launch_analysis.nf new file mode 100644 index 0000000000000000000000000000000000000000..324cfc7a48c775a3fd8367de39e82ae2ecaeba77 --- /dev/null +++ b/src/launch_analysis.nf @@ -0,0 +1,105 @@ +nextflow.enable.dsl=2 + + + + +params.fasta = "/Xnfs/lbmcdb/common/Genomes/human/GRCh38/EnsEMBL/transindex/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz" +params.fastq = "" +params.gtf = "/Xnfs/lbmcdb/common/Genomes/human/GRCh38/EnsEMBL/transindex/Homo_sapiens.GRCh38.99.gtf.gz" +params.transcript_to_gene = "" +params.whitelist = "/scratch/Bio/mlepetit/nextflow/data/whithelist/10x_V3_barcode_whitelist.txt" +params.config = "" +params.mit=25 +params.nexp=0.023 +params.cp=10 +params.reso=0.01 +params.skip=false +params.nfeature=500 + + + +include {tr2g;index_fasta_velocity_default;count_velocity } from "./nf_modules/kb/main.nf" +include {rfiltr } from "./nf_modules/rfiltration/main.nf" addParams(mit: params.mit, nfeature: params.nfeature, nexp: params.nexp) +include {normalization_sanity } from "./nf_modules/sanity/main.nf" +include {rviz } from "./nf_modules/rvisualization/main.nf" addParams(cp: params.cp, reso: params.reso) +include { fastp } from "./nf_modules/fastp/main.nf" addParams( qualified_quality_phred: 30) + +log.info "fastq files (--fastq): ${params.fastq}" +log.info "fasta file (--fasta): ${params.fasta}" +log.info "gtf file (--gtf): ${params.gtf}" +log.info "transcript_to_gene file (--transcript_to_gene): ${params.transcript_to_gene}" +log.info "whitelist file (--whitelist): ${params.whitelist}" +log.info "config file (--config): ${params.config}" +log.info "Mitochondrial cutoff (--mit): ${params.mit}" +log.info "Numbre of feature min in a cell cutoff (--nfeature): ${params.nfeature}" +log.info "N doublet expect (--nexp): ${params.nexp}" +log.info "Principal Component for analysis (--cp): ${params.cp}" +log.info "Resolution for clustering (--reso): ${params.reso}" + + +channel + .fromFilePairs( params.fastq, size: -1) +.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } +.set { fastq_files } + +channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .map { it -> [it.simpleName, it]} + .set { fasta_files } + +channel + .fromPath( params.gtf ) + .ifEmpty { error "Cannot find any gtf files matching: ${params.gtf}" } + .map { it -> [it.simpleName, it]} + .set { gtf_files } + + +if (params.whitelist == "") { + channel.empty() + .set { whitelist_files } +} else { + channel + .fromPath( params.whitelist ) + .map { it -> [it.simpleName, it]} + .set { whitelist_files } +} + +if (params.config == "") { + channel.empty() + .set { config_files } +} else { + channel + .fromPath( params.config ) + .map { it -> [it.simpleName, it]} + .set { config_files } +} + + + +workflow { +switch(params.skip) { + case true: + fastp(fastq_files) + tr2g(gtf_files) + index_fasta_velocity_default(fasta_files, gtf_files,tr2g.out.t2g) + count_velocity(index_fasta_velocity_default.out.index,fastp.out.fastq,index_fasta_velocity_default.out.t2g, whitelist_files,config_files) + rfiltr(count_velocity.out.counts,tr2g.out.t2g) + normalization_sanity(rfiltr.out.raw_filtered_mtx) + break; + + default: +fastp(fastq_files) +tr2g(gtf_files) + index_fasta_velocity_default(fasta_files, gtf_files,tr2g.out.t2g) + count_velocity(index_fasta_velocity_default.out.index,fastp.out.fastq,index_fasta_velocity_default.out.t2g, whitelist_files,config_files) + rfiltr(count_velocity.out.counts,tr2g.out.t2g) + normalization_sanity(rfiltr.out.raw_filtered_mtx) + rviz(normalization_sanity.out.normalize_filtered_mtx,rfiltr.out.seu_obj) + break; + } +} + + + + diff --git a/src/nextflow.config b/src/nextflow.config new file mode 100644 index 0000000000000000000000000000000000000000..ed5ef2293df3c247425c7bf3e31a864e7498a5b2 --- /dev/null +++ b/src/nextflow.config @@ -0,0 +1,176 @@ +nextflowVersion = '>=20' + +manifest { + homePage = 'https://gitbio.ens-lyon.fr/LBMC/nextflow' + description = 'pipeline to ' + mainScript = 'main.nf' + version = '0.0.0' +} + +report { + enabled = true + file = "$baseDir/../results/report.html" +} + +profiles { + docker { + docker.temp = "auto" + docker.enabled = true + process { + errorStrategy = 'finish' + memory = '16GB' + withLabel: big_mem_mono_cpus { + cpus = 1 + } + withLabel: big_mem_multi_cpus { + cpus = 4 + } + withLabel: small_mem_mono_cpus { + cpus = 1 + memory = '2GB' + } + withLabel: small_mem_multi_cpus { + cpus = 4 + memory = '2GB' + } + } + } + podman { + podman.enabled = true + process { + errorStrategy = 'finish' + memory = '16GB' + withLabel: big_mem_mono_cpus { + cpus = 1 + } + withLabel: big_mem_multi_cpus { + cpus = 4 + } + withLabel: small_mem_mono_cpus { + cpus = 1 + memory = '2GB' + } + withLabel: small_mem_multi_cpus { + cpus = 4 + memory = '2GB' + } + } + } + singularity { + singularity.enabled = true + singularity.cacheDir = "./bin/" + process { + errorStrategy = 'finish' + memory = '16GB' + withLabel: big_mem_mono_cpus { + cpus = 1 + } + withLabel: big_mem_multi_cpus { + cpus = 4 + } + withLabel: small_mem_mono_cpus { + cpus = 1 + memory = '2GB' + } + withLabel: small_mem_multi_cpus { + cpus = 4 + memory = '2GB' + } + } + } + psmn { + singularity.enabled = true + singularity.cacheDir = "/Xnfs/abc/singularity/" + singularity.runOptions = "--bind /Xnfs,/scratch" + process{ + maxRetries = 1 + withLabel: big_mem_mono_cpus { + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 1 + memory = "128GB" + time = "12h" + queue = "monointeldeb128,monointeldeb192" + } + withLabel: big_mem_multi_cpus { + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 32 + memory = "192GB" + time = "24h" + queue = "CLG*,SLG*,Epyc*" + penv = "openmp32" + + } + withLabel: small_mem_mono_cpus { + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 1 + memory = "16GB" + time = "12h" + queue = "monointeldeb128,monointeldeb192" + } + withLabel: small_mem_multi_cpus { + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 32 + memory = "16GB" + time = "24h" + queue = "CLG*,SLG*,Epyc*" + penv = "openmp32" + + } + } + } + ccin2p3 { + singularity.enabled = true + singularity.cacheDir = "$baseDir/.singularity_in2p3/" + singularity.runOptions = "--bind /pbs,/sps,/scratch" + process{ + errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + maxRetries = 3 + withLabel: big_mem_mono_cpus { + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + memory = "8GB" + queue = "huge" + } + withLabel: big_mem_multi_cpus { + container = "lbmc/urqt:d62c1f8" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + memory = "8GB" + queue = "huge" + } + withLabel: small_mem_mono_cpus { + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + memory = "8GB" + queue = "huge" + } + withLabel: small_mem_multi_cpus { + container = "lbmc/urqt:d62c1f8" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + memory = "8GB" + queue = "huge" + } + } + } +} diff --git a/src/nextflow_src/example_chipseq.nf b/src/nextflow_src/example_chipseq.nf new file mode 100644 index 0000000000000000000000000000000000000000..454617c9c6fa21b9cdd2c8fd378bbcd7c60ea42e --- /dev/null +++ b/src/nextflow_src/example_chipseq.nf @@ -0,0 +1,123 @@ + +nextflow.enable.dsl=2 + +include { + fastp +} from './nf_modules/fastp/main' + +workflow csv_parsing { + if (params.csv_path.size() > 0) { + log.info "loading local csv files" + Channel + .fromPath(params.csv_path, checkIfExists: true) + .ifEmpty { error + log.error """ + ============================================================= + WARNING! No csv input file precised. + Use '--csv_path <file.csv>' + Or '--help' for more informations + ============================================================= + """ + } + .splitCsv(header: true, sep: ";", strip: true) + .flatMap{ + it -> [ + [(it.IP + it.WCE).md5(), "IP", "w", file(it.IP)], + [(it.IP + it.WCE).md5(), "WCE", "w", file(it.WCE)] + ] + } + .map{ it -> + if (it[1] instanceof List){ + it + } else { + [it[0], [it[1]], it[2], it[3], [it[4]]] + } + } + .map{ + it -> + if (it[1].size() == 2){ // if data are paired_end + [ + "index": it[0], + "group": ref_order(it), + "ip": it[2], + "type": it[3], + "id": read_order(it)[0], + "file": read_order(it) + ] + } else { + [ + "index": it[0], + "group": it[1].simpleName, + "ip": it[2], + "type": it[3], + "id": it[4].simpleName, + "file": [it[4].simpleName, it[4]] + ] + } + } + .set{input_csv} + } else { + log.info "loading remotes SRA csv files" + Channel + .fromPath(params.csv_sra, checkIfExists: true) + .ifEmpty { error + log.error """ + ============================================================= + WARNING! No csv input file precised. + Use '--csv_path <file.csv>' or + Use '--csv_SRA <file.csv>' + Or '--help' for more informations + ============================================================= + """ + } + .splitCsv(header: true, sep: ";", strip: true) + .flatMap{ + it -> [ + [[it.IP_w + it.WCE_w + it.IP_m + it.WCE_m], t.IP_w, "IP", "w", it.IP_w], + [[it.IP_w + it.WCE_w + it.IP_m + it.WCE_m], it.IP_w, "WCE", "w", it.WCE_w], + [[it.IP_w + it.WCE_w + it.IP_m + it.WCE_m], it.IP_w, "IP", "m", it.IP_m], + [[it.IP_w + it.WCE_w + it.IP_m + it.WCE_m], it.IP_w, "WCE", "m", it.WCE_m] + ] + } + .map{ + it -> + if (it[1].size() == 2){ // if data are paired_end + [ + "index": ( + it[0][0][0].simpleName + + it[0][0][1].simpleName + + it[0][0][2].simpleName + + it[0][0][3].simpleName + ).md5(), + "group": it[1][0].simpleName, + "ip": it[2], + "type": it[3], + "id": it[4][0].simpleName[0..-4], + "file": [it[4][0].simpleName[0..-4], it[4]] + ] + } else { + [ + "index": ( + it[0][0].simpleName + + it[0][1].simpleName + + it[0][2].simpleName + + it[0][3].simpleName + ).md5(), + "group": it[1].simpleName, + "ip": it[2], + "type": it[3], + "id": it[4].simpleName, + "file": [it[4].simpleName, it[4]] + ] + } + } + .set{input_csv} + } + emit: + input_csv +} + + +workflow { + +} \ No newline at end of file diff --git a/src/nextflow_src/example_marseq.nf b/src/nextflow_src/example_marseq.nf new file mode 100644 index 0000000000000000000000000000000000000000..821ebd99d45e9810b2a5fc3546e720d417666750 --- /dev/null +++ b/src/nextflow_src/example_marseq.nf @@ -0,0 +1,87 @@ +nextflow.enable.dsl=2 + +/* +Testing pipeline for marseq scRNASeq analysis +*/ + +include { adaptor_removal} from "./nf_modules/cutadapt/main.nf" +include { + index_fasta; + count; + index_fasta_velocity; + count_velocity +} from "./nf_modules/kb/main.nf" addParams( + kb_protocol: "marsseq", + count_out: "quantification/", + count_velocity_out: "quantification_velocity/" +) + +params.fasta = "http://ftp.ensembl.org/pub/release-94/fasta/gallus_gallus/dna/Gallus_gallus.Gallus_gallus-5.0.dna.toplevel.fa.gz" +params.fastq = "data/CF42_45/*/*R{1,2}.fastq.gz" +params.gtf = "http://ftp.ensembl.org/pub/release-94/gtf/gallus_gallus/Gallus_gallus.Gallus_gallus-5.0.94.gtf.gz" +params.transcript_to_gene = "" +params.whitelist = "data/expected_whitelist.txt" +params.config = "data/marseq_flexi_splitter.yaml" +params.workflow_type = "classic" + +log.info "fastq files (--fastq): ${params.fastq}" +log.info "fasta file (--fasta): ${params.fasta}" +log.info "gtf file (--gtf): ${params.gtf}" +log.info "transcript_to_gene file (--transcript_to_gene): ${params.transcript_to_gene}" +log.info "whitelist file (--whitelist): ${params.whitelist}" +log.info "config file (--config): ${params.config}" + +channel + .fromFilePairs( params.fastq, size: -1) + .set { fastq_files } +channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .map { it -> [it.simpleName, it]} + .set { fasta_files } +channel + .fromPath( params.gtf ) + .ifEmpty { error "Cannot find any gtf files matching: ${params.gtf}" } + .map { it -> [it.simpleName, it]} + .set { gtf_files } +if (params.whitelist == "") { + channel.empty() + .set { whitelist_files } +} else { + channel + .fromPath( params.whitelist ) + .map { it -> [it.simpleName, it]} + .set { whitelist_files } +} +channel + .fromPath( params.config ) + .ifEmpty { error "Cannot find any config files matching: ${params.config}" } + .map { it -> [it.simpleName, it]} + .set { config_files } + +workflow { + adaptor_removal(fastq_files) + if (params.workflow_type == "classic") { + index_fasta( + fasta_files, + gtf_files + ) + count( + index_fasta.out.index, + adaptor_removal.out.fastq, + index_fasta.out.t2g, whitelist_files,config_files + ) + } else { + index_fasta_velocity( + fasta_files, + gtf_files + ) + count_velocity( + index_fasta_velocity.out.index, + adaptor_removal.out.fastq, + index_fasta_velocity.out.t2g, + whitelist_files, + config_files + ) + } +} diff --git a/src/nextflow_src/example_variant_calling.nf b/src/nextflow_src/example_variant_calling.nf new file mode 100644 index 0000000000000000000000000000000000000000..5d793ed4898ac89753b2188f252403f903182043 --- /dev/null +++ b/src/nextflow_src/example_variant_calling.nf @@ -0,0 +1,36 @@ +nextflow.enable.dsl=2 + +/* +Testing pipeline for marseq scRNASeq analysis +*/ + +include { + mapping; +} from "./nf_modules/bwa/main.nf" + +include { + sort_bam; +} from "./nf_modules/samtools/main.nf" + +include { + germline_cohort_data_variant_calling; +} from "./nf_modules/gatk4/main.nf" addParams( + variant_calling_out: "vcf/", +) + +params.fastq = "" +params.fasta = "" + +channel + .fromFilePairs( params.fastq, size: -1) + .set { fastq_files } +channel + .fromPath( params.fasta ) + .map { it -> [it.simpleName, it]} + .set { fasta_files } + +workflow { + mapping(fasta_files, fastq_files) + sort_bam(mapping.out.bam) + germline_cohort_data_variant_calling(sort_bam.out.bam, fasta_files) +} diff --git a/src/nextflow_src/fasta_sampler.nf b/src/nextflow_src/fasta_sampler.nf new file mode 100644 index 0000000000000000000000000000000000000000..d1200ed496c77756cde525835f581b71b2528990 --- /dev/null +++ b/src/nextflow_src/fasta_sampler.nf @@ -0,0 +1,18 @@ +Channel + .fromPath( "data/tiny_dataset/fasta/*.fasta" ) + .set { fasta_file } + +process sample_fasta { + publishDir "results/sampling/", mode: 'copy' + + input: +file fasta from fasta_file + + output: +file "*_sample.fasta" into fasta_sample + + script: +""" +head ${fasta} > ${fasta.baseName}_sample.fasta +""" +} diff --git a/src/nextflow_src/filtr.nf b/src/nextflow_src/filtr.nf new file mode 100644 index 0000000000000000000000000000000000000000..b941237d9b25fa748c2289df57334ba571df5d68 --- /dev/null +++ b/src/nextflow_src/filtr.nf @@ -0,0 +1,34 @@ +nextflow.enable.dsl=2 + +params.spliced_mtx="results/Gon_M1_S1_R" +params.t2g="results/Gon_M1_S1_R/t2g.txt" +params.mit=25 +params.nexp=0.023 + +log.info "Spliced matrix (--spliced_mtx): ${params.spliced_mtx}" +log.info "t2g file (--t2g): ${params.t2g}" +log.info "Mitochondrial cutoff (--mit): ${params.mit}" +log.info "N doublet expect (--nexp): ${params.nexp}" + + +channel + .fromPath( params.spliced_mtx ) + .ifEmpty { error "Cannot find any Spliced matrix files matching: ${params.spliced_mtx}" } + .map { it -> [it.simpleName, it]} + .set { spliced_mtx } + +channel + .fromPath( params.t2g ) + .ifEmpty { error "Cannot find any t2g files matching: ${params.t2g}" } + .map { it -> [it.simpleName, it]} + .set { t2g } + + +include {rfiltr } from "./nf_modules/rfiltration/main.nf" addParams(mit: params.mit, nexp: params.nexp) + +workflow { + + rfiltr(spliced_mtx,t2g) + + } + diff --git a/src/nextflow_src/solution_RNASeq.nf b/src/nextflow_src/solution_RNASeq.nf new file mode 100644 index 0000000000000000000000000000000000000000..23adf0998d2c6d2dfc60697ab140b8b0ebb16ba0 --- /dev/null +++ b/src/nextflow_src/solution_RNASeq.nf @@ -0,0 +1,34 @@ +nextflow.enable.dsl=2 + +include { fastp } from "./nf_modules/fastp/main.nf" +include { fasta_from_bed } from "./nf_modules/bedtools/main.nf" +include { index_fasta; mapping_fastq } from './nf_modules/kallisto/main.nf' addParams(mapping_fastq_out: "quantification/") + + +params.fastq = "data/fastq/*_{1,2}.fastq" + +log.info "fastq files: ${params.fastq}" +log.info "fasta file : ${params.fasta}" +log.info "bed file : ${params.bed}" + +channel + .fromFilePairs( params.fastq, size: -1) + .set { fastq_files } + +channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .map { it -> [it.simpleName, it]} + .set { fasta_files } +channel + .fromPath( params.bed ) + .ifEmpty { error "Cannot find any bed files matching: ${params.bed}" } + .map { it -> [it.simpleName, it]} + .set { bed_files } + +workflow { + fastp(fastq_files) + fasta_from_bed(fasta_files, bed_files) + index_fasta(fasta_from_bed.out.fasta) + mapping_fastq(index_fasta.out.index.collect(), fastp.out.fastq) +} diff --git a/src/nextflow_src/test_pipeline.nf b/src/nextflow_src/test_pipeline.nf new file mode 100644 index 0000000000000000000000000000000000000000..6f59a2c6b8eabee0cf296913f2ea27ac691e9db6 --- /dev/null +++ b/src/nextflow_src/test_pipeline.nf @@ -0,0 +1,81 @@ +nextflow.enable.dsl=2 + + +params.fasta = "/Xnfs/lbmcdb/common/Genomes/human/GRCh38/EnsEMBL/transindex/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz" +params.fastq = "" +params.gtf = "/Xnfs/lbmcdb/common/Genomes/human/GRCh38/EnsEMBL/transindex/Homo_sapiens.GRCh38.99.gtf.gz" +params.transcript_to_gene = "" +params.whitelist = "/Xnfs/abc/whitelist/10x_V3_barcode_whitelist.txt.gz" +params.config = "" +params.mit=25 +params.nexp=0.023 +params.cp=4 +params.reso=0.01 + +include {tr2g;index_fasta_velocity_default;count_velocity } from "./nf_modules/kb/main.nf" +include {rfiltr } from "./nf_modules/rfiltration/main.nf" addParams(mit: params.mit, nexp: params.nexp) +include {normalization_sanity } from "./nf_modules/sanity/main.nf" +include {rviz } from "./nf_modules/rvisualization/main.nf" addParams(cp: params.cp, reso: params.reso) + + + +log.info "fastq files (--fastq): ${params.fastq}" +log.info "fasta file (--fasta): ${params.fasta}" +log.info "gtf file (--gtf): ${params.gtf}" +log.info "transcript_to_gene file (--transcript_to_gene): ${params.transcript_to_gene}" +log.info "whitelist file (--whitelist): ${params.whitelist}" +log.info "config file (--config): ${params.config}" +log.info "Mitochondrial cutoff (--mit): ${params.mit}" +log.info "N doublet expect (--nexp): ${params.nexp}" +log.info "Principal Component for analysis (--cp): ${params.cp}" +log.info "Resolution for clustering (--reso): ${params.reso}" + +channel + .fromFilePairs( params.fastq, size: -1) + .set { fastq_files } + +channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .map { it -> [it.simpleName, it]} + .set { fasta_files } + +channel + .fromPath( params.gtf ) + .ifEmpty { error "Cannot find any gtf files matching: ${params.gtf}" } + .map { it -> [it.simpleName, it]} + .set { gtf_files } + + +if (params.whitelist == "") { + channel.empty() + .set { whitelist_files } +} else { + channel + .fromPath( params.whitelist ) + .map { it -> [it.simpleName, it]} + .set { whitelist_files } +} + +if (params.config == "") { + channel.empty() + .set { config_files } +} else { + channel + .fromPath( params.config ) + .map { it -> [it.simpleName, it]} + .set { config_files } +} + + + +workflow { + tr2g(gtf_files) + index_fasta_velocity_default(fasta_files, gtf_files,tr2g.out.t2g) + count_velocity(index_fasta_velocity_default.out.index,fastq_files,index_fasta_velocity_default.out.t2g, whitelist_files,config_files) + + rfiltr(count_velocity.out.counts,tr2g.out.t2g) +normalization_sanity(rfiltr.out.raw_filtered_mtx) + rviz(normalization_sanity.out.normalize_filtered_mtx,rfiltr.out.seu_obj) + } + diff --git a/src/nextflow_src/test_rcluviz.nf b/src/nextflow_src/test_rcluviz.nf new file mode 100644 index 0000000000000000000000000000000000000000..4b379875d5a39540d6b6ebd1ebd3f6e32edd292c --- /dev/null +++ b/src/nextflow_src/test_rcluviz.nf @@ -0,0 +1,34 @@ +nextflow.enable.dsl=2 + +params.norm_mtx="" +params.seu_obj="" +params.cp=4 +params.reso=0.01 + +log.info "Normalized matrix (--norm_mtx): ${params.norm_mtx}" +log.info "Seurat object file (--seu_obj): ${params.seu_obj}" +log.info "Principaal Component for analysis (--cp): ${params.cp}" +log.info "Resolution for clustering (--reso): ${params.reso}" + +channel + .fromPath( params.norm_mtx ) + .ifEmpty { error "Cannot find any Spliced matrix files matching: ${params.norm_mtx}" } + .map { it -> [it.simpleName, it]} + .set { norm_mtx } + +channel + .fromPath( params.seu_obj ) + .ifEmpty { error "Cannot find any t2g files matching: ${params.seu_obj}" } + .map { it -> [it.simpleName, it]} + .set { seu_obj } + + +include {rviz } from "./nf_modules/rvisualization/main.nf" addParams(cp: params.cp, reso: params.reso) + +workflow { + + rviz(norm_mtx,seu_obj) + + } + + diff --git a/src/nextflow_src/test_rpip.nf b/src/nextflow_src/test_rpip.nf new file mode 100644 index 0000000000000000000000000000000000000000..01b3fed88cef45f84b1da46b6d3b8dbac41fc1e8 --- /dev/null +++ b/src/nextflow_src/test_rpip.nf @@ -0,0 +1,45 @@ +nextflow.enable.dsl=2 + + +params.spliced_mtx="results/Gon_M1_S1_R" +params.t2g="/scratch/Bio/mlepetit/nextflow/results/Gon_M1_S1_R/t2g.txt" +params.mit=25 +params.nexp=0.023 +params.cp=4 +params.reso=0.01 + +log.info "Spliced matrix (--spliced_mtx): ${params.spliced_mtx}" +log.info "t2g file (--t2g): ${params.t2g}" +log.info "Mitochondrial cutoff (--mit): ${params.mit}" +log.info "N doublet expect (--nexp): ${params.nexp}" +log.info "Principal Component for analysis (--cp): ${params.cp}" +log.info "Resolution for clustering (--reso): ${params.reso}" + +channel + .fromPath( params.spliced_mtx ) + .ifEmpty { error "Cannot find any Spliced matrix files matching: ${params.spliced_mtx}" } + .map { it -> [it.simpleName, it]} + .set { spliced_mtx } + +channel + .fromPath( params.t2g ) + .ifEmpty { error "Cannot find any t2g files matching: ${params.t2g}" } + .map { it -> [it.simpleName, it]} + .set { t2g } + + + +include {rfiltr } from "./nf_modules/rfiltration/main.nf" addParams(mit: params.mit, nexp: params.nexp) +include {normalization_sanity } from "./nf_modules/sanity/main.nf" +include {rviz } from "./nf_modules/rvisualization/main.nf" addParams(cp: params.cp, reso: params.reso) + + +workflow { + +rfiltr(spliced_mtx,t2g) +normalization_sanity(rfiltr.out.raw_filtered_mtx) +rviz(normalization_sanity.out.normalize_filtered_mtx,rfiltr.out.seu_obj) + + + } + diff --git a/src/nextflow_src/test_sanity.nf b/src/nextflow_src/test_sanity.nf new file mode 100644 index 0000000000000000000000000000000000000000..1a7a64108b0c78c8ebfc12bdbd59e5cbef747f44 --- /dev/null +++ b/src/nextflow_src/test_sanity.nf @@ -0,0 +1,27 @@ +nextflow.enable.dsl=2 + +params.raw_filtered_mtx="" + +log.info "Raw filtered matrix (--raw_filtered_mtx): ${params.raw_filtered_mtx}" + + +channel + .fromPath( params.raw_filtered_mtx ) + .ifEmpty { error "Cannot find any Spliced matrix files matching: ${params.raw_filtered_mtx}" } + .map { it -> [it.simpleName, it]} + .set { raw_filtered_mtx } + + + +include {normalization_sanity } from "./nf_modules/sanity/main.nf" + + +workflow { + + normalization_sanity(raw_filtered_mtx) + + } + + + + diff --git a/src/nextflow_src/test_skip_pipeline.nf b/src/nextflow_src/test_skip_pipeline.nf new file mode 100644 index 0000000000000000000000000000000000000000..f96a6626536e6526064e35f4b19f66ed0b339e2a --- /dev/null +++ b/src/nextflow_src/test_skip_pipeline.nf @@ -0,0 +1,104 @@ +nextflow.enable.dsl=2 + + + + +params.fasta = "/Xnfs/lbmcdb/common/Genomes/human/GRCh38/EnsEMBL/transindex/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz" +params.fastq = "" +params.gtf = "/Xnfs/lbmcdb/common/Genomes/human/GRCh38/EnsEMBL/transindex/Homo_sapiens.GRCh38.99.gtf.gz" +params.transcript_to_gene = "" +params.whitelist = "/Xnfs/abc/whitelist/10x_V3_barcode_whitelist.txt.gz" +params.config = "" +params.mit=25 +params.nexp=0.023 +params.cp=4 +params.reso=0.01 +params.skip=false +params.nfeature=500 + + + +include {tr2g;index_fasta_velocity_default;count_velocity } from "./nf_modules/kb/main.nf" +include {rfiltr } from "./nf_modules/rfiltration/main.nf" addParams(mit: params.mit, nfeature: params.nfeature, nexp: params.nexp) +include {normalization_sanity } from "./nf_modules/sanity/main.nf" +include {rviz } from "./nf_modules/rvisualization/main.nf" addParams(cp: params.cp, reso: params.reso) +include { fastp } from "./nf_modules/fastp/main.nf" addParams(qualified_quality_phred:30) + +log.info "fastq files (--fastq): ${params.fastq}" +log.info "fasta file (--fasta): ${params.fasta}" +log.info "gtf file (--gtf): ${params.gtf}" +log.info "transcript_to_gene file (--transcript_to_gene): ${params.transcript_to_gene}" +log.info "whitelist file (--whitelist): ${params.whitelist}" +log.info "config file (--config): ${params.config}" +log.info "Mitochondrial cutoff (--mit): ${params.mit}" +log.info "Numbre of feature min in a cell cutoff (--nfeature): ${params.nfeature}" +log.info "N doublet expect (--nexp): ${params.nexp}" +log.info "Principal Component for analysis (--cp): ${params.cp}" +log.info "Resolution for clustering (--reso): ${params.reso}" + + +channel + .fromFilePairs( params.fastq, size: -1) + .set { fastq_files } + +channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .map { it -> [it.simpleName, it]} + .set { fasta_files } + +channel + .fromPath( params.gtf ) + .ifEmpty { error "Cannot find any gtf files matching: ${params.gtf}" } + .map { it -> [it.simpleName, it]} + .set { gtf_files } + + +if (params.whitelist == "") { + channel.empty() + .set { whitelist_files } +} else { + channel + .fromPath( params.whitelist ) + .map { it -> [it.simpleName, it]} + .set { whitelist_files } +} + +if (params.config == "") { + channel.empty() + .set { config_files } +} else { + channel + .fromPath( params.config ) + .map { it -> [it.simpleName, it]} + .set { config_files } +} + + + +workflow { +switch(params.skip) { + case true: + fastp(fastq_files) + tr2g(gtf_files) + index_fasta_velocity_default(fasta_files, gtf_files,tr2g.out.t2g) + count_velocity(index_fasta_velocity_default.out.index,fastp.out.fastq,index_fasta_velocity_default.out.t2g, whitelist_files,config_files) + rfiltr(count_velocity.out.counts,tr2g.out.t2g) + normalization_sanity(rfiltr.out.raw_filtered_mtx) + break; + + default: +fastp(fastq_files) +tr2g(gtf_files) + index_fasta_velocity_default(fasta_files, gtf_files,tr2g.out.t2g) + count_velocity(index_fasta_velocity_default.out.index,fastp.out.fastq,index_fasta_velocity_default.out.t2g, whitelist_files,config_files) + rfiltr(count_velocity.out.counts,tr2g.out.t2g) + normalization_sanity(rfiltr.out.raw_filtered_mtx) + rviz(normalization_sanity.out.normalize_filtered_mtx,rfiltr.out.seu_obj) + break; + } +} + + + + diff --git a/src/nextflow_src/training_dataset.config b/src/nextflow_src/training_dataset.config new file mode 100644 index 0000000000000000000000000000000000000000..734a7410c3f3d2425a98b2f1462faee929dc68e1 --- /dev/null +++ b/src/nextflow_src/training_dataset.config @@ -0,0 +1,370 @@ +profiles { + docker { + docker.temp = "auto" + docker.enabled = true + process { + withName: build_synthetic_bed { + container = "lbmc/bedtools:2.25.0" + cpus = 1 + } + withName: fasta_from_bed { + container = "lbmc/bedtools:2.25.0" + cpus = 1 + } + withName: index_fasta { + container = "lbmc/bowtie2:2.3.4.1" + cpus = 4 + } + withName: mapping_fastq_paired { + container = "lbmc/bowtie2:2.3.4.1" + cpus = 4 + } + withName: bam_2_fastq_paired { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: filter_bam_paired { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: sort_bam_paired { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: index_bam_paired { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: mapping_fastq_single { + container = "lbmc/bowtie2:2.3.4.1" + cpus = 4 + } + withName: bam_2_fastq_single { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: filter_bam_single { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: sort_bam_single { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: index_bam_single { + container = "lbmc/samtools:1.7" + cpus = 4 + } + } + } + singularity { + singularity.enabled = true + singularity.cacheDir = "./bin/" + process { + withName: build_synthetic_bed { + container = "lbmc/bedtools:2.25.0" + cpus = 1 + } + withName: fasta_from_bed { + container = "lbmc/bedtools:2.25.0" + cpus = 1 + } + withName: index_fasta { + container = "lbmc/bowtie2:2.3.4.1" + cpus = 4 + } + withName: mapping_fastq_single { + container = "lbmc/bowtie2:2.3.4.1" + cpus = 4 + } + withName: mapping_fastq_paired { + container = "lbmc/bowtie2:2.3.4.1" + cpus = 4 + } + withName: bam_2_fastq_paired { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: filter_bam_paired { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: sort_bam_paired { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: index_bam_paired { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: bam_2_fastq_single { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: filter_bam_single { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: sort_bam_single { + container = "lbmc/samtools:1.7" + cpus = 4 + } + withName: index_bam_single { + container = "lbmc/samtools:1.7" + cpus = 4 + } + } + } + psmn { + process{ + withName: build_synthetic_bed { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/bedtools_2.25.0" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 1 + memory = "20GB" + time = "12h" + queue = "monointeldeb128" + } + withName: fasta_from_bed { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/bedtools_2.25.0" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 1 + memory = "20GB" + time = "12h" + queue = "monointeldeb128" + } + withName: index_fasta { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 32 + memory = "20GB" + time = "12h" + queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D" + penv = "openmp32" + } + withName: mapping_fastq_paired { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D" + penv = "openmp32" + } + withName: bam_2_fastq_paired { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D" + penv = "openmp32" + } + withName: sort_bam_paired { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D" + penv = "openmp32" + } + withName: index_bam_paired { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D" + penv = "openmp32" + } + withName: mapping_fastq_single { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D" + penv = "openmp32" + } + withName: bam_2_fastq_single { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D" + penv = "openmp32" + } + withName: sort_bam_single { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D" + penv = "openmp32" + } + withName: index_bam_single { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-m e -cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D" + penv = "openmp32" + } + } + } + ccin2p3 { + singularity.enabled = true + singularity.cacheDir = "$baseDir/.singularity_in2p3/" + singularity.runOptions = "--bind /pbs,/sps,/scratch" + process{ + withName: fasta_from_bed { + container = "lbmc/bedtools:2.25.0" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + } + process{ + withName: build_synthetic_bed { + container = "lbmc/bedtools:2.25.0" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + withName: fasta_from_bed { + container = "lbmc/bedtools:2.25.0" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + withName: index_fasta { + container = "lbmc/bowtie2:2.3.4.1" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + withName: mapping_fastq_paired { + container = "lbmc/bowtie2:2.3.4.1" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + withName: bam_2_fastq_paired { + container = "lbmc/samtools:1.7" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + withName: sort_bam_paired { + container = "lbmc/samtools:1.7" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + withName: index_bam_paired { + container = "lbmc/samtools:1.7" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + withName: mapping_fastq_single { + container = "lbmc/bowtie2:2.3.4.1" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + withName: bam_2_fastq_single { + container = "lbmc/samtools:1.7" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + withName: sort_bam_single { + container = "lbmc/samtools:1.7" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + withName: index_bam_single { + container = "lbmc/samtools:1.7" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = "huge" + } + } + } +} diff --git a/src/nextflow_src/training_dataset.nf b/src/nextflow_src/training_dataset.nf new file mode 100644 index 0000000000000000000000000000000000000000..c6f48e882a82eaa7437a409c7500c7045224893e --- /dev/null +++ b/src/nextflow_src/training_dataset.nf @@ -0,0 +1,308 @@ +/* +small pipeline to build a training dataset from whole genome data + +input: +- fasta +- fastq +- chromosome +- start position +- stop position + +output: +- sort fasta +- sort fastq + +example for paired-end data: +./nextflow src/training_dataset.nf -c src/training_dataset.config --fasta "data/genome.fa" --fastq_paired "data/*_R{1,2}.fastq.gz" --chromosome "X" --start 5305683 --stop 5333928 -resume + +example for single-end data: +./nextflow src/training_dataset.nf -c src/training_dataset.config --fasta "data/genome.fa" --fastq_single "data/*_R1.fastq.gz" --chromosome "X" --start 5305683 --stop 5333928 -resume + +*/ + +params.fastq_paired = "" +params.fastq_single = "" + +log.info "fasta files : ${params.fasta}" +log.info "fastq paired files : ${params.fastq_paired}" +log.info "fastq single files : ${params.fastq_single}" +log.info "chromosome : ${params.chromosome}" +log.info "start position : ${params.start}" +log.info "stop position : ${params.stop}" + + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any index files matching: ${params.fasta}" } + .set { fasta_file } + + +process build_synthetic_bed { + tag "${chromosome}:${start}-${stop}" + cpus 4 + + input: + val chromosome from params.chromosome + val start from params.start + val stop from params.stop + + output: + file "*.bed" into bed_files + + script: +""" +echo "${chromosome}\t${start}\t${stop}" > synthetic.bed +""" +} + +process fasta_from_bed { + tag "${fasta.baseName}" + cpus 4 + publishDir "results/training/fasta/", mode: 'copy' + + input: + file fasta from fasta_file + file bed from bed_files + val chromosome from params.chromosome + + output: + file "*.fasta" into fasta_files_extracted + + script: +""" +bedtools getfasta \ +-fi ${fasta} -bed ${bed} -fo s${fasta.baseName}.fasta +""" +} + +process index_fasta { + tag "$fasta.baseName" + cpus 4 + publishDir "results/training/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_files_extracted + + output: + file "*.index*" into index_files + file "*_report.txt" into indexing_report + + script: +""" +bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt + +if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then + exit 1 +fi +""" +} + +if ( params.fastq_paired != "" ) { + Channel + .fromFilePairs( params.fastq_paired ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq_paired}" } + .set { fastq_files_paired } + + process mapping_fastq_paired { + tag "$pair_id" + cpus 4 + + input: + set pair_id, file(reads) from fastq_files_paired + file index from index_files.collect() + + output: + set pair_id, "*.bam" into bam_files_paired + file "*_report.txt" into mapping_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } + """ + bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ + -1 ${reads[0]} -2 ${reads[1]} 2> \ + ${pair_id}_bowtie2_report.txt | \ + samtools view -Sb - > ${pair_id}.bam + + if grep -q "Error" ${pair_id}_bowtie2_report.txt; then + exit 1 + fi + """ + } + + bam_files_paired.into{ bam_files_paired_fa; bam_files_paired_ba} + + process bam_2_fastq_paired { + tag "$file_id" + publishDir "results/training/fastq/", mode: 'copy' + + input: + set file_id, file(bam) from bam_files_paired_fa + + output: + set file_id, "*.fastq" into fastq_files_extracted + script: + """ + samtools fastq -1 s${file_id}_R1.fastq -2 s${file_id}_R2.fastq -F 0x4 ${bam} + """ + } + + process filter_bam_paired { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from bam_files_paired_ba + file bed from bed_files + + output: + set file_id, "*.bam" into filtered_bam_files_paired + script: + """ + samtools view -@ ${task.cpus} -hb ${bam} -F 0x4 > f${file_id}.bam + """ + } + + process sort_bam_paired { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + cpus 4 + + input: + set file_id, file(bam) from filtered_bam_files_paired + + output: + set file_id, "*.bam" into sorted_bam_files_paired + + script: + """ + samtools sort -@ ${task.cpus} -O BAM -o s${file_id}.bam ${bam} + """ + } + + process index_bam_paired { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_files_paired + + output: + set file_id, "*.bam*" into indexed_bam_file_paired + + script: + """ + samtools index ${bam} + """ + } +} + + +if ( params.fastq_single != "" ) { + Channel + .fromPath( params.fastq_single ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq_single}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { fastq_files_single } + + process mapping_fastq_single { + tag "$file_id" + cpus 4 + + input: + set file_id, file(reads) from fastq_files_single + file index from index_files.collect() + + output: + set file_id, "*.bam" into bam_files_single + file "*_report.txt" into mapping_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } + """ + bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ + -U ${reads} 2> \ + ${file_id}_bowtie2_report.txt | \ + samtools view -Sb - > ${file_id}.bam + + if grep -q "Error" ${file_id}_bowtie2_report.txt; then + exit 1 + fi + """ + } + + bam_files_single.into{ bam_files_single_fa; bam_files_single_ba} + + process bam_2_fastq_single { + tag "$file_id" + + input: + set file_id, file(bam) from bam_files_single_fa + + output: + set file_id, "*.fastq" into fastq_files_extracted + script: + """ + samtools fastq -0 s${file_id}.fastq -F 0x4 ${bam} + """ + } + + process filter_bam_single { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from bam_files_single_ba + file bed from bed_files + + output: + set file_id, "*.bam" into filtered_bam_files_single + script: + """ + samtools view -@ ${task.cpus} -hb ${bam} -F 0x4 > f${file_id}.bam + """ + } + + process sort_bam_single { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + cpus 4 + + input: + set file_id, file(bam) from filtered_bam_files_single + + output: + set file_id, "*.bam" into sorted_bam_files_single + + script: + """ + samtools sort -@ ${task.cpus} -O BAM -o s${file_id}.bam ${bam} + """ + } + + process index_bam_single { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_files_single + + output: + set file_id, "*.bam*" into indexed_bam_file_single + + script: + """ + samtools index ${bam} + """ + } +} + diff --git a/src/nf_modules/agat/main.nf b/src/nf_modules/agat/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..e2d832e72b97340f1a0811dde6358bd73cb2e606 --- /dev/null +++ b/src/nf_modules/agat/main.nf @@ -0,0 +1,46 @@ +version = "0.8.0" +container_url = "lbmc/agat:${version}" + +params.gff_to_bed = "" +params.gff_to_bed_out = "" +process gff_to_bed { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.gff_to_bed_out != "") { + publishDir "results/${params.gff_to_bed_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(gff) + output: + tuple val(file_id), path("*.bed"), emit: bed + + script: +""" +zcat ${gff} > ${gff.baseName}.gff +agat_convert_sp_gff2bed.pl ${params.gff_to_bed} --gff ${gff.baseName}.gff -o ${gff.simpleName}.bed +""" +} + +params.gff_to_gtf = "" +params.gff_to_gtf_out = "" +process gff_to_gtf { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.gff_to_gtf_out != "") { + publishDir "results/${params.gff_to_gtf_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(gff) + output: + tuple val(file_id), path("*.gtf"), emit: gtf + + script: +""" +zcat ${gff} > ${gff.baseName}.gff +agat_convert_sp_gff2gtf.pl ${params.gff_to_gtf} --gff ${gff.baseName}.gff -o ${gff.simpleName}.gtf +""" +} \ No newline at end of file diff --git a/src/nf_modules/alntools/main.nf b/src/nf_modules/alntools/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..19ee7b096f8f8aa40f7c57602caee4060f17227a --- /dev/null +++ b/src/nf_modules/alntools/main.nf @@ -0,0 +1,65 @@ +version = "dd96682" +container_url = "lbmc/alntools:${version}" + +params.bam2ec = "" +params.bam2ec_out = "" +process bam2ec { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.bam2ec_out != "") { + publishDir "results/${params.bam2ec_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam), path(bam_idx) + tuple val(transcripts_lengths_id), path(transcripts_lengths) + + output: + tuple val(file_id), path("${bam.simpleName}.bin"), emit: bin + tuple val(transcripts_lengths_id), path("${transcripts_lengths}"), emit: tsv + tuple val(file_id), path("${bam.simpleName}_bam2ec_report.txt"), emit: report + + script: +""" +mkdir tmp +alntools bam2ec \ + -c 1 ${params.bam2ec} \ + -d ./tmp \ + -t ${transcripts_lengths} \ + -v \ + ${bam} ${bam.simpleName}.bin &> \ + ${bam.simpleName}_bam2ec_report.txt +""" +} + +params.gtf_to_transcripts_lengths = "" +params.gtf_to_transcripts_lengths_out = "" +process gtf_to_transcripts_lengths { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.gtf_to_transcripts_lengths != "") { + publishDir "results/${params.gtf_to_transcripts_lengths}", mode: 'copy' + } + + input: + tuple val(file_id), path(gtf) + + output: + tuple val(file_id), path("${gtf.simpleName}_transcripts_lengths.tsv"), emit: tsv + + script: +""" +awk -F"[\\t;]" ' +\$3=="exon" { + ID=gensub(/transcript_id \\"(.*)\\"/, "\\\\1", "g", \$11); + LEN[ID]+=\$5-\$4+1; + } +END{ + for(i in LEN) + {print i"\\t"LEN[i]} + } +' ${gtf} > ${gtf.simpleName}_transcripts_lengths.tsv +""" +} diff --git a/src/nf_modules/beagle/main.nf b/src/nf_modules/beagle/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..bc0a54b21d941d358d047334c29d67f4b55be16b --- /dev/null +++ b/src/nf_modules/beagle/main.nf @@ -0,0 +1,23 @@ +version = "5.1_24Aug19.3e8--hdfd78af_1" +container_url = "quay.io/biocontainers/beagle::${version}" + +params.phasing = "" +process phasing { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(ref_vcf) + + output: + tuple val(file_id), path("*.bam*"), emit: bam + + script: +""" +beagle nthread=${task.cpus} \ + gtgl=${vcf} \ + ref=${ref_vcf} +""" +} diff --git a/src/nf_modules/bedtools/main.nf b/src/nf_modules/bedtools/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..9400abf4e55bcf57f52e896b5f1d41e1a8fe8bfa --- /dev/null +++ b/src/nf_modules/bedtools/main.nf @@ -0,0 +1,121 @@ +version = "2.25.0" +container_url = "lbmc/bedtools:${version}" + +params.fasta_from_bed = "-name" +params.fasta_from_bed_out = "" +process fasta_from_bed { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.fasta_from_bed_out != "") { + publishDir "results/${params.fasta_from_bed_out}", mode: 'copy' + } + + input: + tuple val(fasta_id), path(fasta) + tuple val(file_id), path(bed) + + output: + tuple val(file_id), path("*_extracted.fasta"), emit: fasta + + script: +""" +bedtools getfasta ${params.fasta_from_bed} \ +-fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta +""" +} + +params.merge_bed = "" +params.merge_bed_out = "" +process merge_bed { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.merge_bed_out != "") { + publishDir "results/${params.merge_bed_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bed) + + output: + tuple val(file_id), path("*_merged.fasta"), emit: bed + + script: +""" +bedtools merge ${params.merge_bed} -i ${bed} > ${bed[0].simpleName}_merged.bed +""" +} + +params.bam_to_fastq_singleend = "" +params.bam_to_fastq_singleend_out = "" +process bam_to_fastq_singleend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bam_id}" + if (params.bam_to_fastq_singleend_out != "") { + publishDir "results/${params.bam_to_fastq_singleend_out}", mode: 'copy' + } + + input: + tuple val(bam_id), path(bam) + + output: + tuple val(bam_id), path("*.fastq"), emit: fastq + + script: +""" +bedtools bamtofastq \ + ${params.bam_to_fastq_singleend} \ + -i ${bam} -fq ${bam.baseName}.fastq +""" +} + +params.bam_to_fastq_pairedend = "" +params.bam_to_fastq_pairedend_out = "" +process bam_to_fastq_pairedend { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bam_id}" + if (params.bam_to_fastq_pairedend_out != "") { + publishDir "results/${params.bam_to_fastq_pairedend_out}", mode: 'copy' + } + + input: + tuple val(bam_id), path(bam) + + output: + tuple val(bam_id), path("*.fastq"), emit: fastq + + script: +""" +bedtools bamtofastq \ + ${params.bam_to_fastq_pairedend} \ + -i ${bam} -fq ${bam.baseName}_R1.fastq -fq2 ${bam.baseName}_R2.fastq +""" +} + +params.bam_to_bedgraph = "" +params.bam_to_bedgraph_out = "" +process bam_to_bedgraph { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bam_id}" + if (params.bam_to_bedgraph_out != "") { + publishDir "results/${params.bam_to_bedgraph_out}", mode: 'copy' + } + + input: + tuple val(bam_id), path(bam) + + output: + tuple val(bam_id), path("*.bg"), emit: bedgraph + + script: +""" +bedtools genomecov \ + ${params.bam_to_bedgraph} \ + -ibam ${bam} \ + -bg > ${bam.simpleName}.bg +""" +} diff --git a/src/nf_modules/bioawk/main.nf b/src/nf_modules/bioawk/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..eaa5a4a2e50a1e9244f99ae6cc9fb10401bfb5a9 --- /dev/null +++ b/src/nf_modules/bioawk/main.nf @@ -0,0 +1,24 @@ +version = "1.0" +container_url = "lbmc/bioawk:${version}" + +params.fasta_to_transcripts_lengths = "" +params.fasta_to_transcripts_lengths_out = "" +process fasta_to_transcripts_lengths { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.fasta_to_transcripts_lengths_out != "") { + publishDir "results/${params.fasta_to_transcripts_lengths_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + + output: + tuple val(file_id), path("${fasta.simpleName}_transcripts_lengths.tsv"), emit: tsv + + script: +""" +bioawk -c fastx '{print(\$name" "length(\$seq))}' ${fasta} > ${fasta.simpleName}_transcripts_lengths.tsv +""" +} \ No newline at end of file diff --git a/src/nf_modules/bioconvert/main.nf b/src/nf_modules/bioconvert/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..884ebb9ee693b27fca4cda7724ca3145476ec60f --- /dev/null +++ b/src/nf_modules/bioconvert/main.nf @@ -0,0 +1,46 @@ +version = "0.4.0" +container_url = "lbmc/bioconvert:${version}" +params.bigwig_to_wig = "" +params.bigwig_to_wig_out = "" +process bigwig_to_wig { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.bigwig_to_wig_out != "") { + publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy' + } + + input: + tuple val(file_id) path(bw) + + output: + tuple val(file_id), path("*.wig"), emit: wig + + script: +""" +bioconvert bigwig2wiggle ${bw} ${bw.simpleName}.wig +""" +} + +params.bigwig2_to_wig2 = "" +params.bigwig2_to_wig2_out = "" +process bigwig2_to_wig2 { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.bigwig_to_wig_out != "") { + publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bw_a), path(bw_b) + + output: + tuple val(file_id), path("${bw_a.simpleName}.wig"), path("${bw_b.simpleName}.wig"), emit: wig + + script: +""" +bioconvert bigwig2wiggle ${bw_a} ${bw_a.simpleName}.wig +bioconvert bigwig2wiggle ${bw_b} ${bw_b.simpleName}.wig +""" +} \ No newline at end of file diff --git a/src/nf_modules/bowtie/main.nf b/src/nf_modules/bowtie/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..a841fc36195dabcf2bb238431680f9e0677aa701 --- /dev/null +++ b/src/nf_modules/bowtie/main.nf @@ -0,0 +1,168 @@ +version = "1.2.2" +container_url = "lbmc/bowtie:${version}" + +params.index_fasta = "" +params.index_fasta_out = "" +process index_fasta { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + + output: + tuple val(file_id), path("*.index*"), emit: index + tuple val(file_id), path("*_report.txt"), emit: report + + script: +""" +bowtie-build --threads ${task.cpus} \ + ${params.index_fasta} \ + -f ${fasta} ${fasta.baseName}.index &> \ + ${fasta.baseName}_bowtie_index_report.txt + +if grep -q "Error" ${fasta.baseName}_bowtie_index_report.txt; then + exit 1 +fi +""" +} + +params.mapping_fastq = "--very-sensitive" +params.mapping_fastq_out = "" +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + if (params.mapping_fastq_out != "") { + publishDir "results/${params.mapping_fastq_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } + if (reads.size() == 2) + """ + # -v specify the max number of missmatch, -k the number of match reported per + # reads + bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + ${params.mapping_fastq} \ + -1 ${reads[0]} -2 ${reads[1]} 2> \ + ${file_id}_bowtie_report_tmp.txt | \ + samtools view -Sb - > ${file_id}.bam + + if grep -q "Error" ${file_id}_bowtie_report_tmp.txt; then + exit 1 + fi + tail -n 19 ${file_id}_bowtie_report_tmp.txt > \ + ${file_id}_bowtie_mapping_report.txt + """ + else + """ + bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + ${params.mapping_fastq} + -q ${reads} 2> \ + ${file_id}_bowtie_report_tmp.txt | \ + samtools view -Sb - > ${file_id}.bam + + if grep -q "Error" ${file_id}_bowtie_report_tmp.txt; then + exit 1 + fi + tail -n 19 ${file_id}_bowtie_report_tmp.txt > \ + ${file_id}_bowtie_mapping_report.txt + """ +} + +params.mapping_fastq_pairedend = "" +process mapping_fastq_pairedend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + + input: + path index + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" +# -v specify the max number of missmatch, -k the number of match reported per +# reads +bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + ${params.mapping_fastq_pairedend} \ + -1 ${reads[0]} -2 ${reads[1]} 2> \ + ${pair_id}_bowtie_report_tmp.txt | \ + samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_bowtie_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${pair_id}_bowtie_report_tmp.txt > \ + ${pair_id}_bowtie_mapping_report.txt +""" +} + +params.mapping_fastq_singleend = "" +process mapping_fastq_singleend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + path index + tuple val(file_id), path(reads) + + output: + set file_id, "*.bam", emit: bam + file "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" +bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + ${params.mapping_fastq_singleend} \ + -q ${reads} 2> \ + ${file_id}_bowtie_report_tmp.txt | \ + samtools view -Sb - > ${file_id}.bam + +if grep -q "Error" ${file_id}_bowtie_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${file_id}_bowtie_report_tmp.txt > \ + ${file_id}_bowtie_mapping_report.txt +""" +} diff --git a/src/nf_modules/bowtie2/main.nf b/src/nf_modules/bowtie2/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..3a0fc967f381ee84f40cd7a4ac3887bcb32d70ed --- /dev/null +++ b/src/nf_modules/bowtie2/main.nf @@ -0,0 +1,102 @@ +version = "2.3.4.1" +container_url = "lbmc/bowtie2:${version}" + +params.index_fasta = "" +params.index_fasta_out = "" +process index_fasta { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + + output: + tuple val(file_id), path("*.bt2"), emit: index + tuple val(file_id), path("*_report.txt"), emit: report + + script: +""" +bowtie2-build --threads ${task.cpus} \ + ${fasta} \ + ${fasta.simpleName} &> \ + ${fasta.simpleName}_bowtie2_index_report.txt + +if grep -q "Error" ${fasta.simpleName}_bowtie2_index_report.txt; then + exit 1 +fi +""" +} + +params.mapping_fastq = "--very-sensitive" +params.mapping_fastq_out = "" +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.mapping_fastq_out != "") { + publishDir "results/${params.mapping_fastq_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } + switch(file_id) { + case {it instanceof List}: + file_prefix = file_id[0] + break + case {it instanceof Map}: + file_prefix = file_id.values()[0] + break + default: + file_prefix = file_id + break + } + + if (reads.size() == 2) + """ + bowtie2 ${params.mapping_fastq} \ + -p ${task.cpus} \ + -x ${index_id} \ + -1 ${reads[0]} \ + -2 ${reads[1]} 2> \ + ${file_prefix}_bowtie2_mapping_report_tmp.txt | \ + samtools view -Sb - > ${file_prefix}.bam + + if grep -q "Error" ${file_prefix}_bowtie2_mapping_report_tmp.txt; then + exit 1 + fi + tail -n 19 ${file_prefix}_bowtie2_mapping_report_tmp.txt > \ + ${file_prefix}_bowtie2_mapping_report.txt + """ + else + """ + bowtie2 ${params.mapping_fastq} \ + -p ${task.cpus} \ + -x ${index_id} \ + -U ${reads} 2> \ + ${file_prefix}_bowtie2_mapping_report_tmp.txt | \ + samtools view -Sb - > ${file_prefix}.bam + + if grep -q "Error" ${file_prefix}_bowtie2_mapping_report_tmp.txt; then + exit 1 + fi + tail -n 19 ${file_prefix}_bowtie2_mapping_report_tmp.txt > \ + ${file_prefix}_bowtie2_mapping_report.txt + """ +} diff --git a/src/nf_modules/bwa/main.nf b/src/nf_modules/bwa/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..0490b082a4d71d8cef7aa68d44ccf96d2ea41d89 --- /dev/null +++ b/src/nf_modules/bwa/main.nf @@ -0,0 +1,96 @@ +version = "0.7.17" +container_url = "lbmc/bwa:${version}" + + +workflow mapping { + take: + fasta + fastq + main: + index_fasta(fasta) + mapping_fastq(index_fasta.out.index.collect(), fastq) + emit: + bam = mapping_fastq.out.bam + report = mapping_fastq.out.report +} + + +params.index_fasta = "" +params.index_fasta_out = "" +process index_fasta { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + + output: + tuple val(file_id), path("${fasta.simpleName}.*"), emit: index + tuple val(file_id), path("*_bwa_report.txt"), emit: report + + script: +""" +bwa index ${params.index_fasta} -p ${fasta.simpleName} ${fasta} \ +&> ${fasta.simpleName}_bwa_report.txt +""" +} + + +params.mapping_fastq = "" +params.mapping_fastq_out = "" +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.mapping_fastq_out != "") { + publishDir "results/${params.mapping_fastq_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*.bam"), emit: bam + tuple val(file_id), path("${file_prefix}_bwa_report.txt"), emit: report + + script: + if (file_id instanceof List){ + library = file_id[0] + file_prefix = file_id[0] + } else if (file_id instanceof Map) { + library = file_id[0] + file_prefix = file_id[0] + if (file_id.containsKey('library')) { + library = file_id.library + file_prefix = file_id.id + } + } else { + library = file_id + file_prefix = file_id + } +bwa_mem_R = "@RG\\tID:${library}\\tSM:${library}\\tLB:lib_${library}\\tPL:illumina" + if (reads.size() == 2) +""" +bwa mem -t ${task.cpus} \ +${params.mapping_fastq} \ +-R '${bwa_mem_R}' \ +${index[0].baseName} ${reads[0]} ${reads[1]} 2> \ + ${file_prefix}_bwa_report.txt | \ + samtools view -@ ${task.cpus} -Sb - > ${file_prefix}.bam +""" + else +""" +bwa mem -t ${task.cpus} \ +${params.mapping_fastq} \ +-R '${bwa_mem_R}' \ +${index[0].baseName} ${reads} 2> \ + ${file_prefix}_bwa_report.txt | \ + samtools view -@ ${task.cpus} -Sb - > ${file_prefix}.bam +""" +} + diff --git a/src/nf_modules/cellphonedb/main.nf b/src/nf_modules/cellphonedb/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..d41f70343f29221460cd05ab1addfa93c54b93e8 --- /dev/null +++ b/src/nf_modules/cellphonedb/main.nf @@ -0,0 +1,118 @@ +version = "3.0.0" +container_url = "mlepetit/cellphonedb:latest" + +params.cellphonedb = "" +params.cellphonedb_out = "" +params.pval="" +params.thres="" +params.iterations="" +params.gene_id="" + + + +workflow cellphone_statistical_analysis { + take: + meta + counts + + main: + +cpdb_methods_stats(meta,counts) +cpdb_plot_dot_plot(cpdb_methods_stats.out.means,cpdb_methods_stats.out.pvalues) +cpdb_plot_heatmap(cpdb_methods_stats.out.pvalues) + + + emit: + means = cpdb_methods_stats.out.means + pvalues = cpdb_methods_stats.out.pvalues + deconvoluted = cpdb_methods_stats.out.deconvoluted + significant_means = cpdb_methods_stats.out.significant_means + dot_plot = cpdb_plot_dot_plot.out.dot_plot + heatmap = cpdb_plot_heatmap.out.heatmap + heatmap_log = cpdb_plot_heatmap.out.heatmap_log + count_network = cpdb_plot_heatmap.out.count_network + interactions_count = cpdb_plot_heatmap.out.interactions_count + + +} + + + + + + + + + + +process cpdb_methods_stats { + container = "${container_url}" + label "big_mem_multi_cpus" + if (params.cellphonedb_out != "") { + publishDir "results/${params.cellphonedb_out}", mode: 'copy' + } + + input: + tuple val(id_mtx), path(meta) + tuple val(id_mtx), path(counts) + + output: + tuple val(id_mtx), path("out/means.txt"), emit: means + tuple val(id_mtx), path("out/pvalues.txt"), emit: pvalues + tuple val(id_mtx), path("out/deconvoluted.txt"), emit: deconvoluted + tuple val(id_mtx), path("out/significant_means.txt"), emit: significant_means + +script: + """ +cellphonedb method statistical_analysis ${params.meta} ${params.counts} --counts-data ${params.gene_id} --threads ${task.cpus} --iterations ${params.iterations} --pvalue ${params.pval} --result-precision 10 --threshold ${params.thres} + + """ +} + + +process cpdb_plot_dot_plot { + container = "${container_url}" + label "big_mem_multi_cpus" + if (params.cellphonedb_out != "") { + publishDir "results/${params.cellphonedb_out}", mode: 'copy' + } + + input: + tuple val(id_mtx), path(means) + tuple val(id_mtx), path(pvalues) + + output: + tuple val(id_mtx), path("out/plot.pdf"), emit: dot_plot + +script: + """ +mkdir ./out +cellphonedb plot dot_plot --means-path ${means} --pvalues-path ${pvalues} + + """ +} + +process cpdb_plot_heatmap { + container = "${container_url}" + label "big_mem_multi_cpus" + if (params.cellphonedb_out != "") { + publishDir "results/${params.cellphonedb_out}", mode: 'copy' + } + + input: + tuple val(id_mtx), path(pvalues) + + output: + tuple val(id_mtx), path("out/heatmap_count.pdf"), emit: heatmap + tuple val(id_mtx), path("out/heatmap_log_count.pdf"), emit: heatmap_log + tuple val(id_mtx), path("out/count_network.txt"), emit: count_network + tuple val(id_mtx), path("out/interaction_count.txt"), emit: interactions_count + +script: + + """ +mkdir ./out +cellphonedb plot heatmap_plot --pvalues-path ${pvalues} --pvalue ${params.pval} ${params.meta} + + """ +} diff --git a/src/nf_modules/cutadapt/main.nf b/src/nf_modules/cutadapt/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..7cac589e23669969ea100ecff00e842d6ae5c910 --- /dev/null +++ b/src/nf_modules/cutadapt/main.nf @@ -0,0 +1,79 @@ +version = "2.1" +container_url = "lbmc/cutadapt:${version}" + +params.adapter_3_prim = "AGATCGGAAGAG" +params.adapter_5_prim = "CTCTTCCGATCT" +params.adaptor_removal = "-a ${params.adapter_3_prim} -g ${params.adapter_5_prim} -A ${params.adapter_3_prim} -G ${params.adapter_5_prim}" +params.adaptor_removal_out = "" +process adaptor_removal { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.adaptor_removal_out != "") { + publishDir "results/${params.adaptor_removal_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*_cut_*"), emit: fastq + path "*_report.txt", emit: report + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + if (reads.size() == 2) + """ + cutadapt ${params.adaptor_removal} \ + -o ${file_prefix}_cut_R1.fastq.gz -p ${file_prefix}_cut_R2.fastq.gz \ + ${reads[0]} ${reads[1]} > ${file_prefix}_report.txt + """ + else + """ + cutadapt ${params.adaptor_removal} \ + -o ${file_prefix}_cut.fastq.gz \ + ${reads} > ${file_prefix}_report.txt + """ +} + +params.trim_quality = "20" +params.trimming = "-q ${params.trim_quality},${params.trim_quality}" +params.trimming_out = "" +process trimming { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.trimming_out != "") { + publishDir "results/${params.trimming_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*_trim_*"), emit:fastq + path "*_report.txt", emit: report + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + if (reads.size() == 2) + """ + cutadapt ${params.trimming} \ + -o ${file_prefix}_trim_R1.fastq.gz -p ${file_prefix}_trim_R2.fastq.gz \ + ${reads[0]} ${reads[1]} > ${file_prefix}_report.txt + """ + else + """ + cutadapt ${params.trimming} \ + -o ${file_prefix}_trim.fastq.gz \ + ${reads} > ${file_prefix}_report.txt + """ +} diff --git a/src/nf_modules/danpos/main.nf b/src/nf_modules/danpos/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..81d8b836f015ef21991d217e2c214099bd925ba7 --- /dev/null +++ b/src/nf_modules/danpos/main.nf @@ -0,0 +1,446 @@ +version = "v2.2.2_cv3" +container_url = "biocontainers/danpos:${version}" + +include { + bigwig2_to_wig2; + bigwig_to_wig; + wig_to_bedgraph; + wig2_to_bedgraph2 +} from "./../ucsc/main.nf" + +params.dpos = "--smooth_width 0 -n N " +params.dpos_out = "" + +workflow dpos_bam_bg { + take: + fasta + fastq + bam + + main: + dpos_bam(fastq, bam) + wig2_to_bedgraph2(fasta, dpos_bam.out.wig) + + emit: + bg = wig2_to_bedgraph2.out.bg + wig = dpos_bam.out.wig + bed = dpos_bam.out.bed +} + +process dpos_bam { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.dpos_out != "") { + publishDir "results/${params.dpos_out}", mode: 'copy', overwrite: true + } + + input: + val fastq + tuple val(file_id), path(bam_ip), path(bam_wce) + + output: + tuple val(file_id), path("${file_prefix}/${bam_ip.simpleName}*.wig"), path("${file_prefix}/${bam_wce.simpleName}*.wig"), emit: wig + tuple val(file_id), path("${file_prefix}/*.positions.bed"), emit: bed + + script: + + switch(file_id) { + case {it instanceof List}: + file_prefix = file_id[0] + break + case {it instanceof Map}: + file_prefix = file_id.values()[0] + break + default: + file_prefix = file_id + break + } + + m = 0 + if (fastq[1].size() == 2){ + m = 1 + } +""" +danpos.py dpos -m ${m} \ + ${params.dpos} \ + -b ${bam_wce} \ + -o ${file_prefix} \ + ${bam_ip} +mv ${file_prefix}/pooled/* ${file_prefix}/ +rm -R ${file_prefix}/pooled +awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.xls > ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.bed +""" +} + +workflow dpos_bw { + take: + fasta + fastq + bw + main: + bigwig2_to_wig2(bw) + dpos_wig(fastq, bigwig2_to_wig2.out.wig) + wig_to_bedgraph(fasta, bigwig2_to_wig2.out.wig) + + emit: + bg = wig_to_bedgraph.out.bg + wig = bigwig2_to_wig2.out.wig + bed = dpos_wig.out.bed +} + +process dpos_wig { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.dpos_out != "") { + publishDir "results/${params.dpos_out}", mode: 'copy', overwrite: true + } + + input: + val fastq + tuple val(file_id), path(wig_ip), path(wig_wce) + + output: + tuple val(file_id), path("${file_prefix}/*.positions.bed"), emit: bed + tuple val(file_id), path("${file_prefix}/${bam_ip.simpleName}*.wig"), path("${file_prefix}/${bam_wce.simpleName}*.wig"), emit: wig + + script: + + switch(file_id) { + case {it instanceof List}: + file_prefix = file_id[0] + break + case {it instanceof Map}: + file_prefix = file_id.values()[0] + break + default: + file_prefix = file_id + break + } + + m = 0 + if (fastq[1].size() == 2){ + m = 1 + } +""" +danpos.py dpos -m ${m} \ + ${params.dpos} \ + -b ${wig_wce} \ + -o ${file_prefix} \ + ${wig_ip} +mv ${file_prefix}/pooled/* ${file_prefix}/ +rm -R ${file_prefix}/pooled +awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${wig_ip.simpleName}.positions.xls > ${file_prefix}/${wig_ip.simpleName}.positions.bed +""" +} + +workflow dpos_bw_no_b { + take: + fasta + fastq + bw + main: + bigwig_to_wig(bw) + dpos_wig_no_b(fastq, bigwig_to_wig.out.wig) + wig_to_bedgraph(fasta, bigwig_to_wig.out.wig) + + emit: + bg = wig_to_bedgraph.out.bg + wig = bigwig_to_wig.out.wig + bed = dpos_wig_no_b.out.bed +} + +process dpos_wig_no_b { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.dpos_out != "") { + publishDir "results/${params.dpos_out}", mode: 'copy', overwrite: true + } + + input: + val fastq + tuple val(file_id), path(wig_ip) + + output: + tuple val(file_id), path("${file_prefix}/*.positions.bed"), emit: bed + + script: + + switch(file_id) { + case {it instanceof List}: + file_prefix = file_id[0] + break + case {it instanceof Map}: + file_prefix = file_id.values()[0] + break + default: + file_prefix = file_id + break + } + + m = 0 + if (fastq[1].size() == 2){ + m = 1 + } +""" +danpos.py dpos -m ${m} \ + ${params.dpos} \ + -o ${file_prefix} \ + ${wig_ip} +mv ${file_prefix}/pooled/* ${file_prefix}/ +rm -R ${file_prefix}/pooled +awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${wig_ip.simpleName}.positions.xls > ${file_prefix}/${wig_ip.simpleName}.positions.bed +""" +} + +workflow dwig_bwvsbw { + take: + fasta + fastq + bw_a + bw_b + main: + dpos_wigvswig( + fastq, + bigwig2_to_wig2(bw_a), + bigwig2_to_wig2(bw_b), + ) + wig_to_bedgraph(fasta, dpos_wigvswig.out.wig) + + emit: + bg = wig_to_bedgraph.out.bg + wig = dpeak_wig.out.wig + bed = dpeak_wig.out.bed +} + +process dpos_wigvswig { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.dpos_out != "") { + publishDir "results/${params.dpos_out}", mode: 'copy', overwrite: true + } + + input: + val fastq + tuple val(file_id_a), path(wig_ip_a) + tuple val(file_id_b), path(wig_ip_b) + + output: + tuple val(file_id), path("${file_prefix}/${wig_ip_a.simpleName}*.wig"), emit: wig + tuple val(file_id), path("${file_prefix}/*.positions.bed"), emit: bed + + script: + + switch(file_id_a) { + case {it instanceof List}: + file_prefix = file_id_a[0] + break + case {it instanceof Map}: + file_prefix = file_id_a.values()[0] + break + default: + file_prefix = file_id_a + break + } + + m = 0 + if (fastq[1].size() == 2){ + m = 1 + } +""" +danpos.py dpos -m ${m} \ + ${params.dpos} \ + -b ${wig_ip_a},${wig_ip_b} \ + -o ${file_prefix} \ + ${wig_ip_a}:${wig_ip_b} +mv ${file_prefix}/pooled/* ${file_prefix}/ +rm -R ${file_prefix}/pooled +awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.positions.xls > ${file_prefix}/${bam_ip.simpleName}.positions.bed +""" +} + +params.dpeak = "--smooth_width 0 -n N " +params.dpeak_out = "" + +process dpeak_bam { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.dpeak_out != "") { + publishDir "results/${params.dpeak_out}", mode: 'copy', overwrite: true + } + + input: + val fastq + tuple val(file_id), path(bam_ip), path(bam_wce) + + output: + tuple val(file_id), path("${file_prefix}/${bam_ip.simpleName}*.wig"), path("${file_prefix}/${bam_wce.simpleName}*.wig"), emit: wig + tuple val(file_id), path("${file_prefix}/*.positions.bed"), path("${file_prefix}/*.summit.bed"), emit: bed + tuple val(file_id), path("${file_prefix}/*.bed"), emit: bed + + script: + + switch(file_id) { + case {it instanceof List}: + file_prefix = file_id[0] + break + case {it instanceof Map}: + file_prefix = file_id.values()[0] + break + default: + file_prefix = file_id + break + } + + m = 0 + if (fastq[1].size() == 2){ + m = 1 + } +""" +danpos.py dpeak -m ${m} \ + ${params.dpeak} \ + -b ${bam_wce} \ + -o ${file_prefix} \ + ${bam_ip} +mv ${file_prefix}/pooled/* ${file_prefix}/ +rm -R ${file_prefix}/pooled +awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.bed +awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$4-1, \$4, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.summit.bed +""" +} + +workflow dpeak_bw { + take: + fasta + fastq + bw + main: + dpeak_wig(fastq, bigwig2_to_wig2(bw)) + wig2_to_bedgraph2(fasta, dpeak_wig.out.wig) + + emit: + bg = wig2_to_bedgraph2.out.bg + wig = dpeak_wig.out.wig + bed = dpeak_wig.out.bed +} + + +process dpeak_wig { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.dpeak_out != "") { + publishDir "results/${params.dpeak_out}", mode: 'copy', overwrite: true + } + + input: + val fastq + tuple val(file_id), path(wig_ip), path(wig_wce) + + output: + tuple val(file_id), path("${file_prefix}/${wig_ip.simpleName}.bgsub.wig"), path("${file_prefix}/${wig_wce.simpleName}.wig"), emit: wig + tuple val(file_id), path("${file_prefix}/*.positions.bed"), path("${file_prefix}/*.summit.bed"), emit: bed + + script: + + switch(file_id) { + case {it instanceof List}: + file_prefix = file_id[0] + break + case {it instanceof Map}: + file_prefix = file_id.values()[0] + break + default: + file_prefix = file_id + break + } + + m = 0 + if (fastq[1].size() == 2){ + m = 1 + } +""" +danpos.py dpeak -m ${m} \ + ${params.dpeak} \ + -b ${wig_wce} \ + -o ${file_prefix} \ + ${wig_ip} +mv ${file_prefix}/pooled/* ${file_prefix}/ +rm -R ${file_prefix}/pooled +awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${wig_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${wig_ip.simpleName}.bgsub.positions.bed +awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$4-1, \$4, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${wig_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${wig_ip.simpleName}.bgsub.positions.summit.bed +""" +} + +workflow dpeak_bwvsbw { + take: + fasta + fastq + bw_a + bw_b + main: + dpeak_wigvswig( + fastq, + bigwig2_to_wig2(bw_a), + bigwig2_to_wig2(bw_b), + ) + wig2_to_bedgraph2(fasta, dpeak_wigvswig.out.wig) + + emit: + bg = wig2_to_bedgraph2.out.bg + wig = dpeak_wig.out.wig + bed = dpeak_wig.out.bed +} + + +process dpeak_wigvswig { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.dpeak_out != "") { + publishDir "results/${params.dpeak_out}", mode: 'copy', overwrite: true + } + + input: + val fastq + tuple val(file_id_a), path(wig_ip_a), path(wig_wce_a) + tuple val(file_id_b), path(wig_ip_b), path(wig_wce_b) + + output: + tuple val(file_id), path("${file_prefix}/${wig_ip_a.simpleName}.bgsub.wig"), path("${file_prefix}/${wig_wce_a.simpleName}.wig"), emit: wig + tuple val(file_id), path("${file_prefix}/*.positions.bed"), path("${file_prefix}/*.summit.bed"), emit: bed + + script: + + switch(file_id_a) { + case {it instanceof List}: + file_prefix = file_id_a[0] + break + case {it instanceof Map}: + file_prefix = file_id_a.values()[0] + break + default: + file_prefix = file_id_a + break + } + + m = 0 + if (fastq[1].size() == 2){ + m = 1 + } +""" +danpos.py dpeak -m ${m} \ + ${params.dpeak} \ + -b ${wig_ip_a}:${wig_wce_a},${wig_ip_b}:${wig_wce_b} \ + -o ${file_prefix} \ + ${wig_ip_a}:${wig_ip_b} +mv ${file_prefix}/pooled/* ${file_prefix}/ +rm -R ${file_prefix}/pooled +awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2-1, \$3, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.bed +awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$4-1, \$4, "Interval_"NR-1, \$6, "+" }' ${file_prefix}/${bam_ip.simpleName}.bgsub.peaks.xls > ${file_prefix}/${bam_ip.simpleName}.bgsub.positions.summit.bed +""" +} \ No newline at end of file diff --git a/src/nf_modules/deeptools/main.nf b/src/nf_modules/deeptools/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..97e4027de2f91930c5fd227c29b0b563b9f3c027 --- /dev/null +++ b/src/nf_modules/deeptools/main.nf @@ -0,0 +1,106 @@ +version = "3.5.1" +container_url = "lbmc/deeptools:${version}" + +params.index_bam = "" +params.index_bam_out = "" +process index_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.index_bam_out != "") { + publishDir "results/${params.index_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("${bam}"), path("*.bam*"), emit: bam_idx + + script: +""" +sambamba index -t ${task.cpus} ${bam} +""" +} + +params.bam_to_bigwig = "" +params.bam_to_bigwig_out = "" +process bam_to_bigwig { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.bam_to_bigwig_out != "") { + publishDir "results/${params.bam_to_bigwig_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam), path(idx) + + output: + tuple val(file_id), path("*.bw"), emit: bw + + script: +""" +bamCoverage -p ${task.cpus} --ignoreDuplicates -b ${bam} \ + -o ${bam.simpleName}.bw +""" +} + +params.compute_matrix = "" +params.compute_matrix_out = "" +process compute_matrix { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "${bed_file_id}" + if (params.compute_matrix_out != "") { + publishDir "results/${params.compute_matrix_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bw) + tuple val(bed_file_id), path(bed) + + output: + tuple val(bed_file_id), path("*.mat.gz"), emit: matrix + + script: +""" +computeMatrix scale-regions -S ${bw} \ + -p ${task.cpus} \ + -R ${bed} \ + --beforeRegionStartLength 100 \ + --afterRegionStartLength 100 \ + -o ${bed.simpleName}.mat.gz +""" +} + +params.plot_profile = "" +params.plot_profile_out = "" +process plot_profile { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.compute_matrix_out != "") { + publishDir "results/${params.compute_matrix_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(matrix) + + output: + tuple val(file_id), path("*.pdf"), emit: pdf + + script: +/* +see more option at +https://deeptools.readthedocs.io/en/develop/content/tools/plotProfile.html +*/ +""" +plotProfile -m ${matrix} \ + --plotFileFormat=pdf \ + -out ${matrix.simpleName}.pdf \ + --plotType=fill \ + --perGroup \ + --plotTitle "${params.title}" +""" +} diff --git a/src/nf_modules/emase-zero/main.nf b/src/nf_modules/emase-zero/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..87c5020d38950cbfe8618aa2fc9ed839492e6aac --- /dev/null +++ b/src/nf_modules/emase-zero/main.nf @@ -0,0 +1,54 @@ +version = "0.3.1" +container_url = "lbmc/emase-zero:${version}" + +include { g2tr } from "./../kb/main.nf" +include { bam2ec } from "./../alntools/main.nf" +include { fasta_to_transcripts_lengths } from "./../bioawk/main.nf" + + +params.count = "-m 2" +params.count_out = "" +workflow count { + take: + bam_idx + fasta + gtf + + main: + g2tr(gtf) + fasta_to_transcripts_lengths(fasta) + bam2ec(bam_idx, fasta_to_transcripts_lengths.out.tsv.collect()) + emase(bam2ec.out.bin, fasta.collect(), bam2ec.out.tsv, g2tr.out.g2t.collect()) + + emit: + count = emase.out.count +} + +process emase { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.count_out != "") { + publishDir "results/${params.count_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bin) + tuple val(fasta_id), path(fasta) + tuple val(transcript_length_id), path(transcript_length) + tuple val(gene_to_transcript_id), path(gene_to_transcript) + + output: + tuple val(file_id), path("${bin.simpleName}.quantified*"), emit: count + path "*_report.txt", emit: report + + script: +""" +grep ">" ${fasta} | sed 's/>//' > tr_list.txt +emase-zero ${params.count} \ + -o ${bin.simpleName}.quantified \ + -l ${transcript_length} \ + -g ${gene_to_transcript} \ + ${bin} &> ${file_id}_emase-zero_report.txt +""" +} \ No newline at end of file diff --git a/src/nf_modules/emase/main.nf b/src/nf_modules/emase/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..4388aad7656b1d648ac53398b3f99de1791b387a --- /dev/null +++ b/src/nf_modules/emase/main.nf @@ -0,0 +1,24 @@ +version = "0.10.16" +container_url = "lbmc/emase:${version}" + +params.diploid_genome = "-x" +params.diploid_genome_out = "-x" +process diploid_genome { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${genome_a}-${genome_b}" + if (params.diploid_genome_out != "") { + publishDir "results/${params.diploid_genome_out}", mode: 'copy' + } + + input: + tuple val(genome_a), path(fasta_a), val(genome_b), path(fasta_b) + + output: + tuple val("${genome_a}_${genome_b}"), path(".fa"), emit: fasta + + script: +""" +prepare-emase -G ${fasta_a},${fasta_b} -s ${genome_a},${genome_b} ${params.diploid_genome} +""" +} \ No newline at end of file diff --git a/src/nf_modules/fastp/main.nf b/src/nf_modules/fastp/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..92ee81fc37f6369e5da2b68dff18f9256f2fca48 --- /dev/null +++ b/src/nf_modules/fastp/main.nf @@ -0,0 +1,154 @@ +version = "0.20.1" +container_url = "lbmc/fastp:${version}" + +params.fastp_protocol = "" + +params.fastp = "" +params.fastp_out = "" +workflow fastp { + take: + fastq + + main: + switch(params.fastp_protocol) { + case "accel_1splus": + fastp_accel_1splus(fastq) + fastp_accel_1splus.out.fastq.set{res_fastq} + fastp_accel_1splus.out.report.set{res_report} + break; + default: + fastp_default(fastq) + fastp_default.out.fastq.set{res_fastq} + fastp_default.out.report.set{res_report} + break; + } + emit: + fastq = res_fastq + report = res_report +} + +process fastp_default { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_prefix" + if (params.fastp_out != "") { + publishDir "results/${params.fastp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*_trim.fastq.gz"), emit: fastq + tuple val(file_id), path("${file_prefix}.html"), emit: html + tuple val(file_id), path("${file_prefix}_fastp.json"), emit: report + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + if (reads.size() == 2) + """ + fastp --thread ${task.cpus} \ + --qualified_quality_phred 20 \ + --disable_length_filtering \ + --detect_adapter_for_pe \ + ${params.fastp} \ + --in1 ${reads[0]} \ + --in2 ${reads[1]} \ + --out1 ${file_prefix}_R1_trim.fastq.gz \ + --out2 ${file_prefix}_R2_trim.fastq.gz \ + --html ${file_prefix}.html \ + --json ${file_prefix}_fastp.json \ + --report_title ${file_prefix} + """ + else + """ + fastp --thread ${task.cpus} \ + --qualified_quality_phred 20 \ + --disable_length_filtering \ + --detect_adapter_for_pe \ + ${params.fastp} \ + --in1 ${reads[0]} \ + --out1 ${file_prefix}_trim.fastq.gz \ + --html ${file_prefix}.html \ + --json ${file_prefix}_fastp.json \ + --report_title ${file_prefix} + """ +} + +process fastp_accel_1splus { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_prefix" + if (params.fastp_out != "") { + publishDir "results/${params.fastp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*_trim.fastq.gz"), emit: fastq + tuple val(file_id), path("${file_prefix}.html"), emit: html + tuple val(file_id), path("${file_prefix}_fastp.json"), emit: report + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + + if (reads.size() == 2) + """ + fastp --thread ${task.cpus} \ + --disable_quality_filtering \ + --disable_length_filtering \ + --disable_trim_poly_g \ + --detect_adapter_for_pe \ + --stdout \ + --in1 ${reads[0]} \ + --in2 ${reads[1]} 2> /dev/null | \ + fastp --thread ${task.cpus} \ + --stdin \ + --interleaved_in \ + --trim_front1=10 \ + --trim_front2=10 \ + --disable_adapter_trimming \ + --qualified_quality_phred 20 \ + --disable_length_filtering \ + --detect_adapter_for_pe \ + ${params.fastp} \ + --out1 ${file_prefix}_R1_trim.fastq.gz \ + --out2 ${file_prefix}_R2_trim.fastq.gz \ + --html ${file_prefix}.html \ + --json ${file_prefix}_fastp.json \ + --report_title ${file_prefix} + """ + else + """ + fastp --thread ${task.cpus} \ + --disable_quality_filtering \ + --disable_length_filtering \ + --disable_trim_poly_g \ + --detect_adapter_for_pe \ + --stdout \ + --in1 ${reads[0]} 2> /dev/null | \ + fastp --thread ${task.cpus} \ + --disable_adapter_trimming \ + --stdin \ + --trim_front1=10 \ + --qualified_quality_phred 20 \ + --disable_length_filtering \ + --detect_adapter_for_pe \ + ${params.fastp} \ + --out1 ${file_prefix}_trim.fastq.gz \ + --html ${file_prefix}.html \ + --json ${file_prefix}_fastp.json \ + --report_title ${file_prefix} + """ +} + diff --git a/src/nf_modules/fastqc/main.nf b/src/nf_modules/fastqc/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..da0c7bc7c952a7c4161751eef34fcfcf1882d2bd --- /dev/null +++ b/src/nf_modules/fastqc/main.nf @@ -0,0 +1,31 @@ +version = "0.11.5" +container_url = "lbmc/fastqc:${version}" + +params.fastqc_fastq = "" +params.fastqc_fastq_out = "" +process fastqc_fastq { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.fastqc_fastq_out != "") { + publishDir "results/${params.fastqc_fastq_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*.{zip,html}"), emit: report + + script: + if (reads.size() == 2) + """ + fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ + ${params.fastqc_fastq} \ + ${reads[0]} ${reads[1]} + """ + else + """ + fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${params.fastqc_fastq} ${reads[0]} + """ +} \ No newline at end of file diff --git a/src/nf_modules/flexi_splitter/main.nf b/src/nf_modules/flexi_splitter/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..753ef1be2fd8586645c09ddde2d29342b7e5c103 --- /dev/null +++ b/src/nf_modules/flexi_splitter/main.nf @@ -0,0 +1,82 @@ +version = "1.0.2" +container_url = "lbmc/flexi_splitter:${version}" + +params.split = "" +params.split_out = "" + + +workflow split { + take: + reads + config + main: + split_fastq(reads, config) + group_fastq(split_fastq.out.fastq_folder) + group_fastq.out.fastq + .map{ it -> it[1] } + .flatten() + .collate(2) + .map{ it -> [it[0].simpleName - ~/_{0,1}R[12]/, it]} + .set{ splited_fastq } + + emit: + fastq = splited_fastq +} + +process split_fastq { + // You can get an example of config file here: + // src/nf_modules/flexi_splitter/marseq_flexi_splitter.yaml + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.split_out != "") { + publishDir "results/${params.split_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(reads) + tuple val(config_id), path(config) + + output: + tuple val(file_id), path("split"), emit: fastq_folder + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + + if (reads.size() == 2) + """ + flexi_splitter ${params.split} -n 2 -f ${reads[0]},${reads[1]} -o split -c ${config} + """ + else + """ + flexi_splitter ${params.split} -n 1 -f ${reads[0]} -o split -c ${config} + """ +} + +process group_fastq { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.split_out != "") { + publishDir "results/${params.split_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(reads_folder) + + output: + tuple val(file_id), path("results/*"), emit: fastq + + script: +""" +mkdir -p results/ +find split/ -type "f" | \ + grep -v "unassigned" | \ + sed -E "s|(split/(.*)/(.*))|\\1 \\2_\\3|g" | + awk '{system("mv "\$1" results/"\$2)}' +""" +} \ No newline at end of file diff --git a/src/nf_modules/flexi_splitter/marseq_flexi_splitter.yaml b/src/nf_modules/flexi_splitter/marseq_flexi_splitter.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72f46d2dc1833e1fab00b734aefadd55a70b44c1 --- /dev/null +++ b/src/nf_modules/flexi_splitter/marseq_flexi_splitter.yaml @@ -0,0 +1,41 @@ +PLATE: + coords: + reads: 0 + start: 1 + stop: 4 + header: False + samples: + - name : Plate1 + seq: GACT + - name : Plate2 + seq: CATG + - name : Plate3 + seq: CCAA + - name : Plate4 + seq: CTGT + - name : Plate5 + seq: GTAG + - name : Plate6 + seq: TGAT + - name : Plate7 + seq: ATCA + - name : Plate8 + seq: TAGA + +conditions: + - Plate1 : + Plate1 + - Plate2 : + Plate2 + - Plate3 : + Plate3 + - Plate4 : + Plate4 + - Plate5 : + Plate5 + - Plate6 : + Plate6 + - Plate7 : + Plate7 + - Plate8 : + Plate8 diff --git a/src/nf_modules/flexi_splitter/toy_file_paired.yaml b/src/nf_modules/flexi_splitter/toy_file_paired.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dec6e8c4df34121693f591fd68f49dc8f787e7d4 --- /dev/null +++ b/src/nf_modules/flexi_splitter/toy_file_paired.yaml @@ -0,0 +1,50 @@ +PCR: + coords: + reads: 3 + start: 1 + stop: 6 + header: False + samples: + - name : PCR1 + seq: NCAGTG + - name : PCR2 + seq : CGATGT + - name : PCR3 + seq: TTAGGC + - name : PCR4 + seq : TGACCA + - name: PCR5 + seq: NGAACG + - name: PCR6 + seq: NCAACA +RT: + coords: + reads: 1 + start: 6 + stop: 13 + header: False + samples: + - name : RT1 + seq: TAGTGCC + - name : RT2 + seq: GCTACCC + - name: RT3 + seq: ATCGACC + - name: RT4 + seq: CGACTCC +UMI: + coords: + reads: 1 + start: 1 + stop: 6 + header: False +conditions: + wt: + - RT1 + - PCR1 + ko: + - RT2 + - PCR2 + sample_paired: + - RT2 + - PCR6 diff --git a/src/nf_modules/g2gtools/main.nf b/src/nf_modules/g2gtools/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..15af58850fac947d903eec5b3459a4eb90b8b09d --- /dev/null +++ b/src/nf_modules/g2gtools/main.nf @@ -0,0 +1,196 @@ +version = "0.2.8" +container_url = "lbmc/g2gtools:${version}" + +params.vci_build = "" +params.vci_build_out = "" +process vci_build { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.vci_build_out != "") { + publishDir "results/${params.vci_build_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta) + output: + tuple val(file_id), path("*.vci.gz"), path("*.vci.gz.tbi"), emit: vci + tuple val(file_id), path("*_report.txt"), emit: report + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + + input_vcf = "" + for (vcf_file in vcf) { + input_vcf += " -i ${vcf_file}" + } +""" +g2gtools vcf2vci \ + ${params.vci_build} \ + -p ${task.cpus} \ + -f ${fasta} \ + ${input_vcf} \ + -s ${file_prefix} \ + -o ${file_prefix}.vci 2> ${file_prefix}_g2gtools_vcf2vci_report.txt +""" +} + +params.incorporate_snp = "" +params.incorporate_snp_out = "" +process incorporate_snp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.incorporate_snp_out != "") { + publishDir "results/${params.incorporate_snp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vci), path(tbi) + tuple val(ref_id), path(fasta) + output: + tuple val(file_id), path("${file_prefix}_snp.fa"), path("${vci}"), path("${tbi}"), emit: fasta + tuple val(file_id), path("*_report.txt"), emit: report + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +g2gtools patch \ + ${params.incorporate_snp} \ + -p ${task.cpus} \ + -i ${fasta} \ + -c ${vci} \ + -o ${file_prefix}_snp.fa 2> ${file_prefix}_g2gtools_path_report.txt +""" +} + +params.incorporate_indel = "" +params.incorporate_indel_out = "" +process incorporate_indel { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.incorporate_indel_out != "") { + publishDir "results/${params.incorporate_indel_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta), path(vci), path(tbi) + output: + tuple val(file_id), path("${file_prefix}_snp_indel.fa"), path("${vci}"), path("${tbi}"), emit: fasta + tuple val(file_id), path("*_report.txt"), emit: report + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +g2gtools transform \ + ${params.incorporate_indel} \ + -p ${task.cpus} \ + -i ${fasta} \ + -c ${vci} \ + -o ${file_prefix}_snp_indel.fa 2> ${file_prefix}_g2gtools_transform_report.txt +""" +} + +params.convert_gtf = "" +params.convert_gtf_out = "" +process convert_gtf { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.convert_gtf_out != "") { + publishDir "results/${params.convert_gtf_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vci), path(tbi) + tuple val(annot_id), path(gtf) + output: + tuple val(file_id), path("${file_prefix}.gtf"), emit: gtf + tuple val(file_id), path("*_report.txt"), emit: report + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +g2gtools convert \ + ${params.convert_gtf} \ + -i ${gtf} \ + -c ${vci} \ + -o ${file_prefix}.gtf 2> ${file_prefix}_g2gtools_convert_report.txt +""" +} + +params.convert_bed = "" +params.convert_bed_out = "" +process convert_bed { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.convert_bed_out != "") { + publishDir "results/${params.convert_bed_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vci), path(tbi) + tuple val(annot_id), path(bed) + output: + tuple val(file_id), path("${file_id}.bed"), emit: bed + tuple val(file_id), path("*_report.txt"), emit: report + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +g2gtools convert \ + ${params.convert_bed} \ + -i ${bed} \ + -c ${vci} \ + -o ${file_id}.bed 2> ${file_id}_g2gtools_convert_report.txt +""" +} + +params.convert_bam = "" +params.convert_bam_out = "" +process convert_bam { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bam_id} ${file_id}" + if (params.convert_bam_out != "") { + publishDir "results/${params.convert_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vci), path(tbi) + tuple val(bam_id), path(bam) + output: + tuple val(file_id), path("${file_id}_${bam_id.baseName}.bam"), emit: bam + tuple val(file_id), path("*_report.txt"), emit: report + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +g2gtools convert \ + ${params.convert_bam} \ + -i ${bam} \ + -c ${vci} \ + -o ${file_id}_${bam.baseName}.bam 2> ${file_id}_g2gtools_convert_report.txt +""" +} \ No newline at end of file diff --git a/src/nf_modules/gatk3/main.nf b/src/nf_modules/gatk3/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..35a1ab7a479b41da6480535364c5caeca1c12e34 --- /dev/null +++ b/src/nf_modules/gatk3/main.nf @@ -0,0 +1,385 @@ +version = "3.8.0" +container_url = "lbmc/gatk:${version}" + +params.variant_calling = "" +params.variant_calling_out = "" +process variant_calling { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.variant_calling_out != "") { + publishDir "results/${params.variant_calling_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam), path(bai) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.vcf"), emit: vcf + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T HaplotypeCaller \ + -nct ${task.cpus} \ + ${params.variant_calling} \ + -R ${fasta} \ + -I ${bam} \ + -o ${file_prefix}.vcf +""" +} + +params.filter_snp = "" +params.filter_snp_out = "" +process filter_snp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.filter_snp_out != "") { + publishDir "results/${params.filter_snp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_snp.vcf"), emit: vcf + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T SelectVariants \ + -nct ${task.cpus} \ + ${params.filter_snp} \ + -R ${fasta} \ + -V ${vcf} \ + -selectType SNP \ + -o ${file_prefix}_snp.vcf +""" +} + +params.filter_indels = "" +params.filter_indels_out = "" +process filter_indels { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.filter_indels_out != "") { + publishDir "results/${params.filter_indels_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_indel.vcf"), emit: vcf + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T SelectVariants \ + -nct ${task.cpus} \ + ${params.filter_indels} \ + -R ${fasta} \ + -V ${vcf} \ + -selectType INDEL \ + -o ${file_prefix}_indel.vcf +""" +} + +params.high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)" +params.high_confidence_snp = "--filterExpression \"${params.high_confidence_snp_filter}\" --filterName \"basic_snp_filter\"" +params.high_confidence_snp_out = "" +process high_confidence_snp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.high_confidence_snp_out != "") { + publishDir "results/${params.high_confidence_snp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_snp.vcf"), emit: vcf + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T VariantFiltration \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${vcf} \ + ${params.high_confidence_snp} \ + -o ${file_prefix}_filtered_snp.vcf +""" +} + +params.high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0" +params.high_confidence_indels = "--filterExpression \"${params.high_confidence_indel_filter}\" --filterName \"basic_indel_filter\"" +params.high_confidence_indels_out = "" +process high_confidence_indels { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.high_confidence_indels_out != "") { + publishDir "results/${params.high_confidence_indels_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_indel.vcf"), emit: vcf + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T VariantFiltration \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${vcf} \ + ${params.high_confidence_indels} \ + -o ${file_prefix}_filtered_indel.vcf +""" +} + +params.recalibrate_snp_table = "" +params.recalibrate_snp_table_out = "" +process recalibrate_snp_table { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.recalibrate_snp_table_out != "") { + publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("recal_data_table"), emit: recal_table + script: +""" +gatk3 -T BaseRecalibrator \ + -nct ${task.cpus} \ + ${recalibrate_snp_table} \ + -R ${fasta} \ + -I ${bam} \ + -knownSites ${snp_file} \ + -knownSites ${indel_file} \ + -o recal_data_table +""" +} + +params.recalibrate_snp = "" +params.recalibrate_snp_out = "" +process recalibrate_snp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.recalibrate_snp_out != "") { + publishDir "results/${params.recalibrate_snp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx) + tuple val(table_id), path(recal_data_table) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.bam"), emit: bam + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T PrintReads \ + --use_jdk_deflater \ + --use_jdk_inflater \ + ${recalibrate_snp} \ + -nct ${task.cpus} \ + -R ${fasta} \ + -I ${bam} \ + -BQSR recal_data_table \ + -o ${file_prefix}_recal.bam +""" +} + +params.haplotype_caller = "" +params.haplotype_caller_out = "" +process haplotype_caller { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.haplotype_caller_out != "") { + publishDir "results/${params.haplotype_caller_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.gvcf"), emit: gvcf + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T HaplotypeCaller \ + -nct ${task.cpus} \ + ${params.haplotype_caller} \ + -R ${fasta} \ + -I ${bam} \ + -ERC GVCF \ + -variant_index_type LINEAR -variant_index_parameter 128000 \ + -o ${file_prefix}.gvcf +""" +} + +params.gvcf_genotyping = "" +params.gvcf_genotyping_out = "" +process gvcf_genotyping { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.gvcf_genotyping_out != "") { + publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(gvcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.vcf"), emit: vcf + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T GenotypeGVCFs \ + -nct ${task.cpus} \ + ${params.gvcf_genotyping} \ + -R ${fasta} \ + -V ${gvcf} \ + -o ${file_prefix}_joint.vcf +""" +} + +params.select_variants_snp = "" +params.select_variants_snp_out = "" +process select_variants_snp { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.select_variants_snp_out != "") { + publishDir "results/${params.select_variants_snp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T SelectVariants \ + -nct ${task.cpus} \ + ${params.select_variants_snp} \ + -R ${fasta} \ + -V ${vcf} \ + -selectType SNP \ + -o ${file_prefix}_joint_snp.vcf +""" +} + +params.select_variants_indels = "" +params.select_variants_indels_out = "" +process select_variants_indels { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.select_variants_indels_out != "") { + publishDir "results/${params.select_variants_indels_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T SelectVariants \ + -nct ${task.cpus} \ + ${params.select_variants_indels} \ + -R ${fasta} \ + -V ${vcf} \ + -selectType INDEL \ + -o ${file_prefix}_joint_indel.vcf +""" +} + +params.personalized_genome = "" +params.personalized_genome_out = "" +process personalized_genome { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.personalized_genome_out != "") { + publishDir "results/${params.personalized_genome_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_genome.fasta"), emit: fasta + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } +""" +gatk3 -T FastaAlternateReferenceMaker\ + ${params.personalized_genome} \ + -R ${reference} \ + -V ${vcf} \ + -o ${file_prefix}_genome.fasta +""" +} + diff --git a/src/nf_modules/gatk4/main.nf b/src/nf_modules/gatk4/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..885b3211f0586cdb7fa52a307b090bc3328b739f --- /dev/null +++ b/src/nf_modules/gatk4/main.nf @@ -0,0 +1,659 @@ +version = "4.2.0.0" +container_url = "broadinstitute/gatk:${version}" + +def get_file_prefix(file_id) { + if (file_id instanceof List){ + file_prefix = file_id[0] + } else if (file_id instanceof Map) { + library = file_id[0] + file_prefix = file_id[0] + if (file_id.containsKey('library')) { + library = file_id.library + file_prefix = file_id.id + } + } else { + file_prefix = file_id + } + return file_prefix +} + +include { + index_fasta as samtools_index_fasta; + index_bam; +} from './../samtools/main.nf' +include { + index_fasta as picard_index_fasta; + index_bam as picard_index_bam; + mark_duplicate; +} from './../picard/main.nf' + +params.variant_calling_out = "" +workflow germline_cohort_data_variant_calling { + take: + bam + fasta + main: + // data preparation + mark_duplicate(bam) + index_bam(mark_duplicate.out.bam) + picard_index_bam(mark_duplicate.out.bam) + index_bam.out.bam_idx + .join(picard_index_bam.out.index) + .set{ bam_idx } + picard_index_fasta(fasta) + samtools_index_fasta(fasta) + fasta + .join(picard_index_fasta.out.index) + .join(samtools_index_fasta.out.index) + .set{ fasta_idx } + + // variant calling + call_variants_per_sample( + bam_idx, + fasta_idx.collect() + ) + call_variants_all_sample( + call_variants_per_sample.out.gvcf, + fasta_idx + ) + emit: + vcf = call_variants_all_sample.out.vcf +} + +/*******************************************************************/ +workflow base_quality_recalibrator{ + take: + bam_idx + fasta_idx + vcf + + main: + index_vcf(vcf) + compute_base_recalibration( + bam_idx, + fasta_idx, + index_vcf.out.vcf_idx + ) + apply_base_recalibration( + bam_idx, + fasta_idx, + compute_base_recalibration.out.table + ) + emit: + bam = apply_base_recalibration.out.bam +} + +process index_vcf { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + input: + tuple val(file_id), path(vcf) + output: + tuple val(file_id), path("${vcf}"), path("*"), emit: vcf_idx + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \ + -I ${vcf} +""" +} + +process compute_base_recalibration { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + input: + tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis) + tuple val(ref_id), path(fasta), path(fai), path(dict) + tuple val(vcf_id), path(vcf), path(vcf_idx) + output: + tuple val(file_id), path("${bam.simpleName}.table"), emit: table + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) + def vcf_cmd = "" + if (vcf instanceof List){ + for (vcf_file in vcf){ + vcf_cmd += "--known-sites ${vcf_file} " + } + } else { + vcf_cmd = "--known-sites ${vcf} " + } +""" + gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \ + -I ${bam} \ + -R ${fasta} \ + ${vcf_cmd} \ + -O ${bam.simpleName}.table +""" +} + +process apply_base_recalibration { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + input: + tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis) + tuple val(ref_id), path(fasta), path(fai), path(dict) + tuple val(table_id), path(table) + output: + tuple val(file_id), path("${bam.simpleName}_recalibrate.bam"), emit: bam + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" + gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \ + -R ${fasta} \ + -I ${bam} \ + --bqsr-recal-file ${table} \ + -O ${bam.simpleName}_recalibrate.bam +""" +} + +/*******************************************************************/ +params.variant_calling_gvcf_out = "" +process call_variants_per_sample { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.variant_calling_gvcf_out != "") { + publishDir "results/${params.variant_calling_gvcf_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("${bam.simpleName}.gvcf.gz"), emit: gvcf + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" + gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ + -R ${fasta} \ + -I ${bam} \ + -O ${bam.simpleName}.gvcf.gz \ + -ERC GVCF +""" +} + +/*******************************************************************/ + +workflow call_variants_all_sample { + take: + gvcf + fasta_idx + + main: + index_gvcf(gvcf) + validate_gvcf( + index_gvcf.out.gvcf_idx, + fasta_idx.collect() + ) + consolidate_gvcf( + validate_gvcf.out.gvcf + .groupTuple(), + fasta_idx.collect() + ) + genomic_db_call( + consolidate_gvcf.out.gvcf_idx, + fasta_idx.collect() + ) + emit: + vcf = genomic_db_call.out.vcf +} + +process index_gvcf { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + input: + tuple val(file_id), path(gvcf) + output: + tuple val(file_id), path("${gvcf}"), path("${gvcf}.tbi"), emit: gvcf_idx + tuple val(file_id), path("${gvcf.simpleName}_IndexFeatureFile_report.txt"), emit: report + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \ + -I ${gvcf} 2> ${gvcf.simpleName}_IndexFeatureFile_report.txt +""" +} + +process validate_gvcf { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + input: + tuple val(file_id), path(gvcf), path(gvcf_idx) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("${gvcf}"), path("${gvcf_idx}"), emit: gvcf + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" ValidateVariants \ + -V ${gvcf} \ + -R ${fasta} -gvcf +""" +} + +process consolidate_gvcf { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + input: + tuple val(file_id), path(gvcf), path(gvcf_idx) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("${file_prefix}.gvcf"), path("${file_prefix}.gvcf.idx"), emit: gvcf_idx + tuple val(file_id), path("${file_prefix}_CombineGVCFs_report.txt"), emit: report + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) + def gvcf_cmd = "" + if (gvcf instanceof List){ + for (gvcf_file in gvcf){ + gvcf_cmd += "-V ${gvcf_file} " + } + } else { + gvcf_cmd = "-V ${gvcf} " + } +""" +mkdir tmp +gatk --java-options "-Xmx${xmx_memory}G" CombineGVCFs \ + ${gvcf_cmd} \ + -R ${fasta} \ + -O ${file_prefix}.gvcf 2> ${file_prefix}_CombineGVCFs_report.txt +gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \ + -I ${file_prefix}.gvcf 2> ${file_prefix}_IndexFeatureFile_report.txt +""" +} + +process genomic_db_call { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.variant_calling_out != "") { + publishDir "results/${params.variant_calling_out}", mode: 'copy' + } + input: + tuple val(file_id), path(gvcf), path(gvcf_idx) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("${gvcf.simpleName}.vcf.gz"), emit: vcf + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) + def gvcf_cmd = "" + if (gvcf instanceof List){ + for (gvcf_file in gvcf){ + gvcf_cmd += "--V ${gvcf_file} " + } + } else { + gvcf_cmd = "--V ${gvcf} " + } +""" +mkdir tmp +gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \ + -R ${fasta} \ + -V ${gvcf} \ + -O ${gvcf.simpleName}.vcf.gz \ + --tmp-dir ./tmp +""" +} + +/*******************************************************************/ +params.variant_calling = "" +process variant_calling { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.variant_calling_out != "") { + publishDir "results/${params.variant_calling_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam), path(bai) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.vcf"), emit: vcf + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ + ${params.variant_calling} \ + -R ${fasta} \ + -I ${bam} \ + -O ${bam.simpleName}.vcf +""" +} + +params.filter_snp = "" +params.filter_snp_out = "" +process filter_snp { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.filter_snp_out != "") { + publishDir "results/${params.filter_snp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_snp.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ + ${params.filter_snp} \ + -R ${fasta} \ + -V ${vcf} \ + -select-type SNP \ + -O ${vcf.simpleName}_snp.vcf +""" +} + +params.filter_indels = "" +params.filter_indels_out = "" +process filter_indels { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.filter_indels_out != "") { + publishDir "results/${params.filter_indels_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_indel.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ + ${params.filter_indels} \ + -R ${fasta} \ + -V ${vcf} \ + -select-type INDEL \ + -O ${vcf.simpleName}_indel.vcf +""" +} + +params.high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)" +params.high_confidence_snp = "--filter-expression \"${params.high_confidence_snp_filter}\" --filter-name \"basic_snp_filter\"" +params.high_confidence_snp_out = "" +process high_confidence_snp { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.high_confidence_snp_out != "") { + publishDir "results/${params.high_confidence_snp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_snp.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ + -R ${fasta} \ + -V ${vcf} \ + ${params.high_confidence_snp} \ + -O ${vcf.simpleName}_filtered_snp.vcf +""" +} + +params.high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0" +params.high_confidence_indels = "--filter-expression \"${params.high_confidence_indel_filter}\" --filter-name \"basic_indel_filter\"" +params.high_confidence_indels_out = "" +process high_confidence_indels { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.high_confidence_indels_out != "") { + publishDir "results/${params.high_confidence_indels_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_indel.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ + -R ${fasta} \ + -V ${vcf} \ + ${params.high_confidence_indels} \ + -O ${vcf.simpleName}_filtered_indel.vcf +""" +} + +params.recalibrate_snp_table = "" +params.recalibrate_snp_table_out = "" +process recalibrate_snp_table { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.recalibrate_snp_table_out != "") { + publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(bam_idx_bis) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("recal_data_table"), emit: recal_table + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \ + -I ${snp_file} +gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \ + -I ${indel_file} +gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \ + ${params.recalibrate_snp_table} \ + -R ${fasta} \ + -I ${bam} \ + -known-sites ${snp_file} \ + -known-sites ${indel_file} \ + -O recal_data_table +""" +} + +params.recalibrate_snp = "" +params.recalibrate_snp_out = "" +process recalibrate_snp { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.recalibrate_snp_out != "") { + publishDir "results/${params.recalibrate_snp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(recal_table) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.bam"), emit: bam + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \ + ${params.recalibrate_snp} \ + -R ${fasta} \ + -I ${bam} \ + --bqsr-recal-file recal_data_table \ + -O ${bam.simpleName}_recal.bam +""" +} + +params.haplotype_caller = "" +params.haplotype_caller_out = "" +process haplotype_caller { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.haplotype_caller_out != "") { + publishDir "results/${params.haplotype_caller_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.gvcf"), emit: gvcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ + ${params.haplotype_caller} \ + -R ${fasta} \ + -I ${bam} \ + -ERC GVCF \ + -O ${bam.simpleName}.gvcf +""" +} + +params.gvcf_genotyping = "" +params.gvcf_genotyping_out = "" +process gvcf_genotyping { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.gvcf_genotyping_out != "") { + publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(gvcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*.vcf.gz"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \ + ${params.gvcf_genotyping} \ + -R ${fasta} \ + -V ${gvcf} \ + -O ${gvcf.simpleName}_joint.vcf.gz +""" +} + +params.select_variants_snp = "" +params.select_variants_snp_out = "" +process select_variants_snp { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.select_variants_snp_out != "") { + publishDir "results/${params.select_variants_snp_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \ + ${params.select_variants_snp} \ + -R ${fasta} \ + -V ${vcf} \ + -select-type SNP \ + -O ${vcf.simpleName}_joint_snp.vcf +""" +} + +params.select_variants_indels = "" +params.select_variants_indels_out = "" +process select_variants_indels { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.select_variants_indels_out != "") { + publishDir "results/${params.select_variants_indels_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ + ${params.select_variants_indels} \ + -R ${fasta} \ + -V ${vcf} \ + -select-type INDEL \ + -O ${file_prefix}_joint_indel.vcf +""" +} + +params.personalized_genome = "" +params.personalized_genome_out = "" +process personalized_genome { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.personalized_genome_out != "") { + publishDir "results/${params.personalized_genome_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(vcf) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_genome.fasta"), emit: fasta + + script: + xmx_memory = "${task.memory}" - ~/\s*GB/ + file_prefix = get_file_prefix(file_id) +""" +gatk --java-options "-Xmx${xmx_memory}G" FastaAlternateReferenceMaker\ + ${params.personalized_genome} \ + -R ${reference} \ + -V ${vcf} \ + -O ${vcf.simpleName}_genome.fasta +""" +} + + + diff --git a/src/nf_modules/gffread/main.nf b/src/nf_modules/gffread/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..3f7a4db4bf7118b61b90c6d23bcf147c62436a99 --- /dev/null +++ b/src/nf_modules/gffread/main.nf @@ -0,0 +1,31 @@ +version = "0.12.2" +container_url = "lbmc/gffread:${version}" + +params.gffread = "" +params.gffread_out = "" +process gffread { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_prefix" + if (params.gffread_out != "") { + publishDir "results/${params.gffread_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(gtf) + tuple val(fasta_id), path(fasta) + + output: + tuple val(fasta_id), path("${file_prefix}.fasta"), emit: fasta + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + """ + gffread ${gtf} -g ${fasta} -M -x dup_${file_prefix}.fasta + awk 'BEGIN {i = 1;} { if (\$1 ~ /^>/) { tmp = h[i]; h[i] = \$1; } else if (!a[\$1]) { s[i] = \$1; a[\$1] = "1"; i++; } else { h[i] = tmp; } } END { for (j = 1; j < i; j++) { print h[j]; print s[j]; } }' < dup_${file_prefix}.fasta | grep -v -e "^\$" > ${file_prefix}.fasta + """ +} diff --git a/src/nf_modules/hisat2/main.nf b/src/nf_modules/hisat2/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..96c532396204260493a6847e47bdf9fa749c1bc4 --- /dev/null +++ b/src/nf_modules/hisat2/main.nf @@ -0,0 +1,100 @@ +version = "2.2.1" +container_url = "lbmc/histat2:${version}" + +params.index_fasta = "" +params.index_fasta_out = "" +process index_fasta { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + + output: + tuple val(file_id), path("*.ht2*"), emit: index + tuple val(file_id), path("*_report.txt"), emit: report + + script: +""" +hisat2-build -p ${task.cpus} \ + ${fasta} \ + ${fasta.simpleName} &> \ + ${fasta.simpleName}_hisat2_index_report.txt + +if grep -q "Error" ${fasta.simpleName}_bowtie2_index_report.txt; then + exit 1 +fi +""" +} + +params.mapping_fastq = "--very-sensitive" +params.mapping_fastq_out = "" +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.mapping_fastq_out != "") { + publishDir "results/${params.mapping_fastq_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.ht2.*/) { + index_id = ( index_file =~ /(.*)\.1\.ht2.*/)[0][1] + } + } + switch(file_id) { + case {it instanceof List}: + file_prefix = file_id[0] + break + case {it instanceof Map}: + file_prefix = file_id.values()[0] + break + default: + file_prefix = file_id + break + } + + if (reads.size() == 2) + """ + histat2 ${params.mapping_fastq} \ + -p ${task.cpus} \ + -x ${index_id} \ + -1 ${reads[0]} \ + -2 ${reads[1]} 2> \ + ${file_prefix}_ht2_mapping_report_tmp.txt \ + | samtools view -@ ${task.cpus} -bS - \ + | samtools sort -@ ${task.cpus} -o ${file_prefix}.bam - + + if grep -q "Error" ${file_prefix}_ht2_mapping_report_tmp.txt; then + exit 1 + fi + """ + else + """ + histat2 ${params.mapping_fastq} \ + -p ${task.cpus} \ + -x ${index_id} \ + -U ${reads} 2> \ + ${file_prefix}_ht2_mapping_report_tmp.txt \ + | samtools view -@ ${task.cpus} -bS - \ + | samtools sort -@ ${task.cpus} -o ${file_prefix}.bam - + + if grep -q "Error" ${file_prefix}_ht2_mapping_report_tmp.txt; then + exit 1 + fi + """ +} diff --git a/src/nf_modules/kallisto/main.nf b/src/nf_modules/kallisto/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..8d3fe1d3a9bbdd1435863f93ab0b74c1d7bcd994 --- /dev/null +++ b/src/nf_modules/kallisto/main.nf @@ -0,0 +1,67 @@ +version = "0.44.0" +container_url = "lbmc/kallisto:${version}" + +params.index_fasta = "-k 31 --make-unique" +params.index_fasta_out = "" +process index_fasta { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + + output: + tuple val(file_id), path("*.index*"), emit: index + tuple val(file_id), path("*_report.txt"), emit: report + + script: +""" +kallisto index ${params.index_fasta} -i ${fasta.baseName}.index ${fasta} \ +2> ${fasta.baseName}_kallisto_index_report.txt +""" +} + +params.mapping_fastq = "--bias --bootstrap-samples 100" +params.mapping_fastq_out = "" +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + if (params.mapping_fastq_out != "") { + publishDir "results/${params.mapping_fastq_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("${file_prefix}"), emit: counts + tuple val(file_id), path("*_report.txt"), emit: report + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + + if (reads.size() == 2) + """ + mkdir ${file_prefix} + kallisto quant -i ${index} -t ${task.cpus} \ + ${params.mapping_fastq} -o ${file_prefix} \ + ${reads[0]} ${reads[1]} &> ${file_prefix}_kallisto_mapping_report.txt + """ + else + """ + mkdir ${file_prefix} + kallisto quant -i ${index} -t ${task.cpus} --single \ + ${params.mapping_fastq} -o ${file_prefix} \ + ${reads[0]} &> ${file_prefix}_kallisto_mapping_report.txt + """ +} diff --git a/src/nf_modules/kb/main.nf b/src/nf_modules/kb/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..6edfa13080ac635bf7917b4013eeccad9265d910 --- /dev/null +++ b/src/nf_modules/kb/main.nf @@ -0,0 +1,463 @@ +version = "0.26.0" +container_url = "lbmc/kb:${version}" + +params.index_fasta = "" +params.index_fasta_out = "" + +workflow index_fasta { + take: + fasta + gtf + + main: + tr2g(gtf) + index_default(fasta, gtf, tr2g.out.t2g) + + emit: + index = index_default.out.index + t2g = index_default.out.t2g + report = index_default.out.report +} + +process tr2g { + // create transcript to gene table from gtf if no transcript to gene file is provided + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(gtf) + + output: + tuple val(file_id), path("t2g.txt"), emit: t2g + + script: + """ + t2g.py --gtf ${gtf} + sort -k1 -u t2g_dup.txt > t2g.txt + """ +} + +process g2tr { + // create gene to transcript table from gtf if no transcript to gene file is provided + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(gtf) + + output: + tuple val(file_id), path("g2t.txt"), emit: g2t + + script: + """ + t2g.py --gtf ${gtf} + sort -k1 -u t2g_dup.txt > t2g.txt + awk 'BEGIN{OFS="\\t"}{print \$2, \$1}' t2g.txt > g2t.txt + """ +} + +process index_default { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + tuple val(gtf_id), path(gtf) + tuple val(t2g_id), path(transcript_to_gene) + + output: + tuple val(file_id), path("*.idx"), emit: index + tuple val(t2g_id), path("${transcript_to_gene}"), emit: t2g + tuple val(file_id), path("*_report.txt"), emit: report + + script: +""" +kb ref \ + -i ${fasta.simpleName}.idx \ + -g ${transcript_to_gene} \ + ${params.index_fasta} \ + -f1 cdna.fa ${fasta} ${gtf} > ${fasta.simpleName}_kb_index_report.txt +""" +} + + +include { split } from "./../flexi_splitter/main.nf" + +params.kb_protocol = "10x_v3" +params.count = "" +params.count_out = "" +workflow count { + take: + index + fastq + transcript_to_gene + whitelist + config + + main: + whitelist + .ifEmpty(["NO WHITELIST", 0]) + .set{ whitelist_optional } + switch(params.kb_protocol) { + case "marsseq": + split(fastq, config.collect()) + kb_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect()) + kb_marseq.out.counts.set{res_counts} + kb_marseq.out.report.set{res_report} + break; + default: + kb_default(index.collect(), fastq, transcript_to_gene.collect(), whitelist_optional.collect()) + kb_default.out.counts.set{res_counts} + kb_default.out.report.set{res_report} + break; + } + + emit: + counts = res_counts + report = res_report +} + +process kb_default { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_prefix" + if (params.count_out != "") { + publishDir "results/${params.count_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(file_id), path(reads) + tuple val(t2g_id), path(transcript_to_gene) + tuple val(whitelist_id), path(whitelist) + + output: + tuple val(file_id), path("${file_prefix}"), emit: counts + tuple val(file_id), path("*_report.txt"), emit: report + + script: + def kb_memory = "${task.memory}" - ~/GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + def whitelist_param = "" + if (whitelist_id != "NO WHITELIST"){ + whitelist_param = "-w ${whitelist}" + } + + if (reads.size() == 2) + """ + mkdir ${file_prefix} + kb count -t ${task.cpus} \ + -m ${kb_memory} \ + -i ${index} \ + -g ${transcript_to_gene} \ + -o ${file_prefix} \ + ${whitelist_param} \ + -x 10XV3 \ + --h5ad \ + ${params.count} \ + ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt + + fix_t2g.py --t2g ${transcript_to_gene} + cp fix_t2g.txt ${file_prefix}/ + cp ${transcript_to_gene} ${file_prefix}/ + """ +} + +process kb_marseq { + // With the MARS-Seq protocol, we have: + // on the read 1: 4 nt of bc plate + // on the read 2: 6 nt of bc cell, and 8 nt of UMI + // this process expect that the bc plate is removed from the read 1 + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_prefix" + if (params.count_out != "") { + publishDir "results/${params.count_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(file_id), path(reads) + tuple val(t2g_id), path(transcript_to_gene) + tuple val(whitelist_id), path(whitelist) + + output: + tuple val(file_id), path("${file_prefix}"), emit: counts + tuple val(file_id), path("*_report.txt"), emit: report + + script: + def kb_memory = "${task.memory}" - ~/GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + def whitelist_param = "" + if (whitelist_id != "NO WHITELIST"){ + whitelist_param = "-w ${whitelist}" + } + + if (reads.size() == 2) + """ + mkdir ${file_prefix} + kb count -t ${task.cpus} \ + -m ${kb_memory} \ + -i ${index} \ + -g ${transcript_to_gene} \ + -o ${file_prefix} \ + ${whitelist_param} \ + ${params.count} \ + --h5ad \ + -x 1,0,6:1,6,14:0,0,0 \ + ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt + fix_t2g.py --t2g ${transcript_to_gene} + cp fix_t2g.txt ${file_prefix}/ + cp ${transcript_to_gene} ${file_prefix}/ + """ + else + """ + mkdir ${file_prefix} + kb count -t ${task.cpus} \ + -m ${kb_memory} \ + -i ${index} \ + -g ${transcript_to_gene} \ + -o ${file_prefix} \ + ${whitelist_param} \ + ${params.count} \ + -x 1,0,6:1,6,14:0,0,0 \ + --h5ad \ + ${reads} > ${file_prefix}_kb_mapping_report.txt + fix_t2g.py --t2g ${transcript_to_gene} + cp fix_t2g.txt ${file_prefix}/ + cp ${transcript_to_gene} ${file_prefix}/ + """ +} + +// ************************** velocity workflow ************************** + +workflow index_fasta_velocity { + take: + fasta + gtf + + main: + tr2g(gtf) + index_fasta_velocity_default(fasta, gtf, tr2g.out.t2g) + + emit: + index = index_fasta_velocity_default.out.index + t2g = index_fasta_velocity_default.out.t2g + report = index_fasta_velocity_default.out.report +} + +process index_fasta_velocity_default { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + tuple val(gtf_id), path(gtf) + tuple val(t2g_id), path(transcript_to_gene) + + output: + tuple val(file_id), path("*.idx"), emit: index + tuple val(t2g_id), path("${transcript_to_gene}"), path("cdna_t2c.txt"), path("intron_t2c.txt"), emit: t2g + tuple val(file_id), path("*_report.txt"), emit: report + + script: +""" +kb ref \ + -i ${fasta.simpleName}.idx \ + -g ${transcript_to_gene} \ + ${params.index_fasta} \ + -f1 cdna.fa -f2 intron.fa -c1 cdna_t2c.txt -c2 intron_t2c.txt --workflow lamanno \ + ${fasta} ${gtf} > ${fasta.simpleName}_kb_index_report.txt +""" +} + +params.count_velocity = "" +params.count_velocity_out = "" +workflow count_velocity { + take: + index + fastq + transcript_to_gene + whitelist + config + + main: + whitelist + .ifEmpty(["NO WHITELIST", 0]) + .set{ whitelist_optional } + switch(params.kb_protocol) { + case "marsseq": + split(fastq, config.collect()) + velocity_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect()) + velocity_marseq.out.counts.set{res_counts} + velocity_marseq.out.report.set{res_report} + break; + default: + velocity_default(index.collect(), fastq, transcript_to_gene.collect(), whitelist_optional.collect()) + velocity_default.out.counts.set{res_counts} + velocity_default.out.report.set{res_report} + break; + } + + emit: + counts = res_counts + report = res_report +} + +process velocity_default { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_prefix" + if (params.count_velocity_out != "") { + publishDir "results/${params.count_velocity_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(file_id), path(reads) + tuple val(t2g_id), path(transcript_to_gene), path(cdna_t2g), path(intron_t2g) + tuple val(whitelist_id), path(whitelist) + + output: + tuple val(file_id), path("${file_prefix}"), emit: counts + tuple val(file_id), path("*_report.txt"), emit: report + + script: + def kb_memory = "${task.memory}" - ~/GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + def whitelist_param = "" + if (whitelist_id != "NO WHITELIST"){ + whitelist_param = "-w ${whitelist}" + } + + if (reads.size() == 2) + """ + mkdir ${file_prefix} + kb count -t ${task.cpus} \ + -m ${kb_memory} \ + -i ${index} \ + -g ${transcript_to_gene} \ + -o ${file_prefix} \ + -c1 ${cdna_t2g} \ + -c2 ${intron_t2g} \ + --workflow lamanno \ + ${whitelist_param} \ + -x 10XV3 \ + --h5ad \ + ${params.count} \ + ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt + fix_t2g.py --t2g ${transcript_to_gene} + cp fix_t2g.txt ${file_prefix}/ + cp ${transcript_to_gene} ${file_prefix}/ + cp ${cdna_t2g} ${file_prefix}/ + cp ${intron_t2g} ${file_prefix}/ + """ +} + +process velocity_marseq { + // With the MARS-Seq protocol, we have: + // on the read 1: 4 nt of bc plate + // on the read 2: 6 nt of bc cell, and 8 nt of UMI + // this process expect that the bc plate is removed from the read 1 + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_prefix" + if (params.count_velocity_out != "") { + publishDir "results/${params.count_velocity_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(file_id), path(reads) + tuple val(t2g_id), path(transcript_to_gene), path(cdna_t2g), path(intron_t2g) + tuple val(whitelist_id), path(whitelist) + + output: + tuple val(file_id), path("${file_prefix}"), emit: counts + tuple val(file_id), path("*_report.txt"), emit: report + + script: + def kb_memory = "${task.memory}" - ~/GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + def whitelist_param = "" + if (whitelist_id != "NO WHITELIST"){ + whitelist_param = "-w ${whitelist}" + } + + if (reads.size() == 2) + """ + mkdir ${file_prefix} + kb count -t ${task.cpus} \ + -m ${kb_memory} \ + -i ${index} \ + -g ${transcript_to_gene} \ + -o ${file_prefix} \ + -c1 ${cdna_t2g} \ + -c2 ${intron_t2g} \ + --workflow lamanno \ + --h5ad \ + ${whitelist_param} \ + ${params.count} \ + -x 1,0,6:1,6,14:0,0,0 \ + ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt + fix_t2g.py --t2g ${transcript_to_gene} + cp fix_t2g.txt ${file_prefix}/ + cp ${transcript_to_gene} ${file_prefix}/ + cp ${cdna_t2g} ${file_prefix}/ + cp ${intron_t2g} ${file_prefix}/ + """ + else + """ + mkdir ${file_prefix} + kb count -t ${task.cpus} \ + -m ${kb_memory} \ + -i ${index} \ + -g ${transcript_to_gene} \ + -o ${file_prefix} \ + -c1 ${cdna_t2g} \ + -c2 ${intron_t2g} \ + --workflow lamanno \ + ${whitelist_param} \ + ${params.count} \ + -x 1,0,6:1,6,14:0,0,0 \ + ${reads} > ${file_prefix}_kb_mapping_report.txt + fix_t2g.py --t2g ${transcript_to_gene} + cp fix_t2g.txt ${file_prefix}/ + cp ${transcript_to_gene} ${file_prefix}/ + cp ${cdna_t2g} ${file_prefix}/ + cp ${intron_t2g} ${file_prefix}/ + """ +} + diff --git a/src/nf_modules/macs2/main.nf b/src/nf_modules/macs2/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..51bed4bebf4060e71f5532fb9ac83fdd89b24231 --- /dev/null +++ b/src/nf_modules/macs2/main.nf @@ -0,0 +1,85 @@ +version = "2.1.2" +container_url = "lbmc/macs2:${version}" + +params.macs_gsize=3e9 +params.macs_mfold="5 50" +params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}" +params.peak_calling_out = "" +process peak_calling { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.peak_calling_out != "") { + publishDir "results/${params.peak_calling_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam_ip), path(bam_control) + + output: + tuple val(file_id), path("*.narrowPeak"), emit: peak + tuple val(file_id), path("*.bed"), emit: summits + tuple val(file_id), path("*_peaks.xls"), path("*_report.txt"), emit: report + + script: +/* remove --nomodel option for real dataset */ +""" +macs2 callpeak \ + ${params.peak_calling} \ + --treatment ${bam_ip} \ + --call-summits \ + --control ${bam_control} \ + --keep-dup all \ + --qvalue 0.99 \ + --name ${bam_ip.simpleName} 2> \ + ${bam_ip.simpleName}_macs2_report.txt + +if grep -q "ERROR" ${bam_ip.simpleName}_macs2_report.txt; then + echo "MACS3 error" + exit 1 +fi +""" +} + +params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}" +params.peak_calling_bg_out = "" +process peak_calling_bg { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.peak_calling_bg_out != "") { + publishDir "results/${params.peak_calling_bg_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bg_ip), path(bg_control) + + output: + tuple val(file_id), path("*.narrowPeak"), emit: peak + tuple val(file_id), path("*.bed"), emit: summits + tuple val(file_id), path("*_report.txt"), emit: report + + script: +/* remove --nomodel option for real dataset */ +""" +awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \ + ${bg_ip.simpleName}.bed +awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \ + ${bg_control.simpleName}.bed +macs2 callpeak \ + ${params.peak_calling_bg} \ + --treatment ${bg_ip.simpleName}.bed \ + --qvalue 0.99 \ + --call-summits \ + --control ${bg_control.simpleName}.bed \ + --keep-dup all \ + --name ${bg_ip.simpleName} 2> \ + ${bg_ip.simpleName}_macs2_report.txt + +if grep -q "ERROR" ${bg_ip.simpleName}_macs2_report.txt; then + echo "MACS3 error" + exit 1 +fi +""" +} + diff --git a/src/nf_modules/macs3/main.nf b/src/nf_modules/macs3/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..b8c2dbcebac257a7ff63f85d3737b48a97f648c2 --- /dev/null +++ b/src/nf_modules/macs3/main.nf @@ -0,0 +1,84 @@ +version = "3.0.0a6" +container_url = "lbmc/macs3:${version}" + +params.macs_gsize=3e9 +params.macs_mfold="5 50" +params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}" +params.peak_calling_out = "" +process peak_calling { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.peak_calling_out != "") { + publishDir "results/${params.peak_calling_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam_ip), path(bam_control) + + output: + path "*", emit: peak + path "*_report.txt", emit: report + + script: +/* remove --nomodel option for real dataset */ +""" +macs3 callpeak \ + --treatment ${bam_ip} \ + --call-summits \ + --control ${bam_control} \ + --keep-dup all \ + ${params.peak_calling} \ + --name ${bam_ip.simpleName} \ + --gsize ${params.macs_gsize} 2> \ + ${bam_ip.simpleName}_macs3_report.txt + +if grep -q "ERROR" ${bam_ip.simpleName}_macs3_report.txt; then + echo "MACS3 error" + exit 1 +fi +""" +} + +params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}" +params.peak_calling_bg_out = "" +process peak_calling_bg { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.peak_calling_bg_out != "") { + publishDir "results/${params.peak_calling_bg_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bg_ip), path(bg_control) + + output: + path "*", emit: peak + path "*_report.txt", emit: report + + script: +/* remove --nomodel option for real dataset */ +""" +awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \ + ${bg_ip.simpleName}.bed +awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \ + ${bg_control.simpleName}.bed +macs3 callpeak \ + ${params.peak_calling_bg} \ + --treatment ${bg_ip.simpleName}.bed \ + --call-summits \ + --control ${bg_control.simpleName}.bed \ + --keep-dup all \ + --mfold params.macs_mfold[0] params.macs_mfold[1] + --name ${bg_ip.simpleName} \ + --gsize ${params.macs_gsize} 2> \ + ${bg_ip.simpleName}_macs3_report.txt + +if grep -q "ERROR" ${bg_ip.simpleName}_macs3_report.txt; then + echo "MACS3 error" + exit 1 +fi +""" +} + diff --git a/src/nf_modules/minimap2/main.nf b/src/nf_modules/minimap2/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..9dbe5a575f840d9b911f778b7adf039747a6024f --- /dev/null +++ b/src/nf_modules/minimap2/main.nf @@ -0,0 +1,62 @@ +version = "2.17" +container_url = "lbmc/minimap2:${version}" + +params.index_fasta = "" +params.index_fasta_out = "" +process index_fasta { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + + output: + tuple val(file_id), path("${fasta}"), path("*.mmi*"), emit: index + + script: + memory = "${task.memory}" - ~/\s*GB/ +""" +minimap2 ${params.index_fasta} -t ${task.cpus} -I ${memory}G -d ${fasta.baseName}.mmi ${fasta} +""" +} + +params.mapping_fastq = "-ax sr" +params.mapping_fastq_out = "" +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.mapping_fastq_out != "") { + publishDir "results/${params.mapping_fastq_out}", mode: 'copy' + } + + input: + tuple val(fasta_id), path(fasta), path(index) + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*.bam"), emit: bam + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + memory = "${task.memory}" - ~/\s*GB/ + memory = memory.toInteger() / (task.cpus + 1.0) + if (reads.size() == 2) + """ + minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} | + samtools view -Sb - > ${pair_id}.bam + """ + else + """ + minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads} | + samtools view -Sb - > ${pair_id}.bam + """ +} diff --git a/src/nf_modules/multiqc/main.nf b/src/nf_modules/multiqc/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..755ae0212662f241acd1202593ba383541d74551 --- /dev/null +++ b/src/nf_modules/multiqc/main.nf @@ -0,0 +1,70 @@ +// multiqc generate nice html report combining lots of differents bioinformatics +// tools report. +// +// EXAMPLE: + +/* +include { multiqc } + from './nf_modules/multiqc/main' + addParams( + multiqc_out: "QC/" + ) + +multiqc( + report_a + .mix( + report_b, + report_c, + report_d + ) +) +*/ + +version = "1.11" +container_url = "lbmc/multiqc:${version}" + +params.multiqc = "" +params.multiqc_out = "QC/" +workflow multiqc { + take: + report + main: + report + .map{it -> + if (it instanceof List){ + if(it.size() > 1) { + it[1] + } else { + it[0] + } + } else { + it + } + } + .unique() + .flatten() + .set { report_cleaned } + multiqc_default(report_cleaned.collect()) + + emit: + report = multiqc_default.out.report +} + +process multiqc_default { + container = "${container_url}" + label "big_mem_mono_cpus" + if (params.multiqc_out != "") { + publishDir "results/${params.multiqc_out}", mode: 'copy' + } + + input: + path report + + output: + path "*multiqc_*", emit: report + + script: +""" +multiqc ${params.multiqc} -f . +""" +} diff --git a/src/nf_modules/picard/main.nf b/src/nf_modules/picard/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..449d5fd014fa2d786a42196e41960ddbfcb806dd --- /dev/null +++ b/src/nf_modules/picard/main.nf @@ -0,0 +1,77 @@ +version = "2.18.11" +container_url = "lbmc/picard:${version}" + +params.mark_duplicate = "VALIDATION_STRINGENCY=LENIENT REMOVE_DUPLICATES=true" +params.mark_duplicate_out = "" +process mark_duplicate { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.mark_duplicate_out != "") { + publishDir "results/${params.mark_duplicate_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + output: + tuple val(file_id) , path("*.bam"), emit: bam + path "*_report.dupinfo.txt", emit: report + + + script: +""" +PicardCommandLine MarkDuplicates \ + ${params.mark_duplicate} \ + INPUT=${bam} \ + OUTPUT=${bam.baseName}_dedup.bam \ + METRICS_FILE=${bam.baseName}_picard_dedup_report.dupinfo.txt &> \ + picard_${bam.baseName}.log +""" +} + +params.index_fasta = "" +params.index_fasta_out = "" +process index_fasta { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + output: + tuple val(file_id), path("*.dict"), emit: index + + script: +""" +PicardCommandLine CreateSequenceDictionary \ + ${params.index_fasta} \ + REFERENCE=${fasta} \ + OUTPUT=${fasta.baseName}.dict +""" +} + +params.index_bam = "" +params.index_bam_out = "" +process index_bam { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.index_bam_out != "") { + publishDir "results/${params.index_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + output: + tuple val(file_id), path("*"), emit: index + + script: +""" +PicardCommandLine BuildBamIndex \ + ${params.index_bam} \ + INPUT=${bam} +""" +} diff --git a/src/nf_modules/rasusa/main.nf b/src/nf_modules/rasusa/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..4a671d0c34d8bff59dbe776e4d7896148f711b71 --- /dev/null +++ b/src/nf_modules/rasusa/main.nf @@ -0,0 +1,81 @@ +version = "0.6.0" +container_url = "lbmc/rasusa:${version}" + +include { index_fasta } from "./../samtools/main.nf" + +params.sample_fastq = "" +params.sample_fastq_coverage = "" +params.sample_fastq_size = "" +params.sample_fastq_out = "" +workflow sample_fastq { + take: + fastq + fasta + + main: + if (params.sample_fastq_coverage == "" && params.sample_fastq_size == ""){ + fastq + .set{ final_fastq } + } else { + index_fasta(fasta) + sub_sample_fastq(fastq, index_fasta.out.index) + sub_sample_fastq.out.fastq + .set{ final_fastq } + } + + emit: + fastq = final_fastq + +} + +process sub_sample_fastq { + container = "${container_url}" + label "small_mem_mono_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.sample_fastq_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fastq) + tuple val(index_id), path(idx) + + output: + tuple val(file_id), path("sub_*.fastq.gz"), emit: fastq + + script: + + switch(file_id) { + case {it instanceof List}: + file_prefix = file_id[0] + break + case {it instanceof Map}: + file_prefix = file_id.values()[0] + break + default: + file_prefix = file_id + break + } + + sample_option = "-c " + params.sample_fastq_coverage + if (params.sample_fastq_size != ""){ + sample_option = "-b " + params.sample_fastq_size + } + + if (fastq.size() == 2) +""" +rasusa \ + -i ${fastq[0]} ${fastq[1]} \ + -g ${idx} \ + ${sample_option} \ + -o sub_${fastq[0].simpleName}.fastq.gz sub_${fastq[1].simpleName}.fastq.gz +""" + else +""" +rasusa \ + -i ${fastq} \ + -g ${idx} \ + ${sample_option} \ + -o sub_${fastq.simpleName}.fastq.gz +""" +} \ No newline at end of file diff --git a/src/nf_modules/rasusa/test.nf b/src/nf_modules/rasusa/test.nf new file mode 100644 index 0000000000000000000000000000000000000000..261e374bbbcd934c1992f844448884f915bc29ab --- /dev/null +++ b/src/nf_modules/rasusa/test.nf @@ -0,0 +1,27 @@ +nextflow.enable.dsl=2 + +/* +./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" +./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0 +./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb" +*/ + +params.fastq = "data/fastq/*R{1,2}*" +params.fasta = "data/fasta/*.fasta" +params.coverage = "" +params.size = "" + +include { sample_fastq } from "./main.nf" addParams(sample_fastq_coverage: params.coverage, sample_fastq_size: params.size, sample_fastq_out: "sample/") + +channel + .fromFilePairs( params.fastq, size: -1) + .set { fastq_files } + +channel + .fromPath( params.fasta ) + .map { it -> [it.simpleName, it]} + .set { fasta_files } + +workflow { + sample_fastq(fastq_files, fasta_files.collect()) +} \ No newline at end of file diff --git a/src/nf_modules/rasusa/test.sh b/src/nf_modules/rasusa/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..d66e26f2a334fe43a43b3228b270046d0dbed66c --- /dev/null +++ b/src/nf_modules/rasusa/test.sh @@ -0,0 +1,4 @@ +#! /bin/sh +./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" +./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0 +./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb" \ No newline at end of file diff --git a/src/nf_modules/rfiltration/function.R b/src/nf_modules/rfiltration/function.R new file mode 100644 index 0000000000000000000000000000000000000000..f48164d0fd30231afe73fdf418d19c717bdcce9c --- /dev/null +++ b/src/nf_modules/rfiltration/function.R @@ -0,0 +1,637 @@ +custom_colors <- list() + +colors_dutch <- c( + '#FFC312','#C4E538','#12CBC4','#FDA7DF','#ED4C67', + '#F79F1F','#A3CB38','#1289A7','#D980FA','#B53471', + '#EE5A24','#009432','#0652DD','#9980FA','#833471', + '#EA2027','#006266','#1B1464','#5758BB','#6F1E51' +) + +colors_spanish <- c( + '#40407a','#706fd3','#f7f1e3','#34ace0','#33d9b2', + '#2c2c54','#474787','#aaa69d','#227093','#218c74', + '#ff5252','#ff793f','#d1ccc0','#ffb142','#ffda79', + '#b33939','#cd6133','#84817a','#cc8e35','#ccae62' +) + +custom_colors$discrete <- c(colors_dutch, colors_spanish) + + + + + + + + + + + + + +reset_par <- function(){ + op <- structure(list(xlog = FALSE, ylog = FALSE, adj = 0.5, ann = TRUE, + ask = FALSE, bg = "transparent", bty = "o", cex = 1, + cex.axis = 1, cex.lab = 1, cex.main = 1.2, cex.sub = 1, + col = "black", col.axis = "black", col.lab = "black", + col.main = "black", col.sub = "black", crt = 0, err = 0L, + family = "", fg = "black", fig = c(0, 1, 0, 1), + fin = c(6.99999895833333, 6.99999895833333), font = 1L, + font.axis = 1L, font.lab = 1L, font.main = 2L, + font.sub = 1L, lab = c(5L, 5L, 7L), las = 0L, + lend = "round", lheight = 1, ljoin = "round", lmitre = 10, + lty = "solid", lwd = 1, mai = c(1.02, 0.82, 0.82, 0.42), + mar = c(5.1, 4.1, 4.1, 2.1), mex = 1, mfcol = c(1L, 1L), + mfg = c(1L, 1L, 1L,1L), mfrow = c(1L, 1L), + mgp = c(3, 1, 0), mkh = 0.001, new = FALSE, + oma = c(0, 0, 0, 0), omd = c(0, 1, 0, 1), + omi = c(0, 0, 0,0), pch = 1L, + pin = c(5.75999895833333, 5.15999895833333), + plt = c(0.117142874574832, 0.939999991071427, + 0.145714307397962, 0.882857125425167), + ps = 12L, pty = "m", smo = 1, srt = 0, tck = NA_real_, + tcl = -0.5, usr = c(0.568, 1.432, 0.568, 1.432), + xaxp = c(0.6, 1.4, 4), xaxs = "r", xaxt = "s", + xpd = FALSE, yaxp = c(0.6, 1.4, 4), yaxs = "r", + yaxt = "s", ylbias = 0.2), + .Names = c("xlog", "ylog", "adj", "ann", "ask", "bg", + "bty", "cex", "cex.axis", "cex.lab", "cex.main", "cex.sub", + "col", "col.axis", "col.lab", "col.main", "col.sub", "crt", + "err", "family", "fg", "fig", "fin", "font", "font.axis", + "font.lab", "font.main", "font.sub", "lab", "las", "lend", + "lheight", "ljoin", "lmitre", "lty", "lwd", "mai", "mar", + "mex", "mfcol", "mfg", "mfrow", "mgp", "mkh", "new", "oma", + "omd", "omi", "pch", "pin", "plt", "ps", "pty", "smo", + "srt", "tck", "tcl", "usr", "xaxp", "xaxs", "xaxt", "xpd", + "yaxp", "yaxs", "yaxt", "ylbias")) + par(op) +} + + +read_count_output <- function(dir, name) { + m <- readMM(paste0(dir, "/","counts_unfiltered","/", name, ".mtx")) + m <- Matrix::t(m) + m <- as(m, "dgCMatrix") + # The matrix read has cells in rows + ge <- ".genes.txt" + genes <- readLines(file(paste0(dir, "/", name, ge))) + barcodes <- readLines(file(paste0(dir, "/", name, ".barcodes.txt"))) + colnames(m) <- barcodes + rownames(m) <- genes + return(m) +} + + + +#' Knee plot for filtering empty droplets +#' +#' Visualizes the inflection point to filter empty droplets. This function plots +#' different datasets with a different color. Facets can be added after calling +#' this function with `facet_*` functions. Will be added to the next release +#' version of BUSpaRse. +#' +#' @param bc_rank A `DataFrame` output from `DropletUtil::barcodeRanks`. +#' @return A ggplot2 object. +knee_plot <- function(bc_rank) { + knee_plt <- tibble(rank = bc_rank[["rank"]], + total = bc_rank[["total"]]) %>% + distinct() %>% + dplyr::filter(total > 0) + annot <- tibble(inflection = metadata(bc_rank)[["inflection"]], + rank_cutoff = max(bc_rank$rank[bc_rank$total > metadata(bc_rank)[["inflection"]]])) + p <- ggplot(knee_plt, aes(total, rank)) + + geom_line() + + geom_hline(aes(yintercept = rank_cutoff), data = annot, linetype = 2) + + geom_vline(aes(xintercept = inflection), data = annot, linetype = 2) + + scale_x_log10() + + scale_y_log10() + + labs(y = "Barcode Rank", x = "Total UMIs") + return(p) +} +#annotation_logticks()+ + +read_kb_output <- function(dir,name){ + + sample <- read_count_output(dir, name = name) + + show(dim(sample)) + + + return(sample) + +} #end function + + + +###Fuction to visualize kneeplot and remove empty droplet + + +RemoveEmptyDrop <- function(sample){ + + tot_counts <- Matrix::colSums(sample) + summary(tot_counts) + ###barcodeRanks provide of DropletUtils library + bc_rank <- barcodeRanks(sample, lower = 10) + + ##Visalisation empty droplet + jpeg("Kneeplot.jpeg") + show(knee_plot(bc_rank)) + dev.off() + + + ##Filter empty droplet + sample <- sample[, tot_counts > metadata(bc_rank)$inflection] + + + return(sample) + +} #end function + +###Fuction to load t2g.txt and to match ensemble identifier with gene symbol +MatchGeneSymbol <- function(dirt2g,sample){ + + ##Need tidyverse pacckage + + tr<-read_tsv(dirt2g, col_names = c("transcript", "gene", "gene_symbol")) + tr2g <- tr %>%select(-transcript) %>% distinct() + + + rownames(sample) <- tr2g$gene_symbol[match(rownames(sample), tr2g$gene)] + + + return(sample) + +} #end function + + + + +##Fuction to load t2g.txt and to match ensemble identifier with gene symbol +Create_Seurat_Object <- function(sample){ + + ##Need tidyverse pacckage + + + seu_obj <- CreateSeuratObject(sample,project="SPACENET", min.cells = 3, min.features = 300,verbose=FALSE) + show(dim(seu_obj)) + + + return(seu_obj) + +} #end function + +Calc_QC_metrics <- function(seu_obj){ + + seu_obj[["percent.mt"]] <- PercentageFeatureSet(seu_obj, pattern = "^MT-") + seu_obj[["percent.ribo"]] <- PercentageFeatureSet(seu_obj, pattern = "^RP[SL]") + + jpeg("QC_metrics.jpeg") + VlnPlot(seu_obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 4) + dev.off() + + return(seu_obj) + +} #end functi +##Fuction to subset seurat object and remove low quality librairies base on Mitochondrial percent and number of gene min in a cell + +RemoveLowQuality <- function(seu_obj,nFeatureRNA,percent_mt){ + + + + seu_obj <- subset(seu_obj, subset = nFeature_RNA > nFeatureRNA & percent.mt < percent_mt) + + show(dim(seu_obj)) + + + return(seu_obj) + +} #end function + + + + +PreRemoveDoublet <- function(seu_obj){ + + + #DOUBLLETFINDER NEEDS TO RUN this 5 step before to run + seu_obj <- NormalizeData(seu_obj,verbose=FALSE) + seu_obj <- FindVariableFeatures(seu_obj, selection.method = "vst", nfeatures = 2000,verbose=FALSE) + seu_obj <- ScaleData(seu_obj,verbose=FALSE) + seu_obj <- RunPCA(seu_obj,verbose=FALSE) + + seu_obj <- FindNeighbors(seu_obj, dims = 1:10,verbose=FALSE) + + seu_obj <- FindClusters(seu_obj, resolution = 0.7,verbose=FALSE) + seu_obj <- RunUMAP(seu_obj, dims = 1:10,verbose=FALSE) + return(seu_obj) +} + + + + +Run_doublet_finder <- function(seu_obj,nexp){ + + + + print("Run Doublet finder opti") + sweep.res.list_kidney <- paramSweep_v3(seu_obj, PCs = 1:10, sct = FALSE) + sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = FALSE,) + bcmvn_kidney <- find.pK(sweep.stats_kidney) + dev.off() + reset_par() + + ##Plot optimisation to choose the right pK + #Save + jpeg("PK_opti.jpeg") + show(ggplot(bcmvn_kidney,aes(x=pK,y=BCmetric,group=1))+ geom_point() +geom_line()) + dev.off() + + + pK_opt_tmp<-filter(bcmvn_kidney,BCmetric == max(BCmetric)) + pK_opt<-as.numeric(as.character(t(pK_opt_tmp$pK))) + + + print(paste0("Optimum pK :",pK_opt)) + + N_exp <- round(ncol(seu_obj) * nexp) # expect 4% doublets + + #Run doublet finder + #seu_obj <- doubletFinder_v3(seu_obj, pN = 0.25, pK = pK_opt, nExp = N_exp, PCs = 1:10) + seu_obj <- doubletFinder_v3(seu_obj, pN = 0.25, pK = 0.09, nExp = N_exp, PCs = 1:10) + + DF.name = colnames(seu_obj@meta.data)[grepl("DF.classification", colnames(seu_obj@meta.data))] + + + jpeg("Doublet_detection.jpeg") + DimPlot(seu_obj, group.by = DF.name,cols = c("blue","red")) + NoAxes()+ggtitle("Doublet detection") + dev.off() + + print("Remove Doublets") + show(paste0( sum( seu_obj@meta.data[, DF.name] == "Doublet"), " doublets detected/removed")) + seu_obj = seu_obj[, seu_obj@meta.data[, DF.name] == "Singlet"] + + #show(dim(seu_obj)) + + + + + return(seu_obj) + +} #end function + +### Function to remove empty feature +RemoveEmptyFeature <- function(seu_obj){ + + + + seu_obj <- seu_obj[Matrix::rowSums(seu_obj) > 0,] + + return(seu_obj) + +} #end function + + + +save_RDS <- function(seu_obj,dir){ + + dir <- normalizePath(dir, mustWork = TRUE) + + name<-"Seurat_object" + path<-paste0(dir, "/", name,".Rds") + saveRDS(seu_obj,file=path) + + + +} #end function + + +### Function to extract data for sanity normaliztion +Extract_data <- function(seu_obj,dir){ + + + df_tmp<-as.data.frame(GetAssayData(object = seu_obj, slot = "counts")) + df_tmp<- cbind(GeneID = rownames(df_tmp), df_tmp) + rownames(df_tmp) <- NULL + + dir <- normalizePath(dir, mustWork = TRUE) + + name<-"counts_filtered_doublet" + path<-paste0(dir, "/", name, ".txt") + write.table(df_tmp,path,sep = '\t', row.names = F, col.names=T, quote = F) + + return(seu_obj) + +} #end function + + + + +read_RDS <- function(path_seu_obj){ + + path_seu_obj <- normalizePath(path_seu_obj, mustWork = TRUE) + + + seu_obj <-readRDS(path_seu_obj) + + + show(dim(seu_obj)) + + + return(seu_obj) + +} #end function + + + +SetSanityMatrix <- function(seu_obj,path){ + + dir <- normalizePath(path, mustWork = TRUE) + name<-"Gon_m1_good_doublet_log_transcription_quotients" + #name<-"counts_filtered_doublet" + path<-paste0(dir, "/", name, ".txt") + + df_norm_tmp<-Matrix(as.matrix(read.table(path,row.names = 1, header = T) ),sparse = TRUE) + + + seu_obj <- SetAssayData(object = seu_obj, slot = "data", new.data = df_norm_tmp) + + + + return(seu_obj) + +} #end function + + + +VariabeFeature <- function(seu_obj){ + + seu_obj <- FindVariableFeatures(seu_obj, selection.method = "vst", nfeatures = 2000,verbose=FALSE) + + # Identify the 10 most highly variable genes + top10 <- head(VariableFeatures(seu_obj), 10) + + # plot variable features with and without labels + plot1 <- VariableFeaturePlot(seu_obj) + plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) + jpeg("Variable_feature.jpg", width = 350, height = 350) + show(plot2) + dev.off() + + return(seu_obj) + +} #end function + + + + +evplot = function(ev) { + # Broken stick model (MacArthur 1957) + n = length(ev) + bsm = data.frame(j=seq(1:n), p=0) + bsm$p[1] = 1/n + for (i in 2:n) bsm$p[i] = bsm$p[i-1] + (1/(n + 1 - i)) + bsm$p = 100*bsm$p/n + # Plot eigenvalues and % of variation for each axis + op = par(mfrow=c(2,1),omi=c(0.1,0.3,0.1,0.1), mar=c(1, 1, 1, 1)) + barplot(ev, main="Eigenvalues", col="bisque", las=2) + abline(h=mean(ev), col="red") + legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n",cex = 1.5) + barplot(t(cbind(100*ev/sum(ev), bsm$p[n:1])), beside=TRUE, + main="% variation", col=c("bisque",2), las=2) + legend("topright", c("% eigenvalue", "Broken stick model"), + pch=15, col=c("bisque",2), bty="n",cex = 1.5) + par(op) +} + +ChooseComponent = function(seu_obj) { + + + df_ev<-(seu_obj@reductions[["pca"]]@stdev)^2 + + jpeg("Choose_component.jpg", width = 350, height = 350) + evplot(df_ev) + + +} +RunCenterScalePCA <- function(seu_obj){ + + + + all.genes <- rownames(seu_obj) + seu_obj<- ScaleData(seu_obj, features = all.genes,verbose=FALSE) + + seu_obj <- RunPCA(seu_obj, features = VariableFeatures(object = seu_obj),verbose=FALSE) + + return(seu_obj) + +} #end function + +VizFeatureCP <- function(seu_obj,cp){ + jpeg("VizDimPca.jpg", width = 350, height = 350) + show(VizDimLoadings(seu_obj, dims = 1:cp, reduction = "pca")) + seu_obj <- FindNeighbors(seu_obj, dims = 1:cp) + return(seu_obj) + +} #end function + +# calculates the convex hull of (J,pen) +# kv : coordinates of the convex hull + +conv.hull <- function(J,pen){ + + K = length(J) + k = 1 + kv = c() + pv = c() + + while (k<K){ + pk = (J[(k+1):K]-J[k]) / (pen[k]-pen[(k+1):K]) + dm = which.max(pk) + kv = c(kv,k) + k = k + dm + } + + kv = c(kv,K) + return(kv=kv) + +} #end function + + + + +mean_silhouette<-function(object){ + + mean_silhouette_score=c() + j = 0 + K=c() + object <- FindClusters(object, resolution = 0.01,verbose = FALSE) + + object<- RunUMAP(object, dims = 1:4,verbose = FALSE) + distance_matrix <- dist(Embeddings(object[['umap']])[, 1:2]) + for (rr in seq(0.01,1,by=0.01)){ + table_silhouette=c() + silhouette=c() + clusters=c() + object@meta.data[["seurat_clusters"]]=c() + j = j+1 + show(j) + + object <- FindClusters(object, resolution = rr,verbose = FALSE) + K[j] = length(unique(Idents(object))) + + #object<- RunUMAP(object, dims = 1:4,verbose = FALSE) + + #distance_matrix <- dist(Embeddings(object[['umap']])[, 1:2]) + + clusters <- object@meta.data[["seurat_clusters"]] + + table_silhouette <- silhouette(as.numeric(clusters), dist = distance_matrix) + + silhouette<-table_silhouette + + object@meta.data[["seurat_clusters"]] <- silhouette[,3] + mean_silhouette_score[j] <- mean(object@meta.data[["seurat_clusters"]]) + + } + + x = seq(0.01,1,by=0.01) + ch = conv.hull(J = -mean_silhouette_score, pen = seq(0.01,1,by=0.01)) + ch2 = conv.hull(J = -K, pen = seq(0.01,1,by=0.01)) + par(mar=c(5,5,5,5)+0.1, las=1) + + jpeg("silhouette_res_cluster.jpg", width = 600, height = 350) + plot.new() + plot.window(xlim=range(x), ylim=range(K)) + lines.default(x, K, col="red") + + axis(1,at = x) + axis(2,at=seq(min(K),max(K),by=1)) + box() + + plot.window(xlim=range(x), ylim=range(mean_silhouette_score)) + lines(x,mean_silhouette_score, col="blue") + lines(x[ch],mean_silhouette_score[ch],type="l",col=2) + lines(x[ch],mean_silhouette_score[ch],type="b",col=2) + + axis(4) + mtext("Mean Silhouette score", side=4, line=3, cex.lab=1,las=3) + + # add a main title and bottom and left axis labels + title( xlab="Resolution",ylab="Clusters") + + + + show(x[ch]) + show(mean_silhouette_score[ch]) + +} + + +GraphClusteringLouvain <- function(seu_obj,res,cp){ + resolution<-as.numeric(res) + principal_component<-as.numeric(cp) + seu_obj <- FindNeighbors(seu_obj, dims = 1:principal_component) + seu_obj <- FindClusters(seu_obj, resolution = resolution) + return(seu_obj) + +} #end function + + + +SilPlot <- function(seu_obj,res,cp){ + ##Initilization + resolution<-as.numeric(res) + principal_component<-as.numeric(cp) + mean_silhouette_score=c() + K=c() + table_silhouette=c() + silhouette=c() + clusters=c() + + ##Compute silhouette score with res=resolution of clustering and cp=nuber of principal coponent + seu_obj@meta.data[["seurat_clusters"]]=c() + seu_obj <- FindClusters(seu_obj, resolution = resolution,verbose = FALSE) + seu_obj<- RunUMAP(seu_obj, dims = 1:principal_component,verbose = FALSE) + distance_matrix <- dist(Embeddings(seu_obj[['umap']])[, 1:2]) + clusters <- seu_obj@meta.data[["seurat_clusters"]] + table_silhouette <- silhouette(as.numeric(clusters), dist = distance_matrix) + silhouette<-table_silhouette + + seu_obj@meta.data[["silhouette_score"]] <- silhouette[,3] + + mean_silhouette_score <- mean(seu_obj@meta.data[["seurat_clusters"]],na.rm=TRUE) + + #Compute Umap with silhouette score for all individuals cells + jpeg("silhouette_score_umap.jpg", width = 600, height = 350) + show(FeaturePlot(seu_obj, features = c("silhouette_score"),label = TRUE,label.size = 6 , pt.size = 0.5,repel = TRUE,cols = c("green", "red"))+ ggtitle(' Silhouette index')) + + #Create silhoette plot + p <- seu_obj@meta.data %>%mutate(barcode = rownames(.)) %>%arrange(seurat_clusters,-silhouette_score) %>%mutate(barcode = factor(barcode, levels = barcode)) %>% + ggplot() +geom_col(aes(barcode, silhouette_score, fill = seurat_clusters), show.legend = FALSE) +geom_hline(yintercept = mean_silhouette_score, color = 'red', linetype = 'dashed') + + scale_x_discrete(name = 'Cells') +scale_y_continuous(name = 'Silhouette score') + scale_fill_manual(values = custom_colors$discrete) +theme_bw() + + theme(axis.title.x = element_blank(),axis.text.x = element_blank(), axis.ticks.x = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + + dev.off() + jpeg("silhouette_plot.jpg", width = 600, height = 350) + + show(p) + + ##Compute % cells with sil score negative + N_negative=sum(seu_obj@meta.data[["silhouette_score"]]<0) + N_tot=sum(seu_obj@meta.data[["silhouette_score"]]!=0) + percent_negative_sil=(N_negative/N_tot)*100 + paste0( percent_negative_sil, " % Cells with negative score") + + +} #end function +#saveRDS(seu_m1,file="/Users/mlepetit/Desktop/PLASCAN/Gon_m1_object.rds") + + + + + + + +UMAPViz <- function(seu_obj,cp){ + principal_component<-as.numeric(cp) + + seu_obj<- RunUMAP(seu_obj, dims = 1:principal_component,verbose=FALSE) + + + # note that you can set `label = TRUE` or use the LabelClusters function to help label + # individual clusters + jpeg("UMAP.jpg", width = 600, height = 350) + show(DimPlot(seu_obj, reduction = "umap",label = TRUE,label.size = 6 ,cols = custom_colors$discrete)+ theme(legend.text=element_text(size=15))) + + + + return(seu_obj) + +} #end function + + + + +extrac_cpdb <- function(seu_obj){ + + seu_obj@meta.data$seurat_annotations<-Idents(seu_obj) + write.table(as.matrix(seu_obj@assays$RNA@data), 'cellphonedb_count.txt', sep='\t', quote=F) + meta_data <- cbind(rownames(seu_obj@meta.data), seu_obj@meta.data[,'seurat_annotations', drop=F]) + meta_data <- as.matrix(meta_data) + meta_data[is.na(meta_data)] = "Unkown" # There can't be... In the cell type NA + + write.table(meta_data, 'cellphonedb_meta.txt', sep='\t', quote=F, row.names=F) + + + + +} #end function + + + +MarkerGene <- function(seu_obj){ + + VlnPlot(seu_obj, features = c("CHGA","CHGB","CD68","CD3D","CD3E","CD3G","CD19","MS4A1","TEK","KDR","PDGFRB","RGS5","S100B","POMC","PRL","SOX2")) + + +} #end function + diff --git a/src/nf_modules/rfiltration/main.nf b/src/nf_modules/rfiltration/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..215e3238c2e79479358f588bc822cbc0685311a2 --- /dev/null +++ b/src/nf_modules/rfiltration/main.nf @@ -0,0 +1,38 @@ +container_url="mlepetit/ranalysis:latest" + +params.rfiltr_out="" +params.mit="" +params.nexp="" +params.nfeature="" +process rfiltr + { + + container="${container_url}" + label "big_mem_multi_cpus" + if (params.rfiltr_out != "") { + publishDir "results/${params.rfiltr_out}", mode: 'copy' + } + +else { + publishDir "results/Seurat_object_filtered/", mode: 'copy' + + } + + input: + + tuple val(id_mtx), path(spliced_mtx) + tuple val(id_t2g), path(t2g_file) + + output: + + tuple val(id_mtx),path("Seurat_filtered_obj.Rds"), emit: seu_obj + tuple val(id_mtx),path("counts_filtered_doublet.txt") ,emit: raw_filtered_mtx + + script: + + """ + Rscript ../../../src/Rscript/scQualityControlR.R ${spliced_mtx} ${t2g_file} ${params.mit} ${params.nfeature} ${params.nexp} + + """ + } + diff --git a/src/nf_modules/rfiltration/ranalysis_part1.R b/src/nf_modules/rfiltration/ranalysis_part1.R new file mode 100644 index 0000000000000000000000000000000000000000..ff9602640c21e89426eeae6a5b96cf04cb2cc0dc --- /dev/null +++ b/src/nf_modules/rfiltration/ranalysis_part1.R @@ -0,0 +1,80 @@ +library(DropletUtils,verbose=FALSE, quietly=FALSE) +library(Matrix,verbose=FALSE, quietly=FALSE) +library(tidyverse,verbose=FALSE, quietly=FALSE) +library(DoubletFinder,verbose=FALSE, quietly=FALSE) +library(ggplot2,verbose=FALSE, quietly=FALSE) +library(SingleR,verbose=FALSE, quietly=FALSE) +library(PCDimension,verbose=FALSE, quietly=FALSE) +library(viridis,verbose=FALSE, quietly=FALSE) + +library(ggpointdensity,verbose=FALSE, quietly=FALSE) +library(scico,verbose=FALSE, quietly=FALSE) +library(scales,verbose=FALSE, quietly=FALSE) +library(Seurat,verbose=FALSE, quietly=FALSE) +library(cluster,verbose=FALSE, quietly=FALSE) +library(dplyr,verbose=FALSE, quietly=FALSE) + +source("/scratch/Bio/mlepetit/nextflow/src/function.R") + + +args = commandArgs(trailingOnly=TRUE) + +dir=args[1] +t2g_file=args[2] +percent_mt=as.numeric(args[3]) +n_doublet_exp=as.numeric(args[4]) + + + + + + + +name<-"spliced" +print("Read KB Output") +tumor=read_kb_output(dir,name) +Sys.sleep(2) +print("Remove empty droplet") +tumor=RemoveEmptyDrop(tumor) +Sys.sleep(2) +print("Match Ensembl ID / Gene symbol") +tumor=MatchGeneSymbol(t2g_file,tumor) + + +print("Create Seurat Object") +seu_object<-Create_Seurat_Object(tumor) +print("Compute QC metrics") +seu_object<-Calc_QC_metrics(seu_object) +Sys.sleep(10) + +print("Remove low quality library") +seu_object<-RemoveLowQuality(seu_object,500,percent_mt) + + + + + + +Sys.sleep(2) +print("Run optimization for pK value") +seu_object<-PreRemoveDoublet(seu_object) +Sys.sleep(2) + +print("Run DoubletFinder") + +seu_object<-Run_doublet_finder(seu_object,n_doublet_exp) +Sys.sleep(10) + + +print("Remove EmptyFeature") +seu_object<-RemoveEmptyFeature(seu_object) + +print("Save Object") + + +dir_tmp<-"./" + +save_RDS(seu_object,dir_tmp) +print("Extract Count Data") +Extract_data(seu_object,dir_tmp) + diff --git a/src/nf_modules/rfiltration/ranalysis_part2.R b/src/nf_modules/rfiltration/ranalysis_part2.R new file mode 100644 index 0000000000000000000000000000000000000000..2b1bbf870b7ee0befed4ece585f310d960ff6668 --- /dev/null +++ b/src/nf_modules/rfiltration/ranalysis_part2.R @@ -0,0 +1,49 @@ + +source("/Users/mlepetit/Desktop/myDocker/R_base/function.R") + + +args = commandArgs(trailingOnly=TRUE) + +#norm_matrix=args[1] +dir=args[1] +#dir_filtered_seu_obj<-"/Users/mlepetit/Desktop/myDocker/R_base" +cp=args[2] +res=args[3] + +print("Load seurat object") +seu_obj<-read_RDS(dir) + +print("Set normalize matrix to seurat object") + +seu_obj<-SetSanityMatrix(seu_obj,dir) + +print("Select Highy Variable Feature") +seu_obj<-VariabeFeature(seu_obj) + +print("Center scale and run pca") + +seu_obj<-RunCenterScalePCA(seu_obj) +print("Choose component") + +ChooseComponent(seu_obj) +Sys.sleep(10) +print("Viz Feature") + +Sys.sleep(2) +VizFeatureCP(seu_obj,cp) + +print("Compute Sil and number of cluster as function of resolution") +mean_silhouette(seu_obj) +print("KNN Graph Clustering and Louvain optimisation") +seu_object<-GraphClusteringLouvain(seu_obj,res,cp) +print("Create Silouette plot") + +SilPlot(seu_obj,res,cp) +print("Compute UMAP") + +seu_obj<-UMAPViz(seu_obj,cp) + + + + + diff --git a/src/nf_modules/rvisualization/function.R b/src/nf_modules/rvisualization/function.R new file mode 100644 index 0000000000000000000000000000000000000000..f48164d0fd30231afe73fdf418d19c717bdcce9c --- /dev/null +++ b/src/nf_modules/rvisualization/function.R @@ -0,0 +1,637 @@ +custom_colors <- list() + +colors_dutch <- c( + '#FFC312','#C4E538','#12CBC4','#FDA7DF','#ED4C67', + '#F79F1F','#A3CB38','#1289A7','#D980FA','#B53471', + '#EE5A24','#009432','#0652DD','#9980FA','#833471', + '#EA2027','#006266','#1B1464','#5758BB','#6F1E51' +) + +colors_spanish <- c( + '#40407a','#706fd3','#f7f1e3','#34ace0','#33d9b2', + '#2c2c54','#474787','#aaa69d','#227093','#218c74', + '#ff5252','#ff793f','#d1ccc0','#ffb142','#ffda79', + '#b33939','#cd6133','#84817a','#cc8e35','#ccae62' +) + +custom_colors$discrete <- c(colors_dutch, colors_spanish) + + + + + + + + + + + + + +reset_par <- function(){ + op <- structure(list(xlog = FALSE, ylog = FALSE, adj = 0.5, ann = TRUE, + ask = FALSE, bg = "transparent", bty = "o", cex = 1, + cex.axis = 1, cex.lab = 1, cex.main = 1.2, cex.sub = 1, + col = "black", col.axis = "black", col.lab = "black", + col.main = "black", col.sub = "black", crt = 0, err = 0L, + family = "", fg = "black", fig = c(0, 1, 0, 1), + fin = c(6.99999895833333, 6.99999895833333), font = 1L, + font.axis = 1L, font.lab = 1L, font.main = 2L, + font.sub = 1L, lab = c(5L, 5L, 7L), las = 0L, + lend = "round", lheight = 1, ljoin = "round", lmitre = 10, + lty = "solid", lwd = 1, mai = c(1.02, 0.82, 0.82, 0.42), + mar = c(5.1, 4.1, 4.1, 2.1), mex = 1, mfcol = c(1L, 1L), + mfg = c(1L, 1L, 1L,1L), mfrow = c(1L, 1L), + mgp = c(3, 1, 0), mkh = 0.001, new = FALSE, + oma = c(0, 0, 0, 0), omd = c(0, 1, 0, 1), + omi = c(0, 0, 0,0), pch = 1L, + pin = c(5.75999895833333, 5.15999895833333), + plt = c(0.117142874574832, 0.939999991071427, + 0.145714307397962, 0.882857125425167), + ps = 12L, pty = "m", smo = 1, srt = 0, tck = NA_real_, + tcl = -0.5, usr = c(0.568, 1.432, 0.568, 1.432), + xaxp = c(0.6, 1.4, 4), xaxs = "r", xaxt = "s", + xpd = FALSE, yaxp = c(0.6, 1.4, 4), yaxs = "r", + yaxt = "s", ylbias = 0.2), + .Names = c("xlog", "ylog", "adj", "ann", "ask", "bg", + "bty", "cex", "cex.axis", "cex.lab", "cex.main", "cex.sub", + "col", "col.axis", "col.lab", "col.main", "col.sub", "crt", + "err", "family", "fg", "fig", "fin", "font", "font.axis", + "font.lab", "font.main", "font.sub", "lab", "las", "lend", + "lheight", "ljoin", "lmitre", "lty", "lwd", "mai", "mar", + "mex", "mfcol", "mfg", "mfrow", "mgp", "mkh", "new", "oma", + "omd", "omi", "pch", "pin", "plt", "ps", "pty", "smo", + "srt", "tck", "tcl", "usr", "xaxp", "xaxs", "xaxt", "xpd", + "yaxp", "yaxs", "yaxt", "ylbias")) + par(op) +} + + +read_count_output <- function(dir, name) { + m <- readMM(paste0(dir, "/","counts_unfiltered","/", name, ".mtx")) + m <- Matrix::t(m) + m <- as(m, "dgCMatrix") + # The matrix read has cells in rows + ge <- ".genes.txt" + genes <- readLines(file(paste0(dir, "/", name, ge))) + barcodes <- readLines(file(paste0(dir, "/", name, ".barcodes.txt"))) + colnames(m) <- barcodes + rownames(m) <- genes + return(m) +} + + + +#' Knee plot for filtering empty droplets +#' +#' Visualizes the inflection point to filter empty droplets. This function plots +#' different datasets with a different color. Facets can be added after calling +#' this function with `facet_*` functions. Will be added to the next release +#' version of BUSpaRse. +#' +#' @param bc_rank A `DataFrame` output from `DropletUtil::barcodeRanks`. +#' @return A ggplot2 object. +knee_plot <- function(bc_rank) { + knee_plt <- tibble(rank = bc_rank[["rank"]], + total = bc_rank[["total"]]) %>% + distinct() %>% + dplyr::filter(total > 0) + annot <- tibble(inflection = metadata(bc_rank)[["inflection"]], + rank_cutoff = max(bc_rank$rank[bc_rank$total > metadata(bc_rank)[["inflection"]]])) + p <- ggplot(knee_plt, aes(total, rank)) + + geom_line() + + geom_hline(aes(yintercept = rank_cutoff), data = annot, linetype = 2) + + geom_vline(aes(xintercept = inflection), data = annot, linetype = 2) + + scale_x_log10() + + scale_y_log10() + + labs(y = "Barcode Rank", x = "Total UMIs") + return(p) +} +#annotation_logticks()+ + +read_kb_output <- function(dir,name){ + + sample <- read_count_output(dir, name = name) + + show(dim(sample)) + + + return(sample) + +} #end function + + + +###Fuction to visualize kneeplot and remove empty droplet + + +RemoveEmptyDrop <- function(sample){ + + tot_counts <- Matrix::colSums(sample) + summary(tot_counts) + ###barcodeRanks provide of DropletUtils library + bc_rank <- barcodeRanks(sample, lower = 10) + + ##Visalisation empty droplet + jpeg("Kneeplot.jpeg") + show(knee_plot(bc_rank)) + dev.off() + + + ##Filter empty droplet + sample <- sample[, tot_counts > metadata(bc_rank)$inflection] + + + return(sample) + +} #end function + +###Fuction to load t2g.txt and to match ensemble identifier with gene symbol +MatchGeneSymbol <- function(dirt2g,sample){ + + ##Need tidyverse pacckage + + tr<-read_tsv(dirt2g, col_names = c("transcript", "gene", "gene_symbol")) + tr2g <- tr %>%select(-transcript) %>% distinct() + + + rownames(sample) <- tr2g$gene_symbol[match(rownames(sample), tr2g$gene)] + + + return(sample) + +} #end function + + + + +##Fuction to load t2g.txt and to match ensemble identifier with gene symbol +Create_Seurat_Object <- function(sample){ + + ##Need tidyverse pacckage + + + seu_obj <- CreateSeuratObject(sample,project="SPACENET", min.cells = 3, min.features = 300,verbose=FALSE) + show(dim(seu_obj)) + + + return(seu_obj) + +} #end function + +Calc_QC_metrics <- function(seu_obj){ + + seu_obj[["percent.mt"]] <- PercentageFeatureSet(seu_obj, pattern = "^MT-") + seu_obj[["percent.ribo"]] <- PercentageFeatureSet(seu_obj, pattern = "^RP[SL]") + + jpeg("QC_metrics.jpeg") + VlnPlot(seu_obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 4) + dev.off() + + return(seu_obj) + +} #end functi +##Fuction to subset seurat object and remove low quality librairies base on Mitochondrial percent and number of gene min in a cell + +RemoveLowQuality <- function(seu_obj,nFeatureRNA,percent_mt){ + + + + seu_obj <- subset(seu_obj, subset = nFeature_RNA > nFeatureRNA & percent.mt < percent_mt) + + show(dim(seu_obj)) + + + return(seu_obj) + +} #end function + + + + +PreRemoveDoublet <- function(seu_obj){ + + + #DOUBLLETFINDER NEEDS TO RUN this 5 step before to run + seu_obj <- NormalizeData(seu_obj,verbose=FALSE) + seu_obj <- FindVariableFeatures(seu_obj, selection.method = "vst", nfeatures = 2000,verbose=FALSE) + seu_obj <- ScaleData(seu_obj,verbose=FALSE) + seu_obj <- RunPCA(seu_obj,verbose=FALSE) + + seu_obj <- FindNeighbors(seu_obj, dims = 1:10,verbose=FALSE) + + seu_obj <- FindClusters(seu_obj, resolution = 0.7,verbose=FALSE) + seu_obj <- RunUMAP(seu_obj, dims = 1:10,verbose=FALSE) + return(seu_obj) +} + + + + +Run_doublet_finder <- function(seu_obj,nexp){ + + + + print("Run Doublet finder opti") + sweep.res.list_kidney <- paramSweep_v3(seu_obj, PCs = 1:10, sct = FALSE) + sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = FALSE,) + bcmvn_kidney <- find.pK(sweep.stats_kidney) + dev.off() + reset_par() + + ##Plot optimisation to choose the right pK + #Save + jpeg("PK_opti.jpeg") + show(ggplot(bcmvn_kidney,aes(x=pK,y=BCmetric,group=1))+ geom_point() +geom_line()) + dev.off() + + + pK_opt_tmp<-filter(bcmvn_kidney,BCmetric == max(BCmetric)) + pK_opt<-as.numeric(as.character(t(pK_opt_tmp$pK))) + + + print(paste0("Optimum pK :",pK_opt)) + + N_exp <- round(ncol(seu_obj) * nexp) # expect 4% doublets + + #Run doublet finder + #seu_obj <- doubletFinder_v3(seu_obj, pN = 0.25, pK = pK_opt, nExp = N_exp, PCs = 1:10) + seu_obj <- doubletFinder_v3(seu_obj, pN = 0.25, pK = 0.09, nExp = N_exp, PCs = 1:10) + + DF.name = colnames(seu_obj@meta.data)[grepl("DF.classification", colnames(seu_obj@meta.data))] + + + jpeg("Doublet_detection.jpeg") + DimPlot(seu_obj, group.by = DF.name,cols = c("blue","red")) + NoAxes()+ggtitle("Doublet detection") + dev.off() + + print("Remove Doublets") + show(paste0( sum( seu_obj@meta.data[, DF.name] == "Doublet"), " doublets detected/removed")) + seu_obj = seu_obj[, seu_obj@meta.data[, DF.name] == "Singlet"] + + #show(dim(seu_obj)) + + + + + return(seu_obj) + +} #end function + +### Function to remove empty feature +RemoveEmptyFeature <- function(seu_obj){ + + + + seu_obj <- seu_obj[Matrix::rowSums(seu_obj) > 0,] + + return(seu_obj) + +} #end function + + + +save_RDS <- function(seu_obj,dir){ + + dir <- normalizePath(dir, mustWork = TRUE) + + name<-"Seurat_object" + path<-paste0(dir, "/", name,".Rds") + saveRDS(seu_obj,file=path) + + + +} #end function + + +### Function to extract data for sanity normaliztion +Extract_data <- function(seu_obj,dir){ + + + df_tmp<-as.data.frame(GetAssayData(object = seu_obj, slot = "counts")) + df_tmp<- cbind(GeneID = rownames(df_tmp), df_tmp) + rownames(df_tmp) <- NULL + + dir <- normalizePath(dir, mustWork = TRUE) + + name<-"counts_filtered_doublet" + path<-paste0(dir, "/", name, ".txt") + write.table(df_tmp,path,sep = '\t', row.names = F, col.names=T, quote = F) + + return(seu_obj) + +} #end function + + + + +read_RDS <- function(path_seu_obj){ + + path_seu_obj <- normalizePath(path_seu_obj, mustWork = TRUE) + + + seu_obj <-readRDS(path_seu_obj) + + + show(dim(seu_obj)) + + + return(seu_obj) + +} #end function + + + +SetSanityMatrix <- function(seu_obj,path){ + + dir <- normalizePath(path, mustWork = TRUE) + name<-"Gon_m1_good_doublet_log_transcription_quotients" + #name<-"counts_filtered_doublet" + path<-paste0(dir, "/", name, ".txt") + + df_norm_tmp<-Matrix(as.matrix(read.table(path,row.names = 1, header = T) ),sparse = TRUE) + + + seu_obj <- SetAssayData(object = seu_obj, slot = "data", new.data = df_norm_tmp) + + + + return(seu_obj) + +} #end function + + + +VariabeFeature <- function(seu_obj){ + + seu_obj <- FindVariableFeatures(seu_obj, selection.method = "vst", nfeatures = 2000,verbose=FALSE) + + # Identify the 10 most highly variable genes + top10 <- head(VariableFeatures(seu_obj), 10) + + # plot variable features with and without labels + plot1 <- VariableFeaturePlot(seu_obj) + plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) + jpeg("Variable_feature.jpg", width = 350, height = 350) + show(plot2) + dev.off() + + return(seu_obj) + +} #end function + + + + +evplot = function(ev) { + # Broken stick model (MacArthur 1957) + n = length(ev) + bsm = data.frame(j=seq(1:n), p=0) + bsm$p[1] = 1/n + for (i in 2:n) bsm$p[i] = bsm$p[i-1] + (1/(n + 1 - i)) + bsm$p = 100*bsm$p/n + # Plot eigenvalues and % of variation for each axis + op = par(mfrow=c(2,1),omi=c(0.1,0.3,0.1,0.1), mar=c(1, 1, 1, 1)) + barplot(ev, main="Eigenvalues", col="bisque", las=2) + abline(h=mean(ev), col="red") + legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n",cex = 1.5) + barplot(t(cbind(100*ev/sum(ev), bsm$p[n:1])), beside=TRUE, + main="% variation", col=c("bisque",2), las=2) + legend("topright", c("% eigenvalue", "Broken stick model"), + pch=15, col=c("bisque",2), bty="n",cex = 1.5) + par(op) +} + +ChooseComponent = function(seu_obj) { + + + df_ev<-(seu_obj@reductions[["pca"]]@stdev)^2 + + jpeg("Choose_component.jpg", width = 350, height = 350) + evplot(df_ev) + + +} +RunCenterScalePCA <- function(seu_obj){ + + + + all.genes <- rownames(seu_obj) + seu_obj<- ScaleData(seu_obj, features = all.genes,verbose=FALSE) + + seu_obj <- RunPCA(seu_obj, features = VariableFeatures(object = seu_obj),verbose=FALSE) + + return(seu_obj) + +} #end function + +VizFeatureCP <- function(seu_obj,cp){ + jpeg("VizDimPca.jpg", width = 350, height = 350) + show(VizDimLoadings(seu_obj, dims = 1:cp, reduction = "pca")) + seu_obj <- FindNeighbors(seu_obj, dims = 1:cp) + return(seu_obj) + +} #end function + +# calculates the convex hull of (J,pen) +# kv : coordinates of the convex hull + +conv.hull <- function(J,pen){ + + K = length(J) + k = 1 + kv = c() + pv = c() + + while (k<K){ + pk = (J[(k+1):K]-J[k]) / (pen[k]-pen[(k+1):K]) + dm = which.max(pk) + kv = c(kv,k) + k = k + dm + } + + kv = c(kv,K) + return(kv=kv) + +} #end function + + + + +mean_silhouette<-function(object){ + + mean_silhouette_score=c() + j = 0 + K=c() + object <- FindClusters(object, resolution = 0.01,verbose = FALSE) + + object<- RunUMAP(object, dims = 1:4,verbose = FALSE) + distance_matrix <- dist(Embeddings(object[['umap']])[, 1:2]) + for (rr in seq(0.01,1,by=0.01)){ + table_silhouette=c() + silhouette=c() + clusters=c() + object@meta.data[["seurat_clusters"]]=c() + j = j+1 + show(j) + + object <- FindClusters(object, resolution = rr,verbose = FALSE) + K[j] = length(unique(Idents(object))) + + #object<- RunUMAP(object, dims = 1:4,verbose = FALSE) + + #distance_matrix <- dist(Embeddings(object[['umap']])[, 1:2]) + + clusters <- object@meta.data[["seurat_clusters"]] + + table_silhouette <- silhouette(as.numeric(clusters), dist = distance_matrix) + + silhouette<-table_silhouette + + object@meta.data[["seurat_clusters"]] <- silhouette[,3] + mean_silhouette_score[j] <- mean(object@meta.data[["seurat_clusters"]]) + + } + + x = seq(0.01,1,by=0.01) + ch = conv.hull(J = -mean_silhouette_score, pen = seq(0.01,1,by=0.01)) + ch2 = conv.hull(J = -K, pen = seq(0.01,1,by=0.01)) + par(mar=c(5,5,5,5)+0.1, las=1) + + jpeg("silhouette_res_cluster.jpg", width = 600, height = 350) + plot.new() + plot.window(xlim=range(x), ylim=range(K)) + lines.default(x, K, col="red") + + axis(1,at = x) + axis(2,at=seq(min(K),max(K),by=1)) + box() + + plot.window(xlim=range(x), ylim=range(mean_silhouette_score)) + lines(x,mean_silhouette_score, col="blue") + lines(x[ch],mean_silhouette_score[ch],type="l",col=2) + lines(x[ch],mean_silhouette_score[ch],type="b",col=2) + + axis(4) + mtext("Mean Silhouette score", side=4, line=3, cex.lab=1,las=3) + + # add a main title and bottom and left axis labels + title( xlab="Resolution",ylab="Clusters") + + + + show(x[ch]) + show(mean_silhouette_score[ch]) + +} + + +GraphClusteringLouvain <- function(seu_obj,res,cp){ + resolution<-as.numeric(res) + principal_component<-as.numeric(cp) + seu_obj <- FindNeighbors(seu_obj, dims = 1:principal_component) + seu_obj <- FindClusters(seu_obj, resolution = resolution) + return(seu_obj) + +} #end function + + + +SilPlot <- function(seu_obj,res,cp){ + ##Initilization + resolution<-as.numeric(res) + principal_component<-as.numeric(cp) + mean_silhouette_score=c() + K=c() + table_silhouette=c() + silhouette=c() + clusters=c() + + ##Compute silhouette score with res=resolution of clustering and cp=nuber of principal coponent + seu_obj@meta.data[["seurat_clusters"]]=c() + seu_obj <- FindClusters(seu_obj, resolution = resolution,verbose = FALSE) + seu_obj<- RunUMAP(seu_obj, dims = 1:principal_component,verbose = FALSE) + distance_matrix <- dist(Embeddings(seu_obj[['umap']])[, 1:2]) + clusters <- seu_obj@meta.data[["seurat_clusters"]] + table_silhouette <- silhouette(as.numeric(clusters), dist = distance_matrix) + silhouette<-table_silhouette + + seu_obj@meta.data[["silhouette_score"]] <- silhouette[,3] + + mean_silhouette_score <- mean(seu_obj@meta.data[["seurat_clusters"]],na.rm=TRUE) + + #Compute Umap with silhouette score for all individuals cells + jpeg("silhouette_score_umap.jpg", width = 600, height = 350) + show(FeaturePlot(seu_obj, features = c("silhouette_score"),label = TRUE,label.size = 6 , pt.size = 0.5,repel = TRUE,cols = c("green", "red"))+ ggtitle(' Silhouette index')) + + #Create silhoette plot + p <- seu_obj@meta.data %>%mutate(barcode = rownames(.)) %>%arrange(seurat_clusters,-silhouette_score) %>%mutate(barcode = factor(barcode, levels = barcode)) %>% + ggplot() +geom_col(aes(barcode, silhouette_score, fill = seurat_clusters), show.legend = FALSE) +geom_hline(yintercept = mean_silhouette_score, color = 'red', linetype = 'dashed') + + scale_x_discrete(name = 'Cells') +scale_y_continuous(name = 'Silhouette score') + scale_fill_manual(values = custom_colors$discrete) +theme_bw() + + theme(axis.title.x = element_blank(),axis.text.x = element_blank(), axis.ticks.x = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + + dev.off() + jpeg("silhouette_plot.jpg", width = 600, height = 350) + + show(p) + + ##Compute % cells with sil score negative + N_negative=sum(seu_obj@meta.data[["silhouette_score"]]<0) + N_tot=sum(seu_obj@meta.data[["silhouette_score"]]!=0) + percent_negative_sil=(N_negative/N_tot)*100 + paste0( percent_negative_sil, " % Cells with negative score") + + +} #end function +#saveRDS(seu_m1,file="/Users/mlepetit/Desktop/PLASCAN/Gon_m1_object.rds") + + + + + + + +UMAPViz <- function(seu_obj,cp){ + principal_component<-as.numeric(cp) + + seu_obj<- RunUMAP(seu_obj, dims = 1:principal_component,verbose=FALSE) + + + # note that you can set `label = TRUE` or use the LabelClusters function to help label + # individual clusters + jpeg("UMAP.jpg", width = 600, height = 350) + show(DimPlot(seu_obj, reduction = "umap",label = TRUE,label.size = 6 ,cols = custom_colors$discrete)+ theme(legend.text=element_text(size=15))) + + + + return(seu_obj) + +} #end function + + + + +extrac_cpdb <- function(seu_obj){ + + seu_obj@meta.data$seurat_annotations<-Idents(seu_obj) + write.table(as.matrix(seu_obj@assays$RNA@data), 'cellphonedb_count.txt', sep='\t', quote=F) + meta_data <- cbind(rownames(seu_obj@meta.data), seu_obj@meta.data[,'seurat_annotations', drop=F]) + meta_data <- as.matrix(meta_data) + meta_data[is.na(meta_data)] = "Unkown" # There can't be... In the cell type NA + + write.table(meta_data, 'cellphonedb_meta.txt', sep='\t', quote=F, row.names=F) + + + + +} #end function + + + +MarkerGene <- function(seu_obj){ + + VlnPlot(seu_obj, features = c("CHGA","CHGB","CD68","CD3D","CD3E","CD3G","CD19","MS4A1","TEK","KDR","PDGFRB","RGS5","S100B","POMC","PRL","SOX2")) + + +} #end function + diff --git a/src/nf_modules/rvisualization/main.nf b/src/nf_modules/rvisualization/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..e22a15d7e1ca1507f32238e717d67a887b76ede9 --- /dev/null +++ b/src/nf_modules/rvisualization/main.nf @@ -0,0 +1,36 @@ +container_url="mlepetit/ranalysis:latest" + +params.rviz_out="" + +params.cp=4 +params.reso=0.01 + +process rviz + { + + container="${container_url}" + label "big_mem_multi_cpus" + if (params.rviz_out != "") { + publishDir "results/${params.rviz_out}", mode: 'copy' + } else { + publishDir "results/Seurat_object_filtered/", mode: 'copy' + + } + input: + + tuple val(id_norm),path(norm_mtx) + tuple val(id_norm),path(seu_obj) + + output: + + path("Seurat_fin_obj.Rds"), emit: seu_filt_nor_obj + + + script: + + """ + Rscript ../../../Rscript/src/scVizualisationR.R ${norm_mtx} ${seu_obj} ${params.cp} ${params.reso} + + """ + } + diff --git a/src/nf_modules/rvisualization/main.nf.save b/src/nf_modules/rvisualization/main.nf.save new file mode 100644 index 0000000000000000000000000000000000000000..0e8b1ac4845d0e28a95734146db597b25e8f79ad --- /dev/null +++ b/src/nf_modules/rvisualization/main.nf.save @@ -0,0 +1,37 @@ +version ="latest" +container_url="mlepetit/kb:${version}" +process ranalysis1 { +// Create Seurat object and run fitration + +container="${container_url}" +label = "big_mem_mono_cpus" + + +params.spliced_mtx_id="" +params.t2g_id="" +params.mit_cutoff=25 +params.n_dou_exp=0.023 + + +input: + +tuple val(spliced_mtx_id),path(spliced_mtx_id) + +tuple val(t2g_id), path(t2g_id) + +val(mit_cutoff) + +val(n_dou_exp) + +output: +tuple val(filtered_seu_obj),path("*.Rds"), emit: seu_obj +tuple val(filtered_raw_matrix),path("*.txt), emit: raw_mtx + +script: + +""" Rscript --vanilla ./ranalysis_part1.R --args ${spliced_mtx_id} ${t2g_id} ${mit_cutoff} ${n_dou_exp} + +""" + +" +} diff --git a/src/nf_modules/rvisualization/ranalysis_part1.R b/src/nf_modules/rvisualization/ranalysis_part1.R new file mode 100644 index 0000000000000000000000000000000000000000..ff9602640c21e89426eeae6a5b96cf04cb2cc0dc --- /dev/null +++ b/src/nf_modules/rvisualization/ranalysis_part1.R @@ -0,0 +1,80 @@ +library(DropletUtils,verbose=FALSE, quietly=FALSE) +library(Matrix,verbose=FALSE, quietly=FALSE) +library(tidyverse,verbose=FALSE, quietly=FALSE) +library(DoubletFinder,verbose=FALSE, quietly=FALSE) +library(ggplot2,verbose=FALSE, quietly=FALSE) +library(SingleR,verbose=FALSE, quietly=FALSE) +library(PCDimension,verbose=FALSE, quietly=FALSE) +library(viridis,verbose=FALSE, quietly=FALSE) + +library(ggpointdensity,verbose=FALSE, quietly=FALSE) +library(scico,verbose=FALSE, quietly=FALSE) +library(scales,verbose=FALSE, quietly=FALSE) +library(Seurat,verbose=FALSE, quietly=FALSE) +library(cluster,verbose=FALSE, quietly=FALSE) +library(dplyr,verbose=FALSE, quietly=FALSE) + +source("/scratch/Bio/mlepetit/nextflow/src/function.R") + + +args = commandArgs(trailingOnly=TRUE) + +dir=args[1] +t2g_file=args[2] +percent_mt=as.numeric(args[3]) +n_doublet_exp=as.numeric(args[4]) + + + + + + + +name<-"spliced" +print("Read KB Output") +tumor=read_kb_output(dir,name) +Sys.sleep(2) +print("Remove empty droplet") +tumor=RemoveEmptyDrop(tumor) +Sys.sleep(2) +print("Match Ensembl ID / Gene symbol") +tumor=MatchGeneSymbol(t2g_file,tumor) + + +print("Create Seurat Object") +seu_object<-Create_Seurat_Object(tumor) +print("Compute QC metrics") +seu_object<-Calc_QC_metrics(seu_object) +Sys.sleep(10) + +print("Remove low quality library") +seu_object<-RemoveLowQuality(seu_object,500,percent_mt) + + + + + + +Sys.sleep(2) +print("Run optimization for pK value") +seu_object<-PreRemoveDoublet(seu_object) +Sys.sleep(2) + +print("Run DoubletFinder") + +seu_object<-Run_doublet_finder(seu_object,n_doublet_exp) +Sys.sleep(10) + + +print("Remove EmptyFeature") +seu_object<-RemoveEmptyFeature(seu_object) + +print("Save Object") + + +dir_tmp<-"./" + +save_RDS(seu_object,dir_tmp) +print("Extract Count Data") +Extract_data(seu_object,dir_tmp) + diff --git a/src/nf_modules/rvisualization/ranalysis_part2.R b/src/nf_modules/rvisualization/ranalysis_part2.R new file mode 100644 index 0000000000000000000000000000000000000000..2b1bbf870b7ee0befed4ece585f310d960ff6668 --- /dev/null +++ b/src/nf_modules/rvisualization/ranalysis_part2.R @@ -0,0 +1,49 @@ + +source("/Users/mlepetit/Desktop/myDocker/R_base/function.R") + + +args = commandArgs(trailingOnly=TRUE) + +#norm_matrix=args[1] +dir=args[1] +#dir_filtered_seu_obj<-"/Users/mlepetit/Desktop/myDocker/R_base" +cp=args[2] +res=args[3] + +print("Load seurat object") +seu_obj<-read_RDS(dir) + +print("Set normalize matrix to seurat object") + +seu_obj<-SetSanityMatrix(seu_obj,dir) + +print("Select Highy Variable Feature") +seu_obj<-VariabeFeature(seu_obj) + +print("Center scale and run pca") + +seu_obj<-RunCenterScalePCA(seu_obj) +print("Choose component") + +ChooseComponent(seu_obj) +Sys.sleep(10) +print("Viz Feature") + +Sys.sleep(2) +VizFeatureCP(seu_obj,cp) + +print("Compute Sil and number of cluster as function of resolution") +mean_silhouette(seu_obj) +print("KNN Graph Clustering and Louvain optimisation") +seu_object<-GraphClusteringLouvain(seu_obj,res,cp) +print("Create Silouette plot") + +SilPlot(seu_obj,res,cp) +print("Compute UMAP") + +seu_obj<-UMAPViz(seu_obj,cp) + + + + + diff --git a/src/nf_modules/sambamba/main.nf b/src/nf_modules/sambamba/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..ea6c6e972bf3e0360df35c2b6eeb9ad227d42450 --- /dev/null +++ b/src/nf_modules/sambamba/main.nf @@ -0,0 +1,59 @@ +version = "0.6.7" +container_url = "lbmc/sambamba:${version}" + +params.index_bam = "" +process index_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*.bam*"), emit: bam + + script: +""" +sambamba index ${params.index_bam} -t ${task.cpus} ${bam} +""" +} + +params.sort_bam = "" +process sort_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*.bam*"), emit: bam + + script: +""" +sambamba sort -t ${task.cpus} ${params.sort_bam} -o ${bam.baseName}_sorted.bam ${bam} +""" +} + +params.split_bam = "" +process split_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_forward.bam*"), emit: bam_forward + tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse + script: +""" +sambamba view -t ${task.cpus} ${params.split_bam} -h -F "strand == '+'" ${bam} > \ + ${bam.baseName}_forward.bam +sambamba view -t ${task.cpus} ${params.split_bam} -h -F "strand == '-'" ${bam} > \ + ${bam.baseName}_reverse.bam +""" +} diff --git a/src/nf_modules/samtools/main.nf b/src/nf_modules/samtools/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..ed88dc56dfeaa239845c7f1f8467bfc1d81624eb --- /dev/null +++ b/src/nf_modules/samtools/main.nf @@ -0,0 +1,315 @@ +version = "1.11" +container_url = "lbmc/samtools:${version}" + +params.index_fasta = "" +params.index_fasta_out = "" +process index_fasta { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + output: + tuple val(file_id), path("*.fai"), emit: index + + script: +""" +if gzip -t ${fasta}; then + zcat ${fasta} > ${fasta.simpleName}.fasta + samtools faidx ${params.index_fasta} ${fasta.simpleName}.fasta +else + samtools faidx ${params.index_fasta} ${fasta} +fi + +""" +} + +params.filter_bam_quality_threshold = 30 +params.filter_bam_quality = "-q ${params.filter_bam_quality_threshold}" +params.filter_bam_quality_out = "" +process filter_bam_quality { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.filter_bam_quality_out != "") { + publishDir "results/${params.filter_bam_quality_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_filtered.bam"), emit: bam + script: +""" +samtools view -@ ${task.cpus} -hb ${bam} ${params.filter_bam_quality} > \ + ${bam.simpleName}_filtered.bam +""" +} + +params.filter_bam = "" +params.filter_bam_out = "" +process filter_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.filter_bam_out != "") { + publishDir "results/${params.filter_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + tuple val(bed_id), path(bed) + + output: + tuple val(file_id), path("*_filtered.bam"), emit: bam + script: +""" +samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} ${params.filter_bam} > \ + ${bam.simpleName}_filtered.bam +""" +} + +params.rm_from_bam = "" +params.rm_from_bam_out = "" +process rm_from_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.rm_from_bam_out != "") { + publishDir "results/${params.rm_from_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + tuple val(bed_id), path(bed) + + output: + tuple val(file_id), path("*_filtered.bam"), emit: bam + script: +""" +samtools view -@ ${task.cpus} ${params.filter_bam} -hb -L ${bed} -U ${bam.simpleName}_filtered.bam ${bam} > /dev/null +""" +} + +params.filter_bam_mapped = "-F 4" +params.filter_bam_mapped_out = "" +process filter_bam_mapped { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.filter_bam_mapped_out != "") { + publishDir "results/${params.filter_bam_mapped_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_mapped.bam"), emit: bam + script: +""" +samtools view -@ ${task.cpus} ${params.filter_bam_mapped} -hb ${bam} > \ + ${bam.simpleName}_mapped.bam +""" +} + +params.filter_bam_unmapped = "-f 4" +params.filter_bam_unmapped_out = "" +process filter_bam_unmapped { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.filter_bam_unmapped_out != "") { + publishDir "results/${params.filter_bam_unmapped_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_unmapped.bam"), emit: bam + script: +""" +samtools view -@ ${task.cpus} ${params.filter_bam_unmapped} -hb ${bam} > ${bam.simpleName}_unmapped.bam +""" +} + +params.index_bam = "" +params.index_bam_out = "" +process index_bam { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.index_bam_out != "") { + publishDir "results/${params.index_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("${bam}"), path("*.bam.bai"), emit: bam_idx + + script: +""" +samtools index ${params.index_bam} ${bam} +""" +} + +params.sort_bam = "" +params.sort_bam_out = "" +process sort_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.sort_bam_out != "") { + publishDir "results/${params.sort_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*.bam*"), emit: bam + + script: +""" +samtools sort -@ ${task.cpus} ${params.sort_bam} -O BAM -o ${bam.simpleName}_sorted.bam ${bam} +""" +} + +params.split_bam = "" +params.split_bam_out = "" +process split_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.split_bam_out != "") { + publishDir "results/${params.split_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_forward.bam*"), emit: bam_forward + tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse + script: +""" +samtools view -@ ${Math.round(task.cpus/2)} ${params.split_bam} \ + -hb -F 0x10 ${bam} > ${bam.simpleName}_forward.bam & +samtools view -@ ${Math.round(task.cpus/2)} ${params.split_bam} \ + -hb -f 0x10 ${bam} > ${bam.simpleName}_reverse.bam +""" +} + +params.merge_bam = "" +params.merge_bam_out = "" +process merge_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.merge_bam_out != "") { + publishDir "results/${params.merge_bam_out}", mode: 'copy' + } + + input: + tuple val(first_file_id), path(first_bam) + tuple val(second_file_id), path(second_bam) + + output: + tuple val(file_id), path("*.bam*"), emit: bam + script: +""" +samtools merge -@ ${task.cpus} ${params.merge_bam} ${first_bam} ${second_bam} \ + ${first_bam.simpleName}_${second_file.simpleName}.bam +""" +} + +params.merge_multi_bam = "" +params.merge_multi_bam_out = "" +process merge_multi_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.merge_multi_bam_out != "") { + publishDir "results/${params.merge_multi_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bams) + + output: + tuple val(file_id), path("*_merged.bam*"), emit: bam + script: +""" +samtools merge -@ ${task.cpus} \ + ${params.merge_multi_bam} \ + ${bams[0].simpleName}_merged.bam \ + ${bams} +""" +} + +params.stats_bam = "" +params.stats_bam_out = "" +process stats_bam { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.stats_bam_out != "") { + publishDir "results/${params.stats_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*.tsv"), emit: tsv + path "*.flagstat.txt", emit: report + script: +""" +samtools flagstat -@ ${task.cpus} ${params.stats_bam} -O tsv ${bam} > ${bam.simpleName}.flagstat.txt +cp ${bam.simpleName}.flagstat.txt ${bam.simpleName}.tsv +""" +} + +params.flagstat_2_multiqc = "" +params.flagstat_2_multiqc_out = "" +process flagstat_2_multiqc { + tag "$file_id" + if (params.flagstat_2_multiqc_out != "") { + publishDir "results/${params.flagstat_2_multiqc_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(tsv) + + output: + tuple val(file_id), path("*.txt"), emit: report +""" +mv ${tsv} ${tsv.simpleName}.flagstat.txt +""" +} + +params.idxstat_2_multiqc = "" +params.idxstat_2_multiqc_out = "" +process idxstat_2_multiqc { + tag "$file_id" + if (params.idxstat_2_multiqc_out != "") { + publishDir "results/${params.idxstat_2_multiqc_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(tsv) + + output: + tuple val(file_id), path("*.txt"), emit: report +""" +mv ${tsv} ${tsv.simpleName}.idxstats.txt +""" +} \ No newline at end of file diff --git a/src/nf_modules/sanity/main.nf b/src/nf_modules/sanity/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..7d9d6236c94230a845abf688699f268e5dff24ef --- /dev/null +++ b/src/nf_modules/sanity/main.nf @@ -0,0 +1,35 @@ +container_url="mlepetit/sanity:latest" + +params.sanity_out="" +params.sanity="" + +process normalization_sanity + { + + container="${container_url}" + label "big_mem_multi_cpus" + if (params.sanity_out != "") { + publishDir "results/${params.sanity_out}", mode: 'copy' + + } +else { + publishDir "results/normalize_matrix/", mode: 'copy' + + } + + input: + + tuple val(id_mtx), path(raw_filtered_mtx) + + + output: + + tuple val(id_mtx),path("log_transcription_quotients.txt"), emit: normalize_filtered_mtx + tuple val(id_mtx), path("ltq_error_bars.txt") ,emit: ltq_error + + script: + + """ + Sanity -f ${raw_filtered_mtx} -n ${task.cpus} ${params.sanity} + """ + } diff --git a/src/nf_modules/sratoolkit/list-srr.txt b/src/nf_modules/sratoolkit/list-srr.txt new file mode 100644 index 0000000000000000000000000000000000000000..a58fc103ffe37a56f511aee117b26383b1e3f516 --- /dev/null +++ b/src/nf_modules/sratoolkit/list-srr.txt @@ -0,0 +1,6 @@ +ERR572281 +ERR572146 +ERR572201 +ERR638114 +ERR638115 +ERR638116 diff --git a/src/nf_modules/sratoolkit/main.nf b/src/nf_modules/sratoolkit/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..158d4058764560e9771133952db53b70391182c9 --- /dev/null +++ b/src/nf_modules/sratoolkit/main.nf @@ -0,0 +1,32 @@ +version = "2.8.2" +container_url = "lbmc/sratoolkit:${version}" + +params.fastq_dump = "" +params.fastq_dump_out = "" +process fastq_dump { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$sra" + if (params.fastq_dump_out != "") { + publishDir "results/${params.fastq_dump_out}", mode: 'copy' + } + + input: + val sra + + output: + tuple val(sra), path("*.fastq"), emit: fastq + + script: +""" +fastq-dump ${params.fastq_dump} --split-files --gzip ${sra} +if [ -f ${sra}_1.fastq ] +then + mv ${sra}_1.fastq ${sra}_R1.fastq +fi +if [ -f ${sra}_2.fastq ] +then + mv ${sra}_2.fastq ${sra}_R2.fastq +fi +""" +} diff --git a/src/nf_modules/ucsc/main.nf b/src/nf_modules/ucsc/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..e661e8a559075a163612c21d9d3c8e5777da102e --- /dev/null +++ b/src/nf_modules/ucsc/main.nf @@ -0,0 +1,242 @@ +version = "407" +container_url = "lbmc/ucsc:${version}" + +include { + index_fasta +} from './../samtools/main' + +params.bedgraph_to_bigwig = "" +params.bedgraph_to_bigwig_out = "" +process bedgraph_to_bigwig { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.bedgraph_to_bigwig_out != "") { + publishDir "results/${params.bedgraph_to_bigwig_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bg) + tuple val(file_id), path(bed) + + output: + tuple val(file_id), path("*.bw"), emit: bw + + script: +""" +LC_COLLATE=C +# transform bed file of start-stop chromosome size to stop chromosome size +awk -v OFS="\\t" '{print \$1, \$3}' ${bed} > chromsize.txt + +sort -T ./ -k1,1 -k2,2n ${bg} > \ + bedGraphToBigWig ${params.bedgraph_to_bigwig} - \ + chromsize.txt \ + ${bg.simpleName}_norm.bw +""" +} + +params.wig_to_bedgraph = "" +params.wig_to_bedgraph_out = "" +workflow wig_to_bedgraph { + take: + fasta + wig + main: + wig_to_bigwig( + fasta, + wig + ) + bigwig_to_bedgraph( + wig_to_bigwig.out.bw + ) + emit: + bg = bigwig_to_bedgraph.out.bg +} + +workflow wig2_to_bedgraph2 { + take: + fasta + wig + main: + wig2_to_bigwig2( + fasta, + wig + ) + bigwig2_to_bedgraph2( + wig2_to_bigwig2.out.bw + ) + emit: + bg = bigwig2_to_bedgraph2.out.bg +} + +params.bigwig_to_bedgraph = "" +params.bigwig_to_bedgraph_out = "" +process bigwig_to_bedgraph { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.bigwig_to_bedgraph_out != "") { + publishDir "results/${params.bigwig_to_bedgraph_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bw) + + output: + tuple val(file_id), path("*.bg"), emit: bg + + script: +""" +bigWigToBedGraph ${bw} ${bw.simpleName}.bg +""" +} + +params.bigwig2_to_bedgraph2 = "" +params.bigwig2_to_bedgraph2_out = "" +process bigwig2_to_bedgraph2 { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.bigwig_to_bedgraph_out != "") { + publishDir "results/${params.bigwig_to_bedgraph_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bw_a), path(bw_b) + + output: + tuple val(file_id), path("${bw_a.simpleName}.bg"), path("${bw_b.simpleName}.bg"), emit: bg + + script: +""" +bigWigToBedGraph ${bw_a} ${bw_a.simpleName}.bg +bigWigToBedGraph ${bw_b} ${bw_b.simpleName}.bg +""" +} + +params.bigwig_to_wig = "" +params.bigwig_to_wig_out = "" +process bigwig_to_wig { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.bigwig_to_wig_out != "") { + publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bw) + + output: + tuple val(file_id), path("*.wig"), emit: wig + + script: +""" +bigWigToBedGraph ${bw} ${bw.simpleName}.bg +bedgraph_to_wig.pl --bedgraph ${bw.simpleName}.bg --wig ${bw.simpleName}.wig --step 10 +""" +} + +params.bigwig2_to_wig2 = "" +params.bigwig2_to_wig2_out = "" +process bigwig2_to_wig2 { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.bigwig_to_wig_out != "") { + publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bw_a), path(bw_b) + + output: + tuple val(file_id), path("${bw_a.simpleName}.wig"), path("${bw_b.simpleName}.wig"), emit: wig + + script: +""" +bigWigToBedGraph ${bw_a} ${bw_a.simpleName}.bg +bedgraph_to_wig.pl --bedgraph ${bw_a.simpleName}.bg --wig ${bw_a.simpleName}.wig --step 10 +bigWigToBedGraph ${bw_b} ${bw_b.simpleName}.bg +bedgraph_to_wig.pl --bedgraph ${bw_b.simpleName}.bg --wig ${bw_b.simpleName}.wig --step 10 +""" +} + +params.wig_to_bigwig = "" +params.wig_to_bigwig_out = "" + +workflow wig_to_bigwig { + take: + fasta + wig + main: + index_fasta(fasta) + wig_to_bigwig_sub( + wig, + index_fasta.out.index + ) + emit: + bw = wig_to_bigwig_sub.out.bw +} + +process wig_to_bigwig_sub { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.bigwig_to_wig_out != "") { + publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(w) + tuple val(idx_id), path(fasta_idx) + + output: + tuple val(file_id), path("${w.simpleName}.bw"), emit: bw + + script: +""" +cut -f 1,2 ${fasta_idx} > ${fasta_idx.simpleName}.sizes +wigToBigWig -clip ${w} ${fasta_idx.simpleName}.sizes ${w.simpleName}.bw +""" +} + +params.wig2_to_bigwig2 = "" +params.wig2_to_bigwig2_out = "" + +workflow wig2_to_bigwig2 { + take: + fasta + wigs + main: + index_fasta(fasta) + wig2_to_bigwig2_sub( + wigs, + index_fasta.out.index + ) + emit: + bw = wig2_to_bigwig2_sub.out.bw +} + +process wig2_to_bigwig2_sub { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${file_id}" + if (params.bigwig_to_wig_out != "") { + publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(w_a), path(w_b) + tuple val(idx_id), path(fasta_idx) + + output: + tuple val(file_id), path("${w_a.simpleName}.bw"), path("${w_b.simpleName}.bw"), emit: bw + + script: +""" +cut -f 1,2 ${fasta_idx} > ${fasta_idx.simpleName}.sizes +wigToBigWig -clip ${w_a} ${fasta_idx.simpleName}.sizes ${w_a.simpleName}.bw +wigToBigWig -clip ${w_b} ${fasta_idx.simpleName}.sizes ${w_b.simpleName}.bw +""" +} \ No newline at end of file diff --git a/src/nf_modules/urqt/main.nf b/src/nf_modules/urqt/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..b91afb74ccad107e7accd76ead32ea9945dda8b1 --- /dev/null +++ b/src/nf_modules/urqt/main.nf @@ -0,0 +1,39 @@ +version = "d62c1f8" +container_url = "lbmc/urqt:${version}" + +trim_quality = "20" + +params.trimming = "--t 20" +process trimming { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "${file_id}" + + input: + tuple val(file_id), path(reads) + + output: + tuple val(pair_id), path("*_trim_R{1,2}.fastq.gz"), emit: fastq + path "*_report.txt", emit: report + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + if (reads.size() == 2) +""" +UrQt ${params.trimming} --m ${task.cpus} --gz \ + --in ${reads[0]} --inpair ${reads[1]} \ + --out ${file_prefix}_trim_R1.fastq.gz --outpair ${file_prefix}_trim_R2.fastq.gz \ + > ${pair_id}_trimming_report.txt +""" + else +""" +UrQt ${params.trimming} --m ${task.cpus} --gz \ + --in ${reads[0]} \ + --out ${file_prefix}_trim.fastq.gz \ + > ${file_prefix}_trimming_report.txt +""" +} \ No newline at end of file diff --git a/src/test_cpdb.nf b/src/test_cpdb.nf new file mode 100644 index 0000000000000000000000000000000000000000..4b43411708d86a7413480ccd51b1a57ef81a1208 --- /dev/null +++ b/src/test_cpdb.nf @@ -0,0 +1,35 @@ +nextflow.enable.dsl=2 + + +params.counts="" +params.meta="" +params.pval=0.01 +params.thres=0.1 +params.iterations=1000 +params.gene_id="gene_name" + +include { cellphone_statistical_analysis } from "./nf_modules/cellphonedb/main.nf" addParams(iterations:params.iterations, gene_id:params.gene_id , thres:params.thres,pval:params.pval) + +log.info "Counts files (--counts): ${params.counts}" +log.info "Annotation file (--meta): ${params.meta}" + +channel + .fromPath( params.meta ) + .ifEmpty { error "Cannot find any annotation files matching: ${params.meta}" } + .map { it -> [it.simpleName, it]} + .set { meta_files } + +channel + .fromPath( params.counts ) + .ifEmpty { error "Cannot find any counts file matching: ${params.counts}" } + .map { it -> [it.simpleName, it]} + .set { counts_files } + + + +workflow { + +cellphone_statistical_analysis(meta_files,counts_files) + + +}