diff --git a/.zenodo.json b/.zenodo.json deleted file mode 100644 index 9aa1f86e4367ceac3dabbbd2cb5e2ab449dc4cb1..0000000000000000000000000000000000000000 --- a/.zenodo.json +++ /dev/null @@ -1,24 +0,0 @@ - -{ - "creators": [ - { - "name": "Ozadam, Hakan", - "affiliation": "UT, Austin, TX, USA" - }, - { - "name": "Cenik, Can", - "affiliation": "UT, Austin, TX, USA" - } - ], - "keywords": [ - "bioinformatics", - "genomics", - "ribosome", - "ribo-seq", - "Python" - ], - "description": "<p>RiboFlow is a NextFlow based pipeline for processing ribosome profiling data.</p>", - "access_right": "open", - "license": "MIT", - "upload_type": "software" -} diff --git a/README.md b/README.md deleted file mode 100644 index 72f5e326691cc93823d2f993e8230259117f378b..0000000000000000000000000000000000000000 --- a/README.md +++ /dev/null @@ -1,222 +0,0 @@ -[](https://doi.org/10.5281/zenodo.3376949) - - -# RiboFlow - -RiboFlow is a [Nextflow](https://www.nextflow.io/) based pipeline -for processing ribosome profiling data. - -## Installation - -### Requirements - -* [Nextflow](https://www.nextflow.io/) -* [Docker](https://docs.docker.com/install/) (Optional) -* [Conda](https://conda.io/en/latest/miniconda.html) (Optional) - -First, follow the instructions in [Nextflow website](https://www.nextflow.io/) and install Nextflow. - -The easiest way of using RiboFLow is using Docker. -If using Docker is not an option, you can install the dependencies using Conda -and run RiboFlow without Docker. - -### Docker Option - -Install [Docker](https://docs.docker.com/install/). -Here is a [tutorial for Ubuntu.](https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-on-ubuntu-18-04) - -All remaining dependencies come in the Docker image [ceniklab/riboflow](https://hub.docker.com/r/ceniklab/riboflow). -This image is automatically pulled by RiboFlow when run with Docker (see test runs below). - -### Conda Option - -This option has been tested on Linux systems only. - -Install [Conda](https://conda.io/en/latest/miniconda.html). - -All other dependencies can be installed using the environment file, -environment.yaml, in this repository. -``` -git clone https://github.com/ribosomeprofiling/riboflow.git -conda env create -f riboflow/environment.yaml -``` - -The above command will create a conda environment called _ribo_ -and install dependencies in it. -To start using RiboFlow, you need to activate the _ribo_ environment. - -`conda activate ribo` - -## Test Run - -For fresh installations, before running RiboFlow on actual data, -it is recommended to do a test run. - -Clone this repository in a new folder and change your working directory to the RiboFlow folder. -``` -mkdir rf_test_run && cd rf_test_run -git clone https://github.com/ribosomeprofiling/riboflow.git -cd riboflow -``` - -Obtain a copy of the sample data in the working directory. -``` -git clone https://github.com/ribosomeprofiling/rf_sample_data.git -``` - -### Run Using Docker - -Provide the argument `-profile docker_local` to Nextflow to indicate Docker use. - -`nextflow RiboFlow.groovy -params-file project.yaml -profile docker_local` - -### Run Using Conda Environment - -Make sure that you have created the conda environment, called _ribo_, -using the instructions above. Then activate the conda environment. - -`conda activate ribo` - -If the above command fails to activate the ribo environment, try -`source activate ribo` - -Now RiboFlow is ready to run. - -`nextflow RiboFlow.groovy -params-file project.yaml` - -## Output - -Pipeline run may take several minutes. -When finished, the resulting files are in the `./output` folder. - -Mapping statistics are compiled in a csv file called `stats.csv` - -``` -ls output/stats/stats.csv -``` - -Ribosome occupancy data is in a single -[ribo file](https://ribopy.readthedocs.io/en/latest/ribo_file_format.html) called `all.ribo`. - -`ls output/ribo/all.ribo` - -You can use -[RiboR](https://github.com/ribosomeprofiling/ribor) or -[RiboPy](https://github.com/ribosomeprofiling/ribopy) to work with ribo files. - - -## Actual Run - -For running RiboFlow on actual data, files must be organized and a parameters file must be prepared. -You can examine the sample run above to see an example. - -1. Organize your data. The following files are required for RiboFlow -* **Ribosome profiling sequencing data:** in gzipped fastq files -* **Transcriptome Reference:** Bowtie2 index files -* **Filter Reference:** Bowtie2 index files (typically for rRNA sequences) -* **Annotation:** A bed file defining CDS, UTR5 and UTR3 regions. -* **Transcript Lengths:** A two column tsv file containing transcript lengths - -2. Prepare a custom `project.yaml` file. -You can use the sample file `project.yaml`, provided in this repository, -as template. - -3. In `project.yaml`, provide RiboFlow parameters such as `clip_arguments`, alignment arguments etc. -You can simply modify the arguments in the sample file `project.yaml` in this repository. - -4. You can adjust the hardware and computing environment settings in Nextflow configuration file(s). -For Docker option, see `configs/docker_local.config`. If you are not using Docker, -see `configs/local.config`. - -5. RNA-Seq data is optional for RiboFlow. So, if you do NOT have RNA-Seq data, in the project file, set - -`do_rnaseq: false` - -If you have RNA-Seq data to be paired with ribosome profiling data, see the __Advanced Features__ below. - - -6. Metadata is optional for RiboFlow.. If you do NOT have metadata, in the project file, set - -`do_metadata: false` - -If you have metadata, see __Advanced Features__ below. - -7. Run RiboFlow using the new parameters file `project.yaml`. - -Using Docker: - -`nextflow RiboFlow.groovy -params-file project.yaml -profile docker_local` - -Without Docker: - -`nextflow RiboFlow.groovy -params-file project.yaml` - -## Advanced Features - -### RNA-Seq Data - -If you have RNA-Seq data that you want to pair with ribosome profiling experiments, -provide the paths of the RNA-Seq (gzipped) fastq files in the configuration file in -_input -> metadata_. See the file `project.yaml` in this repository for an example. -Note that the names in defining RNA-Seq files must match the names in definig ribosome profiling data. -Also turn set the do_rnaseq flag to true, in the project file: - -`do_rnaseq: true` - -Transcript abundance data will be stored in the output ribo file. - -### Metadata - -If you have metadata files for the ribosome profiling experiments, -provide the paths of the metadata files (in yaml format) in the configuration file in -_input -> metadata_. See the file `project.yaml` in this repository for an example. -Note that the names in defining metadata files must match the names in definig ribosome profiling data. -Also turn set the metadata flag to true, in the project file: - -`do_metadata: true` - -Metadata will be stored in the output ribo file. - -# nextflow pipeline - -This repository is a template and a library repository to help you build nextflow pipeline. -You can fork this repository to build your own pipeline. -To get the last commits from this repository into your fork use the following commands: - -```sh -git remote add upstream gitlab_lbmc:pipelines/nextflow.git -git pull upstream master -``` -**If you created your `.config` file before version `0.4.0` you need to run the script `src/.update_config.sh` to use the latest docker, singularity and conda configuration (don't forget to check your config files afterward for typos).** - -## Getting Started - -These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system. - -[you can follow them here.](doc/getting_started.md) - -## Available tools - -[The list of available tools.](doc/available_tools.md) - -## Projects using nextflow - -[A list of projects using nextflow at the LBMC.](doc/nf_projects.md) - -## Contributing - -Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us. - -## Versioning - -We use [SemVer](http://semver.org/) for versioning. For the versions available, see the [tags on this repository](https://gitlab.biologie.ens-lyon.fr/pipelines/nextflow/tags). - -## Authors - -* **Laurent Modolo** - *Initial work* - -See also the list of [contributors](https://gitlab.biologie.ens-lyon.fr/pipelines/nextflow/graphs/master) who participated in this project. - -## License - -This project is licensed under the CeCiLL License- see the [LICENSE](LICENSE) file for details diff --git a/configs/docker_local.config b/configs/docker_local.config deleted file mode 100644 index 0c597b4417485b34b5df78d9f990a9a52d220139..0000000000000000000000000000000000000000 --- a/configs/docker_local.config +++ /dev/null @@ -1,63 +0,0 @@ - -// Default configuration for running the pipeline on a local machine - - -process { - // if the process name is not listed separately below - // the following settings are used - executor='local' - cpus = 1 - maxRetries = 1 - errorStrategy = 'retry' - - cpus = 1 - - // Override the following defaults - // by specifying the process name - - withName: quality_filter{ - cpus = 4 - } - - withName: clip{ - cpus = 4 - } - - withName: filter{ - cpus = 4 - } - - withName: transcriptome_alignment{ - cpus = 4 - } - - withName: quality_filter{ - cpus = 4 - } - - withName: genome_alignment{ - cpus = 4 - } - - withName: create_ribo{ - cpus = 4 - } - - withName: post_genome_alignment{ - cpus = 4 - } - -} - - -// Total number of CPUs reserved for nextflow -executor { - cpus = 4 -} - - -docker { - enabled = true - runOptions = '-u $(id -u):$(id -g)' - temp = 'auto' -} diff --git a/configs/local.config b/configs/local.config deleted file mode 100644 index e16dbeafb91ddf81a5ec1f8bbfc78d3c723dcd6b..0000000000000000000000000000000000000000 --- a/configs/local.config +++ /dev/null @@ -1,62 +0,0 @@ - -// Default configuration for running the pipeline on a local machine - - -process { - // if the process name is not listed separately below - // the following settings are used - executor='local' - cpus = 1 - maxRetries = 1 - errorStrategy = 'retry' - - cpus = 1 - - // Override the following defaults - // by specifying the process name - - withName: quality_filter{ - cpus = 4 - } - - withName: clip{ - cpus = 4 - } - - withName: filter{ - cpus = 4 - } - - withName: transcriptome_alignment{ - cpus = 4 - } - - withName: quality_filter{ - cpus = 4 - } - - withName: genome_alignment{ - cpus = 4 - } - - withName: create_ribo{ - cpus = 4 - } - - withName: post_genome_alignment{ - cpus = 4 - } - -} - - -// Total number of CPUs reserved for nextflow -executor { - cpus = 4 -} - - -docker { - enabled = false - runOptions = '-u $(id -u):$(id -g)' -} diff --git a/configs/stampede_local.config b/configs/stampede_local.config deleted file mode 100644 index 3aa1e7174a6e017ff12b98aafad43c5c90aced1a..0000000000000000000000000000000000000000 --- a/configs/stampede_local.config +++ /dev/null @@ -1,67 +0,0 @@ - -// Default configuration for running the pipeline on a node of TACC Stampede2 - - -process { - // if the process name is not listed separately below - // the following settings are used - executor='local' - cpus = 1 - maxRetries = 1 - errorStrategy = 'retry' - - cpus = 1 - - - // Override the following defaults - // by specifying the process name - - withName: md5sum { - cpus = 1 - } - - withName: quality_filter{ - cpus = 4 - } - - withName: clip{ - cpus = 4 - } - - withName: filter{ - cpus = 8 - } - - withName: transcriptome_alignment{ - cpus = 8 - } - - withName: quality_filter{ - cpus = 8 - } - - withName: genome_alignment{ - cpus = 8 - } - - withName: create_ribo{ - cpus = 8 - } - - withName: post_genome_alignment{ - cpus = 8 - } - -} - - -// Total number of CPUs reserved for nextflow -executor { - cpus = 48 -} - - -docker { - enabled = false - runOptions = '-u $(id -u):$(id -g)' -} diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index 711d272d1d645a5ac16df48bd3a69884829af039..0000000000000000000000000000000000000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -FROM ubuntu:18.04 - -RUN apt-get update --fix-missing && \ - apt-get install -q -y wget curl bzip2 libbz2-dev git build-essential zlib1g-dev locales vim fontconfig ttf-dejavu - - -# Set the locale -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - -# Install conda -RUN curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - bash Miniconda3-latest-Linux-x86_64.sh -p /miniconda3 -b && \ - rm Miniconda3-latest-Linux-x86_64.sh -ENV PATH=/miniconda3/bin:${PATH} - -# Install conda dependencies -ADD environment.yaml / -ADD VERSION / -RUN pwd -RUN conda config --set always_yes yes --set changeps1 no && \ - conda config --add channels conda-forge && \ - conda config --add channels defaults && \ - conda config --add channels bioconda && \ - conda config --get && \ - conda update -q conda && \ - conda info -a && \ - conda env update -q -n root --file environment.yaml && \ - conda clean --tarballs --index-cache --lock diff --git a/docker/build.sh b/docker/build.sh deleted file mode 100644 index 180e2e5d8a09e46822bde1b36d663f944a65a87c..0000000000000000000000000000000000000000 --- a/docker/build.sh +++ /dev/null @@ -1,19 +0,0 @@ -set -ex - -cp ../VERSION ./VERSION -cp ../environment.yaml ./environment.yaml - -version=$(cat ./VERSION | sed -nre 's/^[^0-9]*(([0-9]+\.)*[0-9]+).*/\1/p') - -function cleanup { - rm ./VERSION - rm ./environment.yaml -} - -trap cleanup EXIT - - -docker build -t ceniklab/riboflow:latest . -docker run -it ceniklab/riboflow:latest apt list | sed 's/\x1b\[[0-9;]*m//g' > ./apt.list -docker run -it ceniklab/riboflow:latest conda list > ./conda.list -docker images diff --git a/docker/deploy.sh b/docker/deploy.sh deleted file mode 100644 index 7c416be659cc1e51afaf2925e32000f8b499de85..0000000000000000000000000000000000000000 --- a/docker/deploy.sh +++ /dev/null @@ -1,9 +0,0 @@ - -docker login -u ceniklab - -version=$(cat ../VERSION | sed -nre 's/^[^0-9]*(([0-9]+\.)*[0-9]+).*/\1/p') -echo "version: $version" - -# push the image -docker push ceniklab/riboflow:latest -docker push ceniklab/riboflow:$version diff --git a/docker/tag.sh b/docker/tag.sh deleted file mode 100644 index c3be5db66683c25ac5dbd7b69676c673e8d57134..0000000000000000000000000000000000000000 --- a/docker/tag.sh +++ /dev/null @@ -1,8 +0,0 @@ - -set -ex - -version=$(cat ../VERSION | sed -nre 's/^[^0-9]*(([0-9]+\.)*[0-9]+).*/\1/p') -echo "version: $version" - -# tag it -docker tag ceniklab/riboflow:latest ceniklab/riboflow:${version}