diff --git a/.editorconfig b/.editorconfig index b78de6e655d209990fd9c1e70ae4882a9f0b4dff..a9229959f923ec33a4329ef15bd8b21ce192feca 100644 --- a/.editorconfig +++ b/.editorconfig @@ -22,3 +22,6 @@ indent_size = unset [/assets/email*] indent_size = unset + +[/assets/blacklists/GRCh37-blacklist.bed] +trim_trailing_whitespace = unset diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8da27af0eb9a6017271d057b39940493b3ac4bc4..f3dbd188d0fd31c93bd61a759a2901b35d6e6d0e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -17,7 +17,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/chip - [ ] If you've fixed a bug or added code that should be tested, add tests! - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/chipseq/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/chipseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker` --outdir <OUTDIR>`). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index c6f0c9c60cb0c4313cd8643477ac54b37c0ef3fe..5a1c3622dbded54b5194a6a2b96b50d1cdc63082 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -12,12 +12,12 @@ jobs: name: Run AWS full tests if: github.repository == 'nf-core/chipseq' runs-on: ubuntu-latest + strategy: + matrix: + aligner: ["bwa", "bowtie2", "chromap", "star"] steps: - name: Launch workflow via tower uses: nf-core/tower-action@v3 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -26,5 +26,6 @@ jobs: parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/chipseq/results-${{ github.sha }}" + "aligner": "${{ matrix.aligner }}" } profiles: test_full,aws_tower diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml old mode 100644 new mode 100755 index cf99db7f76ae62f9b4ddcdd2c3de6b30390a885b..520b9d2bd3e01fce8394d8e342aaec74458a9a70 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,8 +32,50 @@ jobs: version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + + parameters: + name: Test workflow parameters + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/chipseq') }} + runs-on: ubuntu-latest + strategy: + matrix: + parameters: + - "--skip_trimming" + - "--skip_consensus_peaks" + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with various parameters + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} --outdir ./results + + aligners: + name: Test available aligners + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/chipseq') }} + runs-on: ubuntu-latest + strategy: + matrix: + aligner: + - "bowtie2" + - "chromap" + - "star" + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with the different aligners available + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner ${{ matrix.aligner }} --outdir ./results diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml old mode 100644 new mode 100755 diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81c144cd8f7bf7e49106934a313cb7667a..f1d2ac4eda43d6ae7ee39d53260ffb5b5a989762 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,8 @@ repository_type: pipeline +lint: + files_unchanged: + - .github/workflows/branch.yml + - .github/workflows/linting_comment.yml + - .github/workflows/linting.yml + - .github/PULL_REQUEST_TEMPLATE.md + - bin/check_samplesheet.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ddd1153818ca6c06f3289320cf3694b0775f6e3d..289c062c2c61d9b105b247d929a34ccf5cf50c84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,245 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.0.0 - [date] +## [[2.0.0](https://github.com/nf-core/chipseq/releases/tag/2.0.0)] - 2022-09-30 -Initial release of nf-core/chipseq, created with the [nf-core](https://nf-co.re/) template. +### Enhancements & fixes + +- Pipeline has been re-implemented in [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) +- All software containers are now exclusively obtained from [Biocontainers](https://biocontainers.pro/#/registry) +- Updated pipeline template to [nf-core/tools 2.5.1](https://github.com/nf-core/tools/releases/tag/2.5.1) +- [[#128](https://github.com/nf-core/chipseq/issues/128)] - Filter files with no peaks to avoid errors in downstream processes +- [[#220](https://github.com/nf-core/chipseq/issues/220)] - Fix `phantompeakqualtools` protection stack overflow error +- [[#233](https://github.com/nf-core/chipseq/issues/233)] - Add `chromap` to the available aligners +- Bump minimum Nextflow version from `21.04.0` -> `21.10.3` +- Added `python3` shebang to appropriate scripts in `bin/` directory +- [[#160](https://github.com/nf-core/chipseq/issues/160)] - Add `bowtie2` and `star` as available aligners, via the `--aligner` parameter +- Add `--save_unaligned` parameter (only available for `bowtie2` and `star`) +- Update `igenomes.config` to fetch whole `BWAIndex/version0.6.0/` folder +- [[228](https://github.com/nf-core/chipseq/issues/228)] - Update blacklist bed files. +- [nf-core/tools#1415](https://github.com/nf-core/tools/issues/1415) - Make `--outdir` a mandatory parameter +- [[282](https://github.com/nf-core/chipseq/issues/282)] - Fix `genome.fa` publication for IGV. +- [[280](https://github.com/nf-core/chipseq/issues/280)] - Update `macs_gsize` in `igenomes.config`, create a new `--read_length` parameter and implement the logic to calculate `--macs_gsize` when the parameter is missing +- Eliminate `if` conditions from `deseq2_qc` and `macs2_consensus` (local module and use `ext.when` instead) +- Remove `deseq2` differential binding analysis of consensus peaks. +- [[280](https://github.com/nf-core/chipseq/issues/291) - Filter paired-end files produced by `chromap` since the resulting `BAM` files can not be processed downstream. +- Remove <ANTIBODY> from the macs2 consensus publish directory since it can not be referred as input from the IGV process (meta.id not resolved at execution time) +- Add bytesize link to readme. + +### Parameters + +| Old parameter | New parameter | +| ---------------------- | ----------------------- | +| `--conda` | `--enable_conda` | +| `--skip_diff_analysis` | `--skip_deseq2_qc` | +| | `--skip_qc` | +| | `--aligner` | +| | `--save_unaligned` | +| | `--read_length` | +| | `--multiqc_title` | +| | `--gff` | +| | `--bowtie2_index` | +| | `--chromap_index` | +| | `--star_index` | +| | `--validate_params` | +| | `--show_hidden_params` | +| | `--config_profile_name` | +| `--clusterOptions` | | +| `--single_end` | | +| `--name` | | +| `--hostnames` | | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if parameter information isn't present. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------------------- | ----------- | ----------- | +| `samtools` | 1.10 | 1.15.1 | +| `picard` | 2.23.1 | 2.27.4 | +| `bamtools` | 2.5.1 | 2.5.2 | +| `pysam` | 0.15.3 | 0.19.0 | +| `bedtools` | 2.29.2 | 2.30.0 | +| `ucsc-bedgraphtobigwig` | 357 | 377 | +| `deeptools` | 3.4.3 | 3.5.1 | +| `pigz` | 2.3.4 | 2.6 | +| `preseq` | 2.0.3 | 3.1.2 | +| `multiqc` | 1.9 | 1.13a | +| `r-base` | 3.6.1 | 4.0.3 | +| `r-ggplot2` | 3.3.2 | 3.3.3 | +| `bioconductor-deseq2` | 1.26.0 | 1.28.0 | +| `trim-galore` | 0.6.5 | 0.6.7 | +| `r-optparse` | - | 1.7.1 | +| `chromap` | - | 0.2.1 | +| `bowtie2` | - | 2.4.4 | +| `star` | - | 2.6.1d | +| `r-tidyr` | - | - | +| `r-lattice` | - | - | +| `r-xfun` | - | - | +| `bioconductor-vsn` | - | - | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> **NB:** Dependency has been **added** if just the new version information is present. +> **NB:** Dependency has been **removed** if version information isn't present. + +## [[1.2.2](https://github.com/nf-core/chipseq/releases/tag/1.2.2)] - 2021-04-22 + +- [#206](https://github.com/nf-core/chipseq/issues/206) - Minor patch release to fix Conda environment + +### `Dependencies` + +- Update r-base `3.6.2` -> `3.6.3` +- Update r-xfun `0.15` -> `0.20` + +## [[1.2.1](https://github.com/nf-core/chipseq/releases/tag/1.2.1)] - 2020-07-29 + +- [#171](https://github.com/nf-core/chipseq/issues/171) - Minor patch release to update pipeline schema + +## [[1.2.0](https://github.com/nf-core/chipseq/releases/tag/1.2.0)] - 2020-07-02 ### `Added` +- [#138](https://github.com/nf-core/chipseq/issues/138) - Add social preview image +- [#153](https://github.com/nf-core/chipseq/issues/153) - Add plotHeatmap +- [#159](https://github.com/nf-core/chipseq/issues/159) - expose bwa mem -T parameter +- [nf-core/atacseq#63](https://github.com/nf-core/atacseq/issues/63) - Added multicore support for Trim Galore! +- [nf-core/atacseq#75](https://github.com/nf-core/atacseq/issues/75) - Include gene annotation versions in multiqc report +- [nf-core/atacseq#76](https://github.com/nf-core/atacseq/issues/76) - featureCounts coupled to DESeq2 +- [nf-core/atacseq#79](https://github.com/nf-core/atacseq/issues/79) - Parallelize DESeq2 +- [nf-core/atacseq#97](https://github.com/nf-core/atacseq/issues/97) - PBC1, PBC2 from pipeline? +- [nf-core/atacseq#107](https://github.com/nf-core/atacseq/issues/107) - Add options to change MACS2 parameters +- Regenerated screenshots and added collapsible sections for output files in `docs/output.md` +- Update template to tools `1.9` +- Replace `set` with `tuple` and `file()` with `path()` in all processes +- Capitalise process names +- Parameters: + - `--bwa_min_score` to set minimum alignment score for BWA MEM + - `--macs_fdr` to provide FDR threshold for MACS2 peak calling + - `--macs_pvalue` to provide p-value threshold for MACS2 peak calling + - `--skip_peak_qc` to skip MACS2 peak QC plot generation + - `--skip_peak_annotation` to skip annotation of MACS2 and consensus peaks with HOMER + - `--skip_consensus_peaks` to skip consensus peak generation + - `--deseq2_vst` to use variance stabilizing transformation (VST) instead of regularized log transformation (rlog) with DESeq2 + - `--publish_dir_mode` to customise method of publishing results to output directory [nf-core/tools#585](https://github.com/nf-core/tools/issues/585) + +### `Removed` + +- `--tss_bed` parameter + ### `Fixed` +- [#118](https://github.com/nf-core/chipseq/issues/118) - Running on with SGE +- [#132](https://github.com/nf-core/chipseq/issues/132) - BigWig Error: sort: cannot create temporary file in '': Read-only file system +- [#154](https://github.com/nf-core/chipseq/issues/154) - computeMatrix.val.mat.gz files not zipped +- [nf-core/atacseq#71](https://github.com/nf-core/atacseq/issues/71) - consensus_peaks.mLb.clN.boolean.intersect.plot.pdf not generated +- [nf-core/atacseq#73](https://github.com/nf-core/atacseq/issues/73) - macs_annotatePeaks.mLb.clN.summary.txt file is not created +- [nf-core/atacseq#86](https://github.com/nf-core/atacseq/issues/86) - bug in the plot_homer_annotatepeaks.r script +- [nf-core/atacseq#102](https://github.com/nf-core/atacseq/issues/102) - Incorrect Group ID assigned by featurecounts_deseq2.r +- [nf-core/atacseq#109](https://github.com/nf-core/atacseq/issues/109) - Specify custom gtf but gene bed is not generated from that gtf? +- Make executables in `bin/` compatible with Python 3 + +### `Dependencies` + +- Add bioconductor-biocparallel `1.20.0` +- Add markdown `3.2.2` +- Add pigz `2.3.4` +- Add pygments `2.6.1` +- Add pymdown-extensions `7.1` +- Add python `3.7.6` +- Add r-reshape2 `1.4.4` +- Add r-tidyr `1.1.0` +- Update bedtools `2.27.1` -> `2.29.2` +- Update bioconductor-deseq2 `1.20.0` -> `1.26.0` +- Update bioconductor-vsn `3.46.0` -> `3.54.0` +- Update deeptools `3.2.1` -> `3.4.3` +- Update fastqc `0.11.8` -> `0.11.9` +- Update gawk `4.2.1` -> `5.1.0` +- Update homer `4.9.1` -> `4.11` +- Update macs2 `2.1.2` -> `2.2.7.1` +- Update multiqc `1.7` -> `1.8` +- Update phantompeakqualtools `1.2` -> `1.2.2` +- Update picard `2.19.0` -> `2.23.1` +- Update pysam `0.15.2` -> `0.15.3` +- Update r-base `3.4.1` -> `3.6.2` +- Update r-ggplot2 `3.1.0` -> `3.3.2` +- Update r-lattice `0.20_35` -> `0.20_41` +- Update r-optparse `1.6.0` -> `1.6.6` +- Update r-pheatmap `1.0.10` -> `1.0.12` +- Update r-scales `1.0.0` -> `1.1.1` +- Update r-upsetr `1.3.3` -> `1.4.0` +- Update r-xfun `0.3` -> `0.15` +- Update samtools `1.9` -> `1.10` +- Update subread `1.6.4` -> `2.0.1` +- Update trim-galore `0.5.0` -> `0.6.5` +- Update ucsc-bedgraphtobigwig `377` -> `357` + +## [[1.1.0](https://github.com/nf-core/chipseq/releases/tag/1.1.0)] - 2019-11-05 + +### `Added` + +- [nf-core/atacseq#46](https://github.com/nf-core/atacseq/issues/46) - Missing gene_bed path in igenomes config +- Update template to tools `1.7` +- Add `--trim_nextseq` parameter +- Add `CITATIONS.md` file +- Capitalised process names + +### `Fixed` + +- Change all parameters from `camelCase` to `snake_case` (see [Deprecated](#Deprecated)) +- [nf-core/atacseq#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 +- [nf-core/atacseq#45](https://github.com/nf-core/atacseq/issues/45) - Wrong x-axis scale for the HOMER: Peak annotation Counts tab plot? +- [nf-core/atacseq#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly +- [nf-core/atacseq#50](https://github.com/nf-core/atacseq/issues/50) - HOMER number of peaks does not correspond to found MACS2 peaks +- Fixed bug in UpSetR peak intersection plot +- Increase default resource requirements in `base.config` +- Increase process-specific requirements based on user-reported failures + ### `Dependencies` +- Update Nextflow `0.32.0` -> `19.10.0` + ### `Deprecated` + +| Deprecated | Replacement | +| ---------------------------- | ------------------------- | +| `--design` | `--input` | +| `--singleEnd` | `--single_end` | +| `--saveGenomeIndex` | `--save_reference` | +| `--skipTrimming` | `--skip_trimming` | +| `--saveTrimmed` | `--save_trimmed` | +| `--keepDups` | `--keep_dups` | +| `--keepMultiMap` | `--keep_multi_map` | +| `--saveAlignedIntermediates` | `--save_align_intermeds` | +| `--narrowPeak` | `--narrow_peak` | +| `--saveMACSPileup` | `--save_macs_pileup` | +| `--skipDiffAnalysis` | `--skip_diff_analysis` | +| `--skipFastQC` | `--skip_fastqc` | +| `--skipPicardMetrics` | `--skip_picard_metrics` | +| `--skipPreseq` | `--skip_preseq` | +| `--skipPlotProfile` | `--skip_plot_profile` | +| `--skipPlotFingerprint` | `--skip_plot_fingerprint` | +| `--skipSpp` | `--skip_spp` | +| `--skipIGV` | `--skip_igv` | +| `--skipMultiQC` | `--skip_multiqc` | + +## [[1.0.0](https://github.com/nf-core/chipseq/releases/tag/1.0.0)] - 2019-06-06 + +Initial release of nf-core/chipseq pipeline. + +### `Added` + +- Raw read QC (FastQC) +- Adapter trimming (Trim Galore!) +- Map and filter reads (BWA, picard, SAMtools, BEDTools, BAMTools, Pysam) +- Create library-size normalised bigWig tracks (BEDTools, bedGraphToBigWig) +- Alignment QC metrics (Preseq, picard) +- ChIP-seq QC metrics (deepTools, phantompeakqualtools) +- Call and annotate broad/narrow peaks (MACS2, HOMER) +- Create consensus set of peaks per antibody (BEDTools) +- Quantification and differential binding analysis (featureCounts, DESeq2) +- Collate appropriate files for genome browser visualisation (IGV) +- Collate and present various QC metrics (MultiQC, R) diff --git a/CITATIONS.md b/CITATIONS.md index b4b3c8965fe048681c94f954b79142f757edeac3..63188ca80a895e39275f3d79685695271538f524 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,11 +10,111 @@ ## Pipeline tools +- [BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) + + > Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. + +- [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) + + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + +- [BamTools](https://www.ncbi.nlm.nih.gov/pubmed/21493652/) + + > Barnett DW, Garrison EK, Quinlan AR, Strömberg MP, Marth GT. BamTools: a C++ API and toolkit for analyzing and managing BAM files. Bioinformatics. 2011 Jun 15;27(12):1691-2. doi: 10.1093/bioinformatics/btr174. Epub 2011 Apr 14. PubMed PMID: 21493652; PubMed Central PMCID: PMC3106182. + +- [Bowtie2](https:/dx.doi.org/10.1038/nmeth.1923) + + > Langmead, B. and Salzberg, S. L. 2012 Fast gapped-read alignment with Bowtie 2. Nature methods, 9(4), p. 357–359. doi: 10.1038/nmeth.1923. + +- [Chromap](https://doi.org/10.1038/s41467-021-26865-w) + + > Zhang H, Song L, Wang X, Cheng H, Wang C, Meyer CA, Liu T, Tang M, Aluru S, Yue F, Liu XS and Li H. Fast alignment and preprocessing of chromatin profiles with Chromap. Nature communications. 2021, 12(1), 1-6. doi: 10.1038/s41467-021-26865-w + +- [deepTools](https://www.ncbi.nlm.nih.gov/pubmed/27079975/) + + > Ramírez F, Ryan DP, Grüning B, Bhardwaj V, Kilpert F, Richter AS, Heyne S, Dündar F, Manke T. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Res. 2016 Jul 8;44(W1):W160-5. doi: 10.1093/nar/gkw257. Epub 2016 Apr 13. PubMed PMID: 27079975; PubMed Central PMCID: PMC4987876. + - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [featureCounts](https://www.ncbi.nlm.nih.gov/pubmed/24227677/) + + > Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. + +- [HOMER](https://www.ncbi.nlm.nih.gov/pubmed/20513432/) + + > Heinz S, Benner C, Spann N, Bertolino E, Lin YC, Laslo P, Cheng JX, Murre C, Singh H, Glass CK. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol Cell. 2010 May 28;38(4):576-89. doi: 10.1016/j.molcel.2010.05.004. PubMed PMID: 20513432; PubMed Central PMCID: PMC2898526. + +- [MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) + + > Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [phantompeakqualtools](https://www.ncbi.nlm.nih.gov/pubmed/22955991/) + + > Landt SG, Marinov GK, Kundaje A, Kheradpour P, Pauli F, Batzoglou S, Bernstein BE, Bickel P, Brown JB, Cayting P, Chen Y, DeSalvo G, Epstein C, Fisher-Aylor KI, Euskirchen G, Gerstein M, Gertz J, Hartemink AJ, Hoffman MM, Iyer VR, Jung YL, Karmakar S, Kellis M, Kharchenko PV, Li Q, Liu T, Liu XS, Ma L, Milosavljevic A, Myers RM, Park PJ, Pazin MJ, Perry MD, Raha D, Reddy TE, Rozowsky J, Shoresh N, Sidow A, Slattery M, Stamatoyannopoulos JA, Tolstorukov MY, White KP, Xi S, Farnham PJ, Lieb JD, Wold BJ, Snyder M. ChIP-seq guidelines and practices of the ENCODE and modENCODE consortia. Genome Res. 2012 Sep;22(9):1813-31. doi: 10.1101/gr.136184.111. PubMed PMID: 22955991; PubMed Central PMCID: PMC3431496. + +- [picard-tools](http://broadinstitute.github.io/picard) + +- [preseq](https://www.ncbi.nlm.nih.gov/pubmed/23435259/) + + > Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374. + +- [pysam](https://github.com/pysam-developers/pysam) + +- [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) + + > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. + +- [Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) + +- [UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) + > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. + +## R packages + +- [R](https://www.R-project.org/) + + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + +- [DESeq2](https://www.ncbi.nlm.nih.gov/pubmed/25516281/) + + > Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15(12):550. PubMed PMID: 25516281; PubMed Central PMCID: PMC4302049. + +- [UpSetR](https://CRAN.R-project.org/package=UpSetR) + + > Nils Gehlenborg (2017). UpSetR: A More Scalable Alternative to Venn and Euler Diagrams for Visualizing Intersecting Sets. + +- [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) + + > H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016. + +- [reshape2](http://www.jstatsoft.org/v21/i12/) + + > Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. + +- [scales](https://CRAN.R-project.org/package=scales) + + > Hadley Wickham (2018). scales: Scale Functions for Visualization. + +- [pheatmap](https://CRAN.R-project.org/package=pheatmap) + + > Raivo Kolde (2018). pheatmap: Pretty Heatmaps. + +- [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) + + > Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. + +- [optparse](https://CRAN.R-project.org/package=optparse) + + > Trevor L Davis (2018). optparse: Command Line Option Parser. + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md old mode 100644 new mode 100755 diff --git a/README.md b/README.md index 095d129ce9f9eef6ef28b281964b227937d32107..519a93ebd72fdc7bfd429b90787f7ebf42a8d8a2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ #   -[](https://nf-co.re/chipseq/results)[](https://doi.org/10.5281/zenodo.XXXXXXX) +[](https://nf-co.re/chipseq/results)[](https://doi.org/10.5281/zenodo.3240506) [](https://www.nextflow.io/) [](https://docs.conda.io/en/latest/) @@ -12,22 +12,53 @@ ## Introduction -<!-- TODO nf-core: Write a 1-2 sentence summary of what data the pipeline is for and what it does --> +**nfcore/chipseq** is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data. -**nf-core/chipseq** is a bioinformatics best-practice analysis pipeline for ChIP-seq peak-calling and differential analysis pipeline.. +On release, automated continuous integration tests run the pipeline on a [full-sized dataset](https://github.com/nf-core/test-datasets/tree/chipseq#full-test-dataset-origin) on the AWS cloud infrastructure. The dataset consists of FoxA1 (transcription factor) and EZH2 (histone,mark) IP experiments from _Franco et al. 2015_ ([GEO: GSE59530](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE59530), [PMID: 25752574](https://pubmed.ncbi.nlm.nih.gov/25752574/)) and _Popovic et al. 2014_ ([GEO: GSE57632](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE57632), [PMID: 25188243](https://pubmed.ncbi.nlm.nih.gov/25188243/)), respectively. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from running the full-sized tests can be viewed on the [nf-core website](https://nf-co.re/chipseq/results). The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! -<!-- TODO nf-core: Add full-sized test dataset and amend the paragraph below if applicable --> +## Online videos -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/chipseq/results). +A short talk about the history, current status and functionality on offer in this pipeline was given by Jose Espinosa-Carrasco ([@joseespinosa](https://github.com/joseespinosa)) on [26th July 2022](https://nf-co.re/events/2022/bytesize-chipseq) as part of the nf-core/bytesize series. -## Pipeline summary +You can find numerous talks on the [nf-core events page](https://nf-co.re/events) from various topics including writing pipelines/modules in Nextflow DSL2, using nf-core tooling, running nf-core pipelines as well as more generic content like contributing to Github. Please check them out! -<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --> +## Pipeline summary -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) +3. Choice of multiple aligners + 1.([`BWA`](https://sourceforge.net/projects/bio-bwa/files/)) + 2.([`Chromap`](https://github.com/haowenz/chromap)). **For paired-end reads only working until mapping steps, see [here](https://github.com/nf-core/chipseq/issues/291)** + 3.([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) + 4.([`STAR`](https://github.com/alexdobin/STAR)) +4. Mark duplicates ([`picard`](https://broadinstitute.github.io/picard/)) +5. Merge alignments from multiple libraries of the same sample ([`picard`](https://broadinstitute.github.io/picard/)) + 1. Re-mark duplicates ([`picard`](https://broadinstitute.github.io/picard/)) + 2. Filtering to remove: + - reads mapping to blacklisted regions ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/), [`BEDTools`](https://github.com/arq5x/bedtools2/)) + - reads that are marked as duplicates ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + - reads that are not marked as primary alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + - reads that are unmapped ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + - reads that map to multiple locations ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + - reads containing > 4 mismatches ([`BAMTools`](https://github.com/pezmaster31/bamtools)) + - reads that have an insert size > 2kb ([`BAMTools`](https://github.com/pezmaster31/bamtools); _paired-end only_) + - reads that map to different chromosomes ([`Pysam`](http://pysam.readthedocs.io/en/latest/installation.html); _paired-end only_) + - reads that arent in FR orientation ([`Pysam`](http://pysam.readthedocs.io/en/latest/installation.html); _paired-end only_) + - reads where only one read of the pair fails the above criteria ([`Pysam`](http://pysam.readthedocs.io/en/latest/installation.html); _paired-end only_) + 3. Alignment-level QC and estimation of library complexity ([`picard`](https://broadinstitute.github.io/picard/), [`Preseq`](http://smithlabresearch.org/software/preseq/)) + 4. Create normalised bigWig files scaled to 1 million mapped reads ([`BEDTools`](https://github.com/arq5x/bedtools2/), [`bedGraphToBigWig`](http://hgdownload.soe.ucsc.edu/admin/exe/)) + 5. Generate gene-body meta-profile from bigWig files ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotProfile.html)) + 6. Calculate genome-wide IP enrichment relative to control ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html)) + 7. Calculate strand cross-correlation peak and ChIP-seq quality measures including NSC and RSC ([`phantompeakqualtools`](https://github.com/kundajelab/phantompeakqualtools)) + 8. Call broad/narrow peaks ([`MACS2`](https://github.com/macs3-project/MACS)) + 9. Annotate peaks relative to gene features ([`HOMER`](http://homer.ucsd.edu/homer/download.html)) + 10. Create consensus peakset across all samples and create tabular file to aid in the filtering of the data ([`BEDTools`](https://github.com/arq5x/bedtools2/)) + 11. Count reads in consensus peaks ([`featureCounts`](http://bioinf.wehi.edu.au/featureCounts/)) + 12. PCA and clustering ([`R`](https://www.r-project.org/), [`DESeq2`](https://bioconductor.org/packages/release/bioc/html/DESeq2.html)) +6. Create IGV session file containing bigWig tracks, peaks and differential sites for data visualisation ([`IGV`](https://software.broadinstitute.org/software/igv/)). +7. Present QC for raw read, alignment, peak-calling and differential binding results ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) ## Quick Start @@ -50,23 +81,21 @@ On release, automated continuous integration tests run the pipeline on a full-si 4. Start running your own analysis! - <!-- TODO nf-core: Update the example "typical command" below used to run the pipeline --> - ```bash nextflow run nf-core/chipseq --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> ``` +See [usage docs](https://nf-co.re/chipseq/usage) for all of the available options when running the pipeline. + ## Documentation -The nf-core/chipseq pipeline comes with documentation about the pipeline [usage](https://nf-co.re/chipseq/usage), [parameters](https://nf-co.re/chipseq/parameters) and [output](https://nf-co.re/chipseq/output). +The nf-core/chipseq pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/chipseq/usage) and [output](https://nf-co.re/chipseq/output). ## Credits -nf-core/chipseq was originally written by Philip Ewels, Jose Espinosa-Carrasco, Harshil Patel. - -We thank the following people for their extensive assistance in the development of this pipeline: +These scripts were originally written by Chuan Wang ([@chuan-wang](https://github.com/chuan-wang)) and Phil Ewels ([@ewels](https://github.com/ewels)) for use at the [National Genomics Infrastructure](https://portal.scilifelab.se/genomics/) at [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden. The pipeline was re-implemented by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/) and converted to Nextflow DSL2 by Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/). -<!-- TODO nf-core: If applicable, make list of people who have also contributed --> +Many thanks to others who have helped out and contributed along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@bc2zb](https://github.com/bc2zb), [@crickbabs](https://github.com/crickbabs), [@drejom](https://github.com/drejom), [@houghtos](https://github.com/houghtos), [@KevinMenden](https://github.com/KevinMenden), [@mashehu](https://github.com/mashehu), [@pditommaso](https://github.com/pditommaso), [@Rotholandus](https://github.com/Rotholandus), [@sofiahaglund](https://github.com/sofiahaglund), [@tiagochst](https://github.com/tiagochst) and [@winni2k](https://github.com/winni2k). ## Contributions and Support @@ -76,10 +105,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations -<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --> -<!-- If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --> - -<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --> +If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3240506](https://doi.org/10.5281/zenodo.3240506) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/bamtools_filter_pe.json b/assets/bamtools_filter_pe.json new file mode 100755 index 0000000000000000000000000000000000000000..618b7e9c3700b19a7c5422bcd3b33ec68dacf34c --- /dev/null +++ b/assets/bamtools_filter_pe.json @@ -0,0 +1,11 @@ +{ + "filters": [ + { "id": "insert_min", "insertSize": ">=-2000" }, + + { "id": "insert_max", "insertSize": "<=2000" }, + + { "id": "mismatch", "tag": "NM:<=4" } + ], + + "rule": " insert_min & insert_max & mismatch " +} diff --git a/assets/bamtools_filter_se.json b/assets/bamtools_filter_se.json new file mode 100755 index 0000000000000000000000000000000000000000..8928ab1cd46c5294a14d4fceeb721faa828867a6 --- /dev/null +++ b/assets/bamtools_filter_se.json @@ -0,0 +1,5 @@ +{ + "filters": [{ "id": "mismatch", "tag": "NM:<=4" }], + + "rule": " mismatch " +} diff --git a/assets/blacklists/v1.0/GRCh37-blacklist.v1.bed b/assets/blacklists/v1.0/GRCh37-blacklist.v1.bed new file mode 100644 index 0000000000000000000000000000000000000000..b87063023b6b537ceafcf32b87654ad8d134f03a --- /dev/null +++ b/assets/blacklists/v1.0/GRCh37-blacklist.v1.bed @@ -0,0 +1,411 @@ +1 564449 570371 High_Mappability_island 1000 . +1 724136 727043 Satellite_repeat 1000 . +1 825006 825115 BSR/Beta 1000 . +1 2583334 2634374 Low_mappability_island 1000 . +1 4363064 4363242 (CATTC)n 1000 . +1 5725866 5736651 Low_mappability_island 1000 . +1 16839923 16841396 Low_mappability_island 1000 . +1 38077347 38077423 Low_mappability_island 1000 . +1 91852785 91853147 LSU-rRNA_Hsa 1000 . +1 104163724 104163860 Low_mappability_island 1000 . +1 108112972 108113707 LSU-rRNA_Hsa 1000 . +1 121351474 121487059 centromeric_repeat 1000 . +1 142535434 142543081 Satellite_repeat 1000 . +1 142723256 142723968 Low_mappability_island 1000 . +1 142792613 142793303 Low_mappability_island 1000 . +1 142835822 142837333 Low_mappability_island 1000 . +1 143274490 143284340 centromeric_repeat 1000 . +1 145277108 145277572 LSU-rRNA_Hsa 1000 . +1 149033183 149035829 Satellite_repeat 1000 . +1 156186169 156186712 High_Mappability_island 1000 . +1 224199390 224204260 Satellite_repeat 1000 . +1 233318467 233318516 (CATTC)n 1000 . +1 236260366 236260821 Low_mappability_island 1000 . +1 237766308 237766764 LSU-rRNA_Hsa 1000 . +1 238105345 238105511 Low_mappability_island 1000 . +1 238108025 238108378 Low_mappability_island 1000 . +1 238108645 238109697 Low_mappability_island 1000 . +10 18841533 18862467 (CATTC)n 1000 . +10 20035661 20037171 Low_mappability_island 1000 . +10 36722282 36723650 Low_mappability_island 1000 . +10 38772277 38819357 Satellite_repeat 1000 . +10 38868892 38889025 Satellite_repeat 1000 . +10 39076515 39155771 Satellite_repeat 1000 . +10 42354835 42548642 centromeric_repeat 1000 . +10 42596676 42602082 Satellite_repeat 1000 . +10 42596700 42602110 Satellite_repeat 1000 . +10 42661264 42667623 Satellite_repeat 1000 . +10 42790522 42818398 Satellite_repeat 1000 . +10 135498649 135502716 Satellite_repeat 1000 . +11 6831669 6831838 ALR/Alpha 1000 . +11 10529403 10531969 Low_mappability_island 1000 . +11 48671444 48902406 centromeric_repeat 1000 . +11 48931242 48964015 centromeric_repeat 1000 . +11 50318471 50784078 centromeric_repeat 1000 . +11 51090700 51374066 centromeric_repeat 1000 . +11 51567242 51594226 centromeric_repeat 1000 . +11 54694046 55027975 centromeric_repeat 1000 . +11 73221660 73221946 Low_mappability_island 1000 . +11 85194913 85195322 LSU-rRNA_Hsa 1000 . +11 87524468 87525005 Low_mappability_island 1000 . +11 103275584 103281729 Low_mappability_island 1000 . +11 122874287 122874443 Low_mappability_island 1000 . +12 20704285 20704583 SSU-rRNA_Hsa 1000 . +12 34372315 34372825 LSU-rRNA_Hsa 1000 . +12 34432130 34857010 centromeric_repeat 1000 . +12 37989447 38441828 centromeric_repeat 1000 . +12 38531376 38531930 LSU-rRNA_Hsa 1000 . +12 41757383 41757545 Low_mappability_island 1000 . +12 127650407 127651075 LSU-rRNA_Hsa 1000 . +12 132061320 132062046 Low_mappability_island 1000 . +13 56545728 56545925 Low_mappability_island 1000 . +13 110076444 110076782 Low_mappability_island 1000 . +14 18999935 19056900 centromeric_repeat 1000 . +14 32953263 32954381 Low_mappability_island 1000 . +14 84637832 84639038 Low_mappability_island 1000 . +14 90341302 90341516 SSU-rRNA_Hsa 1000 . +15 19999941 20044132 centromeric_repeat 1000 . +16 32493036 32570826 ALR/Alpha 1000 . +16 32590063 32598801 ALR/Alpha 1000 . +16 33237130 33241330 Low_mappability_island 1000 . +16 33864355 34023306 centromeric_repeat 1000 . +16 34180542 34197081 Satellite_repeat 1000 . +16 34530115 34542632 BSR/Beta 1000 . +16 35193580 35285885 centromeric_repeat 1000 . +16 46385718 46456668 Satellite_repeat 1000 . +16 46497639 46500515 Satellite_repeat 1000 . +16 47538629 47539297 LSU-rRNA_Hsa 1000 . +17 19355538 19356096 LSU-rRNA_Hsa 1000 . +17 19502495 19506773 Low_mappability_island 1000 . +17 21905167 21906712 centromeric_repeat 1000 . +17 22018524 22032049 Low_mappability_island 1000 . +17 22221073 22263006 centromeric_repeat 1000 . +17 25263010 25268059 Satellite_repeat 1000 . +17 25415551 25417559 telomeric_repeat 1000 . +17 31149365 31149981 High_Mappability_island 1000 . +17 33478114 33478372 LSU-rRNA_Hsa 1000 . +17 41381502 41382591 High_Mappability_island 1000 . +17 41463538 41464075 High_Mappability_island 1000 . +17 41464478 41465015 snRNA 1000 . +17 41465562 41467288 High_Mappability_island 1000 . +17 51183038 51183763 Low_mappability_island 1000 . +17 55868618 55868752 LSU-rRNA_Hsa 1000 . +17 75158031 75158430 LSU-rRNA_Hsa 1000 . +18 96416 97552 Satellite_repeat 1000 . +18 105658 112233 Satellite_repeat 1000 . +18 2842252 2842356 Low_mappability_island 1000 . +18 15393801 15393992 centromeric_repeat 1000 . +18 18510894 18520356 centromeric_repeat 1000 . +18 44126235 44126593 (CATTC)n 1000 . +18 45379603 45379864 Low_mappability_island 1000 . +18 50319086 50319301 Low_mappability_island 1000 . +18 77772846 77773065 LSU-rRNA_Hsa 1000 . +19 246006 247844 TAR1 1000 . +19 22877614 22877696 SSU-rRNA_Hsa 1000 . +19 23235030 23235504 BSR/Beta 1000 . +19 24182398 24186210 LSU-rRNA_Hsa 1000 . +19 24385474 24633168 centromeric_repeat 1000 . +19 27730611 28262682 centromeric_repeat 1000 . +19 36066445 36066810 LSU-rRNA_Hsa 1000 . +19 36756398 36800948 centromeric_repeat 1000 . +19 37759473 37797722 centromeric_repeat 1000 . +19 44914313 44916340 ACRO1 1000 . +19 44960681 44962681 ACRO1 1000 . +2 739925 740994 Low_mappability_island 1000 . +2 49456729 49457067 Low_mappability_island 1000 . +2 88124390 88124903 Low_mappability_island 1000 . +2 89830421 89880514 Satellite_repeat 1000 . +2 90371401 90394776 Satellite_repeat 1000 . +2 90443001 90545431 Low_mappability_island 1000 . +2 91595080 91616015 Satellite_repeat 1000 . +2 92267428 92326280 centromeric_repeat 1000 . +2 115695017 115695281 LSU-rRNA_Hsa 1000 . +2 117781085 117781300 Low_mappability_island 1000 . +2 132966248 132989300 centromeric_repeat 1000 . +2 132994855 133007983 ALR/Alpha 1000 . +2 133011824 133013298 SSU-rRNA_Hsa 1000 . +2 133036250 133040042 LSU-rRNA_Hsa 1000 . +2 133044095 133045945 ACRO1 1000 . +2 143848503 143848792 Low_mappability_island 1000 . +2 148022736 148022878 Low_mappability_island 1000 . +2 149639207 149639515 Low_mappability_island 1000 . +2 156120500 156120610 Low_mappability_island 1000 . +2 162135000 162139241 Low_mappability_island 1000 . +2 230045426 230045796 LSU-rRNA_Hsa 1000 . +20 26257032 26320267 centromeric_repeat 1000 . +20 29517710 29521147 centromeric_repeat 1000 . +20 29803876 29833334 centromeric_repeat 1000 . +20 55932703 55936114 chrM 1000 . +20 62916702 62918053 telomeric_repeat 1000 . +21 9647205 9648529 Low_mappability_island 1000 . +21 9694896 9704962 centromeric_repeat 1000 . +21 9825451 9827612 High_Mappability_island 1000 . +21 9827612 9845233 Low_mappability_island 1000 . +21 9881895 9882569 TAR1 1000 . +21 10084922 10088004 Satellite_repeat 1000 . +21 10492876 10493049 Low_mappability_island 1000 . +21 10599428 10599915 TAR1 1000 . +21 10697886 10860890 centromeric_repeat 1000 . +21 11186054 11188131 Satellite_repeat 1000 . +21 14338127 14369791 centromeric_repeat 1000 . +21 18800575 18800997 (GAGTG)n 1000 . +21 27228003 27228242 SSU-rRNA_Hsa 1000 . +21 46796081 46796336 Low_mappability_island 1000 . +22 16847814 16862659 Satellite_repeat 1000 . +22 18876789 18884510 Satellite_repeat 1000 . +3 25508897 25509131 Low_mappability_island 1000 . +3 73159606 73161131 snRNA 1000 . +3 75696297 75699304 BSR/Beta 1000 . +3 75717841 75720426 Satellite_repeat 1000 . +3 80995858 81014459 ALR/Alpha 1000 . +3 90311686 90507410 centromeric_repeat 1000 . +3 93504815 93519133 centromeric_repeat 1000 . +3 96335934 96337436 Low_mappability_island 1000 . +3 160665423 160665642 Low_mappability_island 1000 . +3 196625514 196625860 Satellite_repeat 1000 . +3 197825427 197834080 Low_mappability_island 1000 . +4 9987 12694 telomeric_repeat 1000 . +4 12276463 12292424 ALR/Alpha 1000 . +4 12641862 12642305 Low_mappability_island 1000 . +4 21583630 21583719 (GAATG)n 1000 . +4 27732004 27732240 Low_mappability_island 1000 . +4 47774268 47774416 Low_mappability_island 1000 . +4 49085372 49342114 centromeric_repeat 1000 . +4 49488472 49662085 centromeric_repeat 1000 . +4 52659961 52688986 centromeric_repeat 1000 . +4 56194229 56194584 Low_mappability_island 1000 . +4 65473858 65473941 Low_mappability_island 1000 . +4 68264186 68266830 centromeric_repeat 1000 . +4 70296565 70296841 LSU-rRNA_Hsa 1000 . +4 76807083 76807320 LSU-rRNA_Hsa 1000 . +4 78929660 78929920 Low_mappability_island 1000 . +4 156374749 156377226 chrM 1000 . +4 156384860 156387314 Low_mappability_island 1000 . +4 163342479 163342744 Low_mappability_island 1000 . +4 190190746 190203442 Low_mappability_island 1000 . +4 190801869 190802909 Low_mappability_island 1000 . +4 190943802 190943962 Satellite_repeat 1000 . +4 190987268 190990949 Satellite_repeat 1000 . +4 191026302 191044344 telomeric_repeat 1000 . +5 17517177 17600940 Low_mappability_island 1000 . +5 21477365 21497415 Low_mappability_island 1000 . +5 34177882 34197574 Low_mappability_island 1000 . +5 45908253 46411114 centromeric_repeat 1000 . +5 49405493 49554574 centromeric_repeat 1000 . +5 71146650 71146996 LSU-rRNA_Hsa 1000 . +5 79945807 79948223 Low_mappability_island 1000 . +5 93903068 93906726 Low_mappability_island 1000 . +5 97746525 97746679 Low_mappability_island 1000 . +5 99381556 99390873 Low_mappability_island 1000 . +5 105889063 105889263 chrM 1000 . +5 123095972 123097432 chrM 1000 . +5 134258949 134264271 Low_mappability_island 1000 . +5 174541634 174542177 SSU-rRNA_Hsa 1000 . +6 58735349 58739031 centromeric_repeat 1000 . +6 58745955 58780547 centromeric_repeat 1000 . +6 61880095 61944008 centromeric_repeat 1000 . +6 62189892 62206612 ALR/Alpha 1000 . +6 62207809 62230644 ALR/Alpha 1000 . +6 62283966 62284581 Low_mappability_island 1000 . +6 133593944 133594201 LSU-rRNA_Hsa 1000 . +6 137059142 137059326 SSU-rRNA_Hsa 1000 . +6 150665074 150665281 SSU-rRNA_Hsa 1000 . +6 157731310 157735525 Low_mappability_island 1000 . +7 43878355 43878530 TAR1 1000 . +7 45291517 45291740 Low_mappability_island 1000 . +7 56437808 56442977 Low_mappability_island 1000 . +7 57253980 57254183 Low_mappability_island 1000 . +7 57255310 57255444 Low_mappability_island 1000 . +7 57261829 57261998 Low_mappability_island 1000 . +7 57544726 57556913 Satellite_repeat 1000 . +7 57811488 57836990 centromeric_repeat 1000 . +7 57939184 58055539 centromeric_repeat 1000 . +7 61054285 62454680 centromeric_repeat 1000 . +7 64059157 64066183 BSR/Beta 1000 . +7 64951348 64956223 centromeric_repeat 1000 . +7 68201468 68201673 Low_mappability_island 1000 . +7 68527370 68527788 LSU-rRNA_Hsa 1000 . +7 80962907 80963147 SSU-rRNA_Hsa 1000 . +7 100550640 100551321 Low_mappability_island 1000 . +7 142372972 142375638 Low_mappability_island 1000 . +7 145694403 145694561 Low_mappability_island 1000 . +8 155512 157639 TAR1 1000 . +8 21455971 21456306 LSU-rRNA_Hsa 1000 . +8 32868966 32873279 Low_mappability_island 1000 . +8 43092737 43097573 Satellite_repeat 1000 . +8 43399486 43843604 centromeric_repeat 1000 . +8 46838215 47457541 centromeric_repeat 1000 . +8 47739043 47742797 Low_mappability_island 1000 . +8 47750844 47776101 BSR/Beta 1000 . +8 56754955 56755418 LSU-rRNA_Hsa 1000 . +8 69218401 69218922 LSU-rRNA_Hsa 1000 . +8 70602248 70602620 LSU-rRNA_Hsa 1000 . +8 77114154 77114389 Low_mappability_island 1000 . +8 100508010 100508287 Low_mappability_island 1000 . +9 10435 11574 TAR1 1000 . +9 4799734 4800000 SSU-rRNA_Hsa 1000 . +9 33656606 33659249 Low_mappability_island 1000 . +9 42819021 42832395 centromeric_repeat 1000 . +9 44070617 44070871 Low_mappability_island 1000 . +9 44873123 44902307 centromeric_repeat 1000 . +9 45355954 45357644 telomeric_repeat 1000 . +9 45435109 45443517 centromeric_repeat 1000 . +9 66494170 66494805 TAR1 1000 . +9 66767710 66864329 centromeric_repeat 1000 . +9 66970914 67005594 centromeric_repeat 1000 . +9 67315122 67321036 centromeric_repeat 1000 . +9 67789868 67792893 centromeric_repeat 1000 . +9 68410775 68435115 Low_mappability_island 1000 . +9 69677073 69687998 centromeric_repeat 1000 . +9 69689770 69711497 centromeric_repeat 1000 . +9 69947961 70011196 centromeric_repeat 1000 . +9 70076144 70076855 centromeric_repeat 1000 . +9 70318723 70327683 centromeric_repeat 1000 . +9 72653073 72653572 Satellite_repeat 1000 . +9 78790077 78790255 (GAATG)n 1000 . +9 79186574 79187026 LSU-rRNA_Hsa 1000 . +9 141019938 141021783 TAR1 1000 . +MT 1 16569 chrM 1000 . +X 55206111 55206740 Low_mappability_island 1000 . +X 55207753 55208152 Low_mappability_island 1000 . +X 55208300 55208643 Low_mappability_island 1000 . +X 55208980 55209208 Low_mappability_island 1000 . +X 55209655 55210006 Low_mappability_island 1000 . +X 58330488 58330843 centromeric_repeat 1000 . +X 58373806 58373962 centromeric_repeat 1000 . +X 58377680 58377864 centromeric_repeat 1000 . +X 58415350 58416387 centromeric_repeat 1000 . +X 58432411 58432680 centromeric_repeat 1000 . +X 58485887 58486241 centromeric_repeat 1000 . +X 58488898 58494528 centromeric_repeat 1000 . +X 58499466 58504235 centromeric_repeat 1000 . +X 58506076 58528214 centromeric_repeat 1000 . +X 58528184 58536883 centromeric_repeat 1000 . +X 58544061 58582415 centromeric_repeat 1000 . +X 61681834 61919683 centromeric_repeat 1000 . +X 62003205 62041580 centromeric_repeat 1000 . +X 83658929 83659019 Low_mappability_island 1000 . +X 108297348 108297886 LSU-rRNA_Hsa 1000 . +X 114959057 115006437 Low_mappability_island 1000 . +X 125605623 125607351 Low_mappability_island 1000 . +X 125714985 125715338 Low_mappability_island 1000 . +X 125864844 125864980 Low_mappability_island 1000 . +X 125865719 125865874 Low_mappability_island 1000 . +Y 313470 313613 ALR/Alpha 1000 . +Y 3004989 3005175 LSU-rRNA_Hsa 1000 . +Y 4212807 4212910 Low_mappability_island 1000 . +Y 7671817 7694928 BSR/Beta 1000 . +Y 7726064 7730229 BSR/Beta 1000 . +Y 7730734 7731598 BSR/Beta 1000 . +Y 7735811 7752887 BSR/Beta 1000 . +Y 7785067 7806311 BSR/Beta 1000 . +Y 7806856 7814704 BSR/Beta 1000 . +Y 7815230 7820478 BSR/Beta 1000 . +Y 7829937 7832032 BSR/Beta 1000 . +Y 7832744 7848695 BSR/Beta 1000 . +Y 7870343 7873582 BSR/Beta 1000 . +Y 7874115 7874584 BSR/Beta 1000 . +Y 7875409 7885257 BSR/Beta 1000 . +Y 7886545 7894591 BSR/Beta 1000 . +Y 7898927 7916812 BSR/Beta 1000 . +Y 7918790 7921352 BSR/Beta 1000 . +Y 7926344 7936705 BSR/Beta 1000 . +Y 7941130 7947438 BSR/Beta 1000 . +Y 7948790 7964448 BSR/Beta 1000 . +Y 8179010 8181143 BSR/Beta 1000 . +Y 8181757 8213330 BSR/Beta 1000 . +Y 8214629 8215637 BSR/Beta 1000 . +Y 8220421 8230061 BSR/Beta 1000 . +Y 8230686 8231546 BSR/Beta 1000 . +Y 8240772 8265916 BSR/Beta 1000 . +Y 8291535 8292942 BSR/Beta 1000 . +Y 8294002 8295175 BSR/Beta 1000 . +Y 8296944 8321375 BSR/Beta 1000 . +Y 8325813 8325929 BSR/Beta 1000 . +Y 8326678 8333466 BSR/Beta 1000 . +Y 8334027 8342387 BSR/Beta 1000 . +Y 8356544 8369346 BSR/Beta 1000 . +Y 8909560 8909925 TAR1 1000 . +Y 8979478 8979585 Low_mappability_island 1000 . +Y 9072781 9072993 TAR1 1000 . +Y 9908430 9925608 centromeric_repeat 1000 . +Y 9981952 9982126 BSR/Beta 1000 . +Y 10034864 10036712 SSU-rRNA_Hsa 1000 . +Y 10040627 10045657 ALR/Alpha 1000 . +Y 10047773 10052533 ALR/Alpha 1000 . +Y 10053695 10057722 ALR/Alpha 1000 . +Y 10059394 10073694 ALR/Alpha 1000 . +Y 10075082 10075781 ALR/Alpha 1000 . +Y 10080736 10104539 ALR/Alpha 1000 . +Y 13104530 13144368 centromeric_repeat 1000 . +Y 13193966 13196535 Low_mappability_island 1000 . +Y 13252193 13259484 centromeric_repeat 1000 . +Y 13290177 13290667 chrM 1000 . +Y 13445957 13490591 Satellite_repeat 1000 . +Y 13642186 13749784 Satellite_repeat 1000 . +Y 13798522 13870984 Satellite_repeat 1000 . +Y 19691913 19692524 LSU-rRNA_Hsa 1000 . +Y 19764063 19776198 ALR/Alpha 1000 . +Y 19780600 19781704 ALR/Alpha 1000 . +Y 19783669 19796396 ALR/Alpha 1000 . +Y 19800068 19801419 ALR/Alpha 1000 . +Y 19808085 19817100 ALR/Alpha 1000 . +Y 19944298 19944581 TAR1 1000 . +Y 20235195 20235478 TAR1 1000 . +Y 20362679 20371694 ALR/Alpha 1000 . +Y 20378360 20379711 ALR/Alpha 1000 . +Y 20383383 20396110 ALR/Alpha 1000 . +Y 20398075 20399179 ALR/Alpha 1000 . +Y 20403581 20415713 ALR/Alpha 1000 . +Y 20487248 20487859 LSU-rRNA_Hsa 1000 . +Y 23124788 23125577 BSR/Beta 1000 . +Y 23149027 23151205 BSR/Beta 1000 . +Y 23157969 23158245 BSR/Beta 1000 . +Y 23159001 23167737 BSR/Beta 1000 . +Y 23178886 23181770 BSR/Beta 1000 . +Y 23220740 23223625 BSR/Beta 1000 . +Y 23234125 23235822 BSR/Beta 1000 . +Y 23236898 23248080 BSR/Beta 1000 . +Y 23248729 23248851 BSR/Beta 1000 . +Y 23899295 23899388 TAR1 1000 . +Y 23956449 23956628 TAR1 1000 . +Y 24247659 24247700 TAR1 1000 . +Y 24630999 24631040 TAR1 1000 . +Y 24953159 24975657 BSR/Beta 1000 . +Y 24980997 24991235 BSR/Beta 1000 . +Y 25022753 25039185 BSR/Beta 1000 . +Y 25040153 25042421 BSR/Beta 1000 . +Y 25048332 25059258 BSR/Beta 1000 . +Y 25060235 25064798 BSR/Beta 1000 . +Y 25099139 25121882 BSR/Beta 1000 . +Y 25122419 25160800 BSR/Beta 1000 . +Y 25182404 25192372 BSR/Beta 1000 . +Y 25217722 25219409 BSR/Beta 1000 . +Y 25493588 25495275 BSR/Beta 1000 . +Y 26148315 26148450 TAR1 1000 . +Y 26586905 26609405 BSR/Beta 1000 . +Y 26614745 26624983 BSR/Beta 1000 . +Y 26656502 26672934 BSR/Beta 1000 . +Y 26673902 26676170 BSR/Beta 1000 . +Y 26682081 26693007 BSR/Beta 1000 . +Y 26693984 26698547 BSR/Beta 1000 . +Y 26732883 26755623 BSR/Beta 1000 . +Y 26756160 26794538 BSR/Beta 1000 . +Y 26816148 26826116 BSR/Beta 1000 . +Y 26851466 26853153 BSR/Beta 1000 . +Y 27109247 27110934 BSR/Beta 1000 . +Y 27136281 27146249 BSR/Beta 1000 . +Y 27167859 27206241 BSR/Beta 1000 . +Y 27206778 27229502 BSR/Beta 1000 . +Y 27263848 27268411 BSR/Beta 1000 . +Y 27269388 27280315 BSR/Beta 1000 . +Y 27286226 27288494 BSR/Beta 1000 . +Y 27289462 27305895 BSR/Beta 1000 . +Y 27337415 27347656 BSR/Beta 1000 . +Y 27352996 27375497 BSR/Beta 1000 . +Y 27813984 27814119 TAR1 1000 . +Y 28555026 28555353 TAR1 1000 . +Y 28784129 28819695 Satellite_repeat 1000 . +Y 58819367 58917648 (CATTC)n 1000 . +Y 58971913 58997782 (CATTC)n 1000 . +Y 59361267 59362785 TAR1 1000 . diff --git a/assets/blacklists/v1.0/hg19-blacklist.v1.bed b/assets/blacklists/v1.0/hg19-blacklist.v1.bed new file mode 100644 index 0000000000000000000000000000000000000000..a37f0483c3b26fadb79ac38cfd537b80e99fc32b --- /dev/null +++ b/assets/blacklists/v1.0/hg19-blacklist.v1.bed @@ -0,0 +1,411 @@ +chr1 564449 570371 High_Mappability_island 1000 . +chr1 724136 727043 Satellite_repeat 1000 . +chr1 825006 825115 BSR/Beta 1000 . +chr1 2583334 2634374 Low_mappability_island 1000 . +chr1 4363064 4363242 (CATTC)n 1000 . +chr1 5725866 5736651 Low_mappability_island 1000 . +chr1 16839923 16841396 Low_mappability_island 1000 . +chr1 38077347 38077423 Low_mappability_island 1000 . +chr1 91852785 91853147 LSU-rRNA_Hsa 1000 . +chr1 104163724 104163860 Low_mappability_island 1000 . +chr1 108112972 108113707 LSU-rRNA_Hsa 1000 . +chr1 121351474 121487059 centromeric_repeat 1000 . +chr1 142535434 142543081 Satellite_repeat 1000 . +chr1 142723256 142723968 Low_mappability_island 1000 . +chr1 142792613 142793303 Low_mappability_island 1000 . +chr1 142835822 142837333 Low_mappability_island 1000 . +chr1 143274490 143284340 centromeric_repeat 1000 . +chr1 145277108 145277572 LSU-rRNA_Hsa 1000 . +chr1 149033183 149035829 Satellite_repeat 1000 . +chr1 156186169 156186712 High_Mappability_island 1000 . +chr1 224199390 224204260 Satellite_repeat 1000 . +chr1 233318467 233318516 (CATTC)n 1000 . +chr1 236260366 236260821 Low_mappability_island 1000 . +chr1 237766308 237766764 LSU-rRNA_Hsa 1000 . +chr1 238105345 238105511 Low_mappability_island 1000 . +chr1 238108025 238108378 Low_mappability_island 1000 . +chr1 238108645 238109697 Low_mappability_island 1000 . +chr10 18841533 18862467 (CATTC)n 1000 . +chr10 20035661 20037171 Low_mappability_island 1000 . +chr10 36722282 36723650 Low_mappability_island 1000 . +chr10 38772277 38819357 Satellite_repeat 1000 . +chr10 38868892 38889025 Satellite_repeat 1000 . +chr10 39076515 39155771 Satellite_repeat 1000 . +chr10 42354835 42548642 centromeric_repeat 1000 . +chr10 42596676 42602082 Satellite_repeat 1000 . +chr10 42596700 42602110 Satellite_repeat 1000 . +chr10 42661264 42667623 Satellite_repeat 1000 . +chr10 42790522 42818398 Satellite_repeat 1000 . +chr10 135498649 135502716 Satellite_repeat 1000 . +chr11 6831669 6831838 ALR/Alpha 1000 . +chr11 10529403 10531969 Low_mappability_island 1000 . +chr11 48671444 48902406 centromeric_repeat 1000 . +chr11 48931242 48964015 centromeric_repeat 1000 . +chr11 50318471 50784078 centromeric_repeat 1000 . +chr11 51090700 51374066 centromeric_repeat 1000 . +chr11 51567242 51594226 centromeric_repeat 1000 . +chr11 54694046 55027975 centromeric_repeat 1000 . +chr11 73221660 73221946 Low_mappability_island 1000 . +chr11 85194913 85195322 LSU-rRNA_Hsa 1000 . +chr11 87524468 87525005 Low_mappability_island 1000 . +chr11 103275584 103281729 Low_mappability_island 1000 . +chr11 122874287 122874443 Low_mappability_island 1000 . +chr12 20704285 20704583 SSU-rRNA_Hsa 1000 . +chr12 34372315 34372825 LSU-rRNA_Hsa 1000 . +chr12 34432130 34857010 centromeric_repeat 1000 . +chr12 37989447 38441828 centromeric_repeat 1000 . +chr12 38531376 38531930 LSU-rRNA_Hsa 1000 . +chr12 41757383 41757545 Low_mappability_island 1000 . +chr12 127650407 127651075 LSU-rRNA_Hsa 1000 . +chr12 132061320 132062046 Low_mappability_island 1000 . +chr13 56545728 56545925 Low_mappability_island 1000 . +chr13 110076444 110076782 Low_mappability_island 1000 . +chr14 18999935 19056900 centromeric_repeat 1000 . +chr14 32953263 32954381 Low_mappability_island 1000 . +chr14 84637832 84639038 Low_mappability_island 1000 . +chr14 90341302 90341516 SSU-rRNA_Hsa 1000 . +chr15 19999941 20044132 centromeric_repeat 1000 . +chr16 32493036 32570826 ALR/Alpha 1000 . +chr16 32590063 32598801 ALR/Alpha 1000 . +chr16 33237130 33241330 Low_mappability_island 1000 . +chr16 33864355 34023306 centromeric_repeat 1000 . +chr16 34180542 34197081 Satellite_repeat 1000 . +chr16 34530115 34542632 BSR/Beta 1000 . +chr16 35193580 35285885 centromeric_repeat 1000 . +chr16 46385718 46456668 Satellite_repeat 1000 . +chr16 46497639 46500515 Satellite_repeat 1000 . +chr16 47538629 47539297 LSU-rRNA_Hsa 1000 . +chr17 19355538 19356096 LSU-rRNA_Hsa 1000 . +chr17 19502495 19506773 Low_mappability_island 1000 . +chr17 21905167 21906712 centromeric_repeat 1000 . +chr17 22018524 22032049 Low_mappability_island 1000 . +chr17 22221073 22263006 centromeric_repeat 1000 . +chr17 25263010 25268059 Satellite_repeat 1000 . +chr17 25415551 25417559 telomeric_repeat 1000 . +chr17 31149365 31149981 High_Mappability_island 1000 . +chr17 33478114 33478372 LSU-rRNA_Hsa 1000 . +chr17 41381502 41382591 High_Mappability_island 1000 . +chr17 41463538 41464075 High_Mappability_island 1000 . +chr17 41464478 41465015 snRNA 1000 . +chr17 41465562 41467288 High_Mappability_island 1000 . +chr17 51183038 51183763 Low_mappability_island 1000 . +chr17 55868618 55868752 LSU-rRNA_Hsa 1000 . +chr17 75158031 75158430 LSU-rRNA_Hsa 1000 . +chr18 96416 97552 Satellite_repeat 1000 . +chr18 105658 112233 Satellite_repeat 1000 . +chr18 2842252 2842356 Low_mappability_island 1000 . +chr18 15393801 15393992 centromeric_repeat 1000 . +chr18 18510894 18520356 centromeric_repeat 1000 . +chr18 44126235 44126593 (CATTC)n 1000 . +chr18 45379603 45379864 Low_mappability_island 1000 . +chr18 50319086 50319301 Low_mappability_island 1000 . +chr18 77772846 77773065 LSU-rRNA_Hsa 1000 . +chr19 246006 247844 TAR1 1000 . +chr19 22877614 22877696 SSU-rRNA_Hsa 1000 . +chr19 23235030 23235504 BSR/Beta 1000 . +chr19 24182398 24186210 LSU-rRNA_Hsa 1000 . +chr19 24385474 24633168 centromeric_repeat 1000 . +chr19 27730611 28262682 centromeric_repeat 1000 . +chr19 36066445 36066810 LSU-rRNA_Hsa 1000 . +chr19 36756398 36800948 centromeric_repeat 1000 . +chr19 37759473 37797722 centromeric_repeat 1000 . +chr19 44914313 44916340 ACRO1 1000 . +chr19 44960681 44962681 ACRO1 1000 . +chr2 739925 740994 Low_mappability_island 1000 . +chr2 49456729 49457067 Low_mappability_island 1000 . +chr2 88124390 88124903 Low_mappability_island 1000 . +chr2 89830421 89880514 Satellite_repeat 1000 . +chr2 90371401 90394776 Satellite_repeat 1000 . +chr2 90443001 90545431 Low_mappability_island 1000 . +chr2 91595080 91616015 Satellite_repeat 1000 . +chr2 92267428 92326280 centromeric_repeat 1000 . +chr2 115695017 115695281 LSU-rRNA_Hsa 1000 . +chr2 117781085 117781300 Low_mappability_island 1000 . +chr2 132966248 132989300 centromeric_repeat 1000 . +chr2 132994855 133007983 ALR/Alpha 1000 . +chr2 133011824 133013298 SSU-rRNA_Hsa 1000 . +chr2 133036250 133040042 LSU-rRNA_Hsa 1000 . +chr2 133044095 133045945 ACRO1 1000 . +chr2 143848503 143848792 Low_mappability_island 1000 . +chr2 148022736 148022878 Low_mappability_island 1000 . +chr2 149639207 149639515 Low_mappability_island 1000 . +chr2 156120500 156120610 Low_mappability_island 1000 . +chr2 162135000 162139241 Low_mappability_island 1000 . +chr2 230045426 230045796 LSU-rRNA_Hsa 1000 . +chr20 26257032 26320267 centromeric_repeat 1000 . +chr20 29517710 29521147 centromeric_repeat 1000 . +chr20 29803876 29833334 centromeric_repeat 1000 . +chr20 55932703 55936114 chrM 1000 . +chr20 62916702 62918053 telomeric_repeat 1000 . +chr21 9647205 9648529 Low_mappability_island 1000 . +chr21 9694896 9704962 centromeric_repeat 1000 . +chr21 9825451 9827612 High_Mappability_island 1000 . +chr21 9827612 9845233 Low_mappability_island 1000 . +chr21 9881895 9882569 TAR1 1000 . +chr21 10084922 10088004 Satellite_repeat 1000 . +chr21 10492876 10493049 Low_mappability_island 1000 . +chr21 10599428 10599915 TAR1 1000 . +chr21 10697886 10860890 centromeric_repeat 1000 . +chr21 11186054 11188131 Satellite_repeat 1000 . +chr21 14338127 14369791 centromeric_repeat 1000 . +chr21 18800575 18800997 (GAGTG)n 1000 . +chr21 27228003 27228242 SSU-rRNA_Hsa 1000 . +chr21 46796081 46796336 Low_mappability_island 1000 . +chr22 16847814 16862659 Satellite_repeat 1000 . +chr22 18876789 18884510 Satellite_repeat 1000 . +chr3 25508897 25509131 Low_mappability_island 1000 . +chr3 73159606 73161131 snRNA 1000 . +chr3 75696297 75699304 BSR/Beta 1000 . +chr3 75717841 75720426 Satellite_repeat 1000 . +chr3 80995858 81014459 ALR/Alpha 1000 . +chr3 90311686 90507410 centromeric_repeat 1000 . +chr3 93504815 93519133 centromeric_repeat 1000 . +chr3 96335934 96337436 Low_mappability_island 1000 . +chr3 160665423 160665642 Low_mappability_island 1000 . +chr3 196625514 196625860 Satellite_repeat 1000 . +chr3 197825427 197834080 Low_mappability_island 1000 . +chr4 9987 12694 telomeric_repeat 1000 . +chr4 12276463 12292424 ALR/Alpha 1000 . +chr4 12641862 12642305 Low_mappability_island 1000 . +chr4 21583630 21583719 (GAATG)n 1000 . +chr4 27732004 27732240 Low_mappability_island 1000 . +chr4 47774268 47774416 Low_mappability_island 1000 . +chr4 49085372 49342114 centromeric_repeat 1000 . +chr4 49488472 49662085 centromeric_repeat 1000 . +chr4 52659961 52688986 centromeric_repeat 1000 . +chr4 56194229 56194584 Low_mappability_island 1000 . +chr4 65473858 65473941 Low_mappability_island 1000 . +chr4 68264186 68266830 centromeric_repeat 1000 . +chr4 70296565 70296841 LSU-rRNA_Hsa 1000 . +chr4 76807083 76807320 LSU-rRNA_Hsa 1000 . +chr4 78929660 78929920 Low_mappability_island 1000 . +chr4 156374749 156377226 chrM 1000 . +chr4 156384860 156387314 Low_mappability_island 1000 . +chr4 163342479 163342744 Low_mappability_island 1000 . +chr4 190190746 190203442 Low_mappability_island 1000 . +chr4 190801869 190802909 Low_mappability_island 1000 . +chr4 190943802 190943962 Satellite_repeat 1000 . +chr4 190987268 190990949 Satellite_repeat 1000 . +chr4 191026302 191044344 telomeric_repeat 1000 . +chr5 17517177 17600940 Low_mappability_island 1000 . +chr5 21477365 21497415 Low_mappability_island 1000 . +chr5 34177882 34197574 Low_mappability_island 1000 . +chr5 45908253 46411114 centromeric_repeat 1000 . +chr5 49405493 49554574 centromeric_repeat 1000 . +chr5 71146650 71146996 LSU-rRNA_Hsa 1000 . +chr5 79945807 79948223 Low_mappability_island 1000 . +chr5 93903068 93906726 Low_mappability_island 1000 . +chr5 97746525 97746679 Low_mappability_island 1000 . +chr5 99381556 99390873 Low_mappability_island 1000 . +chr5 105889063 105889263 chrM 1000 . +chr5 123095972 123097432 chrM 1000 . +chr5 134258949 134264271 Low_mappability_island 1000 . +chr5 174541634 174542177 SSU-rRNA_Hsa 1000 . +chr6 58735349 58739031 centromeric_repeat 1000 . +chr6 58745955 58780547 centromeric_repeat 1000 . +chr6 61880095 61944008 centromeric_repeat 1000 . +chr6 62189892 62206612 ALR/Alpha 1000 . +chr6 62207809 62230644 ALR/Alpha 1000 . +chr6 62283966 62284581 Low_mappability_island 1000 . +chr6 133593944 133594201 LSU-rRNA_Hsa 1000 . +chr6 137059142 137059326 SSU-rRNA_Hsa 1000 . +chr6 150665074 150665281 SSU-rRNA_Hsa 1000 . +chr6 157731310 157735525 Low_mappability_island 1000 . +chr7 43878355 43878530 TAR1 1000 . +chr7 45291517 45291740 Low_mappability_island 1000 . +chr7 56437808 56442977 Low_mappability_island 1000 . +chr7 57253980 57254183 Low_mappability_island 1000 . +chr7 57255310 57255444 Low_mappability_island 1000 . +chr7 57261829 57261998 Low_mappability_island 1000 . +chr7 57544726 57556913 Satellite_repeat 1000 . +chr7 57811488 57836990 centromeric_repeat 1000 . +chr7 57939184 58055539 centromeric_repeat 1000 . +chr7 61054285 62454680 centromeric_repeat 1000 . +chr7 64059157 64066183 BSR/Beta 1000 . +chr7 64951348 64956223 centromeric_repeat 1000 . +chr7 68201468 68201673 Low_mappability_island 1000 . +chr7 68527370 68527788 LSU-rRNA_Hsa 1000 . +chr7 80962907 80963147 SSU-rRNA_Hsa 1000 . +chr7 100550640 100551321 Low_mappability_island 1000 . +chr7 142372972 142375638 Low_mappability_island 1000 . +chr7 145694403 145694561 Low_mappability_island 1000 . +chr8 155512 157639 TAR1 1000 . +chr8 21455971 21456306 LSU-rRNA_Hsa 1000 . +chr8 32868966 32873279 Low_mappability_island 1000 . +chr8 43092737 43097573 Satellite_repeat 1000 . +chr8 43399486 43843604 centromeric_repeat 1000 . +chr8 46838215 47457541 centromeric_repeat 1000 . +chr8 47739043 47742797 Low_mappability_island 1000 . +chr8 47750844 47776101 BSR/Beta 1000 . +chr8 56754955 56755418 LSU-rRNA_Hsa 1000 . +chr8 69218401 69218922 LSU-rRNA_Hsa 1000 . +chr8 70602248 70602620 LSU-rRNA_Hsa 1000 . +chr8 77114154 77114389 Low_mappability_island 1000 . +chr8 100508010 100508287 Low_mappability_island 1000 . +chr9 10435 11574 TAR1 1000 . +chr9 4799734 4800000 SSU-rRNA_Hsa 1000 . +chr9 33656606 33659249 Low_mappability_island 1000 . +chr9 42819021 42832395 centromeric_repeat 1000 . +chr9 44070617 44070871 Low_mappability_island 1000 . +chr9 44873123 44902307 centromeric_repeat 1000 . +chr9 45355954 45357644 telomeric_repeat 1000 . +chr9 45435109 45443517 centromeric_repeat 1000 . +chr9 66494170 66494805 TAR1 1000 . +chr9 66767710 66864329 centromeric_repeat 1000 . +chr9 66970914 67005594 centromeric_repeat 1000 . +chr9 67315122 67321036 centromeric_repeat 1000 . +chr9 67789868 67792893 centromeric_repeat 1000 . +chr9 68410775 68435115 Low_mappability_island 1000 . +chr9 69677073 69687998 centromeric_repeat 1000 . +chr9 69689770 69711497 centromeric_repeat 1000 . +chr9 69947961 70011196 centromeric_repeat 1000 . +chr9 70076144 70076855 centromeric_repeat 1000 . +chr9 70318723 70327683 centromeric_repeat 1000 . +chr9 72653073 72653572 Satellite_repeat 1000 . +chr9 78790077 78790255 (GAATG)n 1000 . +chr9 79186574 79187026 LSU-rRNA_Hsa 1000 . +chr9 141019938 141021783 TAR1 1000 . +chrM 1 16571 chrM 1000 . +chrX 55206111 55206740 Low_mappability_island 1000 . +chrX 55207753 55208152 Low_mappability_island 1000 . +chrX 55208300 55208643 Low_mappability_island 1000 . +chrX 55208980 55209208 Low_mappability_island 1000 . +chrX 55209655 55210006 Low_mappability_island 1000 . +chrX 58330488 58330843 centromeric_repeat 1000 . +chrX 58373806 58373962 centromeric_repeat 1000 . +chrX 58377680 58377864 centromeric_repeat 1000 . +chrX 58415350 58416387 centromeric_repeat 1000 . +chrX 58432411 58432680 centromeric_repeat 1000 . +chrX 58485887 58486241 centromeric_repeat 1000 . +chrX 58488898 58494528 centromeric_repeat 1000 . +chrX 58499466 58504235 centromeric_repeat 1000 . +chrX 58506076 58528214 centromeric_repeat 1000 . +chrX 58528184 58536883 centromeric_repeat 1000 . +chrX 58544061 58582415 centromeric_repeat 1000 . +chrX 61681834 61919683 centromeric_repeat 1000 . +chrX 62003205 62041580 centromeric_repeat 1000 . +chrX 83658929 83659019 Low_mappability_island 1000 . +chrX 108297348 108297886 LSU-rRNA_Hsa 1000 . +chrX 114959057 115006437 Low_mappability_island 1000 . +chrX 125605623 125607351 Low_mappability_island 1000 . +chrX 125714985 125715338 Low_mappability_island 1000 . +chrX 125864844 125864980 Low_mappability_island 1000 . +chrX 125865719 125865874 Low_mappability_island 1000 . +chrY 313470 313613 ALR/Alpha 1000 . +chrY 3004989 3005175 LSU-rRNA_Hsa 1000 . +chrY 4212807 4212910 Low_mappability_island 1000 . +chrY 7671817 7694928 BSR/Beta 1000 . +chrY 7726064 7730229 BSR/Beta 1000 . +chrY 7730734 7731598 BSR/Beta 1000 . +chrY 7735811 7752887 BSR/Beta 1000 . +chrY 7785067 7806311 BSR/Beta 1000 . +chrY 7806856 7814704 BSR/Beta 1000 . +chrY 7815230 7820478 BSR/Beta 1000 . +chrY 7829937 7832032 BSR/Beta 1000 . +chrY 7832744 7848695 BSR/Beta 1000 . +chrY 7870343 7873582 BSR/Beta 1000 . +chrY 7874115 7874584 BSR/Beta 1000 . +chrY 7875409 7885257 BSR/Beta 1000 . +chrY 7886545 7894591 BSR/Beta 1000 . +chrY 7898927 7916812 BSR/Beta 1000 . +chrY 7918790 7921352 BSR/Beta 1000 . +chrY 7926344 7936705 BSR/Beta 1000 . +chrY 7941130 7947438 BSR/Beta 1000 . +chrY 7948790 7964448 BSR/Beta 1000 . +chrY 8179010 8181143 BSR/Beta 1000 . +chrY 8181757 8213330 BSR/Beta 1000 . +chrY 8214629 8215637 BSR/Beta 1000 . +chrY 8220421 8230061 BSR/Beta 1000 . +chrY 8230686 8231546 BSR/Beta 1000 . +chrY 8240772 8265916 BSR/Beta 1000 . +chrY 8291535 8292942 BSR/Beta 1000 . +chrY 8294002 8295175 BSR/Beta 1000 . +chrY 8296944 8321375 BSR/Beta 1000 . +chrY 8325813 8325929 BSR/Beta 1000 . +chrY 8326678 8333466 BSR/Beta 1000 . +chrY 8334027 8342387 BSR/Beta 1000 . +chrY 8356544 8369346 BSR/Beta 1000 . +chrY 8909560 8909925 TAR1 1000 . +chrY 8979478 8979585 Low_mappability_island 1000 . +chrY 9072781 9072993 TAR1 1000 . +chrY 9908430 9925608 centromeric_repeat 1000 . +chrY 9981952 9982126 BSR/Beta 1000 . +chrY 10034864 10036712 SSU-rRNA_Hsa 1000 . +chrY 10040627 10045657 ALR/Alpha 1000 . +chrY 10047773 10052533 ALR/Alpha 1000 . +chrY 10053695 10057722 ALR/Alpha 1000 . +chrY 10059394 10073694 ALR/Alpha 1000 . +chrY 10075082 10075781 ALR/Alpha 1000 . +chrY 10080736 10104539 ALR/Alpha 1000 . +chrY 13104530 13144368 centromeric_repeat 1000 . +chrY 13193966 13196535 Low_mappability_island 1000 . +chrY 13252193 13259484 centromeric_repeat 1000 . +chrY 13290177 13290667 chrM 1000 . +chrY 13445957 13490591 Satellite_repeat 1000 . +chrY 13642186 13749784 Satellite_repeat 1000 . +chrY 13798522 13870984 Satellite_repeat 1000 . +chrY 19691913 19692524 LSU-rRNA_Hsa 1000 . +chrY 19764063 19776198 ALR/Alpha 1000 . +chrY 19780600 19781704 ALR/Alpha 1000 . +chrY 19783669 19796396 ALR/Alpha 1000 . +chrY 19800068 19801419 ALR/Alpha 1000 . +chrY 19808085 19817100 ALR/Alpha 1000 . +chrY 19944298 19944581 TAR1 1000 . +chrY 20235195 20235478 TAR1 1000 . +chrY 20362679 20371694 ALR/Alpha 1000 . +chrY 20378360 20379711 ALR/Alpha 1000 . +chrY 20383383 20396110 ALR/Alpha 1000 . +chrY 20398075 20399179 ALR/Alpha 1000 . +chrY 20403581 20415713 ALR/Alpha 1000 . +chrY 20487248 20487859 LSU-rRNA_Hsa 1000 . +chrY 23124788 23125577 BSR/Beta 1000 . +chrY 23149027 23151205 BSR/Beta 1000 . +chrY 23157969 23158245 BSR/Beta 1000 . +chrY 23159001 23167737 BSR/Beta 1000 . +chrY 23178886 23181770 BSR/Beta 1000 . +chrY 23220740 23223625 BSR/Beta 1000 . +chrY 23234125 23235822 BSR/Beta 1000 . +chrY 23236898 23248080 BSR/Beta 1000 . +chrY 23248729 23248851 BSR/Beta 1000 . +chrY 23899295 23899388 TAR1 1000 . +chrY 23956449 23956628 TAR1 1000 . +chrY 24247659 24247700 TAR1 1000 . +chrY 24630999 24631040 TAR1 1000 . +chrY 24953159 24975657 BSR/Beta 1000 . +chrY 24980997 24991235 BSR/Beta 1000 . +chrY 25022753 25039185 BSR/Beta 1000 . +chrY 25040153 25042421 BSR/Beta 1000 . +chrY 25048332 25059258 BSR/Beta 1000 . +chrY 25060235 25064798 BSR/Beta 1000 . +chrY 25099139 25121882 BSR/Beta 1000 . +chrY 25122419 25160800 BSR/Beta 1000 . +chrY 25182404 25192372 BSR/Beta 1000 . +chrY 25217722 25219409 BSR/Beta 1000 . +chrY 25493588 25495275 BSR/Beta 1000 . +chrY 26148315 26148450 TAR1 1000 . +chrY 26586905 26609405 BSR/Beta 1000 . +chrY 26614745 26624983 BSR/Beta 1000 . +chrY 26656502 26672934 BSR/Beta 1000 . +chrY 26673902 26676170 BSR/Beta 1000 . +chrY 26682081 26693007 BSR/Beta 1000 . +chrY 26693984 26698547 BSR/Beta 1000 . +chrY 26732883 26755623 BSR/Beta 1000 . +chrY 26756160 26794538 BSR/Beta 1000 . +chrY 26816148 26826116 BSR/Beta 1000 . +chrY 26851466 26853153 BSR/Beta 1000 . +chrY 27109247 27110934 BSR/Beta 1000 . +chrY 27136281 27146249 BSR/Beta 1000 . +chrY 27167859 27206241 BSR/Beta 1000 . +chrY 27206778 27229502 BSR/Beta 1000 . +chrY 27263848 27268411 BSR/Beta 1000 . +chrY 27269388 27280315 BSR/Beta 1000 . +chrY 27286226 27288494 BSR/Beta 1000 . +chrY 27289462 27305895 BSR/Beta 1000 . +chrY 27337415 27347656 BSR/Beta 1000 . +chrY 27352996 27375497 BSR/Beta 1000 . +chrY 27813984 27814119 TAR1 1000 . +chrY 28555026 28555353 TAR1 1000 . +chrY 28784129 28819695 Satellite_repeat 1000 . +chrY 58819367 58917648 (CATTC)n 1000 . +chrY 58971913 58997782 (CATTC)n 1000 . +chrY 59361267 59362785 TAR1 1000 . diff --git a/assets/blacklists/v2.0/GRCm38-blacklist.v2.bed b/assets/blacklists/v2.0/GRCm38-blacklist.v2.bed new file mode 100644 index 0000000000000000000000000000000000000000..41007eb726360c64073f05ab008f4d584b424db8 --- /dev/null +++ b/assets/blacklists/v2.0/GRCm38-blacklist.v2.bed @@ -0,0 +1,3435 @@ +10 0 3135400 High Signal Region +10 3218900 3276600 Low Mappability +10 3576900 3627700 Low Mappability +10 4191100 4197600 Low Mappability +10 4613500 4615400 High Signal Region +10 4761300 4763900 High Signal Region +10 5080800 5096600 Low Mappability +10 5580100 5586600 Low Mappability +10 6281200 6286700 High Signal Region +10 6740200 6742100 High Signal Region +10 7396300 7429800 High Signal Region +10 7633600 7636600 Low Mappability +10 7889700 7897500 High Signal Region +10 8144900 8153000 High Signal Region +10 8264000 8269200 High Signal Region +10 8382400 8404400 High Signal Region +10 8599200 8606400 Low Mappability +10 10012200 10033400 High Signal Region +10 10566900 10593500 High Signal Region +10 11218400 11224800 Low Mappability +10 11351800 11406300 Low Mappability +10 11491200 11493100 High Signal Region +10 11612300 11642500 High Signal Region +10 11692500 11701300 Low Mappability +10 12266500 12273000 High Signal Region +10 12385800 12396000 High Signal Region +10 13401200 13403100 High Signal Region +10 14559900 14577100 High Signal Region +10 14646300 14664500 Low Mappability +10 14923800 14928300 High Signal Region +10 15047600 15083100 High Signal Region +10 15528600 15534200 High Signal Region +10 15567000 15641800 High Signal Region +10 16967500 16971600 High Signal Region +10 17499600 17501700 High Signal Region +10 18555500 18558100 High Signal Region +10 19427600 19429100 High Signal Region +10 19538800 19546100 Low Mappability +10 19772200 19801600 High Signal Region +10 20458900 20460800 High Signal Region +10 21208600 21216600 Low Mappability +10 21278500 21313500 High Signal Region +10 21642200 21649600 Low Mappability +10 21727800 21736400 Low Mappability +10 22031300 22063500 High Signal Region +10 22127200 22164500 High Signal Region +10 22186700 22290500 High Signal Region +10 22369100 22472300 High Signal Region +10 22683100 22690600 Low Mappability +10 22935900 22941800 High Signal Region +10 24687500 24691700 Low Mappability +10 25091400 25106900 Low Mappability +10 25622900 25629400 Low Mappability +10 25968400 25973400 Low Mappability +10 26641500 26662800 Low Mappability +10 27403200 27407600 High Signal Region +10 27904000 27909500 High Signal Region +10 28908500 28940600 High Signal Region +10 29243900 29249600 High Signal Region +10 29924300 29930700 Low Mappability +10 29954000 29971900 High Signal Region +10 30553000 30577100 High Signal Region +10 31054900 31095900 Low Mappability +10 31406500 31411100 High Signal Region +10 31750000 31757100 Low Mappability +10 31878400 31885800 High Signal Region +10 31980100 32000400 Low Mappability +10 32039700 32045000 High Signal Region +10 32176100 32182400 High Signal Region +10 32499200 32529900 High Signal Region +10 32816400 32857200 High Signal Region +10 33315300 33319800 High Signal Region +10 33492300 33508900 High Signal Region +10 33886600 33901100 Low Mappability +10 34739400 34749100 Low Mappability +10 35669300 35725500 High Signal Region +10 36130200 36135500 High Signal Region +10 36160700 36166700 High Signal Region +10 36594500 36597500 Low Mappability +10 36942200 36948800 Low Mappability +10 37186500 37189300 High Signal Region +10 37799700 37821400 High Signal Region +10 37964600 37970100 High Signal Region +10 38590100 38606100 High Signal Region +10 38637900 38644200 High Signal Region +10 38729400 38782700 High Signal Region +10 38933500 38956500 High Signal Region +10 39126700 39129400 High Signal Region +10 39760700 39764700 High Signal Region +10 41185700 41195800 High Signal Region +10 41840500 41859100 Low Mappability +10 43769400 43773800 High Signal Region +10 44206300 44254100 High Signal Region +10 45515000 45588000 Low Mappability +10 45624800 45628400 High Signal Region +10 46136500 46139300 High Signal Region +10 46468300 46472100 High Signal Region +10 46500500 46538800 High Signal Region +10 46789300 46812500 High Signal Region +10 46966700 47009000 High Signal Region +10 47048600 47074700 Low Mappability +10 47663600 47683500 High Signal Region +10 47743600 47758500 High Signal Region +10 47875400 47881600 High Signal Region +10 48032400 48058800 High Signal Region +10 48677400 48682800 High Signal Region +10 49823500 49842200 High Signal Region +10 50029200 50035300 High Signal Region +10 50109900 50115500 High Signal Region +10 50178500 50184800 High Signal Region +10 50253700 50296500 High Signal Region +10 50333400 50335300 High Signal Region +10 50524000 50553900 High Signal Region +10 51126200 51132900 High Signal Region +10 51436800 51448000 High Signal Region +10 51470300 51474900 High Signal Region +10 51882900 51888000 Low Mappability +10 52052600 52059000 Low Mappability +10 52089600 52148500 High Signal Region +10 52522600 52599800 High Signal Region +10 53073900 53081100 High Signal Region +10 53569600 53576000 Low Mappability +10 54216200 54222900 High Signal Region +10 54588800 54619900 Low Mappability +10 55080400 55090500 High Signal Region +10 55654500 55659600 High Signal Region +10 55715600 55751000 High Signal Region +10 55841700 55847900 High Signal Region +10 56250200 56293900 High Signal Region +10 56701000 56728000 High Signal Region +10 56894100 56897300 High Signal Region +10 57099200 57153200 High Signal Region +10 57239100 57245400 High Signal Region +10 57326900 57333900 High Signal Region +10 57434000 57456500 High Signal Region +10 57678600 57684900 High Signal Region +10 57862800 58240900 High Signal Region +10 58566200 58570900 High Signal Region +10 59381400 59396800 Low Mappability +10 59850500 59922300 Low Mappability +10 60444900 60446800 High Signal Region +10 60546600 60553100 Low Mappability +10 61373100 61375000 High Signal Region +10 63103900 63111200 Low Mappability +10 63508800 63519000 High Signal Region +10 63833800 63835000 High Signal Region +10 64418600 64420000 High Signal Region +10 65166300 65172600 High Signal Region +10 65450400 65477700 High Signal Region +10 65638900 65670200 High Signal Region +10 65938900 65956300 Low Mappability +10 66422900 66431000 High Signal Region +10 66662400 66678300 High Signal Region +10 69030100 69065800 High Signal Region +10 70657500 70668500 High Signal Region +10 70785400 70798600 Low Mappability +10 71012700 71019200 Low Mappability +10 71111600 71114200 Low Mappability +10 71510600 71637800 High Signal Region +10 71691300 71698600 Low Mappability +10 72292400 72314300 High Signal Region +10 72359200 72360700 High Signal Region +10 72493500 72499200 High Signal Region +10 72590700 72591900 High Signal Region +10 72690900 72709500 High Signal Region +10 73378200 73380100 High Signal Region +10 73576400 73601900 High Signal Region +10 74433300 74439500 High Signal Region +10 74655700 74672200 High Signal Region +10 74715300 74746600 High Signal Region +10 74857500 74888000 High Signal Region +10 76835100 76852400 High Signal Region +10 77950600 77979500 Low Mappability +10 78008300 78028800 Low Mappability +10 78637000 78696000 High Signal Region +10 78731500 78735800 High Signal Region +10 78803500 78823100 Low Mappability +10 79207800 79259400 High Signal Region +10 79314000 79354000 Low Mappability +10 80102300 80116000 High Signal Region +10 80928600 80996300 Low Mappability +10 81167600 81199400 High Signal Region +10 81600900 81997900 High Signal Region +10 82517500 82538800 High Signal Region +10 82571100 82575200 High Signal Region +10 82939800 82956300 High Signal Region +10 83386600 83392400 Low Mappability +10 83670800 83678100 Low Mappability +10 83768200 83792700 Low Mappability +10 84155900 84180800 Low Mappability +10 84436900 84473700 Low Mappability +10 84744500 84750100 Low Mappability +10 85413200 85419700 Low Mappability +10 85696600 85732800 High Signal Region +10 85840200 85872500 High Signal Region +10 86561700 86565700 High Signal Region +10 88628700 88658500 Low Mappability +10 88963900 88968200 Low Mappability +10 89398700 89400100 High Signal Region +10 89949700 89964500 High Signal Region +10 90249000 90255300 High Signal Region +10 90324500 90329800 Low Mappability +10 90471200 90474200 Low Mappability +10 91252200 91256900 High Signal Region +10 91928900 91944500 High Signal Region +10 92909200 92915800 High Signal Region +10 94362500 94369300 Low Mappability +10 94591500 94610000 High Signal Region +10 94871200 94873100 High Signal Region +10 96068700 96078800 High Signal Region +10 96157200 96162600 Low Mappability +10 96192400 96199800 Low Mappability +10 97320500 97329700 High Signal Region +10 97525500 97534200 Low Mappability +10 97755000 97761200 High Signal Region +10 97896600 97920300 High Signal Region +10 98337800 98343700 High Signal Region +10 98433100 98444100 High Signal Region +10 100310500 100395900 High Signal Region +10 102667700 102669600 High Signal Region +10 102859800 102861500 High Signal Region +10 103500200 103519100 High Signal Region +10 103547000 103548600 High Signal Region +10 103569600 103575200 High Signal Region +10 103600400 103684400 High Signal Region +10 103936700 103942500 High Signal Region +10 104380700 104382300 High Signal Region +10 104493600 104499800 High Signal Region +10 104539700 104562500 Low Mappability +10 104748100 104771500 High Signal Region +10 104819400 104862500 Low Mappability +10 104966900 105001700 Low Mappability +10 105177000 105181900 Low Mappability +10 105672500 105678000 Low Mappability +10 106166900 106235700 High Signal Region +10 106382800 106403000 High Signal Region +10 106427100 106453600 High Signal Region +10 106529600 106535200 Low Mappability +10 107125500 107136900 Low Mappability +10 107551800 107560700 High Signal Region +10 107845300 107863900 High Signal Region +10 107978900 108006700 Low Mappability +10 109212600 109216800 High Signal Region +10 109315100 109322400 Low Mappability +10 109941600 109948000 High Signal Region +10 110104900 110111300 Low Mappability +10 110504500 110516000 High Signal Region +10 110667700 110700900 Low Mappability +10 111217500 111219000 High Signal Region +10 112013700 112021700 High Signal Region +10 112053500 112058400 Low Mappability +10 112540600 112542100 High Signal Region +10 112587000 112611100 High Signal Region +10 112682400 112722100 Low Mappability +10 113722600 113729800 Low Mappability +10 114167300 114174900 High Signal Region +10 114736400 114738300 High Signal Region +10 114860600 114866900 High Signal Region +10 115641300 115643100 High Signal Region +10 116606200 116613400 Low Mappability +10 116762000 116764200 High Signal Region +10 116878000 116879900 High Signal Region +10 117476200 117491000 High Signal Region +10 118014300 118033200 High Signal Region +10 118054000 118076600 High Signal Region +10 118199900 118279700 Low Mappability +10 118910200 118917100 High Signal Region +10 118937400 118953000 Low Mappability +10 119698800 119701600 Low Mappability +10 120974800 120977500 High Signal Region +10 121136000 121143400 Low Mappability +10 121164700 121169300 Low Mappability +10 121566100 121580200 High Signal Region +10 121707800 121713500 High Signal Region +10 121762300 121769400 High Signal Region +10 122141100 122166000 High Signal Region +10 122346900 122371300 Low Mappability +10 122632400 122638000 High Signal Region +10 122832900 122839300 High Signal Region +10 123792900 123797100 High Signal Region +10 124412900 124433300 High Signal Region +10 124576300 124583500 Low Mappability +10 124605700 124611000 Low Mappability +10 124680500 124686200 Low Mappability +10 124760500 124788800 High Signal Region +10 125819500 125825700 High Signal Region +10 125869000 125871400 High Signal Region +10 126262200 126291600 Low Mappability +10 127779500 127797900 High Signal Region +10 129189500 129217200 High Signal Region +10 129388700 129419600 Low Mappability +10 129443000 129454800 High Signal Region +10 129734500 129736400 High Signal Region +10 129925300 129940600 Low Mappability +10 130039500 130052900 High Signal Region +10 130396900 130408000 High Signal Region +10 130542000 130694900 High Signal Region +11 0 3201000 High Signal Region +11 5167600 5182600 High Signal Region +11 5361500 5365400 Low Mappability +11 5552700 5558200 Low Mappability +11 6141300 6148700 Low Mappability +11 7489400 7492300 High Signal Region +11 7752300 7774500 Low Mappability +11 8058600 8083100 Low Mappability +11 8354900 8370700 High Signal Region +11 8907200 8936100 Low Mappability +11 9707900 9715100 Low Mappability +11 9807600 9814200 Low Mappability +11 10252000 10266800 High Signal Region +11 10760200 10770800 Low Mappability +11 11287200 11295100 High Signal Region +11 12129400 12163100 High Signal Region +11 12507200 12512700 Low Mappability +11 12561900 12569100 Low Mappability +11 12750500 12802700 High Signal Region +11 12856200 12863700 High Signal Region +11 12953900 12960700 Low Mappability +11 14896500 14922100 High Signal Region +11 15227600 15235000 Low Mappability +11 16022400 16029000 High Signal Region +11 16326500 16331700 High Signal Region +11 16418200 16419600 High Signal Region +11 16567100 16573100 High Signal Region +11 17401400 17407800 High Signal Region +11 18330900 18342700 High Signal Region +11 18773800 18780100 High Signal Region +11 19566100 19570600 Low Mappability +11 19788600 19809400 Low Mappability +11 20310000 20312000 High Signal Region +11 20377900 20380400 High Signal Region +11 22322000 22340700 Low Mappability +11 22395200 22432900 Low Mappability +11 22534700 22537000 Low Mappability +11 23218500 23258100 Low Mappability +11 23522600 23552900 High Signal Region +11 24527400 24529500 Low Mappability +11 25196800 25217300 High Signal Region +11 25796400 25802200 Low Mappability +11 26898500 26900500 High Signal Region +11 27525200 27541400 High Signal Region +11 28097200 28104500 Low Mappability +11 29064100 29129900 Low Mappability +11 29259900 29291300 High Signal Region +11 29586000 29592400 Low Mappability +11 30511100 30535400 High Signal Region +11 31343800 31345700 Low Mappability +11 33062300 33068800 Low Mappability +11 34541000 34683100 High Signal Region +11 37482400 37484900 High Signal Region +11 40230800 40248400 High Signal Region +11 40625500 40640300 Low Mappability +11 40796600 40860600 High Signal Region +11 40887700 40915600 High Signal Region +11 41631700 41633600 High Signal Region +11 43237300 43239300 Low Mappability +11 43286400 43329800 High Signal Region +11 43454800 43462300 Low Mappability +11 43659700 43682100 Low Mappability +11 45584200 45655700 Low Mappability +11 46412300 46415000 Low Mappability +11 46492800 46514400 Low Mappability +11 47847500 47860600 High Signal Region +11 48451800 48536100 High Signal Region +11 48929800 49060400 Low Mappability +11 50445100 50469600 High Signal Region +11 51437600 51456700 High Signal Region +11 51664900 51690400 Low Mappability +11 54135500 54141600 High Signal Region +11 54576500 54583300 Low Mappability +11 55240500 55248100 Low Mappability +11 56588500 56594500 High Signal Region +11 57301700 57303600 High Signal Region +11 60558900 60699000 Low Mappability +11 61407400 61427800 Low Mappability +11 61593700 61596500 Low Mappability +11 62879300 62901500 High Signal Region +11 63467600 63475000 Low Mappability +11 64568100 64574200 High Signal Region +11 64681700 64683600 Low Mappability +11 64791900 64827100 Low Mappability +11 65451700 65458800 Low Mappability +11 66629900 66634100 High Signal Region +11 66947700 66958600 Low Mappability +11 67866400 67872800 Low Mappability +11 70155800 70162400 Low Mappability +11 71505700 71512100 Low Mappability +11 71875200 71881700 Low Mappability +11 73436900 73439100 Low Mappability +11 74128800 74136200 Low Mappability +11 74199900 74226800 Low Mappability +11 74301700 74319600 High Signal Region +11 74540000 74548400 Low Mappability +11 74884300 74899000 Low Mappability +11 76828100 76868600 Low Mappability +11 77255000 77257100 Low Mappability +11 79845100 79847300 Low Mappability +11 79872400 79877100 Low Mappability +11 79917300 79920800 Low Mappability +11 81545400 81552800 Low Mappability +11 82123300 82144400 High Signal Region +11 82333900 82338400 Low Mappability +11 83050300 83093600 High Signal Region +11 83126000 83172300 Low Mappability +11 85046500 85067800 High Signal Region +11 85285400 85292700 High Signal Region +11 88910900 88917600 Low Mappability +11 88965900 88971900 High Signal Region +11 89080800 89101300 High Signal Region +11 90504000 90510500 High Signal Region +11 90829400 90835000 Low Mappability +11 90901700 90908400 Low Mappability +11 90958500 91026800 Low Mappability +11 91047200 91049300 Low Mappability +11 92099000 92108200 High Signal Region +11 93409300 93428900 High Signal Region +11 94622900 94629900 Low Mappability +11 96065000 96093900 High Signal Region +11 98586900 98673900 Low Mappability +11 99712600 99717300 High Signal Region +11 100662800 100669700 Low Mappability +11 101731800 101741400 High Signal Region +11 102992300 103049900 Low Mappability +11 104239000 104242600 Low Mappability +11 106028100 106037400 High Signal Region +11 106254800 106297600 High Signal Region +11 106943500 106950100 Low Mappability +11 107188200 107200400 High Signal Region +11 107281300 107283200 High Signal Region +11 108377600 108404500 Low Mappability +11 108649800 108655400 Low Mappability +11 109010700 109024400 High Signal Region +11 109998500 110024600 Low Mappability +11 110421300 110423200 High Signal Region +11 111182400 111189800 Low Mappability +11 111215500 111234900 Low Mappability +11 111353300 111360000 Low Mappability +11 111855400 111857100 High Signal Region +11 112010600 112016400 High Signal Region +11 114456300 114462800 Low Mappability +11 115014300 115046900 Low Mappability +11 115611200 115665700 High Signal Region +11 115754800 115766900 Low Mappability +11 116389300 116395200 Low Mappability +11 116742700 116792800 Low Mappability +11 117499800 117505100 Low Mappability +11 119299800 119340300 Low Mappability +11 120305300 120357300 Low Mappability +11 120515100 120644700 High Signal Region +11 121069800 121075100 High Signal Region +11 121203000 121207500 Low Mappability +11 121396100 121422700 Low Mappability +11 121611900 121614000 Low Mappability +11 121981400 122082500 High Signal Region +12 0 3070900 High Signal Region +12 3102800 3111000 High Signal Region +12 4110500 4112400 High Signal Region +12 4218500 4235300 High Signal Region +12 4751600 4790100 High Signal Region +12 5050300 5065400 High Signal Region +12 6514000 6525100 High Signal Region +12 6606500 6612600 High Signal Region +12 7447300 7449900 High Signal Region +12 7801900 7808600 High Signal Region +12 7925300 7939600 High Signal Region +12 8572000 8640600 High Signal Region +12 10693000 10704200 High Signal Region +12 10961300 11004600 High Signal Region +12 11187600 11194100 High Signal Region +12 11642900 11658000 High Signal Region +12 12092500 12097600 High Signal Region +12 14844600 14848200 High Signal Region +12 15026600 15032400 High Signal Region +12 15252700 15259600 High Signal Region +12 15866100 15871800 High Signal Region +12 16746900 16748800 High Signal Region +12 17116400 17129400 High Signal Region +12 17243500 17248500 High Signal Region +12 18340700 18354800 High Signal Region +12 18856500 18909700 High Signal Region +12 19312600 19413500 High Signal Region +12 19442600 19590100 High Signal Region +12 19627700 19633600 High Signal Region +12 19777500 19781600 High Signal Region +12 19879300 19901200 High Signal Region +12 19931800 19948600 High Signal Region +12 20031900 20205100 High Signal Region +12 20225600 20298300 High Signal Region +12 21914300 21916000 Low Mappability +12 21972100 21987900 High Signal Region +12 22021600 22680500 Low Mappability +12 22896100 22902300 High Signal Region +12 23140700 23225200 High Signal Region +12 23283500 24030600 High Signal Region +12 24295300 24365100 Low Mappability +12 24692300 24727100 High Signal Region +12 25591800 25595300 Low Mappability +12 25840400 25842100 High Signal Region +12 27556800 27592000 High Signal Region +12 28491400 28494000 High Signal Region +12 28954800 28964000 High Signal Region +12 29379500 29400800 High Signal Region +12 30965100 31016300 High Signal Region +12 32020400 32032500 Low Mappability +12 32217700 32219200 High Signal Region +12 33388100 33410100 Low Mappability +12 33748900 33771800 High Signal Region +12 33869500 33880600 High Signal Region +12 34056800 34074100 High Signal Region +12 34128700 34139700 High Signal Region +12 34623000 34629000 Low Mappability +12 35783900 35814400 High Signal Region +12 36099400 36107200 High Signal Region +12 36679100 36700200 Low Mappability +12 36952200 36957900 High Signal Region +12 38746900 38749300 High Signal Region +12 41363500 41385500 High Signal Region +12 41502600 41516100 High Signal Region +12 41860000 41870200 High Signal Region +12 42124500 42126300 High Signal Region +12 42437900 42443400 High Signal Region +12 42666800 42690800 High Signal Region +12 43335600 43349300 High Signal Region +12 43659100 43675300 High Signal Region +12 43953900 43986900 High Signal Region +12 44064500 44070600 High Signal Region +12 44765600 44795900 Low Mappability +12 45768700 45773700 High Signal Region +12 45949200 45962200 High Signal Region +12 46707000 46709200 High Signal Region +12 47027300 47039300 High Signal Region +12 47280500 47286800 High Signal Region +12 47328600 47331300 High Signal Region +12 47646800 47648300 High Signal Region +12 47833000 47834900 High Signal Region +12 47995600 47997600 High Signal Region +12 48842900 48849500 High Signal Region +12 49124800 49155700 High Signal Region +12 49245200 49272100 High Signal Region +12 49606200 49612000 High Signal Region +12 50784600 50789900 High Signal Region +12 51486000 51492000 High Signal Region +12 52157900 52176400 High Signal Region +12 52200400 52223200 High Signal Region +12 52579600 52581200 High Signal Region +12 52730000 52735400 Low Mappability +12 52906200 52952300 High Signal Region +12 54358500 54369200 High Signal Region +12 54705400 54743600 High Signal Region +12 55079600 55267300 Low Mappability +12 56104100 56110600 Low Mappability +12 56423700 56425000 High Signal Region +12 56747800 56752200 High Signal Region +12 56911000 56914000 High Signal Region +12 58294800 58339800 High Signal Region +12 58659000 58692900 High Signal Region +12 58858800 58867600 High Signal Region +12 59034800 59039300 Low Mappability +12 59112800 59124700 High Signal Region +12 59270000 59276700 High Signal Region +12 59297800 59323200 High Signal Region +12 59601000 59605800 High Signal Region +12 60069500 60084400 High Signal Region +12 60501200 60506200 High Signal Region +12 61044200 61045300 High Signal Region +12 61289100 61293700 High Signal Region +12 61892600 61896100 High Signal Region +12 61964500 61971300 High Signal Region +12 62035300 62090200 High Signal Region +12 62959800 62999500 High Signal Region +12 63041800 63048200 High Signal Region +12 63289500 63322400 High Signal Region +12 63728400 63745100 High Signal Region +12 63838200 63840100 High Signal Region +12 65260100 65292400 High Signal Region +12 65784500 65808300 High Signal Region +12 66103800 66127200 High Signal Region +12 67058200 67060800 High Signal Region +12 67433500 67459300 High Signal Region +12 67519200 67571500 High Signal Region +12 67828900 67836600 High Signal Region +12 68696500 68711800 High Signal Region +12 68745100 68750600 Low Mappability +12 69059900 69061300 High Signal Region +12 69653100 69657800 High Signal Region +12 70641800 70668400 Low Mappability +12 71077100 71093600 Low Mappability +12 71589600 71596000 High Signal Region +12 72203000 72209300 High Signal Region +12 72634700 72641300 High Signal Region +12 74620800 74642100 High Signal Region +12 74775800 74778200 High Signal Region +12 74803000 74805400 High Signal Region +12 74857200 74862700 High Signal Region +12 75241800 75248400 High Signal Region +12 77160700 77166000 High Signal Region +12 77383500 77411300 High Signal Region +12 77547200 77553900 High Signal Region +12 78260000 78373200 High Signal Region +12 78462400 78468500 High Signal Region +12 80417200 80449700 High Signal Region +12 80894500 80916600 High Signal Region +12 81550400 81555100 High Signal Region +12 81985400 82064000 Low Mappability +12 83093000 83094900 High Signal Region +12 85401000 85408600 High Signal Region +12 87585600 87771500 Low Mappability +12 87802800 88006400 High Signal Region +12 88119800 88169700 Low Mappability +12 88229600 88312400 High Signal Region +12 88493200 88516700 Low Mappability +12 91221400 91256000 High Signal Region +12 91439200 91475500 High Signal Region +12 92393800 92395800 Low Mappability +12 92839700 92892700 High Signal Region +12 93233800 93265600 High Signal Region +12 93564200 93590500 High Signal Region +12 93915400 93951600 High Signal Region +12 94268500 94273900 High Signal Region +12 94550200 94556100 High Signal Region +12 94694300 94713700 High Signal Region +12 95976100 96021400 High Signal Region +12 97038100 97062700 High Signal Region +12 97616600 97622400 High Signal Region +12 98173700 98176600 High Signal Region +12 99644200 99649400 High Signal Region +12 100490600 100492300 High Signal Region +12 100766900 100825300 High Signal Region +12 101427900 101453500 High Signal Region +12 101839700 101849500 High Signal Region +12 102892000 102893900 High Signal Region +12 103458100 103472900 High Signal Region +12 103776900 103813700 High Signal Region +12 105300300 105307000 High Signal Region +12 105435200 105437100 High Signal Region +12 105523800 105525700 High Signal Region +12 105628200 105631400 High Signal Region +12 108078800 108084400 High Signal Region +12 109901900 109909200 Low Mappability +12 110011800 110013700 High Signal Region +12 111388200 111417100 High Signal Region +12 112542200 112548700 High Signal Region +12 112775700 112830900 Low Mappability +12 113423500 113461500 High Signal Region +12 114584600 114597100 High Signal Region +12 114941500 114943900 High Signal Region +12 115725800 115748700 High Signal Region +12 116796500 116853000 High Signal Region +12 118341100 118358400 High Signal Region +12 118794900 118797400 High Signal Region +12 119013600 119018100 High Signal Region +12 119554500 119598100 High Signal Region +12 119659100 119670900 High Signal Region +12 120023800 120129000 High Signal Region +13 0 3038200 High Signal Region +13 3350900 3378900 High Signal Region +13 3404500 3438200 High Signal Region +13 3901100 3903100 Low Mappability +13 4762900 4770300 High Signal Region +13 5171400 5178400 High Signal Region +13 7601300 7604100 High Signal Region +13 7806100 7810900 High Signal Region +13 7893500 7899700 High Signal Region +13 9828900 9855900 High Signal Region +13 10174800 10181100 Low Mappability +13 12684400 13073000 High Signal Region +13 13752100 13774000 High Signal Region +13 13859900 13907900 High Signal Region +13 13981000 13983000 High Signal Region +13 14690600 14777500 Low Mappability +13 18932700 18963600 Low Mappability +13 21753300 21847200 Low Mappability +13 23620800 23647900 Low Mappability +13 25006900 25051500 High Signal Region +13 26440600 26448200 High Signal Region +13 27164600 27169100 High Signal Region +13 27875800 27888500 High Signal Region +13 29880700 29886800 Low Mappability +13 32889400 32895200 High Signal Region +13 33280200 33319400 High Signal Region +13 33350500 33491800 High Signal Region +13 35687400 35695700 High Signal Region +13 36794200 36797400 High Signal Region +13 37036700 37043900 High Signal Region +13 38633900 38659300 Low Mappability +13 42435800 42437700 High Signal Region +13 44868600 44870900 High Signal Region +13 46316600 46324000 High Signal Region +13 50633400 50741800 High Signal Region +13 53269000 53270900 High Signal Region +13 60675600 60682600 High Signal Region +13 62291600 62346800 Low Mappability +13 62409800 62426300 High Signal Region +13 63142500 63184600 High Signal Region +13 64878100 64885300 High Signal Region +13 65352900 66254300 Low Mappability +13 71381400 71387500 High Signal Region +13 74521500 74565200 High Signal Region +13 74684000 74712200 High Signal Region +13 76472300 76501300 High Signal Region +13 77304000 77305900 High Signal Region +13 77430600 77440000 High Signal Region +13 79563400 79570800 High Signal Region +13 80276300 80279400 High Signal Region +13 80489100 80491400 High Signal Region +13 83419000 83444300 High Signal Region +13 85125800 85145900 High Signal Region +13 86149500 86190600 High Signal Region +13 86502700 86511700 High Signal Region +13 88324900 88345400 High Signal Region +13 92599100 92625400 Low Mappability +13 93279200 93294800 High Signal Region +13 93650100 93651500 High Signal Region +13 93940300 93955300 High Signal Region +13 94016300 94020800 High Signal Region +13 97189600 97206100 High Signal Region +13 98418200 98420500 Low Mappability +13 99774000 99792100 High Signal Region +13 102381900 102387900 High Signal Region +13 105123500 105128600 Low Mappability +13 107839000 107860300 Low Mappability +13 110602100 110615800 High Signal Region +13 110729600 110745400 High Signal Region +13 111187700 111189500 High Signal Region +13 111499700 111515900 Low Mappability +13 112577200 112595200 High Signal Region +13 113171200 113173100 High Signal Region +13 113272600 113310700 High Signal Region +13 115498200 115504200 High Signal Region +13 115741300 115743200 Low Mappability +13 116191900 116193900 High Signal Region +13 119188100 119230700 High Signal Region +13 119486800 119618500 High Signal Region +13 119660800 119674100 High Signal Region +13 119899200 120147600 Low Mappability +13 120320500 120421600 High Signal Region +14 0 4323000 High Signal Region +14 4372100 4741400 High Signal Region +14 4762800 5839200 High Signal Region +14 5959700 6479300 High Signal Region +14 6500100 6791800 High Signal Region +14 6993800 7734200 High Signal Region +14 7869900 7872200 High Signal Region +14 8005200 8018900 High Signal Region +14 8285700 8287800 High Signal Region +14 8652200 8658800 Low Mappability +14 10086500 10118400 High Signal Region +14 10178800 10198700 Low Mappability +14 11046200 11050200 High Signal Region +14 12536700 12538700 High Signal Region +14 14333600 14340200 High Signal Region +14 15460700 15467200 High Signal Region +14 16907800 16914000 High Signal Region +14 16937900 16941100 High Signal Region +14 18487900 18494100 High Signal Region +14 19251900 19255700 High Signal Region +14 19277200 19279100 High Signal Region +14 19414800 19633500 High Signal Region +14 21360400 21366100 High Signal Region +14 21878600 21884500 High Signal Region +14 22542900 22570000 High Signal Region +14 22902100 22934800 High Signal Region +14 25875200 26292200 High Signal Region +14 26946900 26948800 High Signal Region +14 29001300 29003200 Low Mappability +14 29343900 29345700 Low Mappability +14 30748800 30754700 High Signal Region +14 31919300 31923900 High Signal Region +14 32115300 32120500 Low Mappability +14 33667700 33670000 Low Mappability +14 33981000 33987500 Low Mappability +14 35275300 35281500 High Signal Region +14 35709400 35722200 High Signal Region +14 36429100 36440100 High Signal Region +14 37229100 37260800 Low Mappability +14 37619400 37635200 Low Mappability +14 38086800 38116800 High Signal Region +14 38280800 38283100 High Signal Region +14 38455100 38462200 Low Mappability +14 39580800 39607200 High Signal Region +14 39731900 39737200 High Signal Region +14 39905500 39911100 High Signal Region +14 41053200 41061900 Low Mappability +14 41326900 43109000 High Signal Region +14 43132400 43668900 High Signal Region +14 43803900 43850200 High Signal Region +14 44149300 44152100 High Signal Region +14 44273800 44343500 High Signal Region +14 44514200 44516000 Low Mappability +14 45726200 45753500 High Signal Region +14 45811900 45813800 High Signal Region +14 46269900 46274300 High Signal Region +14 47609500 47630400 High Signal Region +14 50538900 50606000 High Signal Region +14 50626200 50638500 High Signal Region +14 51472000 51515400 High Signal Region +14 51730700 51768100 High Signal Region +14 51814200 51837200 High Signal Region +14 52821200 53035800 Low Mappability +14 53146700 53340000 High Signal Region +14 53475200 53479600 High Signal Region +14 53515600 53530500 Low Mappability +14 56447800 56455700 High Signal Region +14 56693100 56695000 High Signal Region +14 58052600 58059800 Low Mappability +14 58462700 58464600 Low Mappability +14 58657800 58659700 High Signal Region +14 58831400 58833300 High Signal Region +14 59250300 59270000 High Signal Region +14 59488900 59490800 High Signal Region +14 59980800 59995700 High Signal Region +14 60328300 60357300 High Signal Region +14 60960000 60961900 Low Mappability +14 61580500 61586700 High Signal Region +14 61855000 61856300 High Signal Region +14 62107300 62126200 High Signal Region +14 64290100 64292500 High Signal Region +14 64463300 64478500 Low Mappability +14 65128900 65135300 Low Mappability +14 66427000 66428400 High Signal Region +14 68232600 68278200 High Signal Region +14 69161000 69163400 High Signal Region +14 70974500 70975600 High Signal Region +14 71121300 71126700 High Signal Region +14 71449700 71453700 High Signal Region +14 71783600 71804000 High Signal Region +14 72900100 72921400 High Signal Region +14 73644600 73679900 High Signal Region +14 73847900 73861200 High Signal Region +14 74039300 74066900 High Signal Region +14 74124400 74138500 High Signal Region +14 74435600 74447800 High Signal Region +14 75425300 75440500 High Signal Region +14 78162300 78168200 High Signal Region +14 78401700 78403200 High Signal Region +14 79145300 79196400 High Signal Region +14 80148100 80150800 High Signal Region +14 80422800 80439400 High Signal Region +14 80622600 80627700 High Signal Region +14 81333200 81337500 High Signal Region +14 81495300 81519300 High Signal Region +14 82077600 82084900 High Signal Region +14 82846900 82867200 High Signal Region +14 82958700 82964100 High Signal Region +14 83292900 83306500 High Signal Region +14 83507000 83512600 High Signal Region +14 84354700 84409800 High Signal Region +14 84855100 84881600 Low Mappability +14 85177800 85203300 Low Mappability +14 85521200 85535200 Low Mappability +14 86198000 86200000 High Signal Region +14 86590500 86614400 High Signal Region +14 87354600 87373000 High Signal Region +14 87671400 87677500 High Signal Region +14 87790500 87852200 High Signal Region +14 88450200 88453600 High Signal Region +14 88478400 88480300 High Signal Region +14 90018300 90019500 High Signal Region +14 90294700 90301800 High Signal Region +14 90910200 90912200 High Signal Region +14 91415900 91418400 High Signal Region +14 91510800 91514900 High Signal Region +14 91672700 91694800 High Signal Region +14 91951700 91976400 High Signal Region +14 92032500 92040900 High Signal Region +14 92383600 92389900 High Signal Region +14 92411600 92432900 High Signal Region +14 92792600 92798500 High Signal Region +14 92921100 92953200 High Signal Region +14 93017600 93020400 High Signal Region +14 93355600 93360200 High Signal Region +14 94319700 94327000 High Signal Region +14 95561600 95567600 High Signal Region +14 96048000 96054300 High Signal Region +14 96093600 96116100 High Signal Region +14 97323800 97326500 High Signal Region +14 98226800 98237000 High Signal Region +14 98731900 98757200 High Signal Region +14 99207100 99208200 High Signal Region +14 99649700 99655500 High Signal Region +14 101076400 101098900 Low Mappability +14 101404800 101414800 High Signal Region +14 102548900 102565300 High Signal Region +14 102755800 102762600 High Signal Region +14 103300300 103302400 High Signal Region +14 103858600 103872900 High Signal Region +14 103999500 104025500 High Signal Region +14 104104800 104128100 Low Mappability +14 104704500 104716800 High Signal Region +14 105758200 105764900 Low Mappability +14 105911400 105978300 High Signal Region +14 106002700 106005700 Low Mappability +14 106301000 106352700 High Signal Region +14 106444800 106483100 Low Mappability +14 106722600 106728700 High Signal Region +14 106895300 106897000 Low Mappability +14 108115100 108174900 Low Mappability +14 108283900 108303500 High Signal Region +14 109675300 109681200 High Signal Region +14 109911500 109917800 High Signal Region +14 110057000 110108200 Low Mappability +14 110356200 110373800 High Signal Region +14 110492000 110495700 Low Mappability +14 110906100 110908200 High Signal Region +14 110992800 110994500 High Signal Region +14 111903200 111909800 High Signal Region +14 112074600 112092300 High Signal Region +14 112210500 112215800 High Signal Region +14 112285400 112291900 High Signal Region +14 112332800 112340000 Low Mappability +14 112517900 112519900 High Signal Region +14 112627800 112663100 Low Mappability +14 114505900 114512900 High Signal Region +14 114822000 114823900 Low Mappability +14 115109700 115117400 High Signal Region +14 115272500 115280200 High Signal Region +14 115379200 115385600 High Signal Region +14 115911100 115912900 High Signal Region +14 115958100 115965000 High Signal Region +14 116402700 116407700 High Signal Region +14 116817000 116822900 High Signal Region +14 117285800 117292800 High Signal Region +14 118144700 118168500 Low Mappability +14 119286000 119287900 High Signal Region +14 120180000 120202600 High Signal Region +14 120742600 120749700 High Signal Region +14 120777500 120802300 High Signal Region +14 121007000 121010900 Low Mappability +14 122502500 122534800 High Signal Region +14 123349400 123351300 Low Mappability +14 123412000 123452600 High Signal Region +14 123674600 123695600 High Signal Region +14 124334000 124340200 High Signal Region +14 124415600 124436400 High Signal Region +14 124491600 124497700 High Signal Region +14 124739500 124902200 High Signal Region +15 0 3125600 High Signal Region +15 3150900 3170400 High Signal Region +15 3313900 3336200 High Signal Region +15 3360500 3363700 High Signal Region +15 3538600 3551000 High Signal Region +15 3712200 3732700 High Signal Region +15 3793500 3823000 High Signal Region +15 4155900 4160900 High Signal Region +15 4278500 4284100 High Signal Region +15 4852000 4894600 Low Mappability +15 4980200 4987600 Low Mappability +15 5369000 5385500 High Signal Region +15 5681700 5690400 High Signal Region +15 5910000 5911700 High Signal Region +15 5993500 5995400 High Signal Region +15 6074100 6087100 Low Mappability +15 6192800 6200000 Low Mappability +15 6316000 6317900 High Signal Region +15 6510500 6539100 High Signal Region +15 6674800 6701400 High Signal Region +15 6801200 6808300 High Signal Region +15 7539900 7548600 Low Mappability +15 7800800 7803000 Low Mappability +15 7849400 7855600 High Signal Region +15 7904400 7929500 Low Mappability +15 8517500 8520400 High Signal Region +15 8548000 8576100 Low Mappability +15 8800200 8808700 High Signal Region +15 8985200 9054800 High Signal Region +15 9219000 9224900 Low Mappability +15 9293200 9333300 High Signal Region +15 9379300 9409100 High Signal Region +15 9437100 9443600 High Signal Region +15 9536500 9554100 High Signal Region +15 9992700 10045700 High Signal Region +15 10579600 10591500 Low Mappability +15 10753400 10810200 High Signal Region +15 10835200 10854700 Low Mappability +15 11921000 11933300 High Signal Region +15 12055800 12063200 Low Mappability +15 12526800 12531900 Low Mappability +15 12872000 12873900 High Signal Region +15 12932300 12934200 Low Mappability +15 13919500 13948300 High Signal Region +15 14414600 14439100 Low Mappability +15 14722200 14732900 High Signal Region +15 14873900 14902400 High Signal Region +15 15043600 15059700 High Signal Region +15 15525500 15551900 High Signal Region +15 16168200 16186400 High Signal Region +15 16303700 16309500 High Signal Region +15 16716400 16717500 High Signal Region +15 16901300 16907100 High Signal Region +15 16939800 16955100 Low Mappability +15 17139000 17169100 High Signal Region +15 17562100 17581400 High Signal Region +15 18314600 18325000 High Signal Region +15 19038400 19063800 Low Mappability +15 19402600 19405500 High Signal Region +15 19448100 19453900 High Signal Region +15 19557200 19578000 High Signal Region +15 19626800 19631800 High Signal Region +15 19678400 19685800 High Signal Region +15 20063000 20067500 High Signal Region +15 20155100 20170700 Low Mappability +15 20474900 20510100 High Signal Region +15 20531400 20537100 High Signal Region +15 20821500 20826700 High Signal Region +15 20972700 20978300 Low Mappability +15 21114000 21115900 High Signal Region +15 21262100 21268500 Low Mappability +15 21423200 21487200 High Signal Region +15 21655500 21657500 High Signal Region +15 21815500 21820800 High Signal Region +15 21853700 21892400 High Signal Region +15 22268700 22293500 High Signal Region +15 22751400 22756700 Low Mappability +15 22799300 22809700 Low Mappability +15 23240200 23255600 Low Mappability +15 23465300 23467800 High Signal Region +15 23886000 23887900 Low Mappability +15 23926900 23939700 High Signal Region +15 24309300 24325700 Low Mappability +15 24761100 24766700 High Signal Region +15 24801600 24837300 High Signal Region +15 24880900 24898600 Low Mappability +15 25051400 25065200 Low Mappability +15 26112700 26118900 High Signal Region +15 26905000 26919300 Low Mappability +15 27286100 27326800 High Signal Region +15 27384100 27390300 Low Mappability +15 27638200 27640500 High Signal Region +15 28564400 28578800 High Signal Region +15 29285200 29291500 Low Mappability +15 29347600 29395600 High Signal Region +15 29463900 29470200 High Signal Region +15 29969800 30001400 High Signal Region +15 30117700 30126200 High Signal Region +15 30441400 30448200 Low Mappability +15 30747900 30755000 High Signal Region +15 30996700 31016300 High Signal Region +15 31066700 31083700 High Signal Region +15 32783900 32806700 High Signal Region +15 32832800 32880300 High Signal Region +15 33138700 33140800 Low Mappability +15 33308700 33310800 Low Mappability +15 33444200 33454100 High Signal Region +15 33710200 33745700 High Signal Region +15 33781400 33849400 High Signal Region +15 33869800 33884700 High Signal Region +15 34494500 34502100 Low Mappability +15 34763100 34769400 High Signal Region +15 34987600 34992800 High Signal Region +15 35013200 35015400 High Signal Region +15 35366800 35406000 High Signal Region +15 36715200 36737400 High Signal Region +15 36966700 36997400 Low Mappability +15 37072900 37150800 Low Mappability +15 38462300 38484300 Low Mappability +15 39172900 39178300 Low Mappability +15 39335600 39348800 Low Mappability +15 39496100 39499100 High Signal Region +15 39695600 39718600 Low Mappability +15 40049600 40056000 High Signal Region +15 40086800 40101400 High Signal Region +15 41531400 41533200 High Signal Region +15 41890400 41896900 Low Mappability +15 42354900 42361100 High Signal Region +15 42925300 42942800 High Signal Region +15 43287300 43346300 High Signal Region +15 44469100 44476400 High Signal Region +15 44649000 44659600 Low Mappability +15 44723200 44728200 Low Mappability +15 44769700 44796100 High Signal Region +15 45005100 45009300 High Signal Region +15 45194600 45197100 High Signal Region +15 45577500 45590900 High Signal Region +15 45635600 45650500 High Signal Region +15 45774400 45779700 High Signal Region +15 45890700 45932500 High Signal Region +15 46255700 46257800 Low Mappability +15 46355600 46368400 High Signal Region +15 46502200 46506800 Low Mappability +15 46562500 46566200 Low Mappability +15 47232800 47256000 High Signal Region +15 47356500 47363700 Low Mappability +15 47539000 47555300 High Signal Region +15 48666900 48671000 High Signal Region +15 49283300 49299700 High Signal Region +15 49322600 49327300 Low Mappability +15 50426100 50442800 High Signal Region +15 50557700 50642600 High Signal Region +15 51113200 51117800 High Signal Region +15 51531900 51533900 Low Mappability +15 52125800 52131200 High Signal Region +15 52329800 52353100 High Signal Region +15 53039200 53044200 Low Mappability +15 53831000 53834900 High Signal Region +15 53870700 53872700 High Signal Region +15 53918300 53929500 High Signal Region +15 54180700 54211500 Low Mappability +15 56032900 56038200 High Signal Region +15 56175800 56183100 Low Mappability +15 56363800 56367900 High Signal Region +15 56400500 56402200 High Signal Region +15 56941600 56993500 High Signal Region +15 57279500 57285000 High Signal Region +15 57412200 57433600 High Signal Region +15 57889500 57913700 Low Mappability +15 58437200 58441100 High Signal Region +15 59421400 59435400 Low Mappability +15 59850100 59875200 Low Mappability +15 60153100 60203900 High Signal Region +15 60592000 60594300 Low Mappability +15 60931800 60986500 High Signal Region +15 61148600 61150700 High Signal Region +15 61903100 61915500 High Signal Region +15 62367600 62370100 High Signal Region +15 62553200 62555200 High Signal Region +15 62686500 62693700 High Signal Region +15 63329400 63346600 Low Mappability +15 63626000 63627900 High Signal Region +15 63791700 63796000 High Signal Region +15 63837600 63922800 High Signal Region +15 64591700 64598200 Low Mappability +15 64673500 64681900 High Signal Region +15 65115600 65123500 Low Mappability +15 65598500 65604500 High Signal Region +15 65666600 65673800 High Signal Region +15 65714400 65753500 High Signal Region +15 66045100 66065700 High Signal Region +15 66208300 66210200 High Signal Region +15 68136300 68137800 Low Mappability +15 68980000 68986500 High Signal Region +15 69122300 69164500 High Signal Region +15 69264900 69268800 High Signal Region +15 69390300 69409400 High Signal Region +15 69642000 69646000 High Signal Region +15 70083000 70088800 High Signal Region +15 70609300 70611100 High Signal Region +15 70896600 70914000 High Signal Region +15 71104600 71112200 High Signal Region +15 71206600 71237500 Low Mappability +15 73060200 73087900 Low Mappability +15 73373200 73378200 Low Mappability +15 73873000 73880400 Low Mappability +15 74360700 74368000 Low Mappability +15 74814300 74826700 Low Mappability +15 74992000 75104600 High Signal Region +15 75205600 75212800 Low Mappability +15 75298000 75299500 High Signal Region +15 75437000 75440500 High Signal Region +15 75523600 75529700 High Signal Region +15 76102000 76106500 High Signal Region +15 76559900 76577900 Low Mappability +15 76964600 76971400 Low Mappability +15 77336200 77439100 High Signal Region +15 77718300 77735600 Low Mappability +15 77895000 77934800 Low Mappability +15 79685000 79775700 Low Mappability +15 79869700 79892600 Low Mappability +15 79974400 79978400 Low Mappability +15 80232400 80267100 High Signal Region +15 81145400 81152000 Low Mappability +15 81492300 81523600 High Signal Region +15 82338000 82368000 Low Mappability +15 82590700 82608900 Low Mappability +15 82675500 82677200 High Signal Region +15 83172100 83202200 Low Mappability +15 84746600 84753000 Low Mappability +15 85176800 85196600 Low Mappability +15 85541200 85543100 High Signal Region +15 86193800 86196100 High Signal Region +15 86312100 86326400 Low Mappability +15 87293900 87301200 Low Mappability +15 87967000 87969000 High Signal Region +15 88779400 88783900 Low Mappability +15 88974800 88976800 High Signal Region +15 89597900 89621300 High Signal Region +15 89808500 89809700 High Signal Region +15 89943000 89982000 Low Mappability +15 90636400 90643600 Low Mappability +15 91115900 91134800 Low Mappability +15 91419400 91422200 High Signal Region +15 91720600 91723200 Low Mappability +15 91905900 91911200 High Signal Region +15 92470100 92475100 Low Mappability +15 92613700 92618300 Low Mappability +15 92722600 92730100 Low Mappability +15 92796100 92820000 Low Mappability +15 93044100 93062000 High Signal Region +15 93467800 93469500 Low Mappability +15 93867100 93873600 High Signal Region +15 94088400 94124100 High Signal Region +15 94150500 94156800 High Signal Region +15 94373000 94379600 High Signal Region +15 95087600 95092100 High Signal Region +15 95306000 95312300 High Signal Region +15 95729500 95756400 High Signal Region +15 96551700 96559500 Low Mappability +15 96977900 96983600 Low Mappability +15 97082100 97084300 High Signal Region +15 97472900 97487400 Low Mappability +15 99168800 99171900 High Signal Region +15 99552100 99553900 Low Mappability +15 100331500 100339800 Low Mappability +15 100360000 100379700 Low Mappability +15 100541700 100617400 Low Mappability +15 101655700 101662100 High Signal Region +15 102596800 102603200 High Signal Region +15 103271900 103277100 High Signal Region +15 103406700 103418500 High Signal Region +15 103606700 103611400 High Signal Region +15 103814500 104043600 High Signal Region +16 0 3427800 High Signal Region +16 3450300 3519700 Low Mappability +16 4300400 4366800 Low Mappability +16 4585000 4591300 High Signal Region +16 5708200 5710200 High Signal Region +16 7460800 7463600 High Signal Region +16 7937100 7958400 Low Mappability +16 8256700 8286200 High Signal Region +16 9577100 9579600 Low Mappability +16 10631200 10633200 Low Mappability +16 10974100 11013900 High Signal Region +16 11134600 11145200 High Signal Region +16 11248000 11249900 Low Mappability +16 11679900 11687500 Low Mappability +16 12327300 12345900 Low Mappability +16 12417900 12423400 High Signal Region +16 12829200 12831000 High Signal Region +16 12976200 12981700 Low Mappability +16 13087700 13107000 Low Mappability +16 13903200 13925900 Low Mappability +16 14316200 14341200 Low Mappability +16 15502700 15510100 Low Mappability +16 15741400 15757700 Low Mappability +16 17199900 17236000 High Signal Region +16 17751400 17761300 High Signal Region +16 17910400 17955500 High Signal Region +16 18532200 18534200 High Signal Region +16 18957500 18979200 High Signal Region +16 19334200 19375100 High Signal Region +16 19581200 19602400 Low Mappability +16 19711900 19748700 High Signal Region +16 19928600 19946300 Low Mappability +16 22923300 22929100 High Signal Region +16 26419300 26421200 High Signal Region +16 26808500 26814800 High Signal Region +16 27071900 27087600 High Signal Region +16 27212200 27218300 High Signal Region +16 28170600 28197500 High Signal Region +16 30828600 30830500 High Signal Region +16 31223800 31234300 Low Mappability +16 31339100 31358900 High Signal Region +16 31818700 31825200 Low Mappability +16 32147700 32153500 Low Mappability +16 32489700 32520100 Low Mappability +16 32579100 32598800 Low Mappability +16 33847200 33852600 Low Mappability +16 34581100 34591200 Low Mappability +16 34742000 34744000 High Signal Region +16 35980600 35983300 High Signal Region +16 36764900 36770500 Low Mappability +16 38714200 38721600 Low Mappability +16 39563700 39568200 High Signal Region +16 41270700 41273100 High Signal Region +16 42657300 42661200 High Signal Region +16 42773100 42779900 High Signal Region +16 42931600 42950000 High Signal Region +16 43764000 43771600 Low Mappability +16 44040400 44063900 Low Mappability +16 44709800 44726400 Low Mappability +16 44920200 44950700 Low Mappability +16 45292600 45293900 High Signal Region +16 45352100 45354000 High Signal Region +16 46364600 46369100 High Signal Region +16 47099100 47147300 High Signal Region +16 47552300 47564100 Low Mappability +16 48579900 48581300 Low Mappability +16 49024900 49031400 Low Mappability +16 49148400 49150300 Low Mappability +16 49447700 49489300 High Signal Region +16 50084900 50101400 Low Mappability +16 50909100 50926800 Low Mappability +16 51087100 51094300 Low Mappability +16 51945800 51980200 High Signal Region +16 53412000 53428900 High Signal Region +16 53571500 53595400 Low Mappability +16 54298300 54307600 Low Mappability +16 54861600 54869000 High Signal Region +16 54959000 54965200 High Signal Region +16 55647800 55681600 Low Mappability +16 56038100 56065100 Low Mappability +16 56988400 57008400 High Signal Region +16 57085500 57095800 High Signal Region +16 57390200 57392600 High Signal Region +16 57792800 57811700 Low Mappability +16 58310800 58343000 High Signal Region +16 58632300 58670400 Low Mappability +16 59121800 59129100 Low Mappability +16 59310100 59378100 High Signal Region +16 60921200 60970900 High Signal Region +16 61312500 61325200 Low Mappability +16 62564300 62599200 High Signal Region +16 62875900 62880400 Low Mappability +16 63114300 63151200 High Signal Region +16 63301300 63313600 High Signal Region +16 64384600 64425600 High Signal Region +16 65176900 65181400 Low Mappability +16 66229300 66247600 Low Mappability +16 67328200 67334700 High Signal Region +16 68272300 68274300 High Signal Region +16 70542300 70558300 Low Mappability +16 70633900 70639700 Low Mappability +16 70892400 70898400 High Signal Region +16 70976900 70982900 High Signal Region +16 71687000 71691500 Low Mappability +16 72019300 72023900 Low Mappability +16 72056200 72062100 High Signal Region +16 72724800 72730900 Low Mappability +16 73656700 73688600 High Signal Region +16 74771800 74781500 Low Mappability +16 76057000 76065000 Low Mappability +16 76487100 76519600 High Signal Region +16 76988700 76991600 High Signal Region +16 77116900 77121900 Low Mappability +16 78977100 79013600 High Signal Region +16 79368600 79376000 Low Mappability +16 79782000 79786700 High Signal Region +16 79943000 79948600 Low Mappability +16 80269400 80309700 Low Mappability +16 81071700 81079200 Low Mappability +16 81779900 81782000 High Signal Region +16 81859300 81865600 High Signal Region +16 82079700 82099600 High Signal Region +16 82237800 82243200 Low Mappability +16 82828200 82845600 High Signal Region +16 83077300 83081800 High Signal Region +16 83360600 83368000 Low Mappability +16 84260500 84283300 High Signal Region +16 84380600 84407600 High Signal Region +16 84440100 84446000 High Signal Region +16 85671600 85673000 High Signal Region +16 85713500 85720100 High Signal Region +16 86333000 86354300 High Signal Region +16 86539500 86570300 High Signal Region +16 86819800 86822100 High Signal Region +16 87055400 87060300 High Signal Region +16 87287400 87302500 Low Mappability +16 87372300 87391700 Low Mappability +16 88022900 88029900 High Signal Region +16 88790600 88797900 Low Mappability +16 88957900 88967800 High Signal Region +16 89145200 89196100 Low Mappability +16 89431800 89448400 Low Mappability +16 89636000 89642900 High Signal Region +16 89877500 89879700 High Signal Region +16 90056200 90072300 Low Mappability +16 90341200 90350100 Low Mappability +16 91533700 91551800 High Signal Region +16 92254500 92259400 Low Mappability +16 93581500 93622800 High Signal Region +16 93685800 93711200 High Signal Region +16 93785700 93790200 High Signal Region +16 93991400 93997900 High Signal Region +16 94258100 94282000 Low Mappability +16 95782000 95788900 High Signal Region +16 95991000 96010400 Low Mappability +16 97996400 98207700 High Signal Region +17 0 3039300 High Signal Region +17 3075400 3085400 High Signal Region +17 3378900 3380800 High Signal Region +17 5863900 5885100 High Signal Region +17 6219100 6717500 High Signal Region +17 6877300 7037900 High Signal Region +17 7302300 7430200 High Signal Region +17 7615300 7617200 High Signal Region +17 7950200 8052300 High Signal Region +17 11097900 11105100 High Signal Region +17 13018500 13469100 High Signal Region +17 13492200 13555800 High Signal Region +17 13584800 13656200 High Signal Region +17 14961200 15054300 Low Mappability +17 20859400 20865200 High Signal Region +17 23426600 23537000 High Signal Region +17 23730600 23732500 High Signal Region +17 24095300 24097300 High Signal Region +17 29101000 29109600 High Signal Region +17 31569500 31571400 High Signal Region +17 35367400 35480300 Low Mappability +17 36230300 36232500 High Signal Region +17 38498200 38500800 High Signal Region +17 39842000 39849700 High Signal Region +17 40422500 40427000 High Signal Region +17 50569500 50571400 High Signal Region +17 53034300 53056100 High Signal Region +17 53151500 53153500 High Signal Region +17 53807400 53820300 High Signal Region +17 54112300 54134200 High Signal Region +17 57368400 57399900 High Signal Region +17 62736600 62738500 High Signal Region +17 66798500 66800400 High Signal Region +17 67740400 67742500 High Signal Region +17 70962200 70964800 High Signal Region +17 82975900 82991600 High Signal Region +17 84458800 84464500 Low Mappability +17 85264100 85266000 High Signal Region +17 93017000 93047400 High Signal Region +17 93623500 93646700 High Signal Region +17 94886200 94987200 High Signal Region +18 0 3063700 High Signal Region +18 3085500 3142600 High Signal Region +18 3568100 3570100 Low Mappability +18 3619800 3652100 Low Mappability +18 3779700 3785600 High Signal Region +18 3815100 3819300 High Signal Region +18 3873200 3889000 High Signal Region +18 4194700 4199900 High Signal Region +18 4456700 4504600 High Signal Region +18 4658000 4664400 Low Mappability +18 4695200 4701800 Low Mappability +18 5499400 5502000 Low Mappability +18 5895900 5900400 Low Mappability +18 6043700 6046600 Low Mappability +18 6343100 6376400 Low Mappability +18 6663800 6669200 High Signal Region +18 6796200 6803600 Low Mappability +18 6853600 6868500 Low Mappability +18 7032800 7035500 High Signal Region +18 7527500 7534800 High Signal Region +18 7782300 7798400 High Signal Region +18 7998000 8018800 Low Mappability +18 8164900 8183000 High Signal Region +18 8243000 8271800 High Signal Region +18 8292000 8294000 Low Mappability +18 8721900 8747000 High Signal Region +18 9095200 9127300 High Signal Region +18 9248500 9269200 Low Mappability +18 9420000 9426100 High Signal Region +18 9890700 9915900 High Signal Region +18 11168900 11192100 High Signal Region +18 11247700 11293200 High Signal Region +18 11626000 11648000 Low Mappability +18 12945100 12956300 High Signal Region +18 13030000 13041900 High Signal Region +18 13161400 13180500 High Signal Region +18 13241200 13251100 Low Mappability +18 13296400 13300000 High Signal Region +18 13513200 13517200 High Signal Region +18 14732900 14739600 Low Mappability +18 15225500 15232800 High Signal Region +18 15366900 15382100 High Signal Region +18 15695100 15737600 High Signal Region +18 16283100 16288900 High Signal Region +18 16988600 17013600 Low Mappability +18 17116100 17119600 High Signal Region +18 17346100 17352400 High Signal Region +18 17425100 17480600 High Signal Region +18 17513300 17517900 High Signal Region +18 17541300 17559000 High Signal Region +18 17593300 17598500 High Signal Region +18 17938300 17951600 Low Mappability +18 18816600 18823800 High Signal Region +18 18916300 18917900 High Signal Region +18 18976900 18992400 High Signal Region +18 19240600 19289100 High Signal Region +18 19345800 19352600 Low Mappability +18 19430400 19448100 High Signal Region +18 19679600 19681600 Low Mappability +18 19812100 19836500 High Signal Region +18 20352500 20369800 High Signal Region +18 20896200 20910000 Low Mappability +18 21261800 21268900 Low Mappability +18 21528200 21541600 High Signal Region +18 21943200 21945200 Low Mappability +18 22297400 22304000 High Signal Region +18 23186200 23215300 High Signal Region +18 25045100 25047300 High Signal Region +18 25253000 25259500 High Signal Region +18 25905600 25928600 High Signal Region +18 26003000 26008100 Low Mappability +18 26829800 26837100 Low Mappability +18 26998200 27005600 Low Mappability +18 27062000 27068200 High Signal Region +18 28151300 28167300 High Signal Region +18 28441700 28446600 Low Mappability +18 28482900 28484900 High Signal Region +18 28814100 28816900 High Signal Region +18 28960100 28966000 Low Mappability +18 29014700 29022000 High Signal Region +18 29557800 29559800 High Signal Region +18 29713000 29719200 High Signal Region +18 31281100 31294300 High Signal Region +18 32758400 32793400 High Signal Region +18 33212800 33221500 Low Mappability +18 33275100 33331000 High Signal Region +18 33697400 33722600 Low Mappability +18 34083600 34087300 Low Mappability +18 34397100 34409800 Low Mappability +18 35318500 35320400 Low Mappability +18 36454200 36494600 Low Mappability +18 36981500 36988700 Low Mappability +18 37031800 37045800 High Signal Region +18 37364600 37398900 Low Mappability +18 37545500 37645000 High Signal Region +18 39598600 39604900 High Signal Region +18 40306300 40309300 High Signal Region +18 40708500 40713600 Low Mappability +18 41381600 41387500 High Signal Region +18 41465300 41471500 High Signal Region +18 41820100 41826100 High Signal Region +18 41960600 41966100 High Signal Region +18 42556800 42559800 High Signal Region +18 42913000 42914900 High Signal Region +18 43335500 43337900 High Signal Region +18 43889500 43900400 High Signal Region +18 44033600 44050200 High Signal Region +18 44228000 44263100 High Signal Region +18 44291600 44295600 High Signal Region +18 44361600 44380500 High Signal Region +18 44873100 44875100 Low Mappability +18 44981000 45032700 High Signal Region +18 45131400 45133400 High Signal Region +18 45291700 45314300 Low Mappability +18 45357300 45364700 Low Mappability +18 45392200 45397700 High Signal Region +18 45506800 45513400 High Signal Region +18 45998300 46038000 Low Mappability +18 46082000 46101400 High Signal Region +18 46439100 46444100 Low Mappability +18 46791400 46793400 Low Mappability +18 47648600 47654100 Low Mappability +18 47769900 47783100 Low Mappability +18 48009500 48011400 High Signal Region +18 48208100 48220300 High Signal Region +18 48705800 48713100 Low Mappability +18 48831300 48836100 High Signal Region +18 49387700 49397800 High Signal Region +18 49669200 49695600 High Signal Region +18 50253400 50268700 High Signal Region +18 50632100 50700200 Low Mappability +18 51072000 51077600 Low Mappability +18 51658600 51698300 High Signal Region +18 52020200 52059300 High Signal Region +18 52256200 52262200 High Signal Region +18 52378900 52395000 Low Mappability +18 52876200 52883200 High Signal Region +18 53828800 53839900 Low Mappability +18 53869300 53876600 Low Mappability +18 54023900 54030000 High Signal Region +18 54288100 54335900 Low Mappability +18 54698000 54707800 High Signal Region +18 55222400 55224400 Low Mappability +18 55311000 55321100 Low Mappability +18 55414800 55436200 Low Mappability +18 55899800 55901700 High Signal Region +18 55938500 55954100 High Signal Region +18 56273000 56276900 High Signal Region +18 56302600 56304500 High Signal Region +18 56341200 56346000 High Signal Region +18 56826900 56830200 Low Mappability +18 57560400 57562500 Low Mappability +18 58992700 58999300 Low Mappability +18 59496300 59511000 High Signal Region +18 59929900 59955000 High Signal Region +18 60042400 60044400 Low Mappability +18 60206100 60238100 High Signal Region +18 60525200 60533800 Low Mappability +18 62237400 62247700 High Signal Region +18 62273700 62292800 Low Mappability +18 62752700 62755100 High Signal Region +18 64131300 64132600 High Signal Region +18 64448400 64454900 Low Mappability +18 65103100 65105000 High Signal Region +18 65385700 65405100 Low Mappability +18 65492400 65494700 Low Mappability +18 65716300 65719400 Low Mappability +18 66543200 66548900 High Signal Region +18 66750000 66759900 Low Mappability +18 66881200 66887200 High Signal Region +18 68381300 68387800 High Signal Region +18 68412100 68425800 Low Mappability +18 68461300 68489000 High Signal Region +18 68691100 68693200 High Signal Region +18 69759300 69761300 Low Mappability +18 70489500 70515400 High Signal Region +18 70775600 70791900 High Signal Region +18 70842100 70849200 Low Mappability +18 71032500 71038800 High Signal Region +18 71139200 71145200 High Signal Region +18 71208200 71211300 Low Mappability +18 71267000 71273300 Low Mappability +18 71630400 71641100 Low Mappability +18 72753900 72794900 High Signal Region +18 72987900 72991000 High Signal Region +18 73259600 73264100 Low Mappability +18 74553100 74566400 High Signal Region +18 74745500 74758500 Low Mappability +18 74880300 74882000 High Signal Region +18 76177900 76184300 Low Mappability +18 76579700 76586300 Low Mappability +18 77264400 77271000 High Signal Region +18 78197300 78199300 High Signal Region +18 78407800 78428500 Low Mappability +18 78861400 78867900 High Signal Region +18 80021700 80028900 Low Mappability +18 80307500 80309600 Low Mappability +18 80455500 80518400 Low Mappability +18 81299700 81306200 Low Mappability +18 82052100 82058200 High Signal Region +18 82160100 82227800 High Signal Region +18 82319500 82339900 High Signal Region +18 82692900 82717900 Low Mappability +18 83171100 83178400 Low Mappability +18 83700500 83707900 Low Mappability +18 84828700 84833000 High Signal Region +18 85035000 85080600 High Signal Region +18 85105800 85112200 High Signal Region +18 85169900 85175900 High Signal Region +18 85377800 85382800 Low Mappability +18 85697000 85699200 High Signal Region +18 85783600 85789900 High Signal Region +18 86508300 86510200 High Signal Region +18 86560600 86586100 High Signal Region +18 86828500 86849500 High Signal Region +18 87006300 87009800 High Signal Region +18 87141500 87161200 High Signal Region +18 87568300 87574300 High Signal Region +18 88149300 88155400 High Signal Region +18 89030400 89036400 High Signal Region +18 89615900 89650500 Low Mappability +18 89983200 89989700 Low Mappability +18 90055500 90092500 High Signal Region +18 90113400 90125400 Low Mappability +18 90464100 90501300 High Signal Region +18 90601200 90702600 High Signal Region +19 0 3140800 High Signal Region +19 3161400 3248600 High Signal Region +19 4061100 4066400 Low Mappability +19 6581000 6594300 High Signal Region +19 7713600 7774800 High Signal Region +19 7810700 7843900 Low Mappability +19 8203200 8285500 Low Mappability +19 9250500 9357700 High Signal Region +19 9502000 9565000 Low Mappability +19 9745800 9803300 High Signal Region +19 9823500 9837700 High Signal Region +19 10507900 10510300 High Signal Region +19 10954500 10960300 Low Mappability +19 11199700 11239800 High Signal Region +19 12447200 12454600 Low Mappability +19 13203500 13216400 High Signal Region +19 13330600 13357100 High Signal Region +19 13685000 13693300 High Signal Region +19 13760500 13777200 High Signal Region +19 15256700 15263000 High Signal Region +19 15433400 15438100 High Signal Region +19 15711800 15719800 High Signal Region +19 15839200 15846600 High Signal Region +19 15956500 15958500 Low Mappability +19 16670500 16673100 High Signal Region +19 18358000 18364200 High Signal Region +19 18532700 18535600 High Signal Region +19 19132200 19161200 High Signal Region +19 19509000 19514900 High Signal Region +19 19870300 19876900 Low Mappability +19 20080700 20081800 High Signal Region +19 20140700 20144100 Low Mappability +19 20288200 20297900 Low Mappability +19 20455400 20462700 Low Mappability +19 20839700 20843900 Low Mappability +19 21218200 21243800 High Signal Region +19 21532400 21534400 Low Mappability +19 22644100 22651700 High Signal Region +19 22722400 22728400 Low Mappability +19 23356500 23358400 High Signal Region +19 23739200 23754000 High Signal Region +19 24040300 24042300 Low Mappability +19 24911900 24919200 High Signal Region +19 25741800 25770100 High Signal Region +19 25917500 25920000 High Signal Region +19 27751400 27758100 High Signal Region +19 28149600 28156600 High Signal Region +19 30907400 30908700 High Signal Region +19 30963600 30968000 Low Mappability +19 31722800 31735800 High Signal Region +19 32203200 32211600 Low Mappability +19 32441800 32449100 Low Mappability +19 32822000 32824000 Low Mappability +19 33439100 33446100 Low Mappability +19 33864200 33877900 High Signal Region +19 33949100 33958200 High Signal Region +19 34131200 34161200 Low Mappability +19 34581900 34613000 High Signal Region +19 35076400 35079800 High Signal Region +19 35650200 35673500 High Signal Region +19 36702500 36723400 High Signal Region +19 37298800 37301800 Low Mappability +19 37617300 37624600 Low Mappability +19 38490200 38495300 Low Mappability +19 39078100 39079500 High Signal Region +19 39106700 39156300 High Signal Region +19 39244700 39270400 High Signal Region +19 39331700 39424100 High Signal Region +19 39599900 39607200 Low Mappability +19 39658700 39695100 Low Mappability +19 40020400 40026800 Low Mappability +19 40094100 40153300 High Signal Region +19 40328500 40330000 Low Mappability +19 41142700 41150000 Low Mappability +19 41424200 41473100 Low Mappability +19 42346000 42350500 Low Mappability +19 42647600 42649700 Low Mappability +19 43118800 43124600 High Signal Region +19 43236000 43238000 Low Mappability +19 43321500 43323700 High Signal Region +19 44145700 44171700 Low Mappability +19 44218500 44225000 Low Mappability +19 44862100 44864300 High Signal Region +19 45004900 45096500 Low Mappability +19 45182300 45190200 High Signal Region +19 45649000 45661500 High Signal Region +19 45699400 45706300 Low Mappability +19 47590300 47602700 Low Mappability +19 48484600 48496700 High Signal Region +19 48743800 48746300 High Signal Region +19 50107900 50114400 Low Mappability +19 50309700 50311600 High Signal Region +19 50754100 50755900 Low Mappability +19 50828900 50835600 High Signal Region +19 51649700 51655800 High Signal Region +19 51949000 51955700 Low Mappability +19 52303100 52309700 Low Mappability +19 52927900 52932300 Low Mappability +19 52967800 52991100 Low Mappability +19 53522200 53527100 High Signal Region +19 53767900 53777800 High Signal Region +19 54235200 54236600 High Signal Region +19 54884700 54936800 High Signal Region +19 54994900 55001700 Low Mappability +19 55976700 55984000 Low Mappability +19 56248700 56259000 Low Mappability +19 56846600 56849100 High Signal Region +19 57514200 57520700 Low Mappability +19 57634000 57635600 Low Mappability +19 57827000 57832700 Low Mappability +19 58012500 58014600 Low Mappability +19 58112400 58114500 High Signal Region +19 58481300 58483200 High Signal Region +19 59221800 59240400 High Signal Region +19 59763100 59779900 High Signal Region +19 60082500 60089900 High Signal Region +19 60906900 60934000 High Signal Region +19 61162600 61174300 Low Mappability +19 61197700 61268100 High Signal Region +19 61330300 61431500 High Signal Region +1 8628600 8719100 High Signal Region +1 12038300 12041400 High Signal Region +1 14958600 14992600 High Signal Region +1 17466800 17479900 High Signal Region +1 18872500 18901300 High Signal Region +1 19175300 19177200 High Signal Region +1 22555000 22556900 High Signal Region +1 24610600 24617100 High Signal Region +1 24683100 24685100 High Signal Region +1 26685100 26689200 High Signal Region +1 43776800 43779800 High Signal Region +1 44198000 44202200 High Signal Region +1 46701700 46756600 High Signal Region +1 48880600 48882500 High Signal Region +1 56119600 56143500 High Signal Region +1 56772200 56783300 High Signal Region +1 58613000 58614900 High Signal Region +1 63629100 63631600 High Signal Region +1 69455800 69457800 High Signal Region +1 71078400 71085500 High Signal Region +1 71250600 71256700 High Signal Region +1 73549100 73555300 High Signal Region +1 73832600 73902400 High Signal Region +1 78572900 78575400 High Signal Region +1 84953500 85663200 High Signal Region +1 88209400 88311700 High Signal Region +1 94093800 94109400 High Signal Region +1 95451000 95452900 High Signal Region +1 95783900 95789700 High Signal Region +1 95810200 95851700 High Signal Region +1 100737900 100760500 High Signal Region +1 101040100 101046300 High Signal Region +1 102627300 102644300 High Signal Region +1 105226800 105230700 High Signal Region +1 110170400 110188300 High Signal Region +1 113602700 113604800 High Signal Region +1 114557300 114579100 High Signal Region +1 114643300 114660500 High Signal Region +1 115447500 115482800 High Signal Region +1 122356200 122358200 High Signal Region +1 133593600 133611300 High Signal Region +1 142651800 142672300 High Signal Region +1 145444500 145449100 High Signal Region +1 146120600 146128200 High Signal Region +1 151181600 151212000 High Signal Region +1 165862800 165864700 Low Mappability +1 171033000 171112400 High Signal Region +1 172716800 172738200 High Signal Region +1 172878700 172885100 High Signal Region +1 178538700 178540700 High Signal Region +1 181742100 181752400 High Signal Region +1 182628900 182630800 High Signal Region +1 183298200 183300500 High Signal Region +1 190299400 190304600 High Signal Region +1 192453100 192471800 High Signal Region +1 193226900 193228800 High Signal Region +1 195239800 195257400 High Signal Region +1 195278100 195280200 High Signal Region +1 195320700 195471900 High Signal Region +2 0 3086300 High Signal Region +2 3474900 3488800 High Signal Region +2 3932700 3939100 Low Mappability +2 3963500 3986100 High Signal Region +2 4515100 4518600 High Signal Region +2 4600600 4620300 High Signal Region +2 5378100 5394600 High Signal Region +2 5545900 5561600 High Signal Region +2 6078200 6095300 High Signal Region +2 6773100 6777500 Low Mappability +2 6832200 6846700 High Signal Region +2 7137500 7139600 High Signal Region +2 7404000 7458100 High Signal Region +2 7571700 7609800 High Signal Region +2 7656300 7669700 Low Mappability +2 7752800 7758500 High Signal Region +2 8034600 8042900 High Signal Region +2 8266200 8275600 High Signal Region +2 8528400 8535700 High Signal Region +2 8938000 8940500 High Signal Region +2 9212600 9219300 High Signal Region +2 10177100 10183400 Low Mappability +2 10483200 10501500 Low Mappability +2 10677000 10697600 Low Mappability +2 12605500 12668600 High Signal Region +2 13824000 13869200 High Signal Region +2 13946300 13948900 High Signal Region +2 14014100 14035300 High Signal Region +2 14359100 14386600 High Signal Region +2 14919000 14924500 High Signal Region +2 15301300 15334700 High Signal Region +2 15430100 15435500 Low Mappability +2 15575900 15602800 High Signal Region +2 15716700 15721100 High Signal Region +2 15768300 15770500 High Signal Region +2 16192400 16198500 High Signal Region +2 16320200 16326500 Low Mappability +2 16762800 16787000 High Signal Region +2 17383200 17385100 High Signal Region +2 17612500 17654500 Low Mappability +2 17747200 17753000 High Signal Region +2 19209900 19212900 High Signal Region +2 19498400 19510300 High Signal Region +2 19707900 19712200 High Signal Region +2 20038500 20067400 Low Mappability +2 20426800 20433300 Low Mappability +2 20898900 20901100 High Signal Region +2 21062600 21082200 Low Mappability +2 22049700 22087700 High Signal Region +2 22137300 22165500 High Signal Region +2 22389900 22608700 High Signal Region +2 22737300 22745800 High Signal Region +2 23009600 23015000 Low Mappability +2 23274600 23304900 High Signal Region +2 23693700 23707900 High Signal Region +2 24193300 24199000 High Signal Region +2 26333100 26351900 Low Mappability +2 26759100 26763600 High Signal Region +2 26998200 27004400 Low Mappability +2 28183200 28205000 High Signal Region +2 30204600 30239600 Low Mappability +2 32381300 32488200 Low Mappability +2 33933000 33935300 High Signal Region +2 34049900 34051800 High Signal Region +2 34903900 34935900 Low Mappability +2 35090800 35109900 High Signal Region +2 35505000 35526700 Low Mappability +2 36008600 36019300 Low Mappability +2 36401900 36413100 High Signal Region +2 36508600 36515200 High Signal Region +2 36542800 36549100 High Signal Region +2 36761000 36766500 High Signal Region +2 36951900 36970700 High Signal Region +2 37156900 37185900 High Signal Region +2 37339700 37359400 Low Mappability +2 38564700 38566600 Low Mappability +2 39225400 39293200 High Signal Region +2 39360600 39367900 Low Mappability +2 39517800 39534800 High Signal Region +2 39778500 39785700 Low Mappability +2 39887500 39915800 High Signal Region +2 40131200 40240800 High Signal Region +2 40262500 40268600 High Signal Region +2 40766400 40794000 High Signal Region +2 41059500 41070200 Low Mappability +2 41168700 41171400 High Signal Region +2 41692800 41694800 High Signal Region +2 41744300 41751600 Low Mappability +2 41775100 41781500 High Signal Region +2 41895300 41897200 High Signal Region +2 42044500 42051600 High Signal Region +2 42200300 42240700 High Signal Region +2 42950100 42956600 High Signal Region +2 43347900 43356400 High Signal Region +2 44936600 44942400 High Signal Region +2 46224800 46226700 High Signal Region +2 46343100 46348100 Low Mappability +2 46574200 46579600 Low Mappability +2 47008600 47023500 High Signal Region +2 47196300 47199300 High Signal Region +2 47533600 47642600 High Signal Region +2 47942200 47943800 High Signal Region +2 48483000 48491000 Low Mappability +2 50543200 50545500 High Signal Region +2 50679600 50686800 Low Mappability +2 51552600 51555600 High Signal Region +2 51750900 51756000 High Signal Region +2 51881600 51890600 Low Mappability +2 51945900 51948400 High Signal Region +2 52695900 52718600 High Signal Region +2 52786800 52796300 High Signal Region +2 53317700 53321600 Low Mappability +2 53347800 53367000 High Signal Region +2 53633400 53642900 High Signal Region +2 53745700 53799800 High Signal Region +2 54252600 54258500 High Signal Region +2 54698000 54747900 High Signal Region +2 54862600 54895300 High Signal Region +2 55197500 55216400 High Signal Region +2 55308300 55353700 High Signal Region +2 55823800 55829000 High Signal Region +2 55860200 55874300 Low Mappability +2 55942000 55947800 High Signal Region +2 56192800 56194600 High Signal Region +2 56298700 56304900 High Signal Region +2 56465200 56471900 High Signal Region +2 56834300 56879100 High Signal Region +2 56988500 56990600 Low Mappability +2 57166400 57172900 Low Mappability +2 57214400 57223500 Low Mappability +2 57417400 57446500 High Signal Region +2 57628500 57633800 High Signal Region +2 57726600 57728500 High Signal Region +2 58212900 58263100 High Signal Region +2 58648300 58691900 High Signal Region +2 58881200 58902500 High Signal Region +2 59971300 59972800 Low Mappability +2 61038200 61042700 High Signal Region +2 61959600 61965300 High Signal Region +2 62022900 62040100 High Signal Region +2 62861100 62867200 High Signal Region +2 63297300 63302700 Low Mappability +2 63368100 63403900 High Signal Region +2 63462300 63483800 High Signal Region +2 63641200 63654600 High Signal Region +2 63718200 63725400 High Signal Region +2 63838100 63845300 Low Mappability +2 64309200 64319600 High Signal Region +2 64608400 64633400 Low Mappability +2 64698700 64703300 High Signal Region +2 65592500 65602200 High Signal Region +2 65737700 65781500 Low Mappability +2 66721600 66750400 High Signal Region +2 66845100 66852300 High Signal Region +2 67408400 67414500 High Signal Region +2 67939700 67946000 High Signal Region +2 68770400 68776700 High Signal Region +2 68917800 68924100 Low Mappability +2 69353900 69356600 High Signal Region +2 70263100 70270000 Low Mappability +2 70880100 70892900 High Signal Region +2 71054700 71071300 Low Mappability +2 71942000 71949500 Low Mappability +2 72270200 72275700 Low Mappability +2 73867000 73868900 High Signal Region +2 74364300 74402600 Low Mappability +2 74437600 74444900 Low Mappability +2 75499500 75504600 High Signal Region +2 77224000 77230500 Low Mappability +2 78318000 78339500 High Signal Region +2 79437700 79441900 High Signal Region +2 79936500 79943700 High Signal Region +2 80119000 80121500 High Signal Region +2 80220600 80257700 Low Mappability +2 80795600 80838700 High Signal Region +2 80879000 80880200 High Signal Region +2 80956500 81006000 High Signal Region +2 81069000 81075100 High Signal Region +2 81639400 81644800 High Signal Region +2 81750800 81756800 High Signal Region +2 81790000 81795900 High Signal Region +2 82329800 82340100 High Signal Region +2 82673800 82679900 High Signal Region +2 82714300 82728500 High Signal Region +2 82783900 82789500 High Signal Region +2 82868800 82887900 High Signal Region +2 82916300 82936800 High Signal Region +2 83120100 83146100 High Signal Region +2 83185100 83193200 High Signal Region +2 83325900 83328200 High Signal Region +2 83413500 83587500 High Signal Region +2 83865600 83893100 High Signal Region +2 83931600 83995800 Low Mappability +2 84080900 84085600 High Signal Region +2 84505000 84510500 Low Mappability +2 84532500 84534600 Low Mappability +2 84564800 84576000 Low Mappability +2 85685600 85701800 Low Mappability +2 85874000 85896300 High Signal Region +2 86018200 86021700 Low Mappability +2 86303400 86317700 High Signal Region +2 86339600 86346900 Low Mappability +2 86612700 86617500 High Signal Region +2 87381000 87382800 High Signal Region +2 87875700 87941300 High Signal Region +2 88167400 88212600 High Signal Region +2 88776200 88780800 High Signal Region +2 89206600 89277100 Low Mappability +2 89345700 89350400 High Signal Region +2 89761200 89775100 High Signal Region +2 89856400 89920100 High Signal Region +2 90127200 90132700 High Signal Region +2 90157100 90249100 High Signal Region +2 90273200 90279100 High Signal Region +2 90309300 90396100 High Signal Region +2 92092600 92094700 High Signal Region +2 92167200 92169100 High Signal Region +2 93824700 93850200 High Signal Region +2 94602800 94607800 Low Mappability +2 94633900 94656500 High Signal Region +2 94801000 94809400 Low Mappability +2 94852800 94891200 High Signal Region +2 95064700 95093500 Low Mappability +2 95148000 95167800 High Signal Region +2 95215900 95320600 High Signal Region +2 95414700 95420600 High Signal Region +2 95536400 95538400 Low Mappability +2 95647900 95654300 High Signal Region +2 95794500 95799200 High Signal Region +2 95929300 95934400 High Signal Region +2 96191400 96208900 High Signal Region +2 96547800 96566800 Low Mappability +2 96954700 96977300 High Signal Region +2 97021000 97034600 High Signal Region +2 97308000 97327600 High Signal Region +2 97671600 97686300 High Signal Region +2 97760700 97765800 High Signal Region +2 97872400 97958200 High Signal Region +2 98361700 98449600 High Signal Region +2 98659400 98668200 High Signal Region +2 98796500 98801900 High Signal Region +2 99020000 99057500 High Signal Region +2 99300200 99320300 High Signal Region +2 99944600 99970200 High Signal Region +2 100112000 100114300 High Signal Region +2 100223900 100238300 High Signal Region +2 100418400 100777900 Low Mappability +2 101127200 101153600 Low Mappability +2 101313100 101350600 High Signal Region +2 102828400 102830400 High Signal Region +2 103231300 103232300 High Signal Region +2 103852300 103872800 High Signal Region +2 104684900 104697300 High Signal Region +2 105249300 105259000 High Signal Region +2 105539300 105563200 Low Mappability +2 105825900 105865100 High Signal Region +2 106555100 106569300 High Signal Region +2 107134100 107140900 High Signal Region +2 107593900 107601200 Low Mappability +2 107710100 107712400 High Signal Region +2 108608600 108614000 High Signal Region +2 108945100 108972800 High Signal Region +2 109629400 109636000 High Signal Region +2 110016800 110025500 High Signal Region +2 110091100 110128700 High Signal Region +2 110157100 110163300 High Signal Region +2 110292700 110294600 High Signal Region +2 110545800 110583400 High Signal Region +2 110752400 110780100 High Signal Region +2 111007400 111018600 High Signal Region +2 111042000 111046600 High Signal Region +2 111172700 111179800 High Signal Region +2 111281500 111287900 Low Mappability +2 111545600 111553300 Low Mappability +2 111716900 111722900 High Signal Region +2 111844900 111866400 High Signal Region +2 111890900 111898900 High Signal Region +2 112053900 112086000 High Signal Region +2 112319700 112326200 Low Mappability +2 112522900 112570500 High Signal Region +2 112602800 112605100 High Signal Region +2 112701400 112707900 High Signal Region +2 113095800 113102400 Low Mappability +2 113330900 113333000 Low Mappability +2 113518400 113524900 Low Mappability +2 113564300 113565700 High Signal Region +2 113659300 113673200 High Signal Region +2 114180800 114187400 Low Mappability +2 114242400 114244000 High Signal Region +2 114469200 114504000 High Signal Region +2 116454300 116524000 High Signal Region +2 117829600 117835500 High Signal Region +2 118017700 118020200 High Signal Region +2 120608600 120650200 High Signal Region +2 120810300 120821000 High Signal Region +2 121435600 121523600 High Signal Region +2 121938800 121957600 High Signal Region +2 122680400 122683200 High Signal Region +2 123288000 123294300 Low Mappability +2 123496800 123525300 High Signal Region +2 123785200 123790700 High Signal Region +2 124002700 124004600 High Signal Region +2 124798800 124835800 High Signal Region +2 125625000 125635900 Low Mappability +2 126217400 126263800 High Signal Region +2 126445400 126447400 Low Mappability +2 126964900 126972100 Low Mappability +2 127720400 127734000 Low Mappability +2 128050800 128053200 High Signal Region +2 128480400 128486900 Low Mappability +2 128772500 128774500 Low Mappability +2 129499400 129523400 High Signal Region +2 129602700 129613700 Low Mappability +2 131791800 131793800 High Signal Region +2 131908300 131931100 Low Mappability +2 131963900 131983700 High Signal Region +2 132885700 132890400 High Signal Region +2 132952400 132954500 Low Mappability +2 133053200 133083400 High Signal Region +2 133239300 133261800 High Signal Region +2 133934000 133937500 High Signal Region +2 134560100 134577900 High Signal Region +2 134661800 134673000 High Signal Region +2 134746600 134751100 High Signal Region +2 135146800 135151900 High Signal Region +2 135987600 135989700 High Signal Region +2 136234300 136286800 Low Mappability +2 137028200 137037000 High Signal Region +2 137345900 137369900 High Signal Region +2 137394500 137405600 High Signal Region +2 137640000 137642300 High Signal Region +2 137890200 137895000 High Signal Region +2 138035000 138056400 Low Mappability +2 138573700 138580400 High Signal Region +2 138621500 138624200 High Signal Region +2 138833600 138853100 High Signal Region +2 138904300 138935000 High Signal Region +2 139433200 139476200 High Signal Region +2 140345800 140352400 Low Mappability +2 142197000 142204400 Low Mappability +2 142464200 142483300 Low Mappability +2 142789100 142795600 Low Mappability +2 143275500 143290300 High Signal Region +2 143725900 143764700 High Signal Region +2 144627800 144636700 Low Mappability +2 144975200 144977100 High Signal Region +2 145001300 145003200 High Signal Region +2 145118300 145146300 Low Mappability +2 145236800 145242600 Low Mappability +2 145625100 145630800 Low Mappability +2 145732700 145734600 High Signal Region +2 146135700 146176900 High Signal Region +2 146995700 147013200 Low Mappability +2 147675300 147677500 High Signal Region +2 147864800 147871300 High Signal Region +2 147918800 147925100 Low Mappability +2 148410500 148416000 Low Mappability +2 148459900 148473800 High Signal Region +2 148612700 148620200 Low Mappability +2 148939300 148984200 High Signal Region +2 149049800 149056000 High Signal Region +2 149269400 149292700 High Signal Region +2 150413500 150452500 High Signal Region +2 150728300 150749700 Low Mappability +2 151029700 151385300 High Signal Region +2 151408800 151496700 High Signal Region +2 152157000 152159000 Low Mappability +2 152206800 152227500 High Signal Region +2 152263400 152269900 Low Mappability +2 153674800 153693100 Low Mappability +2 154174200 154180000 High Signal Region +2 154353800 154359700 Low Mappability +2 155016300 155051500 High Signal Region +2 155235400 155258100 High Signal Region +2 156185100 156214400 Low Mappability +2 157566000 157655300 Low Mappability +2 157833200 157835600 High Signal Region +2 158286300 158292800 High Signal Region +2 159455200 159469500 High Signal Region +2 160620300 160638500 High Signal Region +2 161368800 161376200 High Signal Region +2 161984900 161990900 High Signal Region +2 162369100 162376700 High Signal Region +2 162594500 162602700 High Signal Region +2 162843800 162847600 High Signal Region +2 163519100 163533100 Low Mappability +2 163644500 163655100 High Signal Region +2 163788900 163796100 Low Mappability +2 163833800 163849200 Low Mappability +2 163958100 163963000 Low Mappability +2 164201000 164202700 High Signal Region +2 165477300 165529900 Low Mappability +2 165675100 165679500 Low Mappability +2 165848700 165953000 Low Mappability +2 166530600 166535100 Low Mappability +2 166780500 166832200 Low Mappability +2 167269400 167291100 High Signal Region +2 167407900 167423000 Low Mappability +2 170315100 170320000 High Signal Region +2 170503800 170509800 High Signal Region +2 171814300 171816700 High Signal Region +2 171912800 171932200 Low Mappability +2 172007100 172014300 High Signal Region +2 172743600 172751100 Low Mappability +2 173098700 173101000 Low Mappability +2 173706700 173708800 High Signal Region +2 174961800 176745500 High Signal Region +2 176767100 177166600 High Signal Region +2 177232400 177490200 High Signal Region +2 177526700 177841000 High Signal Region +2 178775000 178794400 High Signal Region +2 180025600 180093500 Low Mappability +2 181169900 181188000 Low Mappability +2 181285900 181298800 High Signal Region +2 181739800 181745800 High Signal Region +2 181885000 181933400 High Signal Region +2 182003800 182113200 High Signal Region +3 0 3052500 High Signal Region +3 3084100 3098300 High Signal Region +3 3123200 3150800 High Signal Region +3 3443300 3493700 High Signal Region +3 4698100 4725500 High Signal Region +3 5517700 5525000 Low Mappability +3 5859400 5863500 High Signal Region +3 6115100 6117100 High Signal Region +3 6601900 6627400 High Signal Region +3 6900700 6916400 High Signal Region +3 6941100 6946600 High Signal Region +3 7178300 7223900 High Signal Region +3 7477600 7482500 High Signal Region +3 7910300 7916600 High Signal Region +3 8225200 8247500 High Signal Region +3 8574000 8589900 High Signal Region +3 8815300 8838700 High Signal Region +3 9091900 9096900 Low Mappability +3 9777500 9778500 High Signal Region +3 9904100 9910700 High Signal Region +3 9952100 9967100 High Signal Region +3 10453800 10464500 High Signal Region +3 10961700 10971700 High Signal Region +3 11050200 11070500 High Signal Region +3 11120700 11143300 High Signal Region +3 11518700 11524700 High Signal Region +3 11779200 11806000 High Signal Region +3 11933500 11938400 High Signal Region +3 11961500 11973100 High Signal Region +3 12107500 12131400 High Signal Region +3 12221200 12262000 High Signal Region +3 12336000 12339700 High Signal Region +3 12814500 12857800 Low Mappability +3 12906200 12907300 High Signal Region +3 13219400 13222800 High Signal Region +3 13821100 13826600 Low Mappability +3 13965800 13972000 High Signal Region +3 14272100 14336300 High Signal Region +3 14449600 14478500 High Signal Region +3 14593200 14597400 High Signal Region +3 14668900 14744700 High Signal Region +3 15028800 15045100 High Signal Region +3 15079500 15087400 High Signal Region +3 15451600 15872400 High Signal Region +3 15964200 15967200 High Signal Region +3 16351400 16357100 High Signal Region +3 16626000 16633700 High Signal Region +3 16995700 17021400 High Signal Region +3 17419700 17447600 High Signal Region +3 17679600 17682100 High Signal Region +3 17954200 17997400 High Signal Region +3 18379800 18395100 High Signal Region +3 18432100 18437500 High Signal Region +3 18966900 18983600 High Signal Region +3 19357600 19359300 High Signal Region +3 19594900 19601100 High Signal Region +3 19917700 19940300 High Signal Region +3 21247500 21250200 High Signal Region +3 21317800 21324600 High Signal Region +3 21383700 21389000 High Signal Region +3 21512900 21519300 High Signal Region +3 21661800 21663700 Low Mappability +3 21685300 21709500 High Signal Region +3 22069200 22070500 High Signal Region +3 22240800 22250100 High Signal Region +3 22362000 22377000 High Signal Region +3 22517600 22521100 High Signal Region +3 22612100 22759200 High Signal Region +3 22933800 23015000 High Signal Region +3 23077300 23099800 High Signal Region +3 23173700 23180900 Low Mappability +3 23302200 23321100 High Signal Region +3 23353500 23360000 High Signal Region +3 23463300 23468200 High Signal Region +3 23579500 23584900 High Signal Region +3 23841700 23843800 Low Mappability +3 24624400 24627900 High Signal Region +3 24655200 24661300 High Signal Region +3 25210800 25228800 Low Mappability +3 25277500 25310400 High Signal Region +3 25416900 25421600 Low Mappability +3 25472900 25478900 High Signal Region +3 26089400 26113400 High Signal Region +3 26346800 26369700 High Signal Region +3 26724600 26737000 High Signal Region +3 26944500 26950800 High Signal Region +3 27010100 27023300 High Signal Region +3 27309300 27319800 Low Mappability +3 28198300 28201300 Low Mappability +3 28513900 28535500 High Signal Region +3 28983500 29014200 High Signal Region +3 29461500 29492300 High Signal Region +3 29675900 29680600 High Signal Region +3 31176300 31188900 Low Mappability +3 31340700 31364500 Low Mappability +3 31651800 31680100 High Signal Region +3 31819800 31826900 High Signal Region +3 33696500 33708400 High Signal Region +3 33768300 33798500 High Signal Region +3 33930000 33948800 Low Mappability +3 34516200 34518200 High Signal Region +3 35285400 35292700 High Signal Region +3 35707000 35713500 Low Mappability +3 35743300 35744600 High Signal Region +3 36106500 36109400 High Signal Region +3 36285400 36291100 High Signal Region +3 36847300 36853900 High Signal Region +3 39026800 39030900 High Signal Region +3 39183300 39189800 High Signal Region +3 40151300 40157700 High Signal Region +3 40347600 40352600 High Signal Region +3 40549300 40651700 High Signal Region +3 41871900 41887800 High Signal Region +3 41993500 41999500 High Signal Region +3 42170000 42187300 High Signal Region +3 42682100 42722800 High Signal Region +3 42820200 42827400 High Signal Region +3 43108100 43197200 High Signal Region +3 43466400 43492100 High Signal Region +3 43538900 43557700 High Signal Region +3 44185900 44191600 High Signal Region +3 44241200 44260000 High Signal Region +3 44401500 44407500 High Signal Region +3 44559600 44565200 High Signal Region +3 44884400 44890700 High Signal Region +3 45579200 45591900 High Signal Region +3 45848500 45863400 Low Mappability +3 45986000 45990700 High Signal Region +3 46141000 46148200 High Signal Region +3 46338200 46340300 Low Mappability +3 46735000 46741900 High Signal Region +3 46795400 46805400 High Signal Region +3 46910900 46936200 High Signal Region +3 47592800 47598000 High Signal Region +3 47798300 47799600 High Signal Region +3 47966600 47968700 High Signal Region +3 48437800 48462000 High Signal Region +3 49443600 49482800 High Signal Region +3 49727200 49734400 High Signal Region +3 50464900 50474400 High Signal Region +3 50763700 50814900 High Signal Region +3 50957300 50963000 High Signal Region +3 51233600 51245400 Low Mappability +3 51616000 51623700 Low Mappability +3 51765300 51784900 High Signal Region +3 52230000 52233400 High Signal Region +3 53426900 53431000 High Signal Region +3 54849100 54874300 Low Mappability +3 56069700 56075200 High Signal Region +3 56210900 56215900 High Signal Region +3 56513600 56576700 High Signal Region +3 56903800 56943000 High Signal Region +3 57059400 57070200 High Signal Region +3 57349800 57379400 High Signal Region +3 58051100 58081600 Low Mappability +3 59370700 59412200 High Signal Region +3 59565300 59632700 High Signal Region +3 59684600 59689200 High Signal Region +3 59791800 59804200 Low Mappability +3 59887400 59889300 High Signal Region +3 59919200 59921100 High Signal Region +3 60044300 60046800 High Signal Region +3 60489700 60495200 Low Mappability +3 61150800 61177900 High Signal Region +3 61260700 61275000 Low Mappability +3 61495400 61499700 High Signal Region +3 61672300 61678300 High Signal Region +3 61707600 61726600 Low Mappability +3 61853900 61858900 High Signal Region +3 62032400 62038600 High Signal Region +3 62108300 62160100 High Signal Region +3 62356900 62367700 High Signal Region +3 62543000 62549200 High Signal Region +3 62873000 62879300 High Signal Region +3 63515500 63530100 High Signal Region +3 63590100 63591500 High Signal Region +3 64171000 64172900 High Signal Region +3 64237900 64245700 High Signal Region +3 64453100 64512800 High Signal Region +3 64609600 64665300 Low Mappability +3 64697900 64730500 High Signal Region +3 67027900 67054100 High Signal Region +3 67262400 67264000 High Signal Region +3 67411100 67419400 High Signal Region +3 67747300 67752800 High Signal Region +3 67786800 67793600 High Signal Region +3 68114300 68119700 Low Mappability +3 68519400 68525100 High Signal Region +3 69228600 69230500 High Signal Region +3 69848400 69854900 High Signal Region +3 69944400 69949800 High Signal Region +3 71117300 71122800 High Signal Region +3 71369600 71447800 High Signal Region +3 72273600 72293700 High Signal Region +3 72698100 72704800 High Signal Region +3 73088300 73098500 High Signal Region +3 73733100 73738500 Low Mappability +3 74583300 74598400 Low Mappability +3 74865000 74881800 High Signal Region +3 75348300 75378700 Low Mappability +3 75409000 75424100 High Signal Region +3 76598800 76604700 High Signal Region +3 76886600 76892900 Low Mappability +3 77597400 77604300 Low Mappability +3 77667400 77711400 High Signal Region +3 77926800 77931400 High Signal Region +3 78281900 78283900 Low Mappability +3 79012700 79014900 High Signal Region +3 79046300 79052800 Low Mappability +3 79763800 79780000 High Signal Region +3 79959500 79965700 High Signal Region +3 80465400 80472000 High Signal Region +3 82283300 82288700 Low Mappability +3 82462100 82508600 Low Mappability +3 82589000 82616700 Low Mappability +3 82921400 82924800 High Signal Region +3 83123200 83125100 High Signal Region +3 83330900 83343400 High Signal Region +3 83845100 83867000 High Signal Region +3 84142200 84149700 Low Mappability +3 84359000 84366300 Low Mappability +3 85305200 85326800 Low Mappability +3 85622200 85629500 Low Mappability +3 87424200 87426100 High Signal Region +3 87469300 87474600 High Signal Region +3 88044000 88066500 High Signal Region +3 88666500 88673500 Low Mappability +3 88716700 88873000 Low Mappability +3 90761500 90810400 High Signal Region +3 90991100 90996800 Low Mappability +3 91856700 91898200 High Signal Region +3 92185400 92291300 High Signal Region +3 93059200 93107000 High Signal Region +3 93168500 93172800 High Signal Region +3 93203900 93229100 High Signal Region +3 93323700 93331700 Low Mappability +3 93860300 94093700 High Signal Region +3 94136200 94152300 High Signal Region +3 94658300 94665700 Low Mappability +3 94690000 94730800 High Signal Region +3 94757600 94765200 Low Mappability +3 96043600 96058900 High Signal Region +3 96196200 96288300 High Signal Region +3 96313200 96388900 Low Mappability +3 96446800 96463800 Low Mappability +3 96485600 96514300 High Signal Region +3 96840000 96863800 High Signal Region +3 97245200 97251500 High Signal Region +3 98396100 98411400 High Signal Region +3 98443100 98597600 Low Mappability +3 98709300 98778900 High Signal Region +3 98986000 99034100 High Signal Region +3 99406000 99434100 High Signal Region +3 99882900 99908100 High Signal Region +3 99980200 99982200 High Signal Region +3 100315500 100330900 High Signal Region +3 100484400 100486300 High Signal Region +3 102813400 102839300 High Signal Region +3 102983600 102989900 High Signal Region +3 103134600 103136000 High Signal Region +3 103427600 103447900 High Signal Region +3 103555000 103557000 Low Mappability +3 104116800 104123100 High Signal Region +3 104194200 104198800 High Signal Region +3 104588100 104595500 Low Mappability +3 105028200 105030500 High Signal Region +3 106118500 106311800 High Signal Region +3 106777900 106779700 High Signal Region +3 109258500 109277300 High Signal Region +3 109458000 109462700 High Signal Region +3 110319800 110325700 High Signal Region +3 110416300 110421800 High Signal Region +3 111256100 111268600 High Signal Region +3 111578400 111605200 Low Mappability +3 111794100 111799000 Low Mappability +3 111830400 111836300 High Signal Region +3 112274500 112287300 High Signal Region +3 112315500 112337400 High Signal Region +3 112561900 112586900 High Signal Region +3 112863500 112869300 High Signal Region +3 112913800 112918000 High Signal Region +3 113186300 113189100 High Signal Region +3 113250900 113527800 High Signal Region +3 113709900 113719000 High Signal Region +3 113742300 113748300 High Signal Region +3 114272600 114279400 High Signal Region +3 114472100 114499300 Low Mappability +3 114587900 114595900 High Signal Region +3 114976700 114982800 High Signal Region +3 115020700 115027100 Low Mappability +3 115367700 115372200 Low Mappability +3 115905900 115922900 High Signal Region +3 116817400 116843900 Low Mappability +3 117267200 117292400 High Signal Region +3 117379100 117386400 Low Mappability +3 118055100 118060000 High Signal Region +3 119211800 119212900 High Signal Region +3 120735000 120742200 High Signal Region +3 120825200 120851500 High Signal Region +3 121248900 121250900 High Signal Region +3 121694400 121696100 High Signal Region +3 122294000 122329300 High Signal Region +3 122654100 122657300 High Signal Region +3 122804300 122806600 High Signal Region +3 123471600 123476200 Low Mappability +3 123729200 123743200 High Signal Region +3 123924800 123957700 High Signal Region +3 124282300 124288300 High Signal Region +3 125902800 125908900 High Signal Region +3 126127300 126136000 Low Mappability +3 126905300 126910600 High Signal Region +3 127522400 127523700 Low Mappability +3 127771600 127780600 High Signal Region +3 128203600 128211000 High Signal Region +3 128440100 128446100 High Signal Region +3 128935800 128937700 High Signal Region +3 129020900 129032100 High Signal Region +3 129393000 129394900 High Signal Region +3 133123600 133130800 Low Mappability +3 133566400 133568700 High Signal Region +3 133636000 133642800 High Signal Region +3 133837100 133859400 High Signal Region +3 134007400 134026700 Low Mappability +3 134685700 134690700 High Signal Region +3 134862500 134888400 High Signal Region +3 135148300 135163000 High Signal Region +3 136173700 136181000 Low Mappability +3 137407500 137413500 High Signal Region +3 137469200 137470300 High Signal Region +3 138200900 138207900 High Signal Region +3 139365700 139417700 High Signal Region +3 140376900 140384200 Low Mappability +3 142190700 142192800 High Signal Region +3 142513000 142517200 High Signal Region +3 143840800 143847000 High Signal Region +3 144030200 144036300 High Signal Region +3 144655600 144660600 High Signal Region +3 145040500 145061800 High Signal Region +3 145109000 145114400 Low Mappability +3 145188100 145190400 High Signal Region +3 145301600 145303100 High Signal Region +3 146073300 146102400 High Signal Region +3 146358800 146362600 High Signal Region +3 146476200 146479000 High Signal Region +3 146918900 146924200 High Signal Region +3 147107400 147113000 High Signal Region +3 147769500 147781800 High Signal Region +3 147874500 147877600 High Signal Region +3 148704800 148716900 High Signal Region +3 148750100 148757400 Low Mappability +3 148797800 148799700 High Signal Region +3 149051500 149053800 High Signal Region +3 150120900 150123800 High Signal Region +3 150336900 150341400 Low Mappability +3 151028900 151031200 High Signal Region +3 151657500 151679800 High Signal Region +3 152313800 152332200 High Signal Region +3 152700700 152702700 High Signal Region +3 153090100 153109400 High Signal Region +3 154640300 154646700 High Signal Region +3 154931700 154932800 High Signal Region +3 155515800 155517600 High Signal Region +3 155765900 155771900 High Signal Region +3 156256900 156262800 Low Mappability +3 156285600 156322500 High Signal Region +3 156799400 156804900 Low Mappability +3 157646900 157678300 High Signal Region +3 157946200 157969400 High Signal Region +3 158095300 158119200 High Signal Region +3 158698600 158756800 High Signal Region +3 159165900 159179700 High Signal Region +3 159225800 159239300 Low Mappability +3 159478300 159479700 High Signal Region +3 159748800 159826500 High Signal Region +3 159938500 160039600 High Signal Region +4 0 3114800 High Signal Region +4 3139700 3333100 High Signal Region +4 18476200 18498400 High Signal Region +4 20168700 20213200 High Signal Region +4 20804100 20808300 High Signal Region +4 20982300 20983700 High Signal Region +4 21281300 21287700 High Signal Region +4 22535900 22542300 High Signal Region +4 24193400 24201100 High Signal Region +4 25471300 25473200 High Signal Region +4 28175900 28177900 High Signal Region +4 31353200 31355200 High Signal Region +4 34934800 34936700 High Signal Region +4 35042700 35048900 High Signal Region +4 38305900 38322000 High Signal Region +4 57979700 57981800 High Signal Region +4 64454600 64499000 High Signal Region +4 68427300 68447900 High Signal Region +4 70367200 70379200 High Signal Region +4 73196300 73209300 High Signal Region +4 80001800 80004900 High Signal Region +4 83536900 83541900 High Signal Region +4 90725600 90727500 High Signal Region +4 92230800 92236500 High Signal Region +4 93843500 93853100 High Signal Region +4 99380500 99382400 High Signal Region +4 110469700 110505300 High Signal Region +4 118546100 118549600 High Signal Region +4 131222500 131229300 High Signal Region +4 145404200 147840400 High Signal Region +4 149809200 149811700 High Signal Region +4 153152100 153154100 High Signal Region +4 156256000 156508100 High Signal Region +5 3175400 3186000 High Signal Region +5 12489500 12490600 High Signal Region +5 14899000 15726800 High Signal Region +5 17466700 17481500 High Signal Region +5 36629400 36662500 High Signal Region +5 46434800 46436700 High Signal Region +5 49722200 49755700 High Signal Region +5 60041900 60043900 Low Mappability +5 80499900 80501900 High Signal Region +5 93288700 93351800 High Signal Region +5 106126300 106177800 High Signal Region +5 110063700 110075500 High Signal Region +5 114921500 114923500 High Signal Region +5 137148800 137153800 High Signal Region +5 146260000 146262300 High Signal Region +5 151733600 151834600 High Signal Region +6 0 3255700 High Signal Region +6 3280700 3340300 High Signal Region +6 4922900 4925100 High Signal Region +6 5608000 5657900 High Signal Region +6 5704400 5706800 High Signal Region +6 6400000 6442800 High Signal Region +6 6700000 6727600 High Signal Region +6 8729200 8731100 High Signal Region +6 8906700 8932300 High Signal Region +6 9519200 9529100 High Signal Region +6 9580600 9610100 High Signal Region +6 9646900 9663400 High Signal Region +6 9720400 9733100 High Signal Region +6 9889000 9891100 High Signal Region +6 10228400 10269900 High Signal Region +6 10559100 10588400 High Signal Region +6 10623400 10633900 High Signal Region +6 11251100 11256800 High Signal Region +6 11406400 11457900 High Signal Region +6 11813900 11897100 High Signal Region +6 12671100 12680300 High Signal Region +6 13390500 13394900 High Signal Region +6 13700500 13743100 High Signal Region +6 14085000 14092300 Low Mappability +6 14793800 14805500 High Signal Region +6 14929200 14935100 High Signal Region +6 16299700 16310100 High Signal Region +6 16922600 16924800 High Signal Region +6 17004600 17042000 High Signal Region +6 17391200 17397900 High Signal Region +6 17981700 17983400 High Signal Region +6 18264800 18267200 High Signal Region +6 18836700 18848600 High Signal Region +6 19068900 19075400 High Signal Region +6 20113900 20143500 High Signal Region +6 21452400 21458100 High Signal Region +6 21801300 21803200 High Signal Region +6 21841300 21845300 High Signal Region +6 21873300 21876800 High Signal Region +6 22107700 22131800 High Signal Region +6 22479600 22483900 High Signal Region +6 22516700 22534300 High Signal Region +6 25505600 25566400 Low Mappability +6 26049500 26072100 High Signal Region +6 26247700 26278000 High Signal Region +6 26834800 26840700 High Signal Region +6 26988500 26992000 High Signal Region +6 27199000 27228400 High Signal Region +6 28924100 28929500 Low Mappability +6 29746800 29750000 High Signal Region +6 29974300 29978200 High Signal Region +6 30752800 30806400 High Signal Region +6 30929300 30936100 Low Mappability +6 31594900 31597200 High Signal Region +6 32740700 32746800 High Signal Region +6 32867600 32869000 High Signal Region +6 33490300 33495000 High Signal Region +6 33650500 33665400 High Signal Region +6 33743900 33749000 High Signal Region +6 36224300 36230500 High Signal Region +6 40535500 40559800 Low Mappability +6 40716600 40723700 High Signal Region +6 42122800 42174200 High Signal Region +6 42492600 42516600 High Signal Region +6 42617600 42620900 High Signal Region +6 44265200 44270800 High Signal Region +6 44497000 44513300 High Signal Region +6 44785200 44794100 High Signal Region +6 44836300 44837500 High Signal Region +6 46381300 46402000 High Signal Region +6 46678600 46685300 High Signal Region +6 47639000 47779200 High Signal Region +6 48120300 48122300 High Signal Region +6 48149300 48172900 High Signal Region +6 48231500 48292600 High Signal Region +6 48320300 48347000 High Signal Region +6 49235500 49237500 High Signal Region +6 50601400 50636700 Low Mappability +6 51046500 51048400 High Signal Region +6 53464100 53487500 Low Mappability +6 54976500 54993700 High Signal Region +6 56232700 56257500 High Signal Region +6 56455900 56465300 High Signal Region +6 57425200 57455700 High Signal Region +6 57588900 57634500 High Signal Region +6 57919500 57925700 High Signal Region +6 58068500 58073500 High Signal Region +6 58588700 58612800 High Signal Region +6 59123600 59130100 High Signal Region +6 59199600 59230600 High Signal Region +6 59584300 59598000 High Signal Region +6 59676000 59698200 High Signal Region +6 60622400 60625600 High Signal Region +6 60668000 60688200 High Signal Region +6 61023100 61029400 High Signal Region +6 61088400 61094600 High Signal Region +6 62525500 62527300 High Signal Region +6 64331600 64338900 Low Mappability +6 64778500 64812500 High Signal Region +6 64882100 64930500 High Signal Region +6 65100600 65106700 High Signal Region +6 65184300 65261600 High Signal Region +6 66070200 66095900 High Signal Region +6 66815600 66831600 High Signal Region +6 67311500 67312900 High Signal Region +6 67494800 67522100 Low Mappability +6 67576400 67630800 High Signal Region +6 67658300 67710900 High Signal Region +6 68011000 68012900 High Signal Region +6 68221900 68252400 Low Mappability +6 68641400 68661300 High Signal Region +6 68971900 68996400 High Signal Region +6 69017600 69035700 High Signal Region +6 70000300 70053000 High Signal Region +6 70187800 70213700 High Signal Region +6 70620700 70648600 High Signal Region +6 73105700 73113400 High Signal Region +6 73502200 73521000 High Signal Region +6 73671400 73672600 High Signal Region +6 74191700 74194400 High Signal Region +6 74365900 74386400 High Signal Region +6 74700100 74705300 High Signal Region +6 75054000 75083000 High Signal Region +6 76645400 76649100 High Signal Region +6 76847200 76854100 High Signal Region +6 78352900 78359500 High Signal Region +6 78456200 78491700 Low Mappability +6 78637400 78639700 High Signal Region +6 78716700 78722400 High Signal Region +6 79627500 79635200 High Signal Region +6 79817300 79819200 High Signal Region +6 79898900 79922800 Low Mappability +6 79959800 79967500 Low Mappability +6 81012200 81036700 High Signal Region +6 81829400 81875000 High Signal Region +6 81997000 82011600 High Signal Region +6 82213400 82218800 High Signal Region +6 84662700 84688200 High Signal Region +6 84712600 84720200 High Signal Region +6 89723500 89735600 High Signal Region +6 91768300 91770200 High Signal Region +6 92321600 92328300 High Signal Region +6 94988600 94990700 Low Mappability +6 95030100 95043800 Low Mappability +6 95475600 95479900 High Signal Region +6 95980800 95987100 High Signal Region +6 96877800 96896100 High Signal Region +6 97356800 97379400 High Signal Region +6 101571200 101621400 High Signal Region +6 102379600 102384100 High Signal Region +6 102483000 102505700 High Signal Region +6 102767600 102791400 High Signal Region +6 103313700 103315600 High Signal Region +6 103647900 103650200 High Signal Region +6 103750700 103752000 High Signal Region +6 105194700 105199600 High Signal Region +6 105253400 105257600 Low Mappability +6 105306000 105337600 High Signal Region +6 107141500 107146300 High Signal Region +6 107284300 107299800 High Signal Region +6 107860500 107920500 High Signal Region +6 109498200 109506200 High Signal Region +6 109641800 109648100 High Signal Region +6 109984000 110013000 High Signal Region +6 114340600 114343000 High Signal Region +6 114492200 114643400 High Signal Region +6 116021200 116043900 High Signal Region +6 116238700 116252600 High Signal Region +6 116566200 116593800 High Signal Region +6 117087400 117094300 High Signal Region +6 118209000 118234000 High Signal Region +6 119419600 119431100 High Signal Region +6 121690100 121703800 High Signal Region +6 122614200 122616600 High Signal Region +6 123132100 123179400 High Signal Region +6 123204800 123242900 High Signal Region +6 126135200 126137300 Low Mappability +6 128680200 128693700 High Signal Region +6 128861200 128865300 High Signal Region +6 129857800 129863300 High Signal Region +6 129935700 129948400 High Signal Region +6 131088300 131114900 High Signal Region +6 131208300 131252100 High Signal Region +6 131495900 131505900 High Signal Region +6 132497200 132523000 Low Mappability +6 132597000 132598700 High Signal Region +6 132635400 132642000 High Signal Region +6 133169000 133170900 High Signal Region +6 133891500 133899800 High Signal Region +6 134689500 134692700 High Signal Region +6 138216100 138221900 High Signal Region +6 138647300 138649100 High Signal Region +6 138685400 138700700 High Signal Region +6 142060700 142079300 High Signal Region +6 142396700 142400200 Low Mappability +6 142433400 142439400 High Signal Region +6 143014400 143016300 High Signal Region +6 143466500 143481400 High Signal Region +6 143883500 143886900 High Signal Region +6 144655200 144670000 High Signal Region +6 145784700 145787000 High Signal Region +6 145931800 145933900 Low Mappability +6 146018900 146080500 High Signal Region +6 147077200 147079900 High Signal Region +6 147459800 147465000 Low Mappability +6 147549600 147555000 Low Mappability +6 147881900 147908400 High Signal Region +6 148013100 148038400 High Signal Region +6 148121800 148124500 High Signal Region +6 148635700 148640300 Low Mappability +6 148662900 148665000 Low Mappability +6 149585500 149736500 High Signal Region +7 4558200 4594300 High Signal Region +7 4648600 4651500 High Signal Region +7 5153200 5244900 High Signal Region +7 5588700 5591600 High Signal Region +7 6050500 6056000 High Signal Region +7 6249400 6251400 High Signal Region +7 6590800 6597400 High Signal Region +7 7209500 7231000 High Signal Region +7 7273500 7327400 High Signal Region +7 7527500 7533900 High Signal Region +7 7556800 8278400 High Signal Region +7 8490800 9968800 High Signal Region +7 9992100 9998900 High Signal Region +7 10314900 10320900 High Signal Region +7 11097700 11123700 High Signal Region +7 11271100 11438600 High Signal Region +7 12009500 12084600 High Signal Region +7 12379600 12385400 High Signal Region +7 12526600 12548100 High Signal Region +7 13112300 13118100 High Signal Region +7 13591200 13620200 High Signal Region +7 14051300 14055900 High Signal Region +7 14767700 14823800 High Signal Region +7 14930100 15023000 High Signal Region +7 15128800 15623000 High Signal Region +7 16661400 16667800 High Signal Region +7 17112200 17123900 High Signal Region +7 17215800 17323400 High Signal Region +7 17800000 17806700 High Signal Region +7 17829700 17862600 High Signal Region +7 18487100 18493200 High Signal Region +7 19032600 19034500 High Signal Region +7 20799700 21103900 High Signal Region +7 21135700 23286800 High Signal Region +7 23494700 23503600 High Signal Region +7 24026200 24031700 High Signal Region +7 24103800 24108200 High Signal Region +7 24729400 24731300 High Signal Region +7 26022700 26066900 High Signal Region +7 26779000 26780900 High Signal Region +7 27082300 27098300 High Signal Region +7 27712800 27732500 High Signal Region +7 31365500 31387000 High Signal Region +7 31818200 31876700 High Signal Region +7 31934500 32043100 High Signal Region +7 32215700 32235200 High Signal Region +7 32629300 33098700 High Signal Region +7 33124200 33198000 High Signal Region +7 33949500 34004800 High Signal Region +7 34957200 34959100 High Signal Region +7 38396600 38787200 High Signal Region +7 38839800 39181000 High Signal Region +7 39227600 39404100 High Signal Region +7 39874600 39875900 High Signal Region +7 41791900 41851900 High Signal Region +7 43123800 43220300 High Signal Region +7 44737800 44739900 High Signal Region +7 47175100 47188600 High Signal Region +7 47414400 47519700 High Signal Region +7 48102600 48135800 High Signal Region +7 50940400 50986800 High Signal Region +7 51329800 51335900 High Signal Region +7 51800300 51812600 High Signal Region +7 51909200 51911200 High Signal Region +7 52095700 52104400 High Signal Region +7 52283300 52288900 High Signal Region +7 53677100 53683100 High Signal Region +7 53977800 54027400 High Signal Region +7 54336000 54351800 High Signal Region +7 54808900 54810100 High Signal Region +7 54923000 54971200 High Signal Region +7 55011500 55016500 High Signal Region +7 55080000 55086300 High Signal Region +7 55115400 55141000 High Signal Region +7 55657400 55667100 High Signal Region +7 56062300 56081700 High Signal Region +7 56160100 56163400 Low Mappability +7 56660300 56693600 High Signal Region +7 57367200 57374700 High Signal Region +7 58040300 58077100 High Signal Region +7 58161700 58177900 High Signal Region +7 59673100 59910900 High Signal Region +7 60209400 60215600 High Signal Region +7 60676300 60682800 High Signal Region +7 61320100 61395400 High Signal Region +7 62135200 62137500 High Signal Region +7 62651400 62693400 High Signal Region +7 63272500 63287100 High Signal Region +7 63431300 63432400 High Signal Region +7 63803700 63810800 High Signal Region +7 63908200 63910100 High Signal Region +7 64072600 64134600 High Signal Region +7 64465300 64496400 High Signal Region +7 64601000 64617900 High Signal Region +7 65187500 65198300 High Signal Region +7 68534700 68537900 High Signal Region +7 68775900 68778100 High Signal Region +7 69086500 69102900 High Signal Region +7 69785300 69792200 High Signal Region +7 70757900 70765000 High Signal Region +7 71971100 71984500 High Signal Region +7 72317400 72337900 High Signal Region +7 72630000 72679900 High Signal Region +7 73212000 73218800 High Signal Region +7 73671700 73680000 High Signal Region +7 75003200 75007700 High Signal Region +7 76067800 76079300 High Signal Region +7 76556000 76573000 High Signal Region +7 76703900 76708400 High Signal Region +7 77520600 77526000 High Signal Region +7 78416900 78422400 High Signal Region +7 80708100 80730100 Low Mappability +7 80787500 80813800 High Signal Region +7 81756100 81760500 High Signal Region +7 82770300 82772800 High Signal Region +7 85017700 85023600 High Signal Region +7 85757200 85768800 High Signal Region +7 86118700 86125800 High Signal Region +7 86497400 86503500 High Signal Region +7 86532600 86534000 High Signal Region +7 86805600 86807500 High Signal Region +7 87989300 88000600 High Signal Region +7 89683300 89704600 High Signal Region +7 90087300 90089400 High Signal Region +7 90441000 90442900 High Signal Region +7 91741500 91747500 High Signal Region +7 93259400 93278100 High Signal Region +7 93699600 93717500 High Signal Region +7 93744000 93766100 High Signal Region +7 93969600 93973700 High Signal Region +7 94293000 94299300 High Signal Region +7 94822500 94848800 High Signal Region +7 95177200 95193600 High Signal Region +7 95527400 95533200 High Signal Region +7 97795000 97797300 High Signal Region +7 103100800 103115000 High Signal Region +7 103195500 103202100 High Signal Region +7 103483000 103487500 High Signal Region +7 104097400 104126600 High Signal Region +7 104476800 104477900 High Signal Region +7 104770000 104801200 High Signal Region +7 105830300 106325300 High Signal Region +7 106979000 106984900 High Signal Region +7 107245200 107271400 High Signal Region +7 108780600 108789800 High Signal Region +7 110058500 110061600 High Signal Region +7 111228400 111230600 High Signal Region +7 112636600 112639800 High Signal Region +7 116432200 116453400 High Signal Region +7 119739900 119742100 High Signal Region +7 119795700 119797700 High Signal Region +7 119998800 120015100 High Signal Region +7 124522300 124528300 High Signal Region +7 125009800 125016600 High Signal Region +7 128171000 128189300 High Signal Region +7 130054200 130055700 High Signal Region +7 130591400 130596900 High Signal Region +7 130833500 130835600 High Signal Region +7 134100500 134107200 High Signal Region +7 134329200 134335200 High Signal Region +7 135006900 135008800 High Signal Region +7 135337800 135340900 High Signal Region +7 138590500 138594500 High Signal Region +7 139447400 139448900 High Signal Region +7 140288200 140307300 High Signal Region +7 140551100 140558800 High Signal Region +7 140580500 140585700 High Signal Region +7 141637000 141640700 High Signal Region +7 142828900 142845000 High Signal Region +7 145340000 145441400 High Signal Region +8 3753500 3779100 High Signal Region +8 14305800 14308200 High Signal Region +8 15508900 15521000 High Signal Region +8 19671800 19937800 High Signal Region +8 19960800 20868000 High Signal Region +8 20945500 20963700 High Signal Region +8 23085600 23096700 High Signal Region +8 35134000 35135900 High Signal Region +8 39132400 39157700 High Signal Region +8 55111200 55397300 High Signal Region +8 69416700 69597900 High Signal Region +8 71432100 71434100 High Signal Region +8 71796100 71863300 High Signal Region +8 73318700 73320700 High Signal Region +8 83755800 83757900 High Signal Region +8 114436000 114437900 High Signal Region +8 123537300 123638300 High Signal Region +8 125778100 125780100 High Signal Region +8 129272900 129401200 High Signal Region +9 0 3053100 High Signal Region +9 3240200 3259800 High Signal Region +9 3302000 3336000 High Signal Region +9 3461000 3466600 Low Mappability +9 3627400 3699700 Low Mappability +9 3802100 3806700 High Signal Region +9 3881100 3887600 High Signal Region +9 4238700 4245700 Low Mappability +9 4375700 4406800 High Signal Region +9 5248000 5254100 High Signal Region +9 5276200 5284600 Low Mappability +9 6431500 6467200 High Signal Region +9 6742900 6806200 Low Mappability +9 7294600 7300700 High Signal Region +9 7370900 7412600 Low Mappability +9 7520900 7525900 High Signal Region +9 8029400 8067100 Low Mappability +9 8275900 8292300 Low Mappability +9 8447200 8483700 High Signal Region +9 8628200 8633700 Low Mappability +9 8859900 8865500 High Signal Region +9 9598800 9626700 High Signal Region +9 9846900 9891900 Low Mappability +9 10193200 10198800 Low Mappability +9 10701300 10707400 High Signal Region +9 10964200 10970600 High Signal Region +9 11341900 11345100 High Signal Region +9 11722300 11747100 High Signal Region +9 11792800 11798400 Low Mappability +9 11821400 11845400 High Signal Region +9 12282000 12287500 High Signal Region +9 12364900 12379600 High Signal Region +9 12469100 12472900 Low Mappability +9 12768200 12773800 High Signal Region +9 12840100 12851100 High Signal Region +9 12917600 12922300 High Signal Region +9 12998400 13045600 Low Mappability +9 13324200 13426100 High Signal Region +9 13533500 13535700 High Signal Region +9 13994600 13996700 High Signal Region +9 14410500 14429300 Low Mappability +9 15123900 15136900 High Signal Region +9 16607400 16691900 Low Mappability +9 16833700 16861000 High Signal Region +9 16939400 16950500 Low Mappability +9 17059000 17088000 High Signal Region +9 17197900 17207600 High Signal Region +9 17261400 17263400 Low Mappability +9 17387200 17406200 High Signal Region +9 17525800 17527700 High Signal Region +9 17632000 17636100 High Signal Region +9 17916200 17919600 High Signal Region +9 18010000 18015600 High Signal Region +9 18117000 18162200 Low Mappability +9 18235100 18270100 High Signal Region +9 18893800 18900100 High Signal Region +9 18980400 18994100 High Signal Region +9 19268700 19294700 High Signal Region +9 19595400 19638400 High Signal Region +9 19720500 19725500 Low Mappability +9 19901400 19906100 High Signal Region +9 20183600 20196700 Low Mappability +9 20322100 20407900 High Signal Region +9 21879200 21928200 High Signal Region +9 22116600 22191600 High Signal Region +9 22699500 22731700 High Signal Region +9 22892700 22926500 Low Mappability +9 22947900 22956900 High Signal Region +9 23508700 23526900 High Signal Region +9 24523300 24576000 High Signal Region +9 25596700 25602700 High Signal Region +9 25842900 25863600 High Signal Region +9 26096100 26103500 Low Mappability +9 26700800 26708000 High Signal Region +9 26904600 26911000 High Signal Region +9 27212200 27232300 High Signal Region +9 27974400 27981700 High Signal Region +9 29739800 29741800 Low Mappability +9 30604400 30606300 Low Mappability +9 30641800 30696800 Low Mappability +9 30929800 30931100 High Signal Region +9 32059200 32083600 Low Mappability +9 32353900 32356500 High Signal Region +9 32839200 32846600 Low Mappability +9 32888700 32896000 Low Mappability +9 32953000 32958100 Low Mappability +9 33127100 33161100 Low Mappability +9 33392400 33402700 High Signal Region +9 33949500 33961900 Low Mappability +9 35071200 35091800 High Signal Region +9 35304300 35306500 High Signal Region +9 36235800 36241900 High Signal Region +9 36555000 36569100 High Signal Region +9 37331400 37349500 Low Mappability +9 37441700 37448100 High Signal Region +9 39330900 39359100 High Signal Region +9 39444100 39449600 High Signal Region +9 39835400 39899000 Low Mappability +9 44214200 44235400 Low Mappability +9 44305700 44408400 Low Mappability +9 47957400 47959300 High Signal Region +9 50082000 50088400 High Signal Region +9 51667400 51673700 High Signal Region +9 52601800 52617200 High Signal Region +9 52749000 52756100 High Signal Region +9 53089800 53107000 High Signal Region +9 53804100 53805400 High Signal Region +9 54916200 54928900 High Signal Region +9 55070600 55078000 Low Mappability +9 55150300 55152300 High Signal Region +9 55936900 55972500 High Signal Region +9 56222700 56224800 High Signal Region +9 56259500 56284300 High Signal Region +9 56991700 56993700 Low Mappability +9 57408000 57434800 High Signal Region +9 58766500 58785800 High Signal Region +9 59046200 59052700 Low Mappability +9 59103800 59125000 High Signal Region +9 60538500 60551200 High Signal Region +9 60726100 60733500 High Signal Region +9 61721500 61723400 High Signal Region +9 62811600 62868300 Low Mappability +9 64236700 64255000 Low Mappability +9 64410400 64417700 Low Mappability +9 65292600 65314200 High Signal Region +9 65867400 65909400 High Signal Region +9 67198600 67205000 Low Mappability +9 68451200 68461200 High Signal Region +9 68527100 68534600 High Signal Region +9 71080600 71120800 Low Mappability +9 71421100 71434600 High Signal Region +9 72895800 72900800 Low Mappability +9 72957900 72985700 Low Mappability +9 73285500 73311300 High Signal Region +9 73396800 73412500 Low Mappability +9 73861400 73863500 Low Mappability +9 73935600 73946700 High Signal Region +9 74615600 74641300 Low Mappability +9 74664800 74690900 High Signal Region +9 74768600 74774600 High Signal Region +9 75709200 75736000 Low Mappability +9 77079900 77082800 High Signal Region +9 77152800 77158800 High Signal Region +9 77972400 77974300 High Signal Region +9 78175200 78182700 Low Mappability +9 78230500 78296900 High Signal Region +9 78554700 78589200 Low Mappability +9 78755200 78757800 High Signal Region +9 78819200 78830500 Low Mappability +9 80234500 80235700 High Signal Region +9 80660700 80665600 High Signal Region +9 81251500 81303200 High Signal Region +9 81614000 81620700 High Signal Region +9 81906400 81937200 High Signal Region +9 83278800 83288100 High Signal Region +9 83558300 83560200 High Signal Region +9 83935500 83950000 High Signal Region +9 83992400 83998900 High Signal Region +9 84211900 84226800 High Signal Region +9 85898900 85918900 High Signal Region +9 86062600 86070000 Low Mappability +9 86120100 86137500 High Signal Region +9 86458200 86463100 High Signal Region +9 87098700 87112200 High Signal Region +9 87481400 87500900 High Signal Region +9 87576700 87594000 High Signal Region +9 87945600 87952400 High Signal Region +9 88011000 88013900 High Signal Region +9 88592100 88829800 High Signal Region +9 89031300 89075400 Low Mappability +9 89321400 89361800 High Signal Region +9 90147100 90149100 High Signal Region +9 90285200 90395300 High Signal Region +9 90455400 90456800 High Signal Region +9 90808100 90821900 Low Mappability +9 90857200 90876300 Low Mappability +9 91222100 91268200 High Signal Region +9 91598800 91647400 High Signal Region +9 92032700 92035300 High Signal Region +9 92075300 92113200 High Signal Region +9 92239700 92242900 High Signal Region +9 92624800 92654500 High Signal Region +9 93013300 93035300 High Signal Region +9 93286500 93296500 High Signal Region +9 93360800 93442100 Low Mappability +9 93618000 93668500 Low Mappability +9 94821700 94828100 Low Mappability +9 95245800 95299600 High Signal Region +9 95425000 95426900 High Signal Region +9 95829400 95831300 High Signal Region +9 96104900 96111400 Low Mappability +9 96852000 96854100 High Signal Region +9 98343300 98345700 Low Mappability +9 98451100 98458500 Low Mappability +9 98747700 98771800 Low Mappability +9 99266600 99273100 Low Mappability +9 99735800 99763300 High Signal Region +9 99922800 99937600 High Signal Region +9 100073800 100080700 High Signal Region +9 100516900 100519200 High Signal Region +9 100920400 100922300 High Signal Region +9 101085500 101110600 High Signal Region +9 101292500 101326600 Low Mappability +9 102277400 102283800 Low Mappability +9 102764700 102766800 Low Mappability +9 102812800 102815000 High Signal Region +9 102956300 102970000 Low Mappability +9 103296200 103305600 High Signal Region +9 103352800 103367100 Low Mappability +9 103988500 103990400 High Signal Region +9 104524500 104525700 High Signal Region +9 104848800 104850600 High Signal Region +9 105086200 105119300 High Signal Region +9 105818400 105820400 High Signal Region +9 107207900 107219900 High Signal Region +9 109036600 109083500 High Signal Region +9 109245000 109252200 High Signal Region +9 109272900 109374100 High Signal Region +9 110280300 110306700 High Signal Region +9 110443100 110455100 High Signal Region +9 110970300 110976000 High Signal Region +9 111661900 111668700 High Signal Region +9 112330100 112336900 High Signal Region +9 112956300 112990600 High Signal Region +9 113260500 113262400 High Signal Region +9 113535400 113541300 High Signal Region +9 114101400 114149500 Low Mappability +9 114172400 114322200 High Signal Region +9 114970100 114974700 Low Mappability +9 115077900 115085200 Low Mappability +9 115349900 115351800 High Signal Region +9 115496100 115498100 Low Mappability +9 116981500 116988600 High Signal Region +9 118088300 118151400 High Signal Region +9 118674000 118675900 High Signal Region +9 119861200 119895000 Low Mappability +9 120265300 120288700 High Signal Region +9 120633900 120641200 Low Mappability +9 121024600 121042700 Low Mappability +9 121178300 121184500 High Signal Region +9 121220100 121247600 High Signal Region +9 121313700 121385800 Low Mappability +9 121406300 121418400 Low Mappability +9 122161300 122163200 High Signal Region +9 122277700 122334500 Low Mappability +9 122401500 122441900 Low Mappability +9 122660600 122667200 Low Mappability +9 122703400 122730400 Low Mappability +9 122903900 122906600 High Signal Region +9 123190700 123197500 Low Mappability +9 123460900 123463100 High Signal Region +9 123742600 123753500 Low Mappability +9 123851700 123929500 High Signal Region +9 123966100 124009300 High Signal Region +9 124161300 124282600 High Signal Region +9 124494100 124595100 High Signal Region +X 3286700 4493800 High Signal Region +X 4524500 5370300 High Signal Region +X 8346400 8348200 High Signal Region +X 8550300 8557800 High Signal Region +X 8818900 8824300 High Signal Region +X 9345800 9395300 High Signal Region +X 9500200 9595700 High Signal Region +X 14739100 14741000 High Signal Region +X 21466500 21472700 High Signal Region +X 21846900 21896100 High Signal Region +X 26459300 26505100 High Signal Region +X 26907100 29639200 High Signal Region +X 29660500 35508900 High Signal Region +X 37612500 37669100 High Signal Region +X 39073800 39075700 High Signal Region +X 41482500 41489500 High Signal Region +X 42676200 42688100 High Signal Region +X 44239900 44293300 High Signal Region +X 44732600 44738600 High Signal Region +X 48699000 48771100 High Signal Region +X 54269300 55286000 High Signal Region +X 55716700 55807400 High Signal Region +X 58475000 58478700 High Signal Region +X 59773000 59796900 High Signal Region +X 61868200 61874000 High Signal Region +X 62065700 62084900 High Signal Region +X 63509200 63515900 High Signal Region +X 63634600 63640900 High Signal Region +X 64125800 64132200 High Signal Region +X 65962800 65999900 High Signal Region +X 66067900 66084000 High Signal Region +X 66143100 66145700 High Signal Region +X 66316400 66356900 High Signal Region +X 67662500 67708500 High Signal Region +X 70055300 70072000 High Signal Region +X 72800000 72818700 High Signal Region +X 75582400 75709000 High Signal Region +X 76589100 76607100 High Signal Region +X 79135300 79150400 High Signal Region +X 81153100 81154600 High Signal Region +X 82475800 82481000 High Signal Region +X 84290800 84296100 High Signal Region +X 87222400 87262500 High Signal Region +X 87838600 87845200 High Signal Region +X 88230200 88246900 High Signal Region +X 89182800 89232600 High Signal Region +X 89914800 89916600 High Signal Region +X 90308600 90336600 High Signal Region +X 92765200 92767900 High Signal Region +X 94795400 94980600 High Signal Region +X 95265900 95291700 High Signal Region +X 97728000 97734800 High Signal Region +X 98008600 98033000 High Signal Region +X 98585800 98612400 High Signal Region +X 101111300 101113600 High Signal Region +X 102560800 102585100 High Signal Region +X 103455000 103457100 High Signal Region +X 104959400 104966000 High Signal Region +X 105523800 105529900 High Signal Region +X 108202600 108222500 High Signal Region +X 108567500 108585200 High Signal Region +X 109871000 109876200 High Signal Region +X 110976700 110997000 High Signal Region +X 112369800 112402300 High Signal Region +X 114412500 114421300 High Signal Region +X 118100900 118102900 High Signal Region +X 118901200 118905100 Low Mappability +X 119137300 119142400 High Signal Region +X 119247400 119264800 High Signal Region +X 119335000 119339300 High Signal Region +X 120351000 120355400 High Signal Region +X 121511200 121514500 High Signal Region +X 122901700 122908000 High Signal Region +X 123686000 124042000 High Signal Region +X 126695300 126778800 High Signal Region +X 127935800 127964600 High Signal Region +X 128512700 128514400 High Signal Region +X 128959800 128965900 High Signal Region +X 129055600 129072400 High Signal Region +X 129429300 129448000 High Signal Region +X 130696000 130702200 High Signal Region +X 131802300 131832800 High Signal Region +X 132024200 132026400 High Signal Region +X 132158700 132160800 High Signal Region +X 134149100 134151200 High Signal Region +X 135040100 135056700 High Signal Region +X 136459400 136503800 High Signal Region +X 136897900 136925800 High Signal Region +X 138302200 138324600 High Signal Region +X 143471300 143484000 High Signal Region +X 144699500 144723900 High Signal Region +X 145709800 145739800 High Signal Region +X 146582500 146588700 High Signal Region +X 146758100 146761900 High Signal Region +X 147619400 147620700 High Signal Region +X 153994800 154073200 High Signal Region +X 154242800 154244800 High Signal Region +X 158443900 158460500 High Signal Region +X 159120000 159154900 High Signal Region +X 161179200 161185600 High Signal Region +X 162381600 162384600 High Signal Region +X 164615100 164622200 High Signal Region +X 166063200 166084500 High Signal Region +X 167213400 167220200 High Signal Region +X 167246000 167252200 High Signal Region +X 169968900 171031200 High Signal Region +Y 0 806800 High Signal Region +Y 924800 1005300 High Signal Region +Y 1276400 1813700 High Signal Region +Y 1834500 1940700 High Signal Region +Y 1973200 1996400 High Signal Region +Y 2017200 2068000 Low Mappability +Y 2104700 2210800 High Signal Region +Y 2280300 2288900 Low Mappability +Y 2471300 3819300 High Signal Region +Y 3880300 4177100 High Signal Region +Y 4249500 4289100 High Signal Region +Y 4432000 4956300 High Signal Region +Y 5062400 5227700 High Signal Region +Y 6376700 6382700 High Signal Region +Y 6530200 6663200 High Signal Region +Y 6760200 6835800 High Signal Region +Y 6984100 8985400 High Signal Region +Y 10638500 41003800 High Signal Region +Y 41159200 91744600 High Signal Region diff --git a/assets/blacklists/v2.0/ce10-blacklist.v2.bed b/assets/blacklists/v2.0/ce10-blacklist.v2.bed new file mode 100644 index 0000000000000000000000000000000000000000..99566cec99db29fb13bdd6a756bacd149644705e --- /dev/null +++ b/assets/blacklists/v2.0/ce10-blacklist.v2.bed @@ -0,0 +1,100 @@ +chrIII 449300 453600 High Signal Region +chrIII 930200 932600 High Signal Region +chrIII 1016500 1021400 Low Mappability +chrIII 1293500 1303900 High Signal Region +chrIII 5352200 5359400 High Signal Region +chrIII 7404700 7452200 High Signal Region +chrIII 7593800 7603300 High Signal Region +chrIII 8861100 8864700 High Signal Region +chrIII 10215600 10228400 High Signal Region +chrIII 13775100 14905900 High Signal Region +chrII 0 1300 High Signal Region +chrII 2569700 2571600 High Signal Region +chrII 3464700 3469600 High Signal Region +chrII 3795600 3798100 High Signal Region +chrII 3993700 3995700 High Signal Region +chrII 4640300 4645900 High Signal Region +chrII 5143800 5147500 High Signal Region +chrII 6504400 6509800 High Signal Region +chrII 8286700 8293600 High Signal Region +chrII 8975300 8977400 High Signal Region +chrII 9631400 9633700 High Signal Region +chrII 10335100 10339700 High Signal Region +chrII 11527100 11530900 High Signal Region +chrII 12842800 12846900 Low Mappability +chrII 13597700 13600700 Low Mappability +chrII 13983900 13987500 Low Mappability +chrII 14323700 14340100 High Signal Region +chrII 14992100 14994300 High Signal Region +chrI 669000 679400 High Signal Region +chrI 931700 935600 High Signal Region +chrI 3170700 3173300 High Signal Region +chrI 3989200 3991600 High Signal Region +chrI 4535300 4549400 High Signal Region +chrI 5151000 5154700 High Signal Region +chrI 10203200 10220100 High Signal Region +chrI 10265700 10277700 High Signal Region +chrI 10945100 10953900 High Signal Region +chrI 15059400 15373800 High Signal Region +chrIV 2821200 2831500 High Signal Region +chrIV 3205500 3210300 High Signal Region +chrIV 3365800 3368900 Low Mappability +chrIV 4415600 4422900 High Signal Region +chrIV 6357100 6361700 High Signal Region +chrIV 6468100 6470600 Low Mappability +chrIV 6682800 6704100 High Signal Region +chrIV 6709900 6734000 High Signal Region +chrIV 7590900 7600100 High Signal Region +chrIV 8563000 8582700 High Signal Region +chrIV 9045100 9049600 High Signal Region +chrIV 10942600 10951900 High Signal Region +chrIV 11070000 11076700 High Signal Region +chrIV 12313600 12325600 High Signal Region +chrIV 12637100 12639400 Low Mappability +chrIV 13359700 13362800 High Signal Region +chrIV 13548500 13550400 High Signal Region +chrIV 14056400 14059900 High Signal Region +chrIV 14775600 14782300 Low Mappability +chrIV 15408400 15424900 Low Mappability +chrV 264100 268100 High Signal Region +chrV 1103700 1106100 Low Mappability +chrV 1637000 1639900 High Signal Region +chrV 3098000 3100300 High Signal Region +chrV 3434000 3441600 High Signal Region +chrV 5072400 5084800 High Signal Region +chrV 5278300 5286700 High Signal Region +chrV 6171000 6183700 High Signal Region +chrV 6936700 6943800 High Signal Region +chrV 7442400 7445100 High Signal Region +chrV 7912200 7925700 High Signal Region +chrV 7988100 7993900 High Signal Region +chrV 8698600 8715600 High Signal Region +chrV 9423500 9436100 High Signal Region +chrV 10604900 10613200 High Signal Region +chrV 12509100 12511300 High Signal Region +chrV 14765800 14770600 High Signal Region +chrV 15425800 15436300 High Signal Region +chrV 16706300 16710300 High Signal Region +chrV 17114500 17133000 High Signal Region +chrV 17307700 17312400 High Signal Region +chrV 17383300 17395200 Low Mappability +chrV 18399500 18402500 High Signal Region +chrX 108300 115100 High Signal Region +chrX 273800 296200 High Signal Region +chrX 1635300 1645200 High Signal Region +chrX 1747400 1755900 High Signal Region +chrX 3006400 3008800 High Signal Region +chrX 4025200 4056900 High Signal Region +chrX 5045000 5058100 High Signal Region +chrX 7076500 7081700 High Signal Region +chrX 9184100 9189500 High Signal Region +chrX 9437700 9440000 High Signal Region +chrX 10360200 10369900 High Signal Region +chrX 11784800 11790300 High Signal Region +chrX 11885600 11889600 High Signal Region +chrX 12275900 12280400 High Signal Region +chrX 14384000 14390400 High Signal Region +chrX 14907200 14910200 High Signal Region +chrX 15226100 15229500 High Signal Region +chrX 15806300 15812000 Low Mappability +chrX 16757900 16761600 High Signal Region diff --git a/assets/blacklists/v2.0/ce11-blacklist.v2.bed b/assets/blacklists/v2.0/ce11-blacklist.v2.bed new file mode 100644 index 0000000000000000000000000000000000000000..66f9d730b69aa9b96e181947461e3c356ebe57de --- /dev/null +++ b/assets/blacklists/v2.0/ce11-blacklist.v2.bed @@ -0,0 +1,97 @@ +chrIII 449400 453600 High Signal Region +chrIII 930300 932600 High Signal Region +chrIII 1016500 1021500 Low Mappability +chrIII 1293500 1303900 High Signal Region +chrIII 5352200 5359400 High Signal Region +chrIII 7404700 7452200 High Signal Region +chrIII 7593800 7598100 High Signal Region +chrIII 8861200 8864800 High Signal Region +chrIII 10216000 10228500 High Signal Region +chrIII 13775200 13783800 High Signal Region +chrII 0 1300 High Signal Region +chrII 2569700 2571800 High Signal Region +chrII 3464700 3469600 High Signal Region +chrII 3795600 3798100 High Signal Region +chrII 3993700 3995700 High Signal Region +chrII 4640300 4645900 High Signal Region +chrII 5143800 5147500 High Signal Region +chrII 6504500 6509800 High Signal Region +chrII 8286700 8293600 High Signal Region +chrII 8975300 8977500 High Signal Region +chrII 9631500 9633800 High Signal Region +chrII 10335100 10339700 High Signal Region +chrII 11527200 11530900 High Signal Region +chrII 12842900 12846900 Low Mappability +chrII 13597800 13600800 High Signal Region +chrII 13984000 13987600 Low Mappability +chrII 14323700 14340100 High Signal Region +chrII 14992200 14994400 High Signal Region +chrI 669000 679200 High Signal Region +chrI 931700 935600 High Signal Region +chrI 3170700 3173300 High Signal Region +chrI 3989200 3991600 High Signal Region +chrI 4535300 4549400 High Signal Region +chrI 5151000 5154700 High Signal Region +chrI 10203200 10220100 High Signal Region +chrI 10265700 10277700 High Signal Region +chrI 10945100 10953900 High Signal Region +chrI 15059400 15072400 High Signal Region +chrIV 2821100 2831500 High Signal Region +chrIV 3205500 3210300 High Signal Region +chrIV 3365800 3368800 Low Mappability +chrIV 4415600 4422800 High Signal Region +chrIV 6357100 6361700 High Signal Region +chrIV 6468100 6470600 Low Mappability +chrIV 6682700 6704200 High Signal Region +chrIV 7590800 7600100 High Signal Region +chrIV 8563000 8582600 High Signal Region +chrIV 9045100 9049600 High Signal Region +chrIV 10942600 10951900 High Signal Region +chrIV 11070000 11076700 High Signal Region +chrIV 12023000 12025800 High Signal Region +chrIV 12313600 12325600 High Signal Region +chrIV 12637100 12639500 Low Mappability +chrIV 13359700 13362800 High Signal Region +chrIV 13548500 13550500 High Signal Region +chrIV 14056400 14059900 High Signal Region +chrIV 14775600 14782300 Low Mappability +chrIV 15076800 15082900 High Signal Region +chrIV 15408500 15424900 Low Mappability +chrV 264100 268100 High Signal Region +chrV 1103700 1106100 Low Mappability +chrV 1637000 1639900 High Signal Region +chrV 3098000 3100400 High Signal Region +chrV 3434100 3441600 High Signal Region +chrV 5278300 5286800 High Signal Region +chrV 6171000 6183700 High Signal Region +chrV 6936700 6943900 High Signal Region +chrV 7912300 7925700 High Signal Region +chrV 7988100 7993700 High Signal Region +chrV 8698700 8715700 High Signal Region +chrV 9423500 9436100 High Signal Region +chrV 10604900 10613200 High Signal Region +chrV 12509100 12511300 High Signal Region +chrV 14765800 14770600 High Signal Region +chrV 16706300 16710300 High Signal Region +chrV 17114600 17133000 High Signal Region +chrV 17307800 17312400 High Signal Region +chrV 17383300 17395200 High Signal Region +chrV 18399500 18402500 High Signal Region +chrX 108300 115100 High Signal Region +chrX 273800 296200 High Signal Region +chrX 1635300 1645200 High Signal Region +chrX 1747400 1755900 High Signal Region +chrX 3006400 3008800 High Signal Region +chrX 4025200 4056900 High Signal Region +chrX 5045000 5058200 High Signal Region +chrX 7076600 7081600 High Signal Region +chrX 9184200 9189600 High Signal Region +chrX 9437700 9440000 High Signal Region +chrX 10360300 10370000 High Signal Region +chrX 11784800 11790400 High Signal Region +chrX 11885700 11889700 High Signal Region +chrX 12275900 12280400 High Signal Region +chrX 14907200 14910300 High Signal Region +chrX 15226200 15229600 High Signal Region +chrX 15806400 15812100 Low Mappability +chrX 16758000 16761600 High Signal Region diff --git a/assets/blacklists/v2.0/dm3-blacklist.v2.bed b/assets/blacklists/v2.0/dm3-blacklist.v2.bed new file mode 100644 index 0000000000000000000000000000000000000000..45f1a2017f3a2018c765f7fb30de63b628bbc64b --- /dev/null +++ b/assets/blacklists/v2.0/dm3-blacklist.v2.bed @@ -0,0 +1,271 @@ +chr2LHet 43400 150800 High Signal Region +chr2LHet 350800 368800 Low Mappability +chr2L 47200 52500 High Signal Region +chr2L 66000 74500 High Signal Region +chr2L 154400 167500 High Signal Region +chr2L 221100 223300 High Signal Region +chr2L 471400 491700 High Signal Region +chr2L 2191400 2201000 High Signal Region +chr2L 2749200 2756400 High Signal Region +chr2L 2884100 2889800 High Signal Region +chr2L 3161500 3164300 High Signal Region +chr2L 4937900 4941000 High Signal Region +chr2L 5206500 5210500 High Signal Region +chr2L 5943200 5949200 High Signal Region +chr2L 5976600 5987500 High Signal Region +chr2L 6991400 6998500 Low Mappability +chr2L 7343400 7350800 High Signal Region +chr2L 9898700 9903100 High Signal Region +chr2L 9973800 9980900 Low Mappability +chr2L 10333600 10335600 High Signal Region +chr2L 11992000 12013100 High Signal Region +chr2L 12558300 12565400 Low Mappability +chr2L 13522100 13527800 Low Mappability +chr2L 14489600 14491600 High Signal Region +chr2L 16267500 16271800 High Signal Region +chr2L 16283800 16289200 High Signal Region +chr2L 16512100 16526900 Low Mappability +chr2L 18942900 18945500 High Signal Region +chr2L 19570700 19588100 High Signal Region +chr2L 20647200 20649100 High Signal Region +chr2L 21019900 21024300 Low Mappability +chr2L 21236700 21238800 High Signal Region +chr2L 21416300 21544000 High Signal Region +chr2L 22378300 22389200 High Signal Region +chr2L 22657900 22670900 High Signal Region +chr2RHet 674900 692100 Low Mappability +chr2RHet 1142300 1263300 Low Mappability +chr2RHet 1422500 1435900 High Signal Region +chr2RHet 2823000 2830800 High Signal Region +chr2RHet 2924700 2989000 High Signal Region +chr2RHet 3179900 3183800 High Signal Region +chr2RHet 3269500 3288700 Low Mappability +chr2R 101000 118400 High Signal Region +chr2R 201300 207900 High Signal Region +chr2R 934100 944600 High Signal Region +chr2R 992900 997600 High Signal Region +chr2R 2217200 2303300 High Signal Region +chr2R 2548500 2551600 High Signal Region +chr2R 3123000 3137600 High Signal Region +chr2R 3322500 3326700 Low Mappability +chr2R 3495700 3501600 Low Mappability +chr2R 3692900 3720700 High Signal Region +chr2R 3902800 3905600 Low Mappability +chr2R 4552800 4555900 High Signal Region +chr2R 5367700 5378100 Low Mappability +chr2R 5430900 5442200 High Signal Region +chr2R 5615200 5621600 High Signal Region +chr2R 6311300 6318700 High Signal Region +chr2R 6364800 6368500 High Signal Region +chr2R 6420800 6430300 High Signal Region +chr2R 6835600 6843000 High Signal Region +chr2R 7538100 7540600 High Signal Region +chr2R 8473600 8481800 High Signal Region +chr2R 8706700 8712000 Low Mappability +chr2R 8867500 8873800 High Signal Region +chr2R 8883000 8885600 High Signal Region +chr2R 9981000 9997000 Low Mappability +chr2R 10076400 10082900 High Signal Region +chr2R 10776800 10785300 Low Mappability +chr2R 11985200 11992700 High Signal Region +chr2R 13034800 13040100 Low Mappability +chr2R 13344900 13346800 High Signal Region +chr2R 13569800 13571700 High Signal Region +chr2R 14243000 14260400 High Signal Region +chr2R 14463000 14469700 Low Mappability +chr2R 14745600 14747800 Low Mappability +chr2R 15616900 15653700 High Signal Region +chr2R 15663700 15667500 Low Mappability +chr2R 16667200 16675500 Low Mappability +chr2R 16882700 16885400 High Signal Region +chr2R 17038700 17049200 High Signal Region +chr2R 17532800 17535100 High Signal Region +chr2R 18413300 18417100 Low Mappability +chr2R 19865400 19867600 High Signal Region +chr2R 20897600 20900500 High Signal Region +chr2R 21144900 21146700 Low Mappability +chr3LHet 1345100 1347600 High Signal Region +chr3LHet 2162700 2187800 High Signal Region +chr3LHet 2202300 2209300 Low Mappability +chr3LHet 2244300 2253000 Low Mappability +chr3L 130400 133100 High Signal Region +chr3L 223200 226000 High Signal Region +chr3L 270300 272600 High Signal Region +chr3L 318400 320800 High Signal Region +chr3L 539500 544400 High Signal Region +chr3L 748200 750200 High Signal Region +chr3L 1334000 1336300 High Signal Region +chr3L 1545400 1547500 High Signal Region +chr3L 1567700 1570700 High Signal Region +chr3L 1793900 1796200 High Signal Region +chr3L 2063400 2069800 High Signal Region +chr3L 2585300 2590700 High Signal Region +chr3L 3059100 3071600 High Signal Region +chr3L 3147900 3150300 High Signal Region +chr3L 3218400 3227200 High Signal Region +chr3L 3896400 3905700 High Signal Region +chr3L 4133100 4139800 High Signal Region +chr3L 5104000 5117900 Low Mappability +chr3L 7349700 7355100 High Signal Region +chr3L 7662100 7684600 High Signal Region +chr3L 7972100 7981100 Low Mappability +chr3L 8012400 8017600 High Signal Region +chr3L 8784200 8790000 Low Mappability +chr3L 9385600 9395900 High Signal Region +chr3L 9415500 9424500 High Signal Region +chr3L 9569100 9574400 Low Mappability +chr3L 10729500 10731200 High Signal Region +chr3L 11238800 11246200 High Signal Region +chr3L 11479900 11481800 High Signal Region +chr3L 11545300 11547300 High Signal Region +chr3L 11605800 11612700 High Signal Region +chr3L 11955900 11966000 High Signal Region +chr3L 14747500 14755700 High Signal Region +chr3L 14773500 14784000 High Signal Region +chr3L 15134000 15138400 High Signal Region +chr3L 15818200 15820000 High Signal Region +chr3L 16038000 16046600 High Signal Region +chr3L 16639900 16641900 High Signal Region +chr3L 16653500 16655400 High Signal Region +chr3L 17003800 17006300 High Signal Region +chr3L 17636700 17638700 High Signal Region +chr3L 18827000 18834000 High Signal Region +chr3L 19887800 19909000 High Signal Region +chr3L 20391900 20395600 High Signal Region +chr3L 20466800 20477400 High Signal Region +chr3L 20510900 20513100 High Signal Region +chr3L 20808600 20818200 High Signal Region +chr3L 21000400 21022000 High Signal Region +chr3L 21358800 21371300 Low Mappability +chr3L 22092600 22099100 Low Mappability +chr3L 22734700 22738400 Low Mappability +chr3L 22754300 22760100 Low Mappability +chr3L 23817900 23833100 High Signal Region +chr3L 23935400 23941300 Low Mappability +chr3L 24447200 24484400 Low Mappability +chr3RHet 43700 62000 High Signal Region +chr3RHet 1342700 1348400 Low Mappability +chr3RHet 1508900 1524800 Low Mappability +chr3RHet 1816700 1834500 High Signal Region +chr3RHet 1947200 1958800 High Signal Region +chr3RHet 2307400 2309300 Low Mappability +chr3R 228600 238600 Low Mappability +chr3R 509000 510400 High Signal Region +chr3R 564200 566200 High Signal Region +chr3R 777000 779000 High Signal Region +chr3R 828100 833700 High Signal Region +chr3R 871500 878800 Low Mappability +chr3R 911600 914500 High Signal Region +chr3R 1076200 1078200 High Signal Region +chr3R 1424600 1427600 High Signal Region +chr3R 1448500 1451200 High Signal Region +chr3R 2230300 2234000 High Signal Region +chr3R 2645100 2649200 High Signal Region +chr3R 2899200 2916800 High Signal Region +chr3R 2933000 2935200 High Signal Region +chr3R 3176400 3180900 High Signal Region +chr3R 3917000 3932800 Low Mappability +chr3R 4396100 4400900 Low Mappability +chr3R 4872100 4884300 High Signal Region +chr3R 5242900 5245900 High Signal Region +chr3R 5335200 5343100 High Signal Region +chr3R 5376200 5378000 High Signal Region +chr3R 5415800 5418000 High Signal Region +chr3R 5454700 5457000 High Signal Region +chr3R 5510100 5537700 High Signal Region +chr3R 5697900 5701200 High Signal Region +chr3R 6080700 6091100 Low Mappability +chr3R 6167400 6182800 High Signal Region +chr3R 6207000 6215100 High Signal Region +chr3R 7583800 7590800 High Signal Region +chr3R 7779600 7786900 High Signal Region +chr3R 8228800 8230500 High Signal Region +chr3R 8290300 8337000 High Signal Region +chr3R 8452400 8454700 High Signal Region +chr3R 9509100 9511200 High Signal Region +chr3R 10109000 10113200 High Signal Region +chr3R 10548800 10550700 High Signal Region +chr3R 10920100 10922000 High Signal Region +chr3R 10956400 10966900 High Signal Region +chr3R 11112800 11118600 High Signal Region +chr3R 11798100 11800500 High Signal Region +chr3R 12054400 12069200 High Signal Region +chr3R 12074500 12081400 High Signal Region +chr3R 12813500 12821900 High Signal Region +chr3R 13506900 13509400 High Signal Region +chr3R 13542200 13544300 High Signal Region +chr3R 13751200 13753400 Low Mappability +chr3R 14022000 14025900 High Signal Region +chr3R 14962500 14964700 Low Mappability +chr3R 17121500 17134300 High Signal Region +chr3R 17173800 17176100 High Signal Region +chr3R 17430600 17445700 Low Mappability +chr3R 17456100 17459600 High Signal Region +chr3R 18275900 18279300 High Signal Region +chr3R 19358600 19360500 High Signal Region +chr3R 19383900 19386200 Low Mappability +chr3R 19715100 19717300 High Signal Region +chr3R 19902300 19904500 Low Mappability +chr3R 19929900 19934800 High Signal Region +chr3R 20407300 20409000 High Signal Region +chr3R 20874800 20877200 High Signal Region +chr3R 22922500 22923900 High Signal Region +chr3R 22966200 22977000 High Signal Region +chr3R 23406600 23408200 High Signal Region +chr3R 23551000 23556900 Low Mappability +chr3R 23682500 23694400 Low Mappability +chr3R 23894000 23899900 Low Mappability +chr3R 24151200 24153300 High Signal Region +chr3R 24889000 24891200 High Signal Region +chr3R 25563700 25565800 Low Mappability +chr3R 25910200 25912400 Low Mappability +chr3R 26900000 26906600 High Signal Region +chr3R 27041200 27048400 High Signal Region +chr3R 27238900 27243600 High Signal Region +chr3R 27433400 27437700 High Signal Region +chr3R 27572500 27575000 High Signal Region +chr3R 27893200 27905000 High Signal Region +chr4 97100 102500 Low Mappability +chr4 1278500 1351800 High Signal Region +chrXHet 32400 43100 Low Mappability +chrXHet 87400 132300 High Signal Region +chrX 0 18800 High Signal Region +chrX 322300 328700 Low Mappability +chrX 1251600 1275300 High Signal Region +chrX 2012900 2033200 High Signal Region +chrX 2504400 2514500 Low Mappability +chrX 2683600 2687100 High Signal Region +chrX 2964000 2975200 Low Mappability +chrX 3308900 3315500 Low Mappability +chrX 3620500 3624500 Low Mappability +chrX 3684200 3699100 High Signal Region +chrX 3834600 3844200 High Signal Region +chrX 4812700 4831100 High Signal Region +chrX 4884100 4891200 High Signal Region +chrX 6065700 6073000 High Signal Region +chrX 7019400 7028400 High Signal Region +chrX 7374800 7376400 High Signal Region +chrX 7791300 7793200 High Signal Region +chrX 7949800 7957700 High Signal Region +chrX 8186900 8190800 High Signal Region +chrX 8821300 8824300 Low Mappability +chrX 9517700 9520200 High Signal Region +chrX 10657000 10663600 High Signal Region +chrX 10990100 10997200 Low Mappability +chrX 11206200 11212900 High Signal Region +chrX 11473900 11494400 High Signal Region +chrX 11527500 11542700 Low Mappability +chrX 11607000 11609700 High Signal Region +chrX 12824600 12831700 Low Mappability +chrX 15705000 15706900 High Signal Region +chrX 15907200 15953600 High Signal Region +chrX 18323300 18329000 Low Mappability +chrX 18676100 18682200 High Signal Region +chrX 18757000 18759500 High Signal Region +chrX 19247000 19252800 Low Mappability +chrX 20070100 20101800 High Signal Region +chrX 21611900 21630800 High Signal Region +chrX 21833900 21835900 High Signal Region +chrX 21924100 21927700 Low Mappability +chrYHet 125600 138300 Low Mappability +chrYHet 195300 225100 High Signal Region diff --git a/assets/blacklists/v2.0/dm6-blacklist.v2.bed b/assets/blacklists/v2.0/dm6-blacklist.v2.bed new file mode 100644 index 0000000000000000000000000000000000000000..65a220b82b9a021ca9219633f03e2d8d0b9acdf9 --- /dev/null +++ b/assets/blacklists/v2.0/dm6-blacklist.v2.bed @@ -0,0 +1,182 @@ +chr2L 154500 167500 High Signal Region +chr2L 348000 365800 High Signal Region +chr2L 471400 482200 High Signal Region +chr2L 2191400 2200900 High Signal Region +chr2L 2749200 2756400 High Signal Region +chr2L 3161500 3164200 High Signal Region +chr2L 4536500 4544800 High Signal Region +chr2L 4938100 4941000 High Signal Region +chr2L 5206500 5210500 High Signal Region +chr2L 5827800 5836700 High Signal Region +chr2L 5976700 5987500 High Signal Region +chr2L 7343400 7349300 High Signal Region +chr2L 9898700 9902900 High Signal Region +chr2L 11316200 11317600 High Signal Region +chr2L 11992200 12013200 High Signal Region +chr2L 16267500 16271800 High Signal Region +chr2L 16283900 16289200 High Signal Region +chr2L 18942900 18945600 High Signal Region +chr2L 20647200 20649100 High Signal Region +chr2L 21236600 21238800 High Signal Region +chr2L 21415900 21544200 High Signal Region +chr2L 21653300 21656300 High Signal Region +chr2L 22409400 22479400 High Signal Region +chr2L 22488500 22506800 High Signal Region +chr2L 22765200 22909700 High Signal Region +chr2L 23096400 23122300 High Signal Region +chr2L 23353200 23387900 High Signal Region +chr2L 23511900 23513700 High Signal Region +chr2R 0 14900 High Signal Region +chr2R 744200 878700 Low Mappability +chr2R 1492900 1530200 High Signal Region +chr2R 1818200 1840700 Low Mappability +chr2R 1931800 1949300 Low Mappability +chr2R 2158800 2169900 High Signal Region +chr2R 2218200 2238300 High Signal Region +chr2R 2652000 2665400 High Signal Region +chr2R 3601600 3603200 High Signal Region +chr2R 3718500 3775000 High Signal Region +chr2R 3943700 3998100 High Signal Region +chr2R 4274200 4275400 High Signal Region +chr2R 4863000 4884000 Low Mappability +chr2R 5046600 5057100 High Signal Region +chr2R 5105500 5110000 High Signal Region +chr2R 7235700 7250000 High Signal Region +chr2R 8015400 8018000 Low Mappability +chr2R 10177000 10186000 High Signal Region +chr2R 10948100 10955500 High Signal Region +chr2R 12586100 12594400 High Signal Region +chr2R 14188900 14195500 High Signal Region +chr2R 18575400 18582400 High Signal Region +chr2R 19715900 19766800 High Signal Region +chr2R 21151200 21153700 High Signal Region +chr2R 24177300 24184700 High Signal Region +chr2R 25257500 25286800 High Signal Region +chr3L 2063300 2069900 High Signal Region +chr3L 2447600 2456600 High Signal Region +chr3L 3899000 3903000 High Signal Region +chr3L 7669000 7691700 High Signal Region +chr3L 7978900 7987800 Low Mappability +chr3L 8019300 8024500 High Signal Region +chr3L 11968300 11972800 High Signal Region +chr3L 16596900 16607900 High Signal Region +chr3L 18833900 18840800 High Signal Region +chr3L 20473700 20484300 High Signal Region +chr3L 20815500 20825000 High Signal Region +chr3L 22099400 22106200 Low Mappability +chr3L 22761300 22767100 High Signal Region +chr3L 23111800 23118300 High Signal Region +chr3L 23825700 23839600 High Signal Region +chr3L 24384500 24445600 High Signal Region +chr3L 24576600 24669400 High Signal Region +chr3L 25051000 25054100 High Signal Region +chr3L 25129300 25135900 High Signal Region +chr3L 25962100 25964900 High Signal Region +chr3L 26877500 27082600 High Signal Region +chr3L 27137300 27140300 Low Mappability +chr3L 27471600 27649900 High Signal Region +chr3R 0 32600 High Signal Region +chr3R 43000 82600 High Signal Region +chr3R 236900 285000 High Signal Region +chr3R 499300 529400 High Signal Region +chr3R 1271100 1279000 Low Mappability +chr3R 1369500 1390500 Low Mappability +chr3R 2619300 2623900 High Signal Region +chr3R 2749700 2768300 High Signal Region +chr3R 2775800 2782000 Low Mappability +chr3R 3032500 3058100 High Signal Region +chr3R 3087400 3136500 High Signal Region +chr3R 3168900 3171300 Low Mappability +chr3R 3697900 3702100 High Signal Region +chr3R 4738400 4740500 High Signal Region +chr3R 4951200 4953300 High Signal Region +chr3R 5002300 5009200 High Signal Region +chr3R 5045500 5053200 Low Mappability +chr3R 5085900 5088400 High Signal Region +chr3R 5598800 5602000 High Signal Region +chr3R 5622700 5625500 High Signal Region +chr3R 6404600 6408200 High Signal Region +chr3R 6819500 6823400 High Signal Region +chr3R 7073500 7076300 High Signal Region +chr3R 7107300 7109400 High Signal Region +chr3R 8091300 8107100 Low Mappability +chr3R 8570200 8575500 Low Mappability +chr3R 9046400 9058600 High Signal Region +chr3R 9351800 9354100 High Signal Region +chr3R 9417100 9420200 High Signal Region +chr3R 9509400 9517300 High Signal Region +chr3R 9550400 9552400 High Signal Region +chr3R 9590000 9592300 High Signal Region +chr3R 9629000 9631300 High Signal Region +chr3R 9684500 9712000 High Signal Region +chr3R 10255100 10265400 Low Mappability +chr3R 10341700 10357100 High Signal Region +chr3R 10377600 10389400 High Signal Region +chr3R 11758100 11765200 High Signal Region +chr3R 11948900 11961000 High Signal Region +chr3R 12464500 12511200 High Signal Region +chr3R 12626600 12629000 High Signal Region +chr3R 13683300 13685500 High Signal Region +chr3R 14283200 14287500 High Signal Region +chr3R 14696300 14698100 High Signal Region +chr3R 14723000 14725100 High Signal Region +chr3R 15094300 15096400 High Signal Region +chr3R 15130700 15135000 High Signal Region +chr3R 15286000 15292900 High Signal Region +chr3R 15972300 15974800 High Signal Region +chr3R 16224000 16243500 High Signal Region +chr3R 16248800 16255800 High Signal Region +chr3R 16987900 16996000 High Signal Region +chr3R 17681200 17683700 High Signal Region +chr3R 17716400 17718700 High Signal Region +chr3R 17925400 17927800 High Signal Region +chr3R 18196300 18200300 High Signal Region +chr3R 21295600 21308400 High Signal Region +chr3R 21348100 21350400 High Signal Region +chr3R 21604900 21620100 Low Mappability +chr3R 21630200 21633900 High Signal Region +chr3R 22450200 22453500 High Signal Region +chr3R 23889300 23891600 High Signal Region +chr3R 24076500 24078900 Low Mappability +chr3R 24104200 24109100 High Signal Region +chr3R 24581500 24590000 High Signal Region +chr3R 25049000 25051400 High Signal Region +chr3R 27140500 27151400 High Signal Region +chr3R 27580600 27582500 High Signal Region +chr3R 27856800 27868700 Low Mappability +chr3R 28325400 28327600 High Signal Region +chr3R 29063200 29065500 High Signal Region +chr3R 29737800 29740200 Low Mappability +chr3R 30084400 30086800 Low Mappability +chr3R 31215500 31222700 High Signal Region +chr3R 31413100 31417800 High Signal Region +chr3R 31607700 31611900 High Signal Region +chr3R 31746800 31749300 High Signal Region +chr3R 32067500 32079300 Low Mappability +chr4 1274800 1348100 High Signal Region +chrX 0 122400 High Signal Region +chrX 201200 246300 High Signal Region +chrX 2610300 2617700 High Signal Region +chrX 4921800 4937000 High Signal Region +chrX 4990100 4997200 High Signal Region +chrX 7125300 7134300 High Signal Region +chrX 8292900 8296800 High Signal Region +chrX 11487700 11494000 High Signal Region +chrX 16013200 16059600 High Signal Region +chrX 19907800 19958400 High Signal Region +chrX 22257600 22401900 High Signal Region +chrX 22432100 22434100 High Signal Region +chrX 22996400 23003500 High Signal Region +chrX 23019600 23022700 Low Mappability +chrX 23204900 23285000 High Signal Region +chrX 23290700 23442900 High Signal Region +chrX 23450200 23465000 Low Mappability +chrX 23471400 23489900 Low Mappability +chrX 23512700 23539400 Low Mappability +chrY 113900 125600 High Signal Region +chrY 131500 155700 High Signal Region +chrY 199800 248700 High Signal Region +chrY 313600 325400 High Signal Region +chrY 641400 654100 Low Mappability +chrY 1456900 1693500 High Signal Region +chrY 3641100 3667300 High Signal Region diff --git a/assets/blacklists/v2.0/hg19-blacklist.v2.bed b/assets/blacklists/v2.0/hg19-blacklist.v2.bed new file mode 100644 index 0000000000000000000000000000000000000000..03688d91e446d569772a90cddb3b54c858004bcf --- /dev/null +++ b/assets/blacklists/v2.0/hg19-blacklist.v2.bed @@ -0,0 +1,834 @@ +chr10 38726200 42489100 High Signal Region +chr10 42524900 42819200 High Signal Region +chr10 98560400 98562500 High Signal Region +chr10 135437600 135534700 High Signal Region +chr11 0 196300 High Signal Region +chr11 584400 586500 High Signal Region +chr11 964000 966100 Low Mappability +chr11 1015700 1019100 High Signal Region +chr11 1088800 1094300 High Signal Region +chr11 1141100 1214300 High Signal Region +chr11 3674100 3676900 Low Mappability +chr11 6830800 6832700 High Signal Region +chr11 10528500 10532700 Low Mappability +chr11 11267200 11269500 High Signal Region +chr11 48700000 48964800 High Signal Region +chr11 50505600 50523400 High Signal Region +chr11 50635500 51200100 High Signal Region +chr11 51244400 51289000 High Signal Region +chr11 51566300 54834600 High Signal Region +chr11 54876800 55028400 High Signal Region +chr11 62606300 62651300 High Signal Region +chr11 77596600 77601800 High Signal Region +chr11 85172700 85196400 High Signal Region +chr11 93965500 93984500 High Signal Region +chr11 100156600 100162500 High Signal Region +chr11 102239800 102246000 High Signal Region +chr11 129208700 129234600 High Signal Region +chr12 0 187000 High Signal Region +chr12 479900 531700 High Signal Region +chr12 2364000 2366100 High Signal Region +chr12 2628700 2649700 Low Mappability +chr12 4618500 4624000 High Signal Region +chr12 6037400 6042400 Low Mappability +chr12 7705200 7717600 High Signal Region +chr12 19881600 19887000 High Signal Region +chr12 20703400 20705400 High Signal Region +chr12 20921400 20928000 High Signal Region +chr12 34371700 34400000 High Signal Region +chr12 34574500 34576400 Low Mappability +chr12 34761600 37887400 High Signal Region +chr12 37989200 38259900 High Signal Region +chr12 38330900 38375800 Low Mappability +chr12 38443400 38503500 High Signal Region +chr12 38534900 38537700 High Signal Region +chr12 41756500 41758400 Low Mappability +chr12 54205000 54206900 High Signal Region +chr12 66867700 66872600 High Signal Region +chr12 69385000 69391000 High Signal Region +chr12 70167100 70204100 High Signal Region +chr12 75903800 75916900 High Signal Region +chr12 93771900 93808100 High Signal Region +chr12 97117400 97122300 High Signal Region +chr12 101540100 101549000 High Signal Region +chr12 113517400 113519300 High Signal Region +chr12 125394300 125426400 Low Mappability +chr12 126072900 126074800 Low Mappability +chr12 127649500 127651900 High Signal Region +chr12 130863600 130878600 High Signal Region +chr12 132060500 132074200 High Signal Region +chr12 133343000 133345000 High Signal Region +chr12 133825400 133851800 Low Mappability +chr13 0 19194200 High Signal Region +chr13 19344200 19447900 High Signal Region +chr13 19641600 19652000 High Signal Region +chr13 19677800 19683400 High Signal Region +chr13 19711000 19713300 High Signal Region +chr13 20051500 20077000 Low Mappability +chr13 20150200 20228600 High Signal Region +chr13 20352400 20372400 High Signal Region +chr13 20966500 20984700 High Signal Region +chr13 21068500 21072900 High Signal Region +chr13 21816000 21826300 High Signal Region +chr13 21950600 21952600 High Signal Region +chr13 22125500 22129800 High Signal Region +chr13 22429700 22436000 High Signal Region +chr13 23095400 23108300 Low Mappability +chr13 24900500 24932100 High Signal Region +chr13 25122300 25128600 High Signal Region +chr13 26467000 26472000 Low Mappability +chr13 27977100 28035000 High Signal Region +chr13 28710500 28733700 High Signal Region +chr13 29767400 29769600 High Signal Region +chr13 30215700 30247400 Low Mappability +chr13 30397900 30426100 High Signal Region +chr13 30787000 30790100 High Signal Region +chr13 30819100 30845000 High Signal Region +chr13 31412800 31440600 High Signal Region +chr13 31521900 31523400 High Signal Region +chr13 31916200 31920700 High Signal Region +chr13 31970100 31971800 High Signal Region +chr13 33109900 33114000 High Signal Region +chr13 33149400 33182100 High Signal Region +chr13 33441700 33443500 Low Mappability +chr13 34163100 34164900 High Signal Region +chr13 34558900 34565000 High Signal Region +chr13 35054300 35073100 High Signal Region +chr13 35656000 35664300 High Signal Region +chr13 35977500 36001800 High Signal Region +chr13 36531200 36553700 High Signal Region +chr13 36582200 36588400 High Signal Region +chr13 37723900 37730200 High Signal Region +chr13 38396200 38402300 Low Mappability +chr13 38640900 38645800 High Signal Region +chr13 38687300 38721000 High Signal Region +chr13 40422400 40427800 High Signal Region +chr13 40560400 40580700 Low Mappability +chr13 40920400 40936600 Low Mappability +chr13 41309000 41315200 Low Mappability +chr13 41343800 41416000 High Signal Region +chr13 41438500 41477300 High Signal Region +chr13 41530500 41640400 High Signal Region +chr13 42108700 42114800 High Signal Region +chr13 42165400 42243300 High Signal Region +chr13 42321000 42324400 High Signal Region +chr13 42445300 42448800 High Signal Region +chr13 42479700 42497900 High Signal Region +chr13 42928200 42961000 Low Mappability +chr13 42999000 43005200 Low Mappability +chr13 43128800 43132300 High Signal Region +chr13 43734900 43740400 High Signal Region +chr13 44391900 44409800 Low Mappability +chr13 44540800 44550400 High Signal Region +chr13 45491200 45494100 High Signal Region +chr13 46190900 46244300 High Signal Region +chr13 47322400 47347500 Low Mappability +chr13 47795600 47799800 Low Mappability +chr13 48288000 48379400 High Signal Region +chr13 48551900 48636200 High Signal Region +chr13 48776900 48781200 High Signal Region +chr13 48955700 49045800 High Signal Region +chr13 49587600 49593700 High Signal Region +chr13 49726300 49750700 High Signal Region +chr13 50655600 50674000 High Signal Region +chr13 50739500 50760400 High Signal Region +chr13 50804000 50831700 High Signal Region +chr13 51045000 51047900 Low Mappability +chr13 51069400 51148800 High Signal Region +chr13 51538700 51562300 High Signal Region +chr13 51643200 51654900 High Signal Region +chr13 52056600 52177400 High Signal Region +chr13 52209900 52311100 High Signal Region +chr13 52628400 52634200 Low Mappability +chr13 52767000 52908600 High Signal Region +chr13 53056600 53198500 High Signal Region +chr13 53667700 53672500 Low Mappability +chr13 54170800 54195600 High Signal Region +chr13 55314400 55316200 Low Mappability +chr13 55924900 55928900 Low Mappability +chr13 56386000 56387700 Low Mappability +chr13 57149500 57152400 High Signal Region +chr13 57613800 57615800 Low Mappability +chr13 57713200 57748000 High Signal Region +chr13 57793400 57794800 High Signal Region +chr13 57929500 57933400 Low Mappability +chr13 58055700 58068300 High Signal Region +chr13 58756200 58759000 High Signal Region +chr13 59246600 59252600 Low Mappability +chr13 60399100 60401600 Low Mappability +chr13 60558600 60561900 Low Mappability +chr13 60819900 60825800 High Signal Region +chr13 60868000 60870400 Low Mappability +chr13 61508300 61510400 High Signal Region +chr13 62142000 62143900 Low Mappability +chr13 62379800 62381800 High Signal Region +chr13 62407700 62419700 High Signal Region +chr13 63602300 63649700 High Signal Region +chr13 64291000 64343600 Low Mappability +chr13 64395200 64410800 Low Mappability +chr13 66567000 66569000 Low Mappability +chr13 66827800 66833700 High Signal Region +chr13 67311600 67317700 Low Mappability +chr13 67350200 67352500 High Signal Region +chr13 68136600 68139600 Low Mappability +chr13 68254200 68260100 High Signal Region +chr13 68566600 68570000 High Signal Region +chr13 68901600 68915400 High Signal Region +chr13 70357600 70362900 Low Mappability +chr13 70783800 70789000 High Signal Region +chr13 71751300 71752700 High Signal Region +chr13 71958500 71963800 High Signal Region +chr13 72799900 72802600 High Signal Region +chr13 73184400 73190500 High Signal Region +chr13 74027000 74033300 High Signal Region +chr13 74202200 74220800 High Signal Region +chr13 74809900 74816100 High Signal Region +chr13 75111000 75116700 High Signal Region +chr13 75606300 75608100 Low Mappability +chr13 75653600 75655800 High Signal Region +chr13 75815200 75821400 High Signal Region +chr13 76251800 76322500 High Signal Region +chr13 76528000 76532300 High Signal Region +chr13 76841900 76843700 High Signal Region +chr13 77119100 77122400 Low Mappability +chr13 77179200 77192700 Low Mappability +chr13 77773200 77779300 High Signal Region +chr13 78250500 78260900 Low Mappability +chr13 78453800 78455300 High Signal Region +chr13 78857200 78859700 High Signal Region +chr13 79087100 79105000 High Signal Region +chr13 79590600 79592200 High Signal Region +chr13 79809800 79811600 High Signal Region +chr13 80391100 80420000 High Signal Region +chr13 80726100 80730600 Low Mappability +chr13 81490500 81492700 Low Mappability +chr13 81638000 81651500 High Signal Region +chr13 82132700 82135500 Low Mappability +chr13 82322400 82327400 High Signal Region +chr13 82619500 82625600 Low Mappability +chr13 82805900 82809300 High Signal Region +chr13 83315600 83317200 Low Mappability +chr13 84095600 84097700 High Signal Region +chr13 84535300 84540600 High Signal Region +chr13 85075900 85078300 Low Mappability +chr13 85299500 85302300 High Signal Region +chr13 85695400 85703500 High Signal Region +chr13 86143500 86147200 Low Mappability +chr13 86485900 86502200 Low Mappability +chr13 86572000 86573600 High Signal Region +chr13 87297900 87303700 High Signal Region +chr13 88351600 88353900 High Signal Region +chr13 89335000 89337000 Low Mappability +chr13 89482500 89486800 Low Mappability +chr13 89740300 89746300 Low Mappability +chr13 91181200 91182800 Low Mappability +chr13 91305800 91322300 Low Mappability +chr13 92256000 92259400 High Signal Region +chr13 92622900 92628600 High Signal Region +chr13 93127200 93129400 Low Mappability +chr13 93170900 93175000 High Signal Region +chr13 94140200 94148600 High Signal Region +chr13 95024400 95030600 Low Mappability +chr13 95471000 95472500 Low Mappability +chr13 95561600 95563600 High Signal Region +chr13 96217900 96220200 High Signal Region +chr13 96377900 96393100 High Signal Region +chr13 96481000 96493700 High Signal Region +chr13 96556500 96572100 High Signal Region +chr13 96616700 96633300 Low Mappability +chr13 96699300 96705200 High Signal Region +chr13 97807500 97812400 High Signal Region +chr13 97873500 98016000 High Signal Region +chr13 98083600 98086200 High Signal Region +chr13 98256400 98266100 Low Mappability +chr13 99386700 99407200 High Signal Region +chr13 100970400 100973100 High Signal Region +chr13 101327900 101356500 Low Mappability +chr13 102191500 102196900 High Signal Region +chr13 102250800 102254200 High Signal Region +chr13 102293700 102296000 High Signal Region +chr13 102560800 102562600 High Signal Region +chr13 103174700 103180600 High Signal Region +chr13 103770000 103772400 High Signal Region +chr13 104155400 104159600 High Signal Region +chr13 105306100 105307700 Low Mappability +chr13 105609500 105613300 High Signal Region +chr13 105951400 105953800 Low Mappability +chr13 106035100 106040800 Low Mappability +chr13 106536800 106542400 High Signal Region +chr13 106651900 106669100 High Signal Region +chr13 106866200 106872100 High Signal Region +chr13 107430500 107436700 High Signal Region +chr13 108868800 108909300 Low Mappability +chr13 109162700 109168900 High Signal Region +chr13 110075500 110098100 Low Mappability +chr13 110691900 110705300 High Signal Region +chr13 111036900 111039000 High Signal Region +chr13 111107500 111163000 High Signal Region +chr13 111512100 111527200 High Signal Region +chr13 111959100 111964000 High Signal Region +chr13 111992000 111994500 Low Mappability +chr13 112148400 112153300 High Signal Region +chr13 112628400 112630400 High Signal Region +chr13 112668600 112670300 High Signal Region +chr13 112931200 112973400 High Signal Region +chr13 113179900 113244400 High Signal Region +chr13 113319400 113321900 High Signal Region +chr13 113440500 113444300 High Signal Region +chr13 113526200 113540600 High Signal Region +chr13 113765500 113767700 High Signal Region +chr13 113916300 113951000 Low Mappability +chr13 114089500 114102600 High Signal Region +chr13 114191600 114218700 High Signal Region +chr13 114247100 114280800 High Signal Region +chr13 114452900 114520000 High Signal Region +chr13 114553300 114571100 High Signal Region +chr13 114601800 114772500 High Signal Region +chr13 114848900 114852600 High Signal Region +chr14 0 20303800 High Signal Region +chr14 27098600 27104100 High Signal Region +chr14 32263100 32280800 High Signal Region +chr14 32350600 32352500 High Signal Region +chr14 32934800 32955200 Low Mappability +chr14 35006400 35031800 Low Mappability +chr14 36416700 36419200 High Signal Region +chr14 39980200 39995800 High Signal Region +chr14 54700600 54706600 High Signal Region +chr14 67508000 67534600 High Signal Region +chr14 80556900 80561000 High Signal Region +chr14 86540300 86577300 High Signal Region +chr14 87058300 87078200 High Signal Region +chr14 87879900 87894500 High Signal Region +chr14 88236600 88243300 High Signal Region +chr14 90340400 90342300 High Signal Region +chr14 102140800 102142700 High Signal Region +chr14 105681400 105707200 High Signal Region +chr14 106034900 106185200 High Signal Region +chr14 107151000 107176900 High Signal Region +chr15 0 20166200 High Signal Region +chr15 20200400 22365100 High Signal Region +chr15 22387400 22749100 Low Mappability +chr15 23266700 23612800 High Signal Region +chr15 26002700 26004600 Low Mappability +chr15 28538400 28956300 High Signal Region +chr15 30358500 30919300 High Signal Region +chr15 31136500 31143800 Low Mappability +chr15 32445900 32915200 High Signal Region +chr15 56603300 56608500 High Signal Region +chr15 69255900 69257800 High Signal Region +chr15 72085400 72090500 High Signal Region +chr15 72923800 72979000 Low Mappability +chr15 74357800 74398000 High Signal Region +chr15 75546200 75592100 High Signal Region +chr15 77991000 77993000 High Signal Region +chr15 82582300 83213900 High Signal Region +chr15 84835000 85142500 High Signal Region +chr15 85732700 85814600 High Signal Region +chr15 102283600 102305300 Low Mappability +chr15 102411600 102531300 High Signal Region +chr16 32923000 33427100 High Signal Region +chr16 33726300 34197900 High Signal Region +chr16 35191100 46501300 High Signal Region +chr16 90155800 90354700 Low Mappability +chr17 66700 167600 High Signal Region +chr17 964700 969400 High Signal Region +chr17 1210900 1236400 Low Mappability +chr17 4734800 4736700 Low Mappability +chr17 18928600 19140800 High Signal Region +chr17 21492100 21686000 High Signal Region +chr17 21901400 21908600 High Signal Region +chr17 22019700 22024900 High Signal Region +chr17 22207000 25341300 High Signal Region +chr17 30264500 30277600 High Signal Region +chr17 31148500 31150800 High Signal Region +chr17 33477200 33479300 High Signal Region +chr17 34476000 34812200 Low Mappability +chr17 36253600 36406900 High Signal Region +chr17 41378900 41402100 High Signal Region +chr17 41432200 41467700 High Signal Region +chr17 43588700 43718700 High Signal Region +chr17 45108700 45130400 Low Mappability +chr17 45211900 45283300 High Signal Region +chr17 45612500 45671300 Low Mappability +chr17 51182300 51184600 Low Mappability +chr17 64794200 64796200 Low Mappability +chr17 78717100 78719200 Low Mappability +chr17 81151700 81195200 Low Mappability +chr18 0 127000 High Signal Region +chr18 952000 976900 High Signal Region +chr18 2247200 2253300 High Signal Region +chr18 2841300 2866100 Low Mappability +chr18 6687500 6705800 High Signal Region +chr18 12134400 12227800 High Signal Region +chr18 14163200 14270800 High Signal Region +chr18 15139700 15271400 High Signal Region +chr18 15293900 18552900 High Signal Region +chr18 19792100 19813600 High Signal Region +chr18 20109800 20115600 High Signal Region +chr18 20388600 20400600 High Signal Region +chr18 27088800 27090300 High Signal Region +chr18 28927900 28933700 High Signal Region +chr18 30436500 30442100 High Signal Region +chr18 32114600 32137900 High Signal Region +chr18 32924100 32938700 High Signal Region +chr18 33196300 33213600 High Signal Region +chr18 33342300 33346200 High Signal Region +chr18 38424600 38428200 High Signal Region +chr18 42024800 42028200 High Signal Region +chr18 42607900 42611000 High Signal Region +chr18 44125300 44127400 High Signal Region +chr18 44503000 44515000 High Signal Region +chr18 44541400 44558200 High Signal Region +chr18 45378700 45380700 Low Mappability +chr18 46175800 46204100 High Signal Region +chr18 46572200 46634900 High Signal Region +chr18 47297100 47302900 High Signal Region +chr18 50318200 50320200 High Signal Region +chr18 52710600 52712900 High Signal Region +chr18 53382700 53388400 High Signal Region +chr18 54391800 54393600 High Signal Region +chr18 60853700 60886800 High Signal Region +chr18 61530100 61533700 High Signal Region +chr18 68386500 68419400 High Signal Region +chr18 74678000 74695900 High Signal Region +chr18 76196600 76198900 Low Mappability +chr18 76272400 76275200 High Signal Region +chr18 76773800 76800400 High Signal Region +chr18 77031200 77124400 High Signal Region +chr18 77233300 77236000 High Signal Region +chr18 77377700 77394500 High Signal Region +chr18 77679100 77681700 High Signal Region +chr18 77772000 77796400 High Signal Region +chr19 7514300 7516900 High Signal Region +chr19 8850900 8910700 High Signal Region +chr19 24182700 24198600 High Signal Region +chr19 24501500 27995100 High Signal Region +chr19 35349800 35357000 High Signal Region +chr19 36065600 36067700 High Signal Region +chr19 37756400 37795100 High Signal Region +chr19 44912700 44921200 High Signal Region +chr19 44958200 44964700 Low Mappability +chr19 48406200 48463100 High Signal Region +chr19 50593500 50643700 High Signal Region +chr1 0 750100 High Signal Region +chr1 814500 845200 High Signal Region +chr1 2052400 2056000 High Signal Region +chr1 2582800 2693900 High Signal Region +chr1 4362200 4364300 High Signal Region +chr1 5714800 5736800 High Signal Region +chr1 16821600 17301500 High Signal Region +chr1 38076400 38078300 Low Mappability +chr1 91836500 91854100 High Signal Region +chr1 120531600 120896300 High Signal Region +chr1 120926100 121149300 High Signal Region +chr1 121341500 145396500 High Signal Region +chr1 147424800 147731700 High Signal Region +chr1 147832000 149058800 High Signal Region +chr1 152185700 152191100 High Signal Region +chr1 156185300 156187600 High Signal Region +chr1 161392300 161442700 High Signal Region +chr1 168317300 168322800 High Signal Region +chr1 203888700 203890700 High Signal Region +chr1 224175500 224213600 High Signal Region +chr1 228743700 228782800 High Signal Region +chr1 236876100 236879100 Low Mappability +chr1 237765500 237767500 High Signal Region +chr1 246980600 246982700 High Signal Region +chr1 249225300 249250600 High Signal Region +chr20 25733100 25945000 Low Mappability +chr20 25984200 26150000 Low Mappability +chr20 26184300 29519700 High Signal Region +chr20 29546800 29853000 High Signal Region +chr20 46521400 46531600 High Signal Region +chr20 47130700 47133900 High Signal Region +chr20 62887700 63025500 Low Mappability +chr21 9594900 10366000 High Signal Region +chr21 10491900 10494000 Low Mappability +chr21 10646000 10861500 High Signal Region +chr21 11004200 14370200 High Signal Region +chr22 0 16962100 High Signal Region +chr22 17348200 17393500 Low Mappability +chr22 17494600 17519200 Low Mappability +chr22 18358700 18361200 High Signal Region +chr22 18657400 18889800 High Signal Region +chr22 20304800 20708400 High Signal Region +chr22 21466100 21916600 High Signal Region +chr22 23826900 23829900 Low Mappability +chr22 33517600 33519500 High Signal Region +chr22 36280800 36282700 Low Mappability +chr22 51058600 51083400 High Signal Region +chr22 51220000 51304500 High Signal Region +chr2 2298400 2300500 Low Mappability +chr2 3183400 3185800 High Signal Region +chr2 13858600 13877800 High Signal Region +chr2 33140400 33143500 High Signal Region +chr2 49455800 49457900 Low Mappability +chr2 62956900 62981700 High Signal Region +chr2 70656600 70659500 High Signal Region +chr2 86882000 86896800 High Signal Region +chr2 87441300 88290400 High Signal Region +chr2 89534800 89985900 High Signal Region +chr2 90267000 95326200 High Signal Region +chr2 95471500 95565900 Low Mappability +chr2 97718400 98232300 High Signal Region +chr2 109814800 109817200 High Signal Region +chr2 114147600 114441900 High Signal Region +chr2 132763200 132836700 Low Mappability +chr2 132946300 133122100 High Signal Region +chr2 149638400 149640300 Low Mappability +chr2 162134100 162148700 High Signal Region +chr2 230044500 230046500 High Signal Region +chr2 243052100 243199300 High Signal Region +chr3 612200 662600 Low Mappability +chr3 3762200 3767100 High Signal Region +chr3 4958300 4964200 High Signal Region +chr3 8414500 8434100 High Signal Region +chr3 15009100 15010800 Low Mappability +chr3 15228200 15245300 High Signal Region +chr3 16995500 17013700 Low Mappability +chr3 25740700 25759100 Low Mappability +chr3 26426200 26445700 High Signal Region +chr3 39913800 39931100 High Signal Region +chr3 43527700 43530900 Low Mappability +chr3 51490400 51496100 High Signal Region +chr3 63719200 63725000 High Signal Region +chr3 73159000 73161500 High Signal Region +chr3 75678100 75917700 High Signal Region +chr3 75982800 75999500 High Signal Region +chr3 78995600 78999800 High Signal Region +chr3 80490200 80492100 Low Mappability +chr3 80916400 80946200 High Signal Region +chr3 90205400 90224700 High Signal Region +chr3 90312300 93518500 High Signal Region +chr3 93957200 93959600 Low Mappability +chr3 96335200 96338100 Low Mappability +chr3 96457300 96459000 High Signal Region +chr3 98184700 98186900 High Signal Region +chr3 100827300 100833600 Low Mappability +chr3 107053900 107058800 Low Mappability +chr3 118633600 118639100 Low Mappability +chr3 135154400 135158200 High Signal Region +chr3 135304100 135329200 High Signal Region +chr3 139309800 139333000 High Signal Region +chr3 155996600 156002700 High Signal Region +chr3 157599200 157620600 High Signal Region +chr3 160658900 160666400 Low Mappability +chr3 169397300 169454100 High Signal Region +chr3 173977500 173983300 Low Mappability +chr3 175499400 175504900 High Signal Region +chr3 182734900 182736900 Low Mappability +chr3 183673200 183676500 High Signal Region +chr3 183796800 183798700 High Signal Region +chr3 185265900 185305500 Low Mappability +chr3 189237500 189238900 Low Mappability +chr3 195201400 195233900 Low Mappability +chr3 195341900 195476900 High Signal Region +chr3 195502200 195519800 High Signal Region +chr3 195640700 195745500 High Signal Region +chr3 196624700 196639200 High Signal Region +chr3 196757600 196762600 High Signal Region +chr3 197110400 197187600 High Signal Region +chr3 197325200 197407700 High Signal Region +chr3 197798000 198022400 High Signal Region +chr4 0 69600 High Signal Region +chr4 1420500 1478600 High Signal Region +chr4 9199300 9371400 High Signal Region +chr4 40293300 40341800 High Signal Region +chr4 49073900 52683800 High Signal Region +chr4 68263300 68273300 High Signal Region +chr4 70294400 70297700 High Signal Region +chr4 76806200 76808200 High Signal Region +chr4 80272500 80275600 High Signal Region +chr4 114909000 114911500 High Signal Region +chr4 120158500 120222800 High Signal Region +chr4 153843200 153846400 High Signal Region +chr4 167475600 167502500 Low Mappability +chr4 190153700 190157200 High Signal Region +chr4 190190600 190230500 High Signal Region +chr4 190469400 190685100 High Signal Region +chr4 190756300 190770700 High Signal Region +chr4 190795300 191154200 High Signal Region +chr5 0 85500 High Signal Region +chr5 629900 651800 High Signal Region +chr5 1326400 1334600 High Signal Region +chr5 2144800 2147800 High Signal Region +chr5 2490000 2491700 High Signal Region +chr5 3322200 3325200 High Signal Region +chr5 6967500 6971800 High Signal Region +chr5 14633500 14653400 Low Mappability +chr5 16335700 16341500 High Signal Region +chr5 17516900 17600400 High Signal Region +chr5 17631100 17633300 Low Mappability +chr5 21458600 21581100 High Signal Region +chr5 25360400 25384600 High Signal Region +chr5 32369500 32391600 High Signal Region +chr5 34177800 34246500 High Signal Region +chr5 45523000 45550600 High Signal Region +chr5 45932400 45978600 High Signal Region +chr5 46072400 46096800 High Signal Region +chr5 46239900 46241800 Low Mappability +chr5 46265500 49594200 High Signal Region +chr5 60055500 60058300 Low Mappability +chr5 68830000 70669400 High Signal Region +chr5 71145800 71149800 High Signal Region +chr5 73981300 74008300 High Signal Region +chr5 79945000 79949100 High Signal Region +chr5 80324700 80351700 High Signal Region +chr5 84936300 84958500 High Signal Region +chr5 90445100 90458900 High Signal Region +chr5 93283200 93284600 High Signal Region +chr5 93903700 93906100 Low Mappability +chr5 99381200 99426800 High Signal Region +chr5 113477000 113496900 High Signal Region +chr5 126439200 126461500 High Signal Region +chr5 130208300 130210400 High Signal Region +chr5 134258200 134265100 High Signal Region +chr5 136835200 136886000 High Signal Region +chr5 137304800 137310300 High Signal Region +chr5 138341100 138347500 High Signal Region +chr5 142677200 142690000 Low Mappability +chr5 143013900 143015800 High Signal Region +chr5 155138700 155189100 High Signal Region +chr5 156085200 156093100 High Signal Region +chr5 170510900 170517200 High Signal Region +chr5 173440700 173444600 High Signal Region +chr5 174540800 174565800 High Signal Region +chr5 175331400 175545200 High Signal Region +chr5 176017900 176019800 Low Mappability +chr5 177061900 177360500 High Signal Region +chr5 177387600 177408100 Low Mappability +chr5 178011600 178013600 High Signal Region +chr5 180599700 180915200 High Signal Region +chr6 0 162100 Low Mappability +chr6 256600 382800 High Signal Region +chr6 519000 521400 Low Mappability +chr6 851500 864200 High Signal Region +chr6 1428700 1434800 Low Mappability +chr6 2200300 2202700 Low Mappability +chr6 4809700 4840300 Low Mappability +chr6 5886400 5892500 Low Mappability +chr6 6141100 6143900 Low Mappability +chr6 6212500 6217900 Low Mappability +chr6 8770600 8776400 Low Mappability +chr6 9966100 9971700 High Signal Region +chr6 10984500 10987800 Low Mappability +chr6 14480600 14486400 Low Mappability +chr6 15189400 15190800 High Signal Region +chr6 20079900 20093100 Low Mappability +chr6 20615000 20619200 Low Mappability +chr6 22166500 22181200 Low Mappability +chr6 23232900 23235900 Low Mappability +chr6 26668800 26830200 High Signal Region +chr6 26850500 26925900 Low Mappability +chr6 30027900 30071800 Low Mappability +chr6 31783300 31806300 Low Mappability +chr6 33451700 33454300 High Signal Region +chr6 34038400 34041700 High Signal Region +chr6 37096000 37117300 High Signal Region +chr6 38241900 38269500 High Signal Region +chr6 44011400 44047700 High Signal Region +chr6 44148500 44150800 High Signal Region +chr6 45637100 45683300 Low Mappability +chr6 45814800 45817800 Low Mappability +chr6 45963800 45965300 Low Mappability +chr6 48331100 48336800 Low Mappability +chr6 48705800 48711100 Low Mappability +chr6 49759100 49764900 Low Mappability +chr6 50999100 51004700 High Signal Region +chr6 51531300 51535800 Low Mappability +chr6 54270500 54273400 High Signal Region +chr6 54364700 54372500 Low Mappability +chr6 54826700 54832300 Low Mappability +chr6 56911200 56913200 Low Mappability +chr6 56954700 56956700 Low Mappability +chr6 57133300 57608800 High Signal Region +chr6 57671300 57673300 Low Mappability +chr6 58061300 58288100 High Signal Region +chr6 58724800 58738300 Low Mappability +chr6 58772700 61920700 High Signal Region +chr6 62283100 62285000 Low Mappability +chr6 62371500 62383900 Low Mappability +chr6 62770600 62781900 High Signal Region +chr6 63265300 63298700 Low Mappability +chr6 65966100 65967700 Low Mappability +chr6 70193400 70231500 Low Mappability +chr6 71454100 71514000 Low Mappability +chr6 71981600 71986300 High Signal Region +chr6 72027300 72029200 Low Mappability +chr6 72875000 72876900 High Signal Region +chr6 73680200 73704400 Low Mappability +chr6 74417700 74420300 Low Mappability +chr6 74707400 74738700 Low Mappability +chr6 77455300 77457000 Low Mappability +chr6 77670600 77687700 Low Mappability +chr6 77752900 77797700 Low Mappability +chr6 78426700 78455800 Low Mappability +chr6 78508100 78509800 Low Mappability +chr6 79681400 79687300 Low Mappability +chr6 80401000 80403400 High Signal Region +chr6 81193300 81207500 Low Mappability +chr6 83257400 83275700 Low Mappability +chr6 86694600 86736600 Low Mappability +chr6 87552300 87637100 Low Mappability +chr6 89091200 89122300 Low Mappability +chr6 90764500 90769800 High Signal Region +chr6 91272000 91298000 High Signal Region +chr6 94341300 94347000 High Signal Region +chr6 95516600 95540000 Low Mappability +chr6 96310100 96313200 Low Mappability +chr6 97430400 97437100 Low Mappability +chr6 97824400 97828600 High Signal Region +chr6 99151500 99156200 Low Mappability +chr6 99314300 99316400 Low Mappability +chr6 100802600 100817600 High Signal Region +chr6 101028300 101034500 Low Mappability +chr6 101633800 101663000 Low Mappability +chr6 102617900 102623600 High Signal Region +chr6 102983200 102985100 Low Mappability +chr6 103200700 103206700 High Signal Region +chr6 104937300 104943400 Low Mappability +chr6 105185700 105210800 Low Mappability +chr6 107045300 107046900 Low Mappability +chr6 109454700 109471400 Low Mappability +chr6 109566300 109571600 Low Mappability +chr6 112224100 112229600 Low Mappability +chr6 112853400 112873000 Low Mappability +chr6 114754200 114756900 Low Mappability +chr6 115121100 115123800 Low Mappability +chr6 115496600 115502400 Low Mappability +chr6 115575100 115578000 High Signal Region +chr6 116960800 116966000 High Signal Region +chr6 117134700 117144000 Low Mappability +chr6 117413300 117429300 Low Mappability +chr6 119557600 119559600 High Signal Region +chr6 121732200 121734100 Low Mappability +chr6 121887100 121892400 Low Mappability +chr6 123793600 123799300 Low Mappability +chr6 125028000 125052900 High Signal Region +chr6 125126000 125131800 Low Mappability +chr6 129226700 129244600 Low Mappability +chr6 131556000 131561800 Low Mappability +chr6 132019100 132037100 Low Mappability +chr6 132177400 132179000 Low Mappability +chr6 133341700 133347800 Low Mappability +chr6 133593100 133595000 High Signal Region +chr6 136492700 136494600 Low Mappability +chr6 138120400 138136600 Low Mappability +chr6 142456500 142469200 Low Mappability +chr6 144117700 144122900 High Signal Region +chr6 145393200 145395000 Low Mappability +chr6 145824200 145826400 Low Mappability +chr6 145984700 146002900 Low Mappability +chr6 146291400 146318300 Low Mappability +chr6 148276600 148278600 Low Mappability +chr6 148480500 148484700 Low Mappability +chr6 150782100 150797500 Low Mappability +chr6 156062900 156064800 High Signal Region +chr6 156355300 156361300 High Signal Region +chr6 156646100 156651900 High Signal Region +chr6 156803000 156804800 Low Mappability +chr6 157730500 157736300 High Signal Region +chr6 160073000 160134300 Low Mappability +chr6 161032400 161068500 Low Mappability +chr6 165716800 165720000 Low Mappability +chr6 165782200 165787800 Low Mappability +chr6 166828700 166843000 Low Mappability +chr6 167196600 167208400 Low Mappability +chr6 167745800 167752500 Low Mappability +chr6 167786100 167802900 Low Mappability +chr6 168635100 168638700 Low Mappability +chr6 168961200 168963300 Low Mappability +chr6 169054200 169061300 High Signal Region +chr6 169239700 169241700 Low Mappability +chr6 170460500 170462500 Low Mappability +chr6 170528700 170531000 Low Mappability +chr6 170686000 170710200 High Signal Region +chr6 170774700 170777400 Low Mappability +chr6 170803900 170839700 Low Mappability +chr6 170915300 171115000 Low Mappability +chr7 0 49700 High Signal Region +chr7 1311000 1313200 High Signal Region +chr7 45290700 45292600 Low Mappability +chr7 56437000 56447500 High Signal Region +chr7 57544900 57557600 High Signal Region +chr7 57597800 57782700 High Signal Region +chr7 57884200 62120800 High Signal Region +chr7 62403000 62404900 High Signal Region +chr7 64929600 65063200 High Signal Region +chr7 84878700 84884900 High Signal Region +chr7 100549000 100611600 High Signal Region +chr7 100634800 100648100 High Signal Region +chr7 101981900 102013400 High Signal Region +chr7 102114900 102445700 High Signal Region +chr7 121919200 121925000 High Signal Region +chr7 140761800 140784200 High Signal Region +chr7 142373000 142376300 Low Mappability +chr7 145693500 145735200 Low Mappability +chr7 152072600 152132400 High Signal Region +chr7 157924100 157945100 High Signal Region +chr7 158387000 158388900 High Signal Region +chr7 158685900 158710600 High Signal Region +chr8 0 185300 High Signal Region +chr8 7012600 8066200 High Signal Region +chr8 11994400 12230100 High Signal Region +chr8 12252000 12466300 High Signal Region +chr8 13501500 13503800 High Signal Region +chr8 43091800 43118200 High Signal Region +chr8 43758900 46908900 High Signal Region +chr8 46946900 46959100 High Signal Region +chr8 47367600 47369500 High Signal Region +chr8 48792700 48794600 High Signal Region +chr8 51581600 51584700 High Signal Region +chr8 52729900 52737900 High Signal Region +chr8 58117400 58128700 High Signal Region +chr8 59283300 59288700 High Signal Region +chr8 60782300 60800500 High Signal Region +chr8 70600600 70603500 High Signal Region +chr8 82753700 82764200 High Signal Region +chr8 86554300 86841600 High Signal Region +chr8 100501000 100509100 High Signal Region +chr8 104795400 104807700 High Signal Region +chr8 106801200 106807000 High Signal Region +chr8 127325400 127331100 High Signal Region +chr8 142501600 142503600 High Signal Region +chr8 144743300 144752700 High Signal Region +chr9 6593800 6595700 High Signal Region +chr9 35903000 35915300 High Signal Region +chr9 40815000 43489000 High Signal Region +chr9 43684600 44102400 High Signal Region +chr9 44852100 44881200 High Signal Region +chr9 44908300 66250200 High Signal Region +chr9 66344100 68143800 High Signal Region +chr9 68306800 69121200 High Signal Region +chr9 69141700 70957900 High Signal Region +chr9 72652100 72654500 High Signal Region +chr9 78789200 78791100 High Signal Region +chr9 79185700 79187900 High Signal Region +chr9 87779800 87780900 High Signal Region +chr9 140221300 140223800 High Signal Region +chr9 141053300 141213400 Low Mappability +chrX 0 290100 High Signal Region +chrX 392200 529200 Low Mappability +chrX 1006400 1334000 High Signal Region +chrX 7505600 7509900 High Signal Region +chrX 9371800 9400200 High Signal Region +chrX 49164900 49386300 High Signal Region +chrX 55207100 55210900 Low Mappability +chrX 58329700 58433500 High Signal Region +chrX 58461000 61920100 High Signal Region +chrX 62005100 62007000 High Signal Region +chrX 78057800 78060000 High Signal Region +chrX 99512200 99516600 High Signal Region +chrX 101446100 101744100 High Signal Region +chrX 108258600 108312300 High Signal Region +chrX 111555900 111595100 High Signal Region +chrX 114959100 115006100 High Signal Region +chrX 125595300 125608200 Low Mappability +chrX 132242600 132250600 High Signal Region +chrX 134852300 134971100 High Signal Region +chrX 136518800 136521500 High Signal Region +chrX 154528900 154616300 High Signal Region +chrX 155038500 155270500 High Signal Region +chrY 7432700 13491000 High Signal Region +chrY 13633400 14289000 High Signal Region +chrY 28783400 59373500 High Signal Region diff --git a/assets/blacklists/v2.0/hg38-blacklist.v2.bed b/assets/blacklists/v2.0/hg38-blacklist.v2.bed new file mode 100644 index 0000000000000000000000000000000000000000..3852ac0c5f32a5c37546b83f3986d29785e65154 --- /dev/null +++ b/assets/blacklists/v2.0/hg38-blacklist.v2.bed @@ -0,0 +1,636 @@ +chr10 0 45700 Low Mappability +chr10 38481300 38596500 High Signal Region +chr10 38782600 38967900 High Signal Region +chr10 39901300 41712900 High Signal Region +chr10 41838900 42107300 High Signal Region +chr10 42279400 42322500 High Signal Region +chr10 126946300 126953400 Low Mappability +chr10 133625800 133797400 High Signal Region +chr11 0 194500 Low Mappability +chr11 518900 520700 Low Mappability +chr11 584400 586500 High Signal Region +chr11 964100 966000 Low Mappability +chr11 1015700 1019300 High Signal Region +chr11 1091000 1098200 Low Mappability +chr11 3652800 3655600 High Signal Region +chr11 10506900 10511100 High Signal Region +chr11 28206300 28236700 High Signal Region +chr11 50813600 54383000 High Signal Region +chr11 61084500 61130400 High Signal Region +chr11 70370400 70372400 High Signal Region +chr11 73509800 73511700 High Signal Region +chr11 77885600 77887600 High Signal Region +chr11 93417500 93427700 High Signal Region +chr11 94232700 94240400 High Signal Region +chr11 103408700 103410600 High Signal Region +chr11 121175000 121187000 High Signal Region +chr11 131679500 131681500 High Signal Region +chr11 135075600 135086600 High Signal Region +chr12 0 77800 High Signal Region +chr12 371800 422400 High Signal Region +chr12 2254900 2257000 High Signal Region +chr12 2519800 2540500 Low Mappability +chr12 5928900 5933000 Low Mappability +chr12 20550500 20552400 Low Mappability +chr12 20768400 20770300 High Signal Region +chr12 29790400 29834600 High Signal Region +chr12 34715400 37269100 High Signal Region +chr12 41362700 41364600 High Signal Region +chr12 61471100 61473000 High Signal Region +chr12 66473900 66475800 High Signal Region +chr12 101147000 101155000 High Signal Region +chr12 113079600 113081500 High Signal Region +chr12 124430500 124440300 High Signal Region +chr12 124905900 124941800 High Signal Region +chr12 130386400 130394100 High Signal Region +chr12 131475300 131478600 High Signal Region +chr12 131576000 131589700 High Signal Region +chr12 132223300 132243400 High Signal Region +chr12 132455100 132465200 High Signal Region +chr12 133249000 133275300 High Signal Region +chr13 16087600 16165300 High Signal Region +chr13 16226300 18171400 High Signal Region +chr13 18211000 18216100 High Signal Region +chr13 57140500 57172500 High Signal Region +chr13 109423200 109425200 High Signal Region +chr13 114353300 114364300 Low Mappability +chr14 0 18670900 High Signal Region +chr14 18695400 19724300 High Signal Region +chr14 23033300 23098600 High Signal Region +chr14 26629300 26634900 High Signal Region +chr14 31793800 31798100 High Signal Region +chr14 32483400 32486000 High Signal Region +chr14 34537100 34562600 High Signal Region +chr14 35947200 35950000 High Signal Region +chr14 37351000 37356700 High Signal Region +chr14 44025100 44027200 High Signal Region +chr14 44705100 44709900 High Signal Region +chr14 45477100 45482500 High Signal Region +chr14 46865300 46866500 High Signal Region +chr14 54235600 54240000 High Signal Region +chr14 57112100 57118100 High Signal Region +chr14 74711700 74729000 High Signal Region +chr14 86074000 86076000 High Signal Region +chr14 86593300 86595200 High Signal Region +chr14 88443700 88458100 High Signal Region +chr14 100525900 100527800 High Signal Region +chr14 101267600 101272200 High Signal Region +chr14 101674400 101676400 High Signal Region +chr14 104288100 104290200 High Signal Region +chr14 105215000 105240900 High Signal Region +chr14 105568500 105583900 High Signal Region +chr14 105616500 105618600 High Signal Region +chr14 106326900 106367700 High Signal Region +chr15 0 17035000 High Signal Region +chr15 17058500 19790100 High Signal Region +chr15 20005600 22606300 High Signal Region +chr15 23125400 23357400 High Signal Region +chr15 25757700 25759100 Low Mappability +chr15 28304900 28683400 High Signal Region +chr15 30066300 30627500 High Signal Region +chr15 30844100 30859900 High Signal Region +chr15 32153700 32626200 High Signal Region +chr15 54925700 54932200 High Signal Region +chr15 56311200 56314600 High Signal Region +chr15 72635200 72687100 High Signal Region +chr15 74068100 74102000 High Signal Region +chr15 75254100 75299800 High Signal Region +chr15 77698600 77700600 High Signal Region +chr15 82321000 82374600 High Signal Region +chr15 82421200 82541700 High Signal Region +chr15 84405300 84524700 High Signal Region +chr15 101752300 101764800 Low Mappability +chr15 101892700 101991100 High Signal Region +chr16 29430800 29566900 Low Mappability +chr16 34061400 34121400 High Signal Region +chr16 34272000 34633100 High Signal Region +chr16 34657200 34672500 High Signal Region +chr16 34694600 34772000 High Signal Region +chr16 34832600 34922100 High Signal Region +chr16 34945600 35072500 Low Mappability +chr16 36166300 36202400 High Signal Region +chr16 36225200 46423000 High Signal Region +chr16 46449700 46467000 High Signal Region +chr16 90100500 90338300 Low Mappability +chr17 0 137600 High Signal Region +chr17 294900 317900 High Signal Region +chr17 448200 510900 High Signal Region +chr17 1061500 1066100 High Signal Region +chr17 1307700 1312000 Low Mappability +chr17 19025700 19237400 High Signal Region +chr17 21783300 22054000 High Signal Region +chr17 22520400 22527300 High Signal Region +chr17 22745200 26629800 High Signal Region +chr17 26766800 26987200 High Signal Region +chr17 43227600 43324300 High Signal Region +chr17 45511500 45641300 Low Mappability +chr17 53104900 53107300 High Signal Region +chr18 0 64600 High Signal Region +chr18 105200 113200 High Signal Region +chr18 971000 976500 High Signal Region +chr18 2841300 2861500 High Signal Region +chr18 15367200 20940300 High Signal Region +chr18 46961600 47031700 High Signal Region +chr18 47852300 47854300 Low Mappability +chr18 52791800 52793800 High Signal Region +chr18 74615900 74618100 High Signal Region +chr18 76966200 76968500 High Signal Region +chr18 78436900 78438700 Low Mappability +chr18 79013800 79040300 High Signal Region +chr18 79617800 79621500 High Signal Region +chr18 80257400 80373200 High Signal Region +chr19 0 271200 High Signal Region +chr19 7019100 7061300 High Signal Region +chr19 7449400 7452000 High Signal Region +chr19 8740100 8800500 High Signal Region +chr19 24330100 27274500 High Signal Region +chr19 27337600 27427400 High Signal Region +chr19 34386800 34393500 High Signal Region +chr19 34860600 34866200 High Signal Region +chr19 36267900 36313700 High Signal Region +chr19 37264900 37304300 High Signal Region +chr19 44393300 44416700 High Signal Region +chr19 47903000 47959700 High Signal Region +chr19 50090500 50140400 High Signal Region +chr19 58538700 58617600 High Signal Region +chr1 0 792500 High Signal Region +chr1 91386300 91388400 Low Mappability +chr1 103594400 103760600 High Signal Region +chr1 121605200 124938900 High Signal Region +chr1 125067600 125086000 High Signal Region +chr1 125130200 143562200 High Signal Region +chr1 161423100 161472400 High Signal Region +chr1 168348600 168349900 High Signal Region +chr1 224010800 224017000 High Signal Region +chr1 236713000 236715600 Low Mappability +chr1 248932700 248956400 High Signal Region +chr20 0 67900 High Signal Region +chr20 26364200 28916900 High Signal Region +chr20 28939400 29264700 High Signal Region +chr20 30995400 31246000 High Signal Region +chr20 47893800 47900200 High Signal Region +chr21 0 8679600 High Signal Region +chr21 9159900 9735300 High Signal Region +chr21 10013900 10069600 High Signal Region +chr21 10094700 10505100 High Signal Region +chr21 10650900 12965800 High Signal Region +chr21 43212400 43280900 High Signal Region +chr21 46682700 46709900 High Signal Region +chr22 10687700 11428100 High Signal Region +chr22 11496900 11873100 High Signal Region +chr22 11976900 15154400 High Signal Region +chr22 16258000 16385800 High Signal Region +chr22 18175900 18947300 High Signal Region +chr22 20337400 20343300 High Signal Region +chr22 21113500 21554000 High Signal Region +chr22 49972700 49975300 High Signal Region +chr22 50642800 50644900 High Signal Region +chr22 50786600 50818400 High Signal Region +chr2 1221700 1223900 High Signal Region +chr2 1594700 1605200 High Signal Region +chr2 3179600 3182100 High Signal Region +chr2 4643800 4648800 High Signal Region +chr2 10952800 10955000 High Signal Region +chr2 13718700 13737700 High Signal Region +chr2 21903500 21906400 High Signal Region +chr2 32865900 32869900 High Signal Region +chr2 32915300 32918400 High Signal Region +chr2 33766500 33768400 High Signal Region +chr2 36183000 36184500 High Signal Region +chr2 49228700 49230700 High Signal Region +chr2 64359300 64377000 High Signal Region +chr2 86655300 86661100 High Signal Region +chr2 86900700 87078100 Low Mappability +chr2 87119300 87189800 Low Mappability +chr2 87217000 87866200 High Signal Region +chr2 88771000 88806500 High Signal Region +chr2 89235300 89947100 High Signal Region +chr2 90246300 91735500 High Signal Region +chr2 91783000 91924800 Low Mappability +chr2 91969000 94569500 High Signal Region +chr2 95849400 96067900 High Signal Region +chr2 97106300 97615800 High Signal Region +chr2 109198400 109200700 High Signal Region +chr2 109744600 110095200 High Signal Region +chr2 110229200 110633400 Low Mappability +chr2 111253600 111500500 Low Mappability +chr2 112346200 112441300 Low Mappability +chr2 113370100 113662700 High Signal Region +chr2 130496800 130716400 High Signal Region +chr2 132201000 132288900 High Signal Region +chr2 132353600 132364500 High Signal Region +chr2 148880800 148882800 High Signal Region +chr2 161277700 161283400 High Signal Region +chr2 181274800 181276800 High Signal Region +chr2 226108500 226110400 High Signal Region +chr2 234889800 234894400 High Signal Region +chr2 239642200 239645600 High Signal Region +chr2 240308100 240310300 High Signal Region +chr2 241589300 241591800 High Signal Region +chr2 242005900 242011100 High Signal Region +chr2 242110100 242193500 High Signal Region +chr3 0 11600 High Signal Region +chr3 3895200 3896700 High Signal Region +chr3 4916700 4922500 High Signal Region +chr3 14091000 14092500 High Signal Region +chr3 15187200 15207800 High Signal Region +chr3 15592100 15603300 High Signal Region +chr3 16176800 16179200 High Signal Region +chr3 16679700 16682500 High Signal Region +chr3 19499700 19504000 High Signal Region +chr3 19624000 19627100 High Signal Region +chr3 21983200 21988100 High Signal Region +chr3 24053500 24054900 High Signal Region +chr3 26384800 26404100 High Signal Region +chr3 29993900 29999900 High Signal Region +chr3 36987500 36995000 High Signal Region +chr3 38083400 38085400 High Signal Region +chr3 38406100 38430900 High Signal Region +chr3 39366700 39386000 High Signal Region +chr3 40219400 40240500 High Signal Region +chr3 49671000 49696700 High Signal Region +chr3 51457800 51462000 High Signal Region +chr3 57326800 57328500 High Signal Region +chr3 65124100 65126100 High Signal Region +chr3 65510000 65513900 High Signal Region +chr3 65697400 65699300 High Signal Region +chr3 66273800 66275200 High Signal Region +chr3 68076400 68077800 High Signal Region +chr3 69047300 69053600 High Signal Region +chr3 69475300 69479700 High Signal Region +chr3 75630100 75707800 High Signal Region +chr3 75736400 75754600 High Signal Region +chr3 78948800 78950500 High Signal Region +chr3 80876000 80894000 High Signal Region +chr3 89345600 89370500 High Signal Region +chr3 90156400 90175500 High Signal Region +chr3 90455400 91297100 High Signal Region +chr3 91516200 93749200 High Signal Region +chr3 96616300 96619300 High Signal Region +chr3 97905100 97923200 High Signal Region +chr3 101674800 101698400 High Signal Region +chr3 103224300 103236500 High Signal Region +chr3 106665700 106669700 High Signal Region +chr3 106975900 106979600 High Signal Region +chr3 108751100 108755100 High Signal Region +chr3 111019500 111024600 High Signal Region +chr3 121933800 121936400 High Signal Region +chr3 122414300 122417500 High Signal Region +chr3 122735500 122796600 High Signal Region +chr3 122837000 122838700 High Signal Region +chr3 133177100 133179800 High Signal Region +chr3 133551500 133579500 High Signal Region +chr3 135437200 135439100 High Signal Region +chr3 136954600 136969200 High Signal Region +chr3 137168400 137169900 High Signal Region +chr3 138575800 138595900 High Signal Region +chr3 139190800 139194700 High Signal Region +chr3 153236200 153241300 High Signal Region +chr3 155544100 155546700 High Signal Region +chr3 156279000 156283500 High Signal Region +chr3 157080800 157093400 High Signal Region +chr3 158511300 158513100 High Signal Region +chr3 160941200 160948700 High Signal Region +chr3 161001900 161014100 High Signal Region +chr3 165573100 165591000 High Signal Region +chr3 166228200 166232400 High Signal Region +chr3 168012100 168016800 High Signal Region +chr3 170567000 170569900 High Signal Region +chr3 170864300 170881400 High Signal Region +chr3 171626600 171637700 High Signal Region +chr3 174829200 174831800 High Signal Region +chr3 176828700 176833000 High Signal Region +chr3 177660600 177664000 High Signal Region +chr3 178926800 178941300 High Signal Region +chr3 183016900 183019100 High Signal Region +chr3 183955400 183958700 High Signal Region +chr3 187893900 187896100 High Signal Region +chr3 192739300 192742700 High Signal Region +chr3 194323600 194334900 High Signal Region +chr3 195477900 195507300 High Signal Region +chr3 195616000 195750100 High Signal Region +chr3 195775500 195791400 High Signal Region +chr3 195914100 196028300 High Signal Region +chr3 196249400 196251900 High Signal Region +chr3 196897800 196899800 High Signal Region +chr3 197030600 197035800 High Signal Region +chr3 197383400 197428800 High Signal Region +chr3 197454700 197460800 High Signal Region +chr3 197598400 197680900 High Signal Region +chr3 198099800 198295500 High Signal Region +chr4 0 69200 High Signal Region +chr4 554100 556500 High Signal Region +chr4 1427000 1468900 High Signal Region +chr4 6002700 6005700 High Signal Region +chr4 7863000 7865000 High Signal Region +chr4 9212700 9369600 High Signal Region +chr4 40291700 40318200 High Signal Region +chr4 49077200 51816100 High Signal Region +chr4 55327200 55329200 High Signal Region +chr4 77994000 78009600 High Signal Region +chr4 119274400 119301700 High Signal Region +chr4 146285100 146305300 High Signal Region +chr4 162420500 162422400 High Signal Region +chr4 166554300 166581300 Low Mappability +chr4 181238800 181242300 Low Mappability +chr4 189232500 189236300 High Signal Region +chr4 189834900 189849700 High Signal Region +chr4 189877500 190023700 High Signal Region +chr4 190048600 190214500 High Signal Region +chr5 0 44100 High Signal Region +chr5 548300 564100 High Signal Region +chr5 647600 651700 High Signal Region +chr5 1326100 1334600 High Signal Region +chr5 2144600 2147800 High Signal Region +chr5 2489800 2491700 High Signal Region +chr5 3322100 3325100 High Signal Region +chr5 6967700 6971700 High Signal Region +chr5 17516800 17600200 High Signal Region +chr5 21477600 21497600 High Signal Region +chr5 25381400 25384300 High Signal Region +chr5 34177900 34244800 High Signal Region +chr5 45522900 45525200 High Signal Region +chr5 45743000 45744800 High Signal Region +chr5 46433900 46687700 High Signal Region +chr5 46708100 50165300 High Signal Region +chr5 60759700 60762500 High Signal Region +chr5 63320900 63335500 High Signal Region +chr5 69540700 71359500 High Signal Region +chr5 71850000 71852800 High Signal Region +chr5 74685400 74712400 High Signal Region +chr5 78452400 78457600 High Signal Region +chr5 78848400 78872800 High Signal Region +chr5 80649100 80653100 High Signal Region +chr5 85641800 85662700 High Signal Region +chr5 93947500 93948900 High Signal Region +chr5 94567100 94570400 High Signal Region +chr5 100045500 100076300 High Signal Region +chr5 106425500 106429500 High Signal Region +chr5 109259500 109265400 High Signal Region +chr5 111302100 111308300 High Signal Region +chr5 114156700 114158300 High Signal Region +chr5 119904000 119905600 High Signal Region +chr5 123760300 123762200 High Signal Region +chr5 134922500 134929400 High Signal Region +chr5 139005500 139011600 High Signal Region +chr5 146610000 146615500 High Signal Region +chr5 153071100 153077000 High Signal Region +chr5 156658300 156665400 High Signal Region +chr5 161606000 161611700 High Signal Region +chr5 171083900 171090200 High Signal Region +chr5 175904500 176118000 High Signal Region +chr5 176590700 176593000 High Signal Region +chr5 177636700 177684700 High Signal Region +chr5 177960500 177981400 High Signal Region +chr5 178584600 178586600 High Signal Region +chr5 181172600 181538200 High Signal Region +chr6 256500 382800 High Signal Region +chr6 861500 864200 High Signal Region +chr6 1052800 1054800 High Signal Region +chr6 26669200 26832300 High Signal Region +chr6 33484600 33486400 High Signal Region +chr6 34070600 34074000 High Signal Region +chr6 38262000 38301600 High Signal Region +chr6 39455800 39460500 High Signal Region +chr6 44043600 44080000 High Signal Region +chr6 44180600 44182900 High Signal Region +chr6 51874900 51901300 High Signal Region +chr6 54961900 54967400 High Signal Region +chr6 58432200 60242300 High Signal Region +chr6 61321800 61493000 High Signal Region +chr6 61573200 61575100 Low Mappability +chr6 61661900 61673400 High Signal Region +chr6 103709000 103715100 High Signal Region +chr6 115254900 115256800 High Signal Region +chr6 143799900 143801800 High Signal Region +chr6 156035300 156040100 High Signal Region +chr6 157309500 157324800 High Signal Region +chr6 160611700 160647400 High Signal Region +chr6 170145300 170147200 High Signal Region +chr6 170376900 170401000 High Signal Region +chr6 170465400 170468400 High Signal Region +chr7 0 49600 High Signal Region +chr7 224500 241300 High Signal Region +chr7 904700 907100 Low Mappability +chr7 1271400 1273500 High Signal Region +chr7 45251000 45253000 High Signal Region +chr7 56369500 56375600 High Signal Region +chr7 57485300 57497800 High Signal Region +chr7 57611600 57637700 Low Mappability +chr7 58031800 60997400 High Signal Region +chr7 61017800 61075200 High Signal Region +chr7 61102900 61725200 Low Mappability +chr7 62265700 62409500 High Signal Region +chr7 62430000 62520600 High Signal Region +chr7 65488000 65496500 High Signal Region +chr7 100951400 100968300 High Signal Region +chr7 100991500 101004600 High Signal Region +chr7 102474700 102686400 High Signal Region +chr7 142665100 142668500 High Signal Region +chr7 144180800 144377300 Low Mappability +chr7 145996400 146018600 High Signal Region +chr7 152375800 152435100 High Signal Region +chr7 158131400 158156200 High Signal Region +chr7 158594300 158596200 High Signal Region +chr7 158893100 158918100 High Signal Region +chr7 159334900 159345900 High Signal Region +chr8 7209800 7914700 High Signal Region +chr8 7940500 8075700 High Signal Region +chr8 8128200 8204600 High Signal Region +chr8 12136900 12614300 High Signal Region +chr8 43236700 43262600 High Signal Region +chr8 43937900 45969600 High Signal Region +chr8 46829400 46832000 High Signal Region +chr8 57204900 57216100 High Signal Region +chr8 59168700 59170400 High Signal Region +chr8 67584500 67592700 High Signal Region +chr8 69688400 69691100 High Signal Region +chr8 71406700 71412400 High Signal Region +chr8 75444100 75448200 High Signal Region +chr8 81841500 81851900 High Signal Region +chr8 85642100 85829300 High Signal Region +chr8 88685900 88691700 High Signal Region +chr8 96171200 96173100 High Signal Region +chr8 99494900 99496800 High Signal Region +chr8 105789200 105793800 High Signal Region +chr8 141491400 141493500 High Signal Region +chr8 141871100 141875200 High Signal Region +chr8 143641400 143670500 High Signal Region +chr8 144124800 144137600 High Signal Region +chr9 319900 322400 High Signal Region +chr9 33656600 33660000 High Signal Region +chr9 35912600 35915300 High Signal Region +chr9 38824200 39089400 High Signal Region +chr9 39846200 40771100 High Signal Region +chr9 40792500 41323100 High Signal Region +chr9 41492300 41635600 High Signal Region +chr9 41661300 42119600 Low Mappability +chr9 42364000 42410600 High Signal Region +chr9 42899400 42901300 High Signal Region +chr9 43263100 61518900 High Signal Region +chr9 61735300 63548000 High Signal Region +chr9 63761400 64027300 High Signal Region +chr9 64135000 65390600 High Signal Region +chr9 65579400 66874600 High Signal Region +chr9 66959000 68398100 High Signal Region +chr9 70037200 70039600 High Signal Region +chr9 76174300 76176200 High Signal Region +chr9 83222900 83226900 High Signal Region +chr9 85071600 85075100 High Signal Region +chr9 85164800 85166100 High Signal Region +chr9 108502000 108506600 High Signal Region +chr9 134164500 134185500 High Signal Region +chr9 137326800 137330600 High Signal Region +chr9 137715200 137722200 Low Mappability +chr9 137841200 137846800 Low Mappability +chr9 138222000 138394700 High Signal Region +chrX 0 329300 High Signal Region +chrX 362400 388500 High Signal Region +chrX 456500 531800 High Signal Region +chrX 723800 739500 High Signal Region +chrX 864500 930400 High Signal Region +chrX 1049100 1054300 High Signal Region +chrX 1085100 1175500 High Signal Region +chrX 1200600 1209400 High Signal Region +chrX 1249200 1269000 High Signal Region +chrX 1289500 1298900 High Signal Region +chrX 1365300 1458700 High Signal Region +chrX 1480900 1492800 High Signal Region +chrX 1816200 1820600 High Signal Region +chrX 2223900 2521900 High Signal Region +chrX 2580600 2751300 High Signal Region +chrX 3966700 3968700 High Signal Region +chrX 5481200 5486100 High Signal Region +chrX 6933400 6938700 High Signal Region +chrX 7587600 7591800 High Signal Region +chrX 9403600 9415100 High Signal Region +chrX 10785000 10809700 High Signal Region +chrX 10966600 10976800 High Signal Region +chrX 11218800 11221100 Low Mappability +chrX 11840900 11848000 High Signal Region +chrX 14085100 14109500 High Signal Region +chrX 14286500 14289300 High Signal Region +chrX 16361200 16366000 High Signal Region +chrX 16498100 16503400 High Signal Region +chrX 19940200 19946300 High Signal Region +chrX 21340600 21345700 High Signal Region +chrX 25773300 25776000 High Signal Region +chrX 26176400 26181400 High Signal Region +chrX 30767800 30772600 High Signal Region +chrX 31077600 31082600 High Signal Region +chrX 31511400 31535800 High Signal Region +chrX 34416800 34425900 High Signal Region +chrX 36465200 36471200 High Signal Region +chrX 37628400 37633500 High Signal Region +chrX 42872300 42910700 High Signal Region +chrX 49317500 49623500 High Signal Region +chrX 50019400 50033700 High Signal Region +chrX 50056700 50066100 High Signal Region +chrX 51202300 51268100 High Signal Region +chrX 51427500 51432400 High Signal Region +chrX 52175000 52228100 High Signal Region +chrX 52442800 52538100 High Signal Region +chrX 53761700 53789500 High Signal Region +chrX 55180400 55184500 High Signal Region +chrX 56754900 56781100 Low Mappability +chrX 57712300 57719700 High Signal Region +chrX 58467900 62522800 High Signal Region +chrX 63129600 63290600 Low Mappability +chrX 67311800 67323800 High Signal Region +chrX 67626800 67632300 High Signal Region +chrX 68217300 68230200 High Signal Region +chrX 70600000 70603800 High Signal Region +chrX 70640600 70645000 High Signal Region +chrX 70963600 70964900 High Signal Region +chrX 71978800 71980500 High Signal Region +chrX 72489400 72490800 High Signal Region +chrX 72743200 73035800 High Signal Region +chrX 73381000 73387000 High Signal Region +chrX 73887000 73891300 High Signal Region +chrX 74660000 74718100 High Signal Region +chrX 74789000 74794000 High Signal Region +chrX 74952200 74995200 High Signal Region +chrX 78802400 78804500 High Signal Region +chrX 79765500 79789600 High Signal Region +chrX 80534100 80537000 High Signal Region +chrX 82849700 82859300 Low Mappability +chrX 83752100 83756900 High Signal Region +chrX 86046600 86076600 High Signal Region +chrX 86395500 86398100 High Signal Region +chrX 86970000 86975600 High Signal Region +chrX 87220500 87222100 High Signal Region +chrX 89060200 89062700 High Signal Region +chrX 89202500 89208400 High Signal Region +chrX 91332900 91336600 High Signal Region +chrX 93618000 93633400 High Signal Region +chrX 94863600 94868300 High Signal Region +chrX 97509600 97515000 High Signal Region +chrX 100135800 100141000 High Signal Region +chrX 100257100 100261600 High Signal Region +chrX 101471700 101474900 High Signal Region +chrX 102188700 102489200 High Signal Region +chrX 103851800 103897800 High Signal Region +chrX 106755500 106769400 High Signal Region +chrX 106813900 106830900 High Signal Region +chrX 107515800 107517200 High Signal Region +chrX 109034800 109069100 High Signal Region +chrX 109114900 109119400 High Signal Region +chrX 109520800 109525700 High Signal Region +chrX 109985900 109987300 High Signal Region +chrX 110816700 110833400 High Signal Region +chrX 111416100 111418000 High Signal Region +chrX 113141700 113143600 High Signal Region +chrX 114701600 114724300 High Signal Region +chrX 115725600 115889600 High Signal Region +chrX 116557600 116595600 High Signal Region +chrX 117874100 117880000 High Signal Region +chrX 118009000 118037800 High Signal Region +chrX 118070900 118072700 High Signal Region +chrX 121263700 121268100 High Signal Region +chrX 121299200 121300600 High Signal Region +chrX 122528400 122550000 High Signal Region +chrX 124584300 124588400 High Signal Region +chrX 125927600 125937100 High Signal Region +chrX 126463700 126474200 High Signal Region +chrX 127116700 127122600 High Signal Region +chrX 127362200 127368300 High Signal Region +chrX 128785000 128788700 High Signal Region +chrX 129337600 129357900 High Signal Region +chrX 129388400 129408400 High Signal Region +chrX 130567700 130572000 High Signal Region +chrX 131152200 131157400 High Signal Region +chrX 131378300 131383300 High Signal Region +chrX 131664300 131670000 High Signal Region +chrX 132284600 132320400 High Signal Region +chrX 133108600 133116500 High Signal Region +chrX 135718600 135888700 High Signal Region +chrX 137074700 137079100 High Signal Region +chrX 137436600 137439300 High Signal Region +chrX 138300600 138302200 High Signal Region +chrX 139437600 139446800 High Signal Region +chrX 139621500 139622800 High Signal Region +chrX 140722400 140726100 High Signal Region +chrX 141000400 141108300 High Signal Region +chrX 142478000 142483800 High Signal Region +chrX 142892300 142911600 High Signal Region +chrX 143352000 143356500 High Signal Region +chrX 144404500 144475900 Low Mappability +chrX 147281700 147287100 High Signal Region +chrX 147653800 147659900 High Signal Region +chrX 148123500 148129000 High Signal Region +chrX 148347100 148378700 High Signal Region +chrX 149437900 149441900 High Signal Region +chrX 150024800 150026200 High Signal Region +chrX 152173800 152175100 High Signal Region +chrX 153251200 153316400 High Signal Region +chrX 154870000 154890200 High Signal Region +chrX 154938900 154945100 High Signal Region +chrX 155299600 155305100 High Signal Region +chrX 155454000 155522000 High Signal Region +chrX 155700400 155727500 High Signal Region +chrX 155983500 156040800 High Signal Region +chrY 4343800 4345800 High Signal Region +chrY 10246200 11041200 High Signal Region +chrY 11072100 11335300 High Signal Region +chrY 11486600 11757800 High Signal Region +chrY 26637300 57227400 High Signal Region diff --git a/assets/blacklists/v2.0/mm10-blacklist.v2.bed b/assets/blacklists/v2.0/mm10-blacklist.v2.bed new file mode 100644 index 0000000000000000000000000000000000000000..e8ff4cc1142dce1e1d8839c4594296aadf91bacd --- /dev/null +++ b/assets/blacklists/v2.0/mm10-blacklist.v2.bed @@ -0,0 +1,3435 @@ +chr10 0 3135400 High Signal Region +chr10 3218900 3276600 Low Mappability +chr10 3576900 3627700 Low Mappability +chr10 4191100 4197600 Low Mappability +chr10 4613500 4615400 High Signal Region +chr10 4761300 4763900 High Signal Region +chr10 5080800 5096600 Low Mappability +chr10 5580100 5586600 Low Mappability +chr10 6281200 6286700 High Signal Region +chr10 6740200 6742100 High Signal Region +chr10 7396300 7429800 High Signal Region +chr10 7633600 7636600 Low Mappability +chr10 7889700 7897500 High Signal Region +chr10 8144900 8153000 High Signal Region +chr10 8264000 8269200 High Signal Region +chr10 8382400 8404400 High Signal Region +chr10 8599200 8606400 Low Mappability +chr10 10012200 10033400 High Signal Region +chr10 10566900 10593500 High Signal Region +chr10 11218400 11224800 Low Mappability +chr10 11351800 11406300 Low Mappability +chr10 11491200 11493100 High Signal Region +chr10 11612300 11642500 High Signal Region +chr10 11692500 11701300 Low Mappability +chr10 12266500 12273000 High Signal Region +chr10 12385800 12396000 High Signal Region +chr10 13401200 13403100 High Signal Region +chr10 14559900 14577100 High Signal Region +chr10 14646300 14664500 Low Mappability +chr10 14923800 14928300 High Signal Region +chr10 15047600 15083100 High Signal Region +chr10 15528600 15534200 High Signal Region +chr10 15567000 15641800 High Signal Region +chr10 16967500 16971600 High Signal Region +chr10 17499600 17501700 High Signal Region +chr10 18555500 18558100 High Signal Region +chr10 19427600 19429100 High Signal Region +chr10 19538800 19546100 Low Mappability +chr10 19772200 19801600 High Signal Region +chr10 20458900 20460800 High Signal Region +chr10 21208600 21216600 Low Mappability +chr10 21278500 21313500 High Signal Region +chr10 21642200 21649600 Low Mappability +chr10 21727800 21736400 Low Mappability +chr10 22031300 22063500 High Signal Region +chr10 22127200 22164500 High Signal Region +chr10 22186700 22290500 High Signal Region +chr10 22369100 22472300 High Signal Region +chr10 22683100 22690600 Low Mappability +chr10 22935900 22941800 High Signal Region +chr10 24687500 24691700 Low Mappability +chr10 25091400 25106900 Low Mappability +chr10 25622900 25629400 Low Mappability +chr10 25968400 25973400 Low Mappability +chr10 26641500 26662800 Low Mappability +chr10 27403200 27407600 High Signal Region +chr10 27904000 27909500 High Signal Region +chr10 28908500 28940600 High Signal Region +chr10 29243900 29249600 High Signal Region +chr10 29924300 29930700 Low Mappability +chr10 29954000 29971900 High Signal Region +chr10 30553000 30577100 High Signal Region +chr10 31054900 31095900 Low Mappability +chr10 31406500 31411100 High Signal Region +chr10 31750000 31757100 Low Mappability +chr10 31878400 31885800 High Signal Region +chr10 31980100 32000400 Low Mappability +chr10 32039700 32045000 High Signal Region +chr10 32176100 32182400 High Signal Region +chr10 32499200 32529900 High Signal Region +chr10 32816400 32857200 High Signal Region +chr10 33315300 33319800 High Signal Region +chr10 33492300 33508900 High Signal Region +chr10 33886600 33901100 Low Mappability +chr10 34739400 34749100 Low Mappability +chr10 35669300 35725500 High Signal Region +chr10 36130200 36135500 High Signal Region +chr10 36160700 36166700 High Signal Region +chr10 36594500 36597500 Low Mappability +chr10 36942200 36948800 Low Mappability +chr10 37186500 37189300 High Signal Region +chr10 37799700 37821400 High Signal Region +chr10 37964600 37970100 High Signal Region +chr10 38590100 38606100 High Signal Region +chr10 38637900 38644200 High Signal Region +chr10 38729400 38782700 High Signal Region +chr10 38933500 38956500 High Signal Region +chr10 39126700 39129400 High Signal Region +chr10 39760700 39764700 High Signal Region +chr10 41185700 41195800 High Signal Region +chr10 41840500 41859100 Low Mappability +chr10 43769400 43773800 High Signal Region +chr10 44206300 44254100 High Signal Region +chr10 45515000 45588000 Low Mappability +chr10 45624800 45628400 High Signal Region +chr10 46136500 46139300 High Signal Region +chr10 46468300 46472100 High Signal Region +chr10 46500500 46538800 High Signal Region +chr10 46789300 46812500 High Signal Region +chr10 46966700 47009000 High Signal Region +chr10 47048600 47074700 Low Mappability +chr10 47663600 47683500 High Signal Region +chr10 47743600 47758500 High Signal Region +chr10 47875400 47881600 High Signal Region +chr10 48032400 48058800 High Signal Region +chr10 48677400 48682800 High Signal Region +chr10 49823500 49842200 High Signal Region +chr10 50029200 50035300 High Signal Region +chr10 50109900 50115500 High Signal Region +chr10 50178500 50184800 High Signal Region +chr10 50253700 50296500 High Signal Region +chr10 50333400 50335300 High Signal Region +chr10 50524000 50553900 High Signal Region +chr10 51126200 51132900 High Signal Region +chr10 51436800 51448000 High Signal Region +chr10 51470300 51474900 High Signal Region +chr10 51882900 51888000 Low Mappability +chr10 52052600 52059000 Low Mappability +chr10 52089600 52148500 High Signal Region +chr10 52522600 52599800 High Signal Region +chr10 53073900 53081100 High Signal Region +chr10 53569600 53576000 Low Mappability +chr10 54216200 54222900 High Signal Region +chr10 54588800 54619900 Low Mappability +chr10 55080400 55090500 High Signal Region +chr10 55654500 55659600 High Signal Region +chr10 55715600 55751000 High Signal Region +chr10 55841700 55847900 High Signal Region +chr10 56250200 56293900 High Signal Region +chr10 56701000 56728000 High Signal Region +chr10 56894100 56897300 High Signal Region +chr10 57099200 57153200 High Signal Region +chr10 57239100 57245400 High Signal Region +chr10 57326900 57333900 High Signal Region +chr10 57434000 57456500 High Signal Region +chr10 57678600 57684900 High Signal Region +chr10 57862800 58240900 High Signal Region +chr10 58566200 58570900 High Signal Region +chr10 59381400 59396800 Low Mappability +chr10 59850500 59922300 Low Mappability +chr10 60444900 60446800 High Signal Region +chr10 60546600 60553100 Low Mappability +chr10 61373100 61375000 High Signal Region +chr10 63103900 63111200 Low Mappability +chr10 63508800 63519000 High Signal Region +chr10 63833800 63835000 High Signal Region +chr10 64418600 64420000 High Signal Region +chr10 65166300 65172600 High Signal Region +chr10 65450400 65477700 High Signal Region +chr10 65638900 65670200 High Signal Region +chr10 65938900 65956300 Low Mappability +chr10 66422900 66431000 High Signal Region +chr10 66662400 66678300 High Signal Region +chr10 69030100 69065800 High Signal Region +chr10 70657500 70668500 High Signal Region +chr10 70785400 70798600 Low Mappability +chr10 71012700 71019200 Low Mappability +chr10 71111600 71114200 Low Mappability +chr10 71510600 71637800 High Signal Region +chr10 71691300 71698600 Low Mappability +chr10 72292400 72314300 High Signal Region +chr10 72359200 72360700 High Signal Region +chr10 72493500 72499200 High Signal Region +chr10 72590700 72591900 High Signal Region +chr10 72690900 72709500 High Signal Region +chr10 73378200 73380100 High Signal Region +chr10 73576400 73601900 High Signal Region +chr10 74433300 74439500 High Signal Region +chr10 74655700 74672200 High Signal Region +chr10 74715300 74746600 High Signal Region +chr10 74857500 74888000 High Signal Region +chr10 76835100 76852400 High Signal Region +chr10 77950600 77979500 Low Mappability +chr10 78008300 78028800 Low Mappability +chr10 78637000 78696000 High Signal Region +chr10 78731500 78735800 High Signal Region +chr10 78803500 78823100 Low Mappability +chr10 79207800 79259400 High Signal Region +chr10 79314000 79354000 Low Mappability +chr10 80102300 80116000 High Signal Region +chr10 80928600 80996300 Low Mappability +chr10 81167600 81199400 High Signal Region +chr10 81600900 81997900 High Signal Region +chr10 82517500 82538800 High Signal Region +chr10 82571100 82575200 High Signal Region +chr10 82939800 82956300 High Signal Region +chr10 83386600 83392400 Low Mappability +chr10 83670800 83678100 Low Mappability +chr10 83768200 83792700 Low Mappability +chr10 84155900 84180800 Low Mappability +chr10 84436900 84473700 Low Mappability +chr10 84744500 84750100 Low Mappability +chr10 85413200 85419700 Low Mappability +chr10 85696600 85732800 High Signal Region +chr10 85840200 85872500 High Signal Region +chr10 86561700 86565700 High Signal Region +chr10 88628700 88658500 Low Mappability +chr10 88963900 88968200 Low Mappability +chr10 89398700 89400100 High Signal Region +chr10 89949700 89964500 High Signal Region +chr10 90249000 90255300 High Signal Region +chr10 90324500 90329800 Low Mappability +chr10 90471200 90474200 Low Mappability +chr10 91252200 91256900 High Signal Region +chr10 91928900 91944500 High Signal Region +chr10 92909200 92915800 High Signal Region +chr10 94362500 94369300 Low Mappability +chr10 94591500 94610000 High Signal Region +chr10 94871200 94873100 High Signal Region +chr10 96068700 96078800 High Signal Region +chr10 96157200 96162600 Low Mappability +chr10 96192400 96199800 Low Mappability +chr10 97320500 97329700 High Signal Region +chr10 97525500 97534200 Low Mappability +chr10 97755000 97761200 High Signal Region +chr10 97896600 97920300 High Signal Region +chr10 98337800 98343700 High Signal Region +chr10 98433100 98444100 High Signal Region +chr10 100310500 100395900 High Signal Region +chr10 102667700 102669600 High Signal Region +chr10 102859800 102861500 High Signal Region +chr10 103500200 103519100 High Signal Region +chr10 103547000 103548600 High Signal Region +chr10 103569600 103575200 High Signal Region +chr10 103600400 103684400 High Signal Region +chr10 103936700 103942500 High Signal Region +chr10 104380700 104382300 High Signal Region +chr10 104493600 104499800 High Signal Region +chr10 104539700 104562500 Low Mappability +chr10 104748100 104771500 High Signal Region +chr10 104819400 104862500 Low Mappability +chr10 104966900 105001700 Low Mappability +chr10 105177000 105181900 Low Mappability +chr10 105672500 105678000 Low Mappability +chr10 106166900 106235700 High Signal Region +chr10 106382800 106403000 High Signal Region +chr10 106427100 106453600 High Signal Region +chr10 106529600 106535200 Low Mappability +chr10 107125500 107136900 Low Mappability +chr10 107551800 107560700 High Signal Region +chr10 107845300 107863900 High Signal Region +chr10 107978900 108006700 Low Mappability +chr10 109212600 109216800 High Signal Region +chr10 109315100 109322400 Low Mappability +chr10 109941600 109948000 High Signal Region +chr10 110104900 110111300 Low Mappability +chr10 110504500 110516000 High Signal Region +chr10 110667700 110700900 Low Mappability +chr10 111217500 111219000 High Signal Region +chr10 112013700 112021700 High Signal Region +chr10 112053500 112058400 Low Mappability +chr10 112540600 112542100 High Signal Region +chr10 112587000 112611100 High Signal Region +chr10 112682400 112722100 Low Mappability +chr10 113722600 113729800 Low Mappability +chr10 114167300 114174900 High Signal Region +chr10 114736400 114738300 High Signal Region +chr10 114860600 114866900 High Signal Region +chr10 115641300 115643100 High Signal Region +chr10 116606200 116613400 Low Mappability +chr10 116762000 116764200 High Signal Region +chr10 116878000 116879900 High Signal Region +chr10 117476200 117491000 High Signal Region +chr10 118014300 118033200 High Signal Region +chr10 118054000 118076600 High Signal Region +chr10 118199900 118279700 Low Mappability +chr10 118910200 118917100 High Signal Region +chr10 118937400 118953000 Low Mappability +chr10 119698800 119701600 Low Mappability +chr10 120974800 120977500 High Signal Region +chr10 121136000 121143400 Low Mappability +chr10 121164700 121169300 Low Mappability +chr10 121566100 121580200 High Signal Region +chr10 121707800 121713500 High Signal Region +chr10 121762300 121769400 High Signal Region +chr10 122141100 122166000 High Signal Region +chr10 122346900 122371300 Low Mappability +chr10 122632400 122638000 High Signal Region +chr10 122832900 122839300 High Signal Region +chr10 123792900 123797100 High Signal Region +chr10 124412900 124433300 High Signal Region +chr10 124576300 124583500 Low Mappability +chr10 124605700 124611000 Low Mappability +chr10 124680500 124686200 Low Mappability +chr10 124760500 124788800 High Signal Region +chr10 125819500 125825700 High Signal Region +chr10 125869000 125871400 High Signal Region +chr10 126262200 126291600 Low Mappability +chr10 127779500 127797900 High Signal Region +chr10 129189500 129217200 High Signal Region +chr10 129388700 129419600 Low Mappability +chr10 129443000 129454800 High Signal Region +chr10 129734500 129736400 High Signal Region +chr10 129925300 129940600 Low Mappability +chr10 130039500 130052900 High Signal Region +chr10 130396900 130408000 High Signal Region +chr10 130542000 130694900 High Signal Region +chr11 0 3201000 High Signal Region +chr11 5167600 5182600 High Signal Region +chr11 5361500 5365400 Low Mappability +chr11 5552700 5558200 Low Mappability +chr11 6141300 6148700 Low Mappability +chr11 7489400 7492300 High Signal Region +chr11 7752300 7774500 Low Mappability +chr11 8058600 8083100 Low Mappability +chr11 8354900 8370700 High Signal Region +chr11 8907200 8936100 Low Mappability +chr11 9707900 9715100 Low Mappability +chr11 9807600 9814200 Low Mappability +chr11 10252000 10266800 High Signal Region +chr11 10760200 10770800 Low Mappability +chr11 11287200 11295100 High Signal Region +chr11 12129400 12163100 High Signal Region +chr11 12507200 12512700 Low Mappability +chr11 12561900 12569100 Low Mappability +chr11 12750500 12802700 High Signal Region +chr11 12856200 12863700 High Signal Region +chr11 12953900 12960700 Low Mappability +chr11 14896500 14922100 High Signal Region +chr11 15227600 15235000 Low Mappability +chr11 16022400 16029000 High Signal Region +chr11 16326500 16331700 High Signal Region +chr11 16418200 16419600 High Signal Region +chr11 16567100 16573100 High Signal Region +chr11 17401400 17407800 High Signal Region +chr11 18330900 18342700 High Signal Region +chr11 18773800 18780100 High Signal Region +chr11 19566100 19570600 Low Mappability +chr11 19788600 19809400 Low Mappability +chr11 20310000 20312000 High Signal Region +chr11 20377900 20380400 High Signal Region +chr11 22322000 22340700 Low Mappability +chr11 22395200 22432900 Low Mappability +chr11 22534700 22537000 Low Mappability +chr11 23218500 23258100 Low Mappability +chr11 23522600 23552900 High Signal Region +chr11 24527400 24529500 Low Mappability +chr11 25196800 25217300 High Signal Region +chr11 25796400 25802200 Low Mappability +chr11 26898500 26900500 High Signal Region +chr11 27525200 27541400 High Signal Region +chr11 28097200 28104500 Low Mappability +chr11 29064100 29129900 Low Mappability +chr11 29259900 29291300 High Signal Region +chr11 29586000 29592400 Low Mappability +chr11 30511100 30535400 High Signal Region +chr11 31343800 31345700 Low Mappability +chr11 33062300 33068800 Low Mappability +chr11 34541000 34683100 High Signal Region +chr11 37482400 37484900 High Signal Region +chr11 40230800 40248400 High Signal Region +chr11 40625500 40640300 Low Mappability +chr11 40796600 40860600 High Signal Region +chr11 40887700 40915600 High Signal Region +chr11 41631700 41633600 High Signal Region +chr11 43237300 43239300 Low Mappability +chr11 43286400 43329800 High Signal Region +chr11 43454800 43462300 Low Mappability +chr11 43659700 43682100 Low Mappability +chr11 45584200 45655700 Low Mappability +chr11 46412300 46415000 Low Mappability +chr11 46492800 46514400 Low Mappability +chr11 47847500 47860600 High Signal Region +chr11 48451800 48536100 High Signal Region +chr11 48929800 49060400 Low Mappability +chr11 50445100 50469600 High Signal Region +chr11 51437600 51456700 High Signal Region +chr11 51664900 51690400 Low Mappability +chr11 54135500 54141600 High Signal Region +chr11 54576500 54583300 Low Mappability +chr11 55240500 55248100 Low Mappability +chr11 56588500 56594500 High Signal Region +chr11 57301700 57303600 High Signal Region +chr11 60558900 60699000 Low Mappability +chr11 61407400 61427800 Low Mappability +chr11 61593700 61596500 Low Mappability +chr11 62879300 62901500 High Signal Region +chr11 63467600 63475000 Low Mappability +chr11 64568100 64574200 High Signal Region +chr11 64681700 64683600 Low Mappability +chr11 64791900 64827100 Low Mappability +chr11 65451700 65458800 Low Mappability +chr11 66629900 66634100 High Signal Region +chr11 66947700 66958600 Low Mappability +chr11 67866400 67872800 Low Mappability +chr11 70155800 70162400 Low Mappability +chr11 71505700 71512100 Low Mappability +chr11 71875200 71881700 Low Mappability +chr11 73436900 73439100 Low Mappability +chr11 74128800 74136200 Low Mappability +chr11 74199900 74226800 Low Mappability +chr11 74301700 74319600 High Signal Region +chr11 74540000 74548400 Low Mappability +chr11 74884300 74899000 Low Mappability +chr11 76828100 76868600 Low Mappability +chr11 77255000 77257100 Low Mappability +chr11 79845100 79847300 Low Mappability +chr11 79872400 79877100 Low Mappability +chr11 79917300 79920800 Low Mappability +chr11 81545400 81552800 Low Mappability +chr11 82123300 82144400 High Signal Region +chr11 82333900 82338400 Low Mappability +chr11 83050300 83093600 High Signal Region +chr11 83126000 83172300 Low Mappability +chr11 85046500 85067800 High Signal Region +chr11 85285400 85292700 High Signal Region +chr11 88910900 88917600 Low Mappability +chr11 88965900 88971900 High Signal Region +chr11 89080800 89101300 High Signal Region +chr11 90504000 90510500 High Signal Region +chr11 90829400 90835000 Low Mappability +chr11 90901700 90908400 Low Mappability +chr11 90958500 91026800 Low Mappability +chr11 91047200 91049300 Low Mappability +chr11 92099000 92108200 High Signal Region +chr11 93409300 93428900 High Signal Region +chr11 94622900 94629900 Low Mappability +chr11 96065000 96093900 High Signal Region +chr11 98586900 98673900 Low Mappability +chr11 99712600 99717300 High Signal Region +chr11 100662800 100669700 Low Mappability +chr11 101731800 101741400 High Signal Region +chr11 102992300 103049900 Low Mappability +chr11 104239000 104242600 Low Mappability +chr11 106028100 106037400 High Signal Region +chr11 106254800 106297600 High Signal Region +chr11 106943500 106950100 Low Mappability +chr11 107188200 107200400 High Signal Region +chr11 107281300 107283200 High Signal Region +chr11 108377600 108404500 Low Mappability +chr11 108649800 108655400 Low Mappability +chr11 109010700 109024400 High Signal Region +chr11 109998500 110024600 Low Mappability +chr11 110421300 110423200 High Signal Region +chr11 111182400 111189800 Low Mappability +chr11 111215500 111234900 Low Mappability +chr11 111353300 111360000 Low Mappability +chr11 111855400 111857100 High Signal Region +chr11 112010600 112016400 High Signal Region +chr11 114456300 114462800 Low Mappability +chr11 115014300 115046900 Low Mappability +chr11 115611200 115665700 High Signal Region +chr11 115754800 115766900 Low Mappability +chr11 116389300 116395200 Low Mappability +chr11 116742700 116792800 Low Mappability +chr11 117499800 117505100 Low Mappability +chr11 119299800 119340300 Low Mappability +chr11 120305300 120357300 Low Mappability +chr11 120515100 120644700 High Signal Region +chr11 121069800 121075100 High Signal Region +chr11 121203000 121207500 Low Mappability +chr11 121396100 121422700 Low Mappability +chr11 121611900 121614000 Low Mappability +chr11 121981400 122082500 High Signal Region +chr12 0 3070900 High Signal Region +chr12 3102800 3111000 High Signal Region +chr12 4110500 4112400 High Signal Region +chr12 4218500 4235300 High Signal Region +chr12 4751600 4790100 High Signal Region +chr12 5050300 5065400 High Signal Region +chr12 6514000 6525100 High Signal Region +chr12 6606500 6612600 High Signal Region +chr12 7447300 7449900 High Signal Region +chr12 7801900 7808600 High Signal Region +chr12 7925300 7939600 High Signal Region +chr12 8572000 8640600 High Signal Region +chr12 10693000 10704200 High Signal Region +chr12 10961300 11004600 High Signal Region +chr12 11187600 11194100 High Signal Region +chr12 11642900 11658000 High Signal Region +chr12 12092500 12097600 High Signal Region +chr12 14844600 14848200 High Signal Region +chr12 15026600 15032400 High Signal Region +chr12 15252700 15259600 High Signal Region +chr12 15866100 15871800 High Signal Region +chr12 16746900 16748800 High Signal Region +chr12 17116400 17129400 High Signal Region +chr12 17243500 17248500 High Signal Region +chr12 18340700 18354800 High Signal Region +chr12 18856500 18909700 High Signal Region +chr12 19312600 19413500 High Signal Region +chr12 19442600 19590100 High Signal Region +chr12 19627700 19633600 High Signal Region +chr12 19777500 19781600 High Signal Region +chr12 19879300 19901200 High Signal Region +chr12 19931800 19948600 High Signal Region +chr12 20031900 20205100 High Signal Region +chr12 20225600 20298300 High Signal Region +chr12 21914300 21916000 Low Mappability +chr12 21972100 21987900 High Signal Region +chr12 22021600 22680500 Low Mappability +chr12 22896100 22902300 High Signal Region +chr12 23140700 23225200 High Signal Region +chr12 23283500 24030600 High Signal Region +chr12 24295300 24365100 Low Mappability +chr12 24692300 24727100 High Signal Region +chr12 25591800 25595300 Low Mappability +chr12 25840400 25842100 High Signal Region +chr12 27556800 27592000 High Signal Region +chr12 28491400 28494000 High Signal Region +chr12 28954800 28964000 High Signal Region +chr12 29379500 29400800 High Signal Region +chr12 30965100 31016300 High Signal Region +chr12 32020400 32032500 Low Mappability +chr12 32217700 32219200 High Signal Region +chr12 33388100 33410100 Low Mappability +chr12 33748900 33771800 High Signal Region +chr12 33869500 33880600 High Signal Region +chr12 34056800 34074100 High Signal Region +chr12 34128700 34139700 High Signal Region +chr12 34623000 34629000 Low Mappability +chr12 35783900 35814400 High Signal Region +chr12 36099400 36107200 High Signal Region +chr12 36679100 36700200 Low Mappability +chr12 36952200 36957900 High Signal Region +chr12 38746900 38749300 High Signal Region +chr12 41363500 41385500 High Signal Region +chr12 41502600 41516100 High Signal Region +chr12 41860000 41870200 High Signal Region +chr12 42124500 42126300 High Signal Region +chr12 42437900 42443400 High Signal Region +chr12 42666800 42690800 High Signal Region +chr12 43335600 43349300 High Signal Region +chr12 43659100 43675300 High Signal Region +chr12 43953900 43986900 High Signal Region +chr12 44064500 44070600 High Signal Region +chr12 44765600 44795900 Low Mappability +chr12 45768700 45773700 High Signal Region +chr12 45949200 45962200 High Signal Region +chr12 46707000 46709200 High Signal Region +chr12 47027300 47039300 High Signal Region +chr12 47280500 47286800 High Signal Region +chr12 47328600 47331300 High Signal Region +chr12 47646800 47648300 High Signal Region +chr12 47833000 47834900 High Signal Region +chr12 47995600 47997600 High Signal Region +chr12 48842900 48849500 High Signal Region +chr12 49124800 49155700 High Signal Region +chr12 49245200 49272100 High Signal Region +chr12 49606200 49612000 High Signal Region +chr12 50784600 50789900 High Signal Region +chr12 51486000 51492000 High Signal Region +chr12 52157900 52176400 High Signal Region +chr12 52200400 52223200 High Signal Region +chr12 52579600 52581200 High Signal Region +chr12 52730000 52735400 Low Mappability +chr12 52906200 52952300 High Signal Region +chr12 54358500 54369200 High Signal Region +chr12 54705400 54743600 High Signal Region +chr12 55079600 55267300 Low Mappability +chr12 56104100 56110600 Low Mappability +chr12 56423700 56425000 High Signal Region +chr12 56747800 56752200 High Signal Region +chr12 56911000 56914000 High Signal Region +chr12 58294800 58339800 High Signal Region +chr12 58659000 58692900 High Signal Region +chr12 58858800 58867600 High Signal Region +chr12 59034800 59039300 Low Mappability +chr12 59112800 59124700 High Signal Region +chr12 59270000 59276700 High Signal Region +chr12 59297800 59323200 High Signal Region +chr12 59601000 59605800 High Signal Region +chr12 60069500 60084400 High Signal Region +chr12 60501200 60506200 High Signal Region +chr12 61044200 61045300 High Signal Region +chr12 61289100 61293700 High Signal Region +chr12 61892600 61896100 High Signal Region +chr12 61964500 61971300 High Signal Region +chr12 62035300 62090200 High Signal Region +chr12 62959800 62999500 High Signal Region +chr12 63041800 63048200 High Signal Region +chr12 63289500 63322400 High Signal Region +chr12 63728400 63745100 High Signal Region +chr12 63838200 63840100 High Signal Region +chr12 65260100 65292400 High Signal Region +chr12 65784500 65808300 High Signal Region +chr12 66103800 66127200 High Signal Region +chr12 67058200 67060800 High Signal Region +chr12 67433500 67459300 High Signal Region +chr12 67519200 67571500 High Signal Region +chr12 67828900 67836600 High Signal Region +chr12 68696500 68711800 High Signal Region +chr12 68745100 68750600 Low Mappability +chr12 69059900 69061300 High Signal Region +chr12 69653100 69657800 High Signal Region +chr12 70641800 70668400 Low Mappability +chr12 71077100 71093600 Low Mappability +chr12 71589600 71596000 High Signal Region +chr12 72203000 72209300 High Signal Region +chr12 72634700 72641300 High Signal Region +chr12 74620800 74642100 High Signal Region +chr12 74775800 74778200 High Signal Region +chr12 74803000 74805400 High Signal Region +chr12 74857200 74862700 High Signal Region +chr12 75241800 75248400 High Signal Region +chr12 77160700 77166000 High Signal Region +chr12 77383500 77411300 High Signal Region +chr12 77547200 77553900 High Signal Region +chr12 78260000 78373200 High Signal Region +chr12 78462400 78468500 High Signal Region +chr12 80417200 80449700 High Signal Region +chr12 80894500 80916600 High Signal Region +chr12 81550400 81555100 High Signal Region +chr12 81985400 82064000 Low Mappability +chr12 83093000 83094900 High Signal Region +chr12 85401000 85408600 High Signal Region +chr12 87585600 87771500 Low Mappability +chr12 87802800 88006400 High Signal Region +chr12 88119800 88169700 Low Mappability +chr12 88229600 88312400 High Signal Region +chr12 88493200 88516700 Low Mappability +chr12 91221400 91256000 High Signal Region +chr12 91439200 91475500 High Signal Region +chr12 92393800 92395800 Low Mappability +chr12 92839700 92892700 High Signal Region +chr12 93233800 93265600 High Signal Region +chr12 93564200 93590500 High Signal Region +chr12 93915400 93951600 High Signal Region +chr12 94268500 94273900 High Signal Region +chr12 94550200 94556100 High Signal Region +chr12 94694300 94713700 High Signal Region +chr12 95976100 96021400 High Signal Region +chr12 97038100 97062700 High Signal Region +chr12 97616600 97622400 High Signal Region +chr12 98173700 98176600 High Signal Region +chr12 99644200 99649400 High Signal Region +chr12 100490600 100492300 High Signal Region +chr12 100766900 100825300 High Signal Region +chr12 101427900 101453500 High Signal Region +chr12 101839700 101849500 High Signal Region +chr12 102892000 102893900 High Signal Region +chr12 103458100 103472900 High Signal Region +chr12 103776900 103813700 High Signal Region +chr12 105300300 105307000 High Signal Region +chr12 105435200 105437100 High Signal Region +chr12 105523800 105525700 High Signal Region +chr12 105628200 105631400 High Signal Region +chr12 108078800 108084400 High Signal Region +chr12 109901900 109909200 Low Mappability +chr12 110011800 110013700 High Signal Region +chr12 111388200 111417100 High Signal Region +chr12 112542200 112548700 High Signal Region +chr12 112775700 112830900 Low Mappability +chr12 113423500 113461500 High Signal Region +chr12 114584600 114597100 High Signal Region +chr12 114941500 114943900 High Signal Region +chr12 115725800 115748700 High Signal Region +chr12 116796500 116853000 High Signal Region +chr12 118341100 118358400 High Signal Region +chr12 118794900 118797400 High Signal Region +chr12 119013600 119018100 High Signal Region +chr12 119554500 119598100 High Signal Region +chr12 119659100 119670900 High Signal Region +chr12 120023800 120129000 High Signal Region +chr13 0 3038200 High Signal Region +chr13 3350900 3378900 High Signal Region +chr13 3404500 3438200 High Signal Region +chr13 3901100 3903100 Low Mappability +chr13 4762900 4770300 High Signal Region +chr13 5171400 5178400 High Signal Region +chr13 7601300 7604100 High Signal Region +chr13 7806100 7810900 High Signal Region +chr13 7893500 7899700 High Signal Region +chr13 9828900 9855900 High Signal Region +chr13 10174800 10181100 Low Mappability +chr13 12684400 13073000 High Signal Region +chr13 13752100 13774000 High Signal Region +chr13 13859900 13907900 High Signal Region +chr13 13981000 13983000 High Signal Region +chr13 14690600 14777500 Low Mappability +chr13 18932700 18963600 Low Mappability +chr13 21753300 21847200 Low Mappability +chr13 23620800 23647900 Low Mappability +chr13 25006900 25051500 High Signal Region +chr13 26440600 26448200 High Signal Region +chr13 27164600 27169100 High Signal Region +chr13 27875800 27888500 High Signal Region +chr13 29880700 29886800 Low Mappability +chr13 32889400 32895200 High Signal Region +chr13 33280200 33319400 High Signal Region +chr13 33350500 33491800 High Signal Region +chr13 35687400 35695700 High Signal Region +chr13 36794200 36797400 High Signal Region +chr13 37036700 37043900 High Signal Region +chr13 38633900 38659300 Low Mappability +chr13 42435800 42437700 High Signal Region +chr13 44868600 44870900 High Signal Region +chr13 46316600 46324000 High Signal Region +chr13 50633400 50741800 High Signal Region +chr13 53269000 53270900 High Signal Region +chr13 60675600 60682600 High Signal Region +chr13 62291600 62346800 Low Mappability +chr13 62409800 62426300 High Signal Region +chr13 63142500 63184600 High Signal Region +chr13 64878100 64885300 High Signal Region +chr13 65352900 66254300 Low Mappability +chr13 71381400 71387500 High Signal Region +chr13 74521500 74565200 High Signal Region +chr13 74684000 74712200 High Signal Region +chr13 76472300 76501300 High Signal Region +chr13 77304000 77305900 High Signal Region +chr13 77430600 77440000 High Signal Region +chr13 79563400 79570800 High Signal Region +chr13 80276300 80279400 High Signal Region +chr13 80489100 80491400 High Signal Region +chr13 83419000 83444300 High Signal Region +chr13 85125800 85145900 High Signal Region +chr13 86149500 86190600 High Signal Region +chr13 86502700 86511700 High Signal Region +chr13 88324900 88345400 High Signal Region +chr13 92599100 92625400 Low Mappability +chr13 93279200 93294800 High Signal Region +chr13 93650100 93651500 High Signal Region +chr13 93940300 93955300 High Signal Region +chr13 94016300 94020800 High Signal Region +chr13 97189600 97206100 High Signal Region +chr13 98418200 98420500 Low Mappability +chr13 99774000 99792100 High Signal Region +chr13 102381900 102387900 High Signal Region +chr13 105123500 105128600 Low Mappability +chr13 107839000 107860300 Low Mappability +chr13 110602100 110615800 High Signal Region +chr13 110729600 110745400 High Signal Region +chr13 111187700 111189500 High Signal Region +chr13 111499700 111515900 Low Mappability +chr13 112577200 112595200 High Signal Region +chr13 113171200 113173100 High Signal Region +chr13 113272600 113310700 High Signal Region +chr13 115498200 115504200 High Signal Region +chr13 115741300 115743200 Low Mappability +chr13 116191900 116193900 High Signal Region +chr13 119188100 119230700 High Signal Region +chr13 119486800 119618500 High Signal Region +chr13 119660800 119674100 High Signal Region +chr13 119899200 120147600 Low Mappability +chr13 120320500 120421600 High Signal Region +chr14 0 4323000 High Signal Region +chr14 4372100 4741400 High Signal Region +chr14 4762800 5839200 High Signal Region +chr14 5959700 6479300 High Signal Region +chr14 6500100 6791800 High Signal Region +chr14 6993800 7734200 High Signal Region +chr14 7869900 7872200 High Signal Region +chr14 8005200 8018900 High Signal Region +chr14 8285700 8287800 High Signal Region +chr14 8652200 8658800 Low Mappability +chr14 10086500 10118400 High Signal Region +chr14 10178800 10198700 Low Mappability +chr14 11046200 11050200 High Signal Region +chr14 12536700 12538700 High Signal Region +chr14 14333600 14340200 High Signal Region +chr14 15460700 15467200 High Signal Region +chr14 16907800 16914000 High Signal Region +chr14 16937900 16941100 High Signal Region +chr14 18487900 18494100 High Signal Region +chr14 19251900 19255700 High Signal Region +chr14 19277200 19279100 High Signal Region +chr14 19414800 19633500 High Signal Region +chr14 21360400 21366100 High Signal Region +chr14 21878600 21884500 High Signal Region +chr14 22542900 22570000 High Signal Region +chr14 22902100 22934800 High Signal Region +chr14 25875200 26292200 High Signal Region +chr14 26946900 26948800 High Signal Region +chr14 29001300 29003200 Low Mappability +chr14 29343900 29345700 Low Mappability +chr14 30748800 30754700 High Signal Region +chr14 31919300 31923900 High Signal Region +chr14 32115300 32120500 Low Mappability +chr14 33667700 33670000 Low Mappability +chr14 33981000 33987500 Low Mappability +chr14 35275300 35281500 High Signal Region +chr14 35709400 35722200 High Signal Region +chr14 36429100 36440100 High Signal Region +chr14 37229100 37260800 Low Mappability +chr14 37619400 37635200 Low Mappability +chr14 38086800 38116800 High Signal Region +chr14 38280800 38283100 High Signal Region +chr14 38455100 38462200 Low Mappability +chr14 39580800 39607200 High Signal Region +chr14 39731900 39737200 High Signal Region +chr14 39905500 39911100 High Signal Region +chr14 41053200 41061900 Low Mappability +chr14 41326900 43109000 High Signal Region +chr14 43132400 43668900 High Signal Region +chr14 43803900 43850200 High Signal Region +chr14 44149300 44152100 High Signal Region +chr14 44273800 44343500 High Signal Region +chr14 44514200 44516000 Low Mappability +chr14 45726200 45753500 High Signal Region +chr14 45811900 45813800 High Signal Region +chr14 46269900 46274300 High Signal Region +chr14 47609500 47630400 High Signal Region +chr14 50538900 50606000 High Signal Region +chr14 50626200 50638500 High Signal Region +chr14 51472000 51515400 High Signal Region +chr14 51730700 51768100 High Signal Region +chr14 51814200 51837200 High Signal Region +chr14 52821200 53035800 Low Mappability +chr14 53146700 53340000 High Signal Region +chr14 53475200 53479600 High Signal Region +chr14 53515600 53530500 Low Mappability +chr14 56447800 56455700 High Signal Region +chr14 56693100 56695000 High Signal Region +chr14 58052600 58059800 Low Mappability +chr14 58462700 58464600 Low Mappability +chr14 58657800 58659700 High Signal Region +chr14 58831400 58833300 High Signal Region +chr14 59250300 59270000 High Signal Region +chr14 59488900 59490800 High Signal Region +chr14 59980800 59995700 High Signal Region +chr14 60328300 60357300 High Signal Region +chr14 60960000 60961900 Low Mappability +chr14 61580500 61586700 High Signal Region +chr14 61855000 61856300 High Signal Region +chr14 62107300 62126200 High Signal Region +chr14 64290100 64292500 High Signal Region +chr14 64463300 64478500 Low Mappability +chr14 65128900 65135300 Low Mappability +chr14 66427000 66428400 High Signal Region +chr14 68232600 68278200 High Signal Region +chr14 69161000 69163400 High Signal Region +chr14 70974500 70975600 High Signal Region +chr14 71121300 71126700 High Signal Region +chr14 71449700 71453700 High Signal Region +chr14 71783600 71804000 High Signal Region +chr14 72900100 72921400 High Signal Region +chr14 73644600 73679900 High Signal Region +chr14 73847900 73861200 High Signal Region +chr14 74039300 74066900 High Signal Region +chr14 74124400 74138500 High Signal Region +chr14 74435600 74447800 High Signal Region +chr14 75425300 75440500 High Signal Region +chr14 78162300 78168200 High Signal Region +chr14 78401700 78403200 High Signal Region +chr14 79145300 79196400 High Signal Region +chr14 80148100 80150800 High Signal Region +chr14 80422800 80439400 High Signal Region +chr14 80622600 80627700 High Signal Region +chr14 81333200 81337500 High Signal Region +chr14 81495300 81519300 High Signal Region +chr14 82077600 82084900 High Signal Region +chr14 82846900 82867200 High Signal Region +chr14 82958700 82964100 High Signal Region +chr14 83292900 83306500 High Signal Region +chr14 83507000 83512600 High Signal Region +chr14 84354700 84409800 High Signal Region +chr14 84855100 84881600 Low Mappability +chr14 85177800 85203300 Low Mappability +chr14 85521200 85535200 Low Mappability +chr14 86198000 86200000 High Signal Region +chr14 86590500 86614400 High Signal Region +chr14 87354600 87373000 High Signal Region +chr14 87671400 87677500 High Signal Region +chr14 87790500 87852200 High Signal Region +chr14 88450200 88453600 High Signal Region +chr14 88478400 88480300 High Signal Region +chr14 90018300 90019500 High Signal Region +chr14 90294700 90301800 High Signal Region +chr14 90910200 90912200 High Signal Region +chr14 91415900 91418400 High Signal Region +chr14 91510800 91514900 High Signal Region +chr14 91672700 91694800 High Signal Region +chr14 91951700 91976400 High Signal Region +chr14 92032500 92040900 High Signal Region +chr14 92383600 92389900 High Signal Region +chr14 92411600 92432900 High Signal Region +chr14 92792600 92798500 High Signal Region +chr14 92921100 92953200 High Signal Region +chr14 93017600 93020400 High Signal Region +chr14 93355600 93360200 High Signal Region +chr14 94319700 94327000 High Signal Region +chr14 95561600 95567600 High Signal Region +chr14 96048000 96054300 High Signal Region +chr14 96093600 96116100 High Signal Region +chr14 97323800 97326500 High Signal Region +chr14 98226800 98237000 High Signal Region +chr14 98731900 98757200 High Signal Region +chr14 99207100 99208200 High Signal Region +chr14 99649700 99655500 High Signal Region +chr14 101076400 101098900 Low Mappability +chr14 101404800 101414800 High Signal Region +chr14 102548900 102565300 High Signal Region +chr14 102755800 102762600 High Signal Region +chr14 103300300 103302400 High Signal Region +chr14 103858600 103872900 High Signal Region +chr14 103999500 104025500 High Signal Region +chr14 104104800 104128100 Low Mappability +chr14 104704500 104716800 High Signal Region +chr14 105758200 105764900 Low Mappability +chr14 105911400 105978300 High Signal Region +chr14 106002700 106005700 Low Mappability +chr14 106301000 106352700 High Signal Region +chr14 106444800 106483100 Low Mappability +chr14 106722600 106728700 High Signal Region +chr14 106895300 106897000 Low Mappability +chr14 108115100 108174900 Low Mappability +chr14 108283900 108303500 High Signal Region +chr14 109675300 109681200 High Signal Region +chr14 109911500 109917800 High Signal Region +chr14 110057000 110108200 Low Mappability +chr14 110356200 110373800 High Signal Region +chr14 110492000 110495700 Low Mappability +chr14 110906100 110908200 High Signal Region +chr14 110992800 110994500 High Signal Region +chr14 111903200 111909800 High Signal Region +chr14 112074600 112092300 High Signal Region +chr14 112210500 112215800 High Signal Region +chr14 112285400 112291900 High Signal Region +chr14 112332800 112340000 Low Mappability +chr14 112517900 112519900 High Signal Region +chr14 112627800 112663100 Low Mappability +chr14 114505900 114512900 High Signal Region +chr14 114822000 114823900 Low Mappability +chr14 115109700 115117400 High Signal Region +chr14 115272500 115280200 High Signal Region +chr14 115379200 115385600 High Signal Region +chr14 115911100 115912900 High Signal Region +chr14 115958100 115965000 High Signal Region +chr14 116402700 116407700 High Signal Region +chr14 116817000 116822900 High Signal Region +chr14 117285800 117292800 High Signal Region +chr14 118144700 118168500 Low Mappability +chr14 119286000 119287900 High Signal Region +chr14 120180000 120202600 High Signal Region +chr14 120742600 120749700 High Signal Region +chr14 120777500 120802300 High Signal Region +chr14 121007000 121010900 Low Mappability +chr14 122502500 122534800 High Signal Region +chr14 123349400 123351300 Low Mappability +chr14 123412000 123452600 High Signal Region +chr14 123674600 123695600 High Signal Region +chr14 124334000 124340200 High Signal Region +chr14 124415600 124436400 High Signal Region +chr14 124491600 124497700 High Signal Region +chr14 124739500 124902200 High Signal Region +chr15 0 3125600 High Signal Region +chr15 3150900 3170400 High Signal Region +chr15 3313900 3336200 High Signal Region +chr15 3360500 3363700 High Signal Region +chr15 3538600 3551000 High Signal Region +chr15 3712200 3732700 High Signal Region +chr15 3793500 3823000 High Signal Region +chr15 4155900 4160900 High Signal Region +chr15 4278500 4284100 High Signal Region +chr15 4852000 4894600 Low Mappability +chr15 4980200 4987600 Low Mappability +chr15 5369000 5385500 High Signal Region +chr15 5681700 5690400 High Signal Region +chr15 5910000 5911700 High Signal Region +chr15 5993500 5995400 High Signal Region +chr15 6074100 6087100 Low Mappability +chr15 6192800 6200000 Low Mappability +chr15 6316000 6317900 High Signal Region +chr15 6510500 6539100 High Signal Region +chr15 6674800 6701400 High Signal Region +chr15 6801200 6808300 High Signal Region +chr15 7539900 7548600 Low Mappability +chr15 7800800 7803000 Low Mappability +chr15 7849400 7855600 High Signal Region +chr15 7904400 7929500 Low Mappability +chr15 8517500 8520400 High Signal Region +chr15 8548000 8576100 Low Mappability +chr15 8800200 8808700 High Signal Region +chr15 8985200 9054800 High Signal Region +chr15 9219000 9224900 Low Mappability +chr15 9293200 9333300 High Signal Region +chr15 9379300 9409100 High Signal Region +chr15 9437100 9443600 High Signal Region +chr15 9536500 9554100 High Signal Region +chr15 9992700 10045700 High Signal Region +chr15 10579600 10591500 Low Mappability +chr15 10753400 10810200 High Signal Region +chr15 10835200 10854700 Low Mappability +chr15 11921000 11933300 High Signal Region +chr15 12055800 12063200 Low Mappability +chr15 12526800 12531900 Low Mappability +chr15 12872000 12873900 High Signal Region +chr15 12932300 12934200 Low Mappability +chr15 13919500 13948300 High Signal Region +chr15 14414600 14439100 Low Mappability +chr15 14722200 14732900 High Signal Region +chr15 14873900 14902400 High Signal Region +chr15 15043600 15059700 High Signal Region +chr15 15525500 15551900 High Signal Region +chr15 16168200 16186400 High Signal Region +chr15 16303700 16309500 High Signal Region +chr15 16716400 16717500 High Signal Region +chr15 16901300 16907100 High Signal Region +chr15 16939800 16955100 Low Mappability +chr15 17139000 17169100 High Signal Region +chr15 17562100 17581400 High Signal Region +chr15 18314600 18325000 High Signal Region +chr15 19038400 19063800 Low Mappability +chr15 19402600 19405500 High Signal Region +chr15 19448100 19453900 High Signal Region +chr15 19557200 19578000 High Signal Region +chr15 19626800 19631800 High Signal Region +chr15 19678400 19685800 High Signal Region +chr15 20063000 20067500 High Signal Region +chr15 20155100 20170700 Low Mappability +chr15 20474900 20510100 High Signal Region +chr15 20531400 20537100 High Signal Region +chr15 20821500 20826700 High Signal Region +chr15 20972700 20978300 Low Mappability +chr15 21114000 21115900 High Signal Region +chr15 21262100 21268500 Low Mappability +chr15 21423200 21487200 High Signal Region +chr15 21655500 21657500 High Signal Region +chr15 21815500 21820800 High Signal Region +chr15 21853700 21892400 High Signal Region +chr15 22268700 22293500 High Signal Region +chr15 22751400 22756700 Low Mappability +chr15 22799300 22809700 Low Mappability +chr15 23240200 23255600 Low Mappability +chr15 23465300 23467800 High Signal Region +chr15 23886000 23887900 Low Mappability +chr15 23926900 23939700 High Signal Region +chr15 24309300 24325700 Low Mappability +chr15 24761100 24766700 High Signal Region +chr15 24801600 24837300 High Signal Region +chr15 24880900 24898600 Low Mappability +chr15 25051400 25065200 Low Mappability +chr15 26112700 26118900 High Signal Region +chr15 26905000 26919300 Low Mappability +chr15 27286100 27326800 High Signal Region +chr15 27384100 27390300 Low Mappability +chr15 27638200 27640500 High Signal Region +chr15 28564400 28578800 High Signal Region +chr15 29285200 29291500 Low Mappability +chr15 29347600 29395600 High Signal Region +chr15 29463900 29470200 High Signal Region +chr15 29969800 30001400 High Signal Region +chr15 30117700 30126200 High Signal Region +chr15 30441400 30448200 Low Mappability +chr15 30747900 30755000 High Signal Region +chr15 30996700 31016300 High Signal Region +chr15 31066700 31083700 High Signal Region +chr15 32783900 32806700 High Signal Region +chr15 32832800 32880300 High Signal Region +chr15 33138700 33140800 Low Mappability +chr15 33308700 33310800 Low Mappability +chr15 33444200 33454100 High Signal Region +chr15 33710200 33745700 High Signal Region +chr15 33781400 33849400 High Signal Region +chr15 33869800 33884700 High Signal Region +chr15 34494500 34502100 Low Mappability +chr15 34763100 34769400 High Signal Region +chr15 34987600 34992800 High Signal Region +chr15 35013200 35015400 High Signal Region +chr15 35366800 35406000 High Signal Region +chr15 36715200 36737400 High Signal Region +chr15 36966700 36997400 Low Mappability +chr15 37072900 37150800 Low Mappability +chr15 38462300 38484300 Low Mappability +chr15 39172900 39178300 Low Mappability +chr15 39335600 39348800 Low Mappability +chr15 39496100 39499100 High Signal Region +chr15 39695600 39718600 Low Mappability +chr15 40049600 40056000 High Signal Region +chr15 40086800 40101400 High Signal Region +chr15 41531400 41533200 High Signal Region +chr15 41890400 41896900 Low Mappability +chr15 42354900 42361100 High Signal Region +chr15 42925300 42942800 High Signal Region +chr15 43287300 43346300 High Signal Region +chr15 44469100 44476400 High Signal Region +chr15 44649000 44659600 Low Mappability +chr15 44723200 44728200 Low Mappability +chr15 44769700 44796100 High Signal Region +chr15 45005100 45009300 High Signal Region +chr15 45194600 45197100 High Signal Region +chr15 45577500 45590900 High Signal Region +chr15 45635600 45650500 High Signal Region +chr15 45774400 45779700 High Signal Region +chr15 45890700 45932500 High Signal Region +chr15 46255700 46257800 Low Mappability +chr15 46355600 46368400 High Signal Region +chr15 46502200 46506800 Low Mappability +chr15 46562500 46566200 Low Mappability +chr15 47232800 47256000 High Signal Region +chr15 47356500 47363700 Low Mappability +chr15 47539000 47555300 High Signal Region +chr15 48666900 48671000 High Signal Region +chr15 49283300 49299700 High Signal Region +chr15 49322600 49327300 Low Mappability +chr15 50426100 50442800 High Signal Region +chr15 50557700 50642600 High Signal Region +chr15 51113200 51117800 High Signal Region +chr15 51531900 51533900 Low Mappability +chr15 52125800 52131200 High Signal Region +chr15 52329800 52353100 High Signal Region +chr15 53039200 53044200 Low Mappability +chr15 53831000 53834900 High Signal Region +chr15 53870700 53872700 High Signal Region +chr15 53918300 53929500 High Signal Region +chr15 54180700 54211500 Low Mappability +chr15 56032900 56038200 High Signal Region +chr15 56175800 56183100 Low Mappability +chr15 56363800 56367900 High Signal Region +chr15 56400500 56402200 High Signal Region +chr15 56941600 56993500 High Signal Region +chr15 57279500 57285000 High Signal Region +chr15 57412200 57433600 High Signal Region +chr15 57889500 57913700 Low Mappability +chr15 58437200 58441100 High Signal Region +chr15 59421400 59435400 Low Mappability +chr15 59850100 59875200 Low Mappability +chr15 60153100 60203900 High Signal Region +chr15 60592000 60594300 Low Mappability +chr15 60931800 60986500 High Signal Region +chr15 61148600 61150700 High Signal Region +chr15 61903100 61915500 High Signal Region +chr15 62367600 62370100 High Signal Region +chr15 62553200 62555200 High Signal Region +chr15 62686500 62693700 High Signal Region +chr15 63329400 63346600 Low Mappability +chr15 63626000 63627900 High Signal Region +chr15 63791700 63796000 High Signal Region +chr15 63837600 63922800 High Signal Region +chr15 64591700 64598200 Low Mappability +chr15 64673500 64681900 High Signal Region +chr15 65115600 65123500 Low Mappability +chr15 65598500 65604500 High Signal Region +chr15 65666600 65673800 High Signal Region +chr15 65714400 65753500 High Signal Region +chr15 66045100 66065700 High Signal Region +chr15 66208300 66210200 High Signal Region +chr15 68136300 68137800 Low Mappability +chr15 68980000 68986500 High Signal Region +chr15 69122300 69164500 High Signal Region +chr15 69264900 69268800 High Signal Region +chr15 69390300 69409400 High Signal Region +chr15 69642000 69646000 High Signal Region +chr15 70083000 70088800 High Signal Region +chr15 70609300 70611100 High Signal Region +chr15 70896600 70914000 High Signal Region +chr15 71104600 71112200 High Signal Region +chr15 71206600 71237500 Low Mappability +chr15 73060200 73087900 Low Mappability +chr15 73373200 73378200 Low Mappability +chr15 73873000 73880400 Low Mappability +chr15 74360700 74368000 Low Mappability +chr15 74814300 74826700 Low Mappability +chr15 74992000 75104600 High Signal Region +chr15 75205600 75212800 Low Mappability +chr15 75298000 75299500 High Signal Region +chr15 75437000 75440500 High Signal Region +chr15 75523600 75529700 High Signal Region +chr15 76102000 76106500 High Signal Region +chr15 76559900 76577900 Low Mappability +chr15 76964600 76971400 Low Mappability +chr15 77336200 77439100 High Signal Region +chr15 77718300 77735600 Low Mappability +chr15 77895000 77934800 Low Mappability +chr15 79685000 79775700 Low Mappability +chr15 79869700 79892600 Low Mappability +chr15 79974400 79978400 Low Mappability +chr15 80232400 80267100 High Signal Region +chr15 81145400 81152000 Low Mappability +chr15 81492300 81523600 High Signal Region +chr15 82338000 82368000 Low Mappability +chr15 82590700 82608900 Low Mappability +chr15 82675500 82677200 High Signal Region +chr15 83172100 83202200 Low Mappability +chr15 84746600 84753000 Low Mappability +chr15 85176800 85196600 Low Mappability +chr15 85541200 85543100 High Signal Region +chr15 86193800 86196100 High Signal Region +chr15 86312100 86326400 Low Mappability +chr15 87293900 87301200 Low Mappability +chr15 87967000 87969000 High Signal Region +chr15 88779400 88783900 Low Mappability +chr15 88974800 88976800 High Signal Region +chr15 89597900 89621300 High Signal Region +chr15 89808500 89809700 High Signal Region +chr15 89943000 89982000 Low Mappability +chr15 90636400 90643600 Low Mappability +chr15 91115900 91134800 Low Mappability +chr15 91419400 91422200 High Signal Region +chr15 91720600 91723200 Low Mappability +chr15 91905900 91911200 High Signal Region +chr15 92470100 92475100 Low Mappability +chr15 92613700 92618300 Low Mappability +chr15 92722600 92730100 Low Mappability +chr15 92796100 92820000 Low Mappability +chr15 93044100 93062000 High Signal Region +chr15 93467800 93469500 Low Mappability +chr15 93867100 93873600 High Signal Region +chr15 94088400 94124100 High Signal Region +chr15 94150500 94156800 High Signal Region +chr15 94373000 94379600 High Signal Region +chr15 95087600 95092100 High Signal Region +chr15 95306000 95312300 High Signal Region +chr15 95729500 95756400 High Signal Region +chr15 96551700 96559500 Low Mappability +chr15 96977900 96983600 Low Mappability +chr15 97082100 97084300 High Signal Region +chr15 97472900 97487400 Low Mappability +chr15 99168800 99171900 High Signal Region +chr15 99552100 99553900 Low Mappability +chr15 100331500 100339800 Low Mappability +chr15 100360000 100379700 Low Mappability +chr15 100541700 100617400 Low Mappability +chr15 101655700 101662100 High Signal Region +chr15 102596800 102603200 High Signal Region +chr15 103271900 103277100 High Signal Region +chr15 103406700 103418500 High Signal Region +chr15 103606700 103611400 High Signal Region +chr15 103814500 104043600 High Signal Region +chr16 0 3427800 High Signal Region +chr16 3450300 3519700 Low Mappability +chr16 4300400 4366800 Low Mappability +chr16 4585000 4591300 High Signal Region +chr16 5708200 5710200 High Signal Region +chr16 7460800 7463600 High Signal Region +chr16 7937100 7958400 Low Mappability +chr16 8256700 8286200 High Signal Region +chr16 9577100 9579600 Low Mappability +chr16 10631200 10633200 Low Mappability +chr16 10974100 11013900 High Signal Region +chr16 11134600 11145200 High Signal Region +chr16 11248000 11249900 Low Mappability +chr16 11679900 11687500 Low Mappability +chr16 12327300 12345900 Low Mappability +chr16 12417900 12423400 High Signal Region +chr16 12829200 12831000 High Signal Region +chr16 12976200 12981700 Low Mappability +chr16 13087700 13107000 Low Mappability +chr16 13903200 13925900 Low Mappability +chr16 14316200 14341200 Low Mappability +chr16 15502700 15510100 Low Mappability +chr16 15741400 15757700 Low Mappability +chr16 17199900 17236000 High Signal Region +chr16 17751400 17761300 High Signal Region +chr16 17910400 17955500 High Signal Region +chr16 18532200 18534200 High Signal Region +chr16 18957500 18979200 High Signal Region +chr16 19334200 19375100 High Signal Region +chr16 19581200 19602400 Low Mappability +chr16 19711900 19748700 High Signal Region +chr16 19928600 19946300 Low Mappability +chr16 22923300 22929100 High Signal Region +chr16 26419300 26421200 High Signal Region +chr16 26808500 26814800 High Signal Region +chr16 27071900 27087600 High Signal Region +chr16 27212200 27218300 High Signal Region +chr16 28170600 28197500 High Signal Region +chr16 30828600 30830500 High Signal Region +chr16 31223800 31234300 Low Mappability +chr16 31339100 31358900 High Signal Region +chr16 31818700 31825200 Low Mappability +chr16 32147700 32153500 Low Mappability +chr16 32489700 32520100 Low Mappability +chr16 32579100 32598800 Low Mappability +chr16 33847200 33852600 Low Mappability +chr16 34581100 34591200 Low Mappability +chr16 34742000 34744000 High Signal Region +chr16 35980600 35983300 High Signal Region +chr16 36764900 36770500 Low Mappability +chr16 38714200 38721600 Low Mappability +chr16 39563700 39568200 High Signal Region +chr16 41270700 41273100 High Signal Region +chr16 42657300 42661200 High Signal Region +chr16 42773100 42779900 High Signal Region +chr16 42931600 42950000 High Signal Region +chr16 43764000 43771600 Low Mappability +chr16 44040400 44063900 Low Mappability +chr16 44709800 44726400 Low Mappability +chr16 44920200 44950700 Low Mappability +chr16 45292600 45293900 High Signal Region +chr16 45352100 45354000 High Signal Region +chr16 46364600 46369100 High Signal Region +chr16 47099100 47147300 High Signal Region +chr16 47552300 47564100 Low Mappability +chr16 48579900 48581300 Low Mappability +chr16 49024900 49031400 Low Mappability +chr16 49148400 49150300 Low Mappability +chr16 49447700 49489300 High Signal Region +chr16 50084900 50101400 Low Mappability +chr16 50909100 50926800 Low Mappability +chr16 51087100 51094300 Low Mappability +chr16 51945800 51980200 High Signal Region +chr16 53412000 53428900 High Signal Region +chr16 53571500 53595400 Low Mappability +chr16 54298300 54307600 Low Mappability +chr16 54861600 54869000 High Signal Region +chr16 54959000 54965200 High Signal Region +chr16 55647800 55681600 Low Mappability +chr16 56038100 56065100 Low Mappability +chr16 56988400 57008400 High Signal Region +chr16 57085500 57095800 High Signal Region +chr16 57390200 57392600 High Signal Region +chr16 57792800 57811700 Low Mappability +chr16 58310800 58343000 High Signal Region +chr16 58632300 58670400 Low Mappability +chr16 59121800 59129100 Low Mappability +chr16 59310100 59378100 High Signal Region +chr16 60921200 60970900 High Signal Region +chr16 61312500 61325200 Low Mappability +chr16 62564300 62599200 High Signal Region +chr16 62875900 62880400 Low Mappability +chr16 63114300 63151200 High Signal Region +chr16 63301300 63313600 High Signal Region +chr16 64384600 64425600 High Signal Region +chr16 65176900 65181400 Low Mappability +chr16 66229300 66247600 Low Mappability +chr16 67328200 67334700 High Signal Region +chr16 68272300 68274300 High Signal Region +chr16 70542300 70558300 Low Mappability +chr16 70633900 70639700 Low Mappability +chr16 70892400 70898400 High Signal Region +chr16 70976900 70982900 High Signal Region +chr16 71687000 71691500 Low Mappability +chr16 72019300 72023900 Low Mappability +chr16 72056200 72062100 High Signal Region +chr16 72724800 72730900 Low Mappability +chr16 73656700 73688600 High Signal Region +chr16 74771800 74781500 Low Mappability +chr16 76057000 76065000 Low Mappability +chr16 76487100 76519600 High Signal Region +chr16 76988700 76991600 High Signal Region +chr16 77116900 77121900 Low Mappability +chr16 78977100 79013600 High Signal Region +chr16 79368600 79376000 Low Mappability +chr16 79782000 79786700 High Signal Region +chr16 79943000 79948600 Low Mappability +chr16 80269400 80309700 Low Mappability +chr16 81071700 81079200 Low Mappability +chr16 81779900 81782000 High Signal Region +chr16 81859300 81865600 High Signal Region +chr16 82079700 82099600 High Signal Region +chr16 82237800 82243200 Low Mappability +chr16 82828200 82845600 High Signal Region +chr16 83077300 83081800 High Signal Region +chr16 83360600 83368000 Low Mappability +chr16 84260500 84283300 High Signal Region +chr16 84380600 84407600 High Signal Region +chr16 84440100 84446000 High Signal Region +chr16 85671600 85673000 High Signal Region +chr16 85713500 85720100 High Signal Region +chr16 86333000 86354300 High Signal Region +chr16 86539500 86570300 High Signal Region +chr16 86819800 86822100 High Signal Region +chr16 87055400 87060300 High Signal Region +chr16 87287400 87302500 Low Mappability +chr16 87372300 87391700 Low Mappability +chr16 88022900 88029900 High Signal Region +chr16 88790600 88797900 Low Mappability +chr16 88957900 88967800 High Signal Region +chr16 89145200 89196100 Low Mappability +chr16 89431800 89448400 Low Mappability +chr16 89636000 89642900 High Signal Region +chr16 89877500 89879700 High Signal Region +chr16 90056200 90072300 Low Mappability +chr16 90341200 90350100 Low Mappability +chr16 91533700 91551800 High Signal Region +chr16 92254500 92259400 Low Mappability +chr16 93581500 93622800 High Signal Region +chr16 93685800 93711200 High Signal Region +chr16 93785700 93790200 High Signal Region +chr16 93991400 93997900 High Signal Region +chr16 94258100 94282000 Low Mappability +chr16 95782000 95788900 High Signal Region +chr16 95991000 96010400 Low Mappability +chr16 97996400 98207700 High Signal Region +chr17 0 3039300 High Signal Region +chr17 3075400 3085400 High Signal Region +chr17 3378900 3380800 High Signal Region +chr17 5863900 5885100 High Signal Region +chr17 6219100 6717500 High Signal Region +chr17 6877300 7037900 High Signal Region +chr17 7302300 7430200 High Signal Region +chr17 7615300 7617200 High Signal Region +chr17 7950200 8052300 High Signal Region +chr17 11097900 11105100 High Signal Region +chr17 13018500 13469100 High Signal Region +chr17 13492200 13555800 High Signal Region +chr17 13584800 13656200 High Signal Region +chr17 14961200 15054300 Low Mappability +chr17 20859400 20865200 High Signal Region +chr17 23426600 23537000 High Signal Region +chr17 23730600 23732500 High Signal Region +chr17 24095300 24097300 High Signal Region +chr17 29101000 29109600 High Signal Region +chr17 31569500 31571400 High Signal Region +chr17 35367400 35480300 Low Mappability +chr17 36230300 36232500 High Signal Region +chr17 38498200 38500800 High Signal Region +chr17 39842000 39849700 High Signal Region +chr17 40422500 40427000 High Signal Region +chr17 50569500 50571400 High Signal Region +chr17 53034300 53056100 High Signal Region +chr17 53151500 53153500 High Signal Region +chr17 53807400 53820300 High Signal Region +chr17 54112300 54134200 High Signal Region +chr17 57368400 57399900 High Signal Region +chr17 62736600 62738500 High Signal Region +chr17 66798500 66800400 High Signal Region +chr17 67740400 67742500 High Signal Region +chr17 70962200 70964800 High Signal Region +chr17 82975900 82991600 High Signal Region +chr17 84458800 84464500 Low Mappability +chr17 85264100 85266000 High Signal Region +chr17 93017000 93047400 High Signal Region +chr17 93623500 93646700 High Signal Region +chr17 94886200 94987200 High Signal Region +chr18 0 3063700 High Signal Region +chr18 3085500 3142600 High Signal Region +chr18 3568100 3570100 Low Mappability +chr18 3619800 3652100 Low Mappability +chr18 3779700 3785600 High Signal Region +chr18 3815100 3819300 High Signal Region +chr18 3873200 3889000 High Signal Region +chr18 4194700 4199900 High Signal Region +chr18 4456700 4504600 High Signal Region +chr18 4658000 4664400 Low Mappability +chr18 4695200 4701800 Low Mappability +chr18 5499400 5502000 Low Mappability +chr18 5895900 5900400 Low Mappability +chr18 6043700 6046600 Low Mappability +chr18 6343100 6376400 Low Mappability +chr18 6663800 6669200 High Signal Region +chr18 6796200 6803600 Low Mappability +chr18 6853600 6868500 Low Mappability +chr18 7032800 7035500 High Signal Region +chr18 7527500 7534800 High Signal Region +chr18 7782300 7798400 High Signal Region +chr18 7998000 8018800 Low Mappability +chr18 8164900 8183000 High Signal Region +chr18 8243000 8271800 High Signal Region +chr18 8292000 8294000 Low Mappability +chr18 8721900 8747000 High Signal Region +chr18 9095200 9127300 High Signal Region +chr18 9248500 9269200 Low Mappability +chr18 9420000 9426100 High Signal Region +chr18 9890700 9915900 High Signal Region +chr18 11168900 11192100 High Signal Region +chr18 11247700 11293200 High Signal Region +chr18 11626000 11648000 Low Mappability +chr18 12945100 12956300 High Signal Region +chr18 13030000 13041900 High Signal Region +chr18 13161400 13180500 High Signal Region +chr18 13241200 13251100 Low Mappability +chr18 13296400 13300000 High Signal Region +chr18 13513200 13517200 High Signal Region +chr18 14732900 14739600 Low Mappability +chr18 15225500 15232800 High Signal Region +chr18 15366900 15382100 High Signal Region +chr18 15695100 15737600 High Signal Region +chr18 16283100 16288900 High Signal Region +chr18 16988600 17013600 Low Mappability +chr18 17116100 17119600 High Signal Region +chr18 17346100 17352400 High Signal Region +chr18 17425100 17480600 High Signal Region +chr18 17513300 17517900 High Signal Region +chr18 17541300 17559000 High Signal Region +chr18 17593300 17598500 High Signal Region +chr18 17938300 17951600 Low Mappability +chr18 18816600 18823800 High Signal Region +chr18 18916300 18917900 High Signal Region +chr18 18976900 18992400 High Signal Region +chr18 19240600 19289100 High Signal Region +chr18 19345800 19352600 Low Mappability +chr18 19430400 19448100 High Signal Region +chr18 19679600 19681600 Low Mappability +chr18 19812100 19836500 High Signal Region +chr18 20352500 20369800 High Signal Region +chr18 20896200 20910000 Low Mappability +chr18 21261800 21268900 Low Mappability +chr18 21528200 21541600 High Signal Region +chr18 21943200 21945200 Low Mappability +chr18 22297400 22304000 High Signal Region +chr18 23186200 23215300 High Signal Region +chr18 25045100 25047300 High Signal Region +chr18 25253000 25259500 High Signal Region +chr18 25905600 25928600 High Signal Region +chr18 26003000 26008100 Low Mappability +chr18 26829800 26837100 Low Mappability +chr18 26998200 27005600 Low Mappability +chr18 27062000 27068200 High Signal Region +chr18 28151300 28167300 High Signal Region +chr18 28441700 28446600 Low Mappability +chr18 28482900 28484900 High Signal Region +chr18 28814100 28816900 High Signal Region +chr18 28960100 28966000 Low Mappability +chr18 29014700 29022000 High Signal Region +chr18 29557800 29559800 High Signal Region +chr18 29713000 29719200 High Signal Region +chr18 31281100 31294300 High Signal Region +chr18 32758400 32793400 High Signal Region +chr18 33212800 33221500 Low Mappability +chr18 33275100 33331000 High Signal Region +chr18 33697400 33722600 Low Mappability +chr18 34083600 34087300 Low Mappability +chr18 34397100 34409800 Low Mappability +chr18 35318500 35320400 Low Mappability +chr18 36454200 36494600 Low Mappability +chr18 36981500 36988700 Low Mappability +chr18 37031800 37045800 High Signal Region +chr18 37364600 37398900 Low Mappability +chr18 37545500 37645000 High Signal Region +chr18 39598600 39604900 High Signal Region +chr18 40306300 40309300 High Signal Region +chr18 40708500 40713600 Low Mappability +chr18 41381600 41387500 High Signal Region +chr18 41465300 41471500 High Signal Region +chr18 41820100 41826100 High Signal Region +chr18 41960600 41966100 High Signal Region +chr18 42556800 42559800 High Signal Region +chr18 42913000 42914900 High Signal Region +chr18 43335500 43337900 High Signal Region +chr18 43889500 43900400 High Signal Region +chr18 44033600 44050200 High Signal Region +chr18 44228000 44263100 High Signal Region +chr18 44291600 44295600 High Signal Region +chr18 44361600 44380500 High Signal Region +chr18 44873100 44875100 Low Mappability +chr18 44981000 45032700 High Signal Region +chr18 45131400 45133400 High Signal Region +chr18 45291700 45314300 Low Mappability +chr18 45357300 45364700 Low Mappability +chr18 45392200 45397700 High Signal Region +chr18 45506800 45513400 High Signal Region +chr18 45998300 46038000 Low Mappability +chr18 46082000 46101400 High Signal Region +chr18 46439100 46444100 Low Mappability +chr18 46791400 46793400 Low Mappability +chr18 47648600 47654100 Low Mappability +chr18 47769900 47783100 Low Mappability +chr18 48009500 48011400 High Signal Region +chr18 48208100 48220300 High Signal Region +chr18 48705800 48713100 Low Mappability +chr18 48831300 48836100 High Signal Region +chr18 49387700 49397800 High Signal Region +chr18 49669200 49695600 High Signal Region +chr18 50253400 50268700 High Signal Region +chr18 50632100 50700200 Low Mappability +chr18 51072000 51077600 Low Mappability +chr18 51658600 51698300 High Signal Region +chr18 52020200 52059300 High Signal Region +chr18 52256200 52262200 High Signal Region +chr18 52378900 52395000 Low Mappability +chr18 52876200 52883200 High Signal Region +chr18 53828800 53839900 Low Mappability +chr18 53869300 53876600 Low Mappability +chr18 54023900 54030000 High Signal Region +chr18 54288100 54335900 Low Mappability +chr18 54698000 54707800 High Signal Region +chr18 55222400 55224400 Low Mappability +chr18 55311000 55321100 Low Mappability +chr18 55414800 55436200 Low Mappability +chr18 55899800 55901700 High Signal Region +chr18 55938500 55954100 High Signal Region +chr18 56273000 56276900 High Signal Region +chr18 56302600 56304500 High Signal Region +chr18 56341200 56346000 High Signal Region +chr18 56826900 56830200 Low Mappability +chr18 57560400 57562500 Low Mappability +chr18 58992700 58999300 Low Mappability +chr18 59496300 59511000 High Signal Region +chr18 59929900 59955000 High Signal Region +chr18 60042400 60044400 Low Mappability +chr18 60206100 60238100 High Signal Region +chr18 60525200 60533800 Low Mappability +chr18 62237400 62247700 High Signal Region +chr18 62273700 62292800 Low Mappability +chr18 62752700 62755100 High Signal Region +chr18 64131300 64132600 High Signal Region +chr18 64448400 64454900 Low Mappability +chr18 65103100 65105000 High Signal Region +chr18 65385700 65405100 Low Mappability +chr18 65492400 65494700 Low Mappability +chr18 65716300 65719400 Low Mappability +chr18 66543200 66548900 High Signal Region +chr18 66750000 66759900 Low Mappability +chr18 66881200 66887200 High Signal Region +chr18 68381300 68387800 High Signal Region +chr18 68412100 68425800 Low Mappability +chr18 68461300 68489000 High Signal Region +chr18 68691100 68693200 High Signal Region +chr18 69759300 69761300 Low Mappability +chr18 70489500 70515400 High Signal Region +chr18 70775600 70791900 High Signal Region +chr18 70842100 70849200 Low Mappability +chr18 71032500 71038800 High Signal Region +chr18 71139200 71145200 High Signal Region +chr18 71208200 71211300 Low Mappability +chr18 71267000 71273300 Low Mappability +chr18 71630400 71641100 Low Mappability +chr18 72753900 72794900 High Signal Region +chr18 72987900 72991000 High Signal Region +chr18 73259600 73264100 Low Mappability +chr18 74553100 74566400 High Signal Region +chr18 74745500 74758500 Low Mappability +chr18 74880300 74882000 High Signal Region +chr18 76177900 76184300 Low Mappability +chr18 76579700 76586300 Low Mappability +chr18 77264400 77271000 High Signal Region +chr18 78197300 78199300 High Signal Region +chr18 78407800 78428500 Low Mappability +chr18 78861400 78867900 High Signal Region +chr18 80021700 80028900 Low Mappability +chr18 80307500 80309600 Low Mappability +chr18 80455500 80518400 Low Mappability +chr18 81299700 81306200 Low Mappability +chr18 82052100 82058200 High Signal Region +chr18 82160100 82227800 High Signal Region +chr18 82319500 82339900 High Signal Region +chr18 82692900 82717900 Low Mappability +chr18 83171100 83178400 Low Mappability +chr18 83700500 83707900 Low Mappability +chr18 84828700 84833000 High Signal Region +chr18 85035000 85080600 High Signal Region +chr18 85105800 85112200 High Signal Region +chr18 85169900 85175900 High Signal Region +chr18 85377800 85382800 Low Mappability +chr18 85697000 85699200 High Signal Region +chr18 85783600 85789900 High Signal Region +chr18 86508300 86510200 High Signal Region +chr18 86560600 86586100 High Signal Region +chr18 86828500 86849500 High Signal Region +chr18 87006300 87009800 High Signal Region +chr18 87141500 87161200 High Signal Region +chr18 87568300 87574300 High Signal Region +chr18 88149300 88155400 High Signal Region +chr18 89030400 89036400 High Signal Region +chr18 89615900 89650500 Low Mappability +chr18 89983200 89989700 Low Mappability +chr18 90055500 90092500 High Signal Region +chr18 90113400 90125400 Low Mappability +chr18 90464100 90501300 High Signal Region +chr18 90601200 90702600 High Signal Region +chr19 0 3140800 High Signal Region +chr19 3161400 3248600 High Signal Region +chr19 4061100 4066400 Low Mappability +chr19 6581000 6594300 High Signal Region +chr19 7713600 7774800 High Signal Region +chr19 7810700 7843900 Low Mappability +chr19 8203200 8285500 Low Mappability +chr19 9250500 9357700 High Signal Region +chr19 9502000 9565000 Low Mappability +chr19 9745800 9803300 High Signal Region +chr19 9823500 9837700 High Signal Region +chr19 10507900 10510300 High Signal Region +chr19 10954500 10960300 Low Mappability +chr19 11199700 11239800 High Signal Region +chr19 12447200 12454600 Low Mappability +chr19 13203500 13216400 High Signal Region +chr19 13330600 13357100 High Signal Region +chr19 13685000 13693300 High Signal Region +chr19 13760500 13777200 High Signal Region +chr19 15256700 15263000 High Signal Region +chr19 15433400 15438100 High Signal Region +chr19 15711800 15719800 High Signal Region +chr19 15839200 15846600 High Signal Region +chr19 15956500 15958500 Low Mappability +chr19 16670500 16673100 High Signal Region +chr19 18358000 18364200 High Signal Region +chr19 18532700 18535600 High Signal Region +chr19 19132200 19161200 High Signal Region +chr19 19509000 19514900 High Signal Region +chr19 19870300 19876900 Low Mappability +chr19 20080700 20081800 High Signal Region +chr19 20140700 20144100 Low Mappability +chr19 20288200 20297900 Low Mappability +chr19 20455400 20462700 Low Mappability +chr19 20839700 20843900 Low Mappability +chr19 21218200 21243800 High Signal Region +chr19 21532400 21534400 Low Mappability +chr19 22644100 22651700 High Signal Region +chr19 22722400 22728400 Low Mappability +chr19 23356500 23358400 High Signal Region +chr19 23739200 23754000 High Signal Region +chr19 24040300 24042300 Low Mappability +chr19 24911900 24919200 High Signal Region +chr19 25741800 25770100 High Signal Region +chr19 25917500 25920000 High Signal Region +chr19 27751400 27758100 High Signal Region +chr19 28149600 28156600 High Signal Region +chr19 30907400 30908700 High Signal Region +chr19 30963600 30968000 Low Mappability +chr19 31722800 31735800 High Signal Region +chr19 32203200 32211600 Low Mappability +chr19 32441800 32449100 Low Mappability +chr19 32822000 32824000 Low Mappability +chr19 33439100 33446100 Low Mappability +chr19 33864200 33877900 High Signal Region +chr19 33949100 33958200 High Signal Region +chr19 34131200 34161200 Low Mappability +chr19 34581900 34613000 High Signal Region +chr19 35076400 35079800 High Signal Region +chr19 35650200 35673500 High Signal Region +chr19 36702500 36723400 High Signal Region +chr19 37298800 37301800 Low Mappability +chr19 37617300 37624600 Low Mappability +chr19 38490200 38495300 Low Mappability +chr19 39078100 39079500 High Signal Region +chr19 39106700 39156300 High Signal Region +chr19 39244700 39270400 High Signal Region +chr19 39331700 39424100 High Signal Region +chr19 39599900 39607200 Low Mappability +chr19 39658700 39695100 Low Mappability +chr19 40020400 40026800 Low Mappability +chr19 40094100 40153300 High Signal Region +chr19 40328500 40330000 Low Mappability +chr19 41142700 41150000 Low Mappability +chr19 41424200 41473100 Low Mappability +chr19 42346000 42350500 Low Mappability +chr19 42647600 42649700 Low Mappability +chr19 43118800 43124600 High Signal Region +chr19 43236000 43238000 Low Mappability +chr19 43321500 43323700 High Signal Region +chr19 44145700 44171700 Low Mappability +chr19 44218500 44225000 Low Mappability +chr19 44862100 44864300 High Signal Region +chr19 45004900 45096500 Low Mappability +chr19 45182300 45190200 High Signal Region +chr19 45649000 45661500 High Signal Region +chr19 45699400 45706300 Low Mappability +chr19 47590300 47602700 Low Mappability +chr19 48484600 48496700 High Signal Region +chr19 48743800 48746300 High Signal Region +chr19 50107900 50114400 Low Mappability +chr19 50309700 50311600 High Signal Region +chr19 50754100 50755900 Low Mappability +chr19 50828900 50835600 High Signal Region +chr19 51649700 51655800 High Signal Region +chr19 51949000 51955700 Low Mappability +chr19 52303100 52309700 Low Mappability +chr19 52927900 52932300 Low Mappability +chr19 52967800 52991100 Low Mappability +chr19 53522200 53527100 High Signal Region +chr19 53767900 53777800 High Signal Region +chr19 54235200 54236600 High Signal Region +chr19 54884700 54936800 High Signal Region +chr19 54994900 55001700 Low Mappability +chr19 55976700 55984000 Low Mappability +chr19 56248700 56259000 Low Mappability +chr19 56846600 56849100 High Signal Region +chr19 57514200 57520700 Low Mappability +chr19 57634000 57635600 Low Mappability +chr19 57827000 57832700 Low Mappability +chr19 58012500 58014600 Low Mappability +chr19 58112400 58114500 High Signal Region +chr19 58481300 58483200 High Signal Region +chr19 59221800 59240400 High Signal Region +chr19 59763100 59779900 High Signal Region +chr19 60082500 60089900 High Signal Region +chr19 60906900 60934000 High Signal Region +chr19 61162600 61174300 Low Mappability +chr19 61197700 61268100 High Signal Region +chr19 61330300 61431500 High Signal Region +chr1 8628600 8719100 High Signal Region +chr1 12038300 12041400 High Signal Region +chr1 14958600 14992600 High Signal Region +chr1 17466800 17479900 High Signal Region +chr1 18872500 18901300 High Signal Region +chr1 19175300 19177200 High Signal Region +chr1 22555000 22556900 High Signal Region +chr1 24610600 24617100 High Signal Region +chr1 24683100 24685100 High Signal Region +chr1 26685100 26689200 High Signal Region +chr1 43776800 43779800 High Signal Region +chr1 44198000 44202200 High Signal Region +chr1 46701700 46756600 High Signal Region +chr1 48880600 48882500 High Signal Region +chr1 56119600 56143500 High Signal Region +chr1 56772200 56783300 High Signal Region +chr1 58613000 58614900 High Signal Region +chr1 63629100 63631600 High Signal Region +chr1 69455800 69457800 High Signal Region +chr1 71078400 71085500 High Signal Region +chr1 71250600 71256700 High Signal Region +chr1 73549100 73555300 High Signal Region +chr1 73832600 73902400 High Signal Region +chr1 78572900 78575400 High Signal Region +chr1 84953500 85663200 High Signal Region +chr1 88209400 88311700 High Signal Region +chr1 94093800 94109400 High Signal Region +chr1 95451000 95452900 High Signal Region +chr1 95783900 95789700 High Signal Region +chr1 95810200 95851700 High Signal Region +chr1 100737900 100760500 High Signal Region +chr1 101040100 101046300 High Signal Region +chr1 102627300 102644300 High Signal Region +chr1 105226800 105230700 High Signal Region +chr1 110170400 110188300 High Signal Region +chr1 113602700 113604800 High Signal Region +chr1 114557300 114579100 High Signal Region +chr1 114643300 114660500 High Signal Region +chr1 115447500 115482800 High Signal Region +chr1 122356200 122358200 High Signal Region +chr1 133593600 133611300 High Signal Region +chr1 142651800 142672300 High Signal Region +chr1 145444500 145449100 High Signal Region +chr1 146120600 146128200 High Signal Region +chr1 151181600 151212000 High Signal Region +chr1 165862800 165864700 Low Mappability +chr1 171033000 171112400 High Signal Region +chr1 172716800 172738200 High Signal Region +chr1 172878700 172885100 High Signal Region +chr1 178538700 178540700 High Signal Region +chr1 181742100 181752400 High Signal Region +chr1 182628900 182630800 High Signal Region +chr1 183298200 183300500 High Signal Region +chr1 190299400 190304600 High Signal Region +chr1 192453100 192471800 High Signal Region +chr1 193226900 193228800 High Signal Region +chr1 195239800 195257400 High Signal Region +chr1 195278100 195280200 High Signal Region +chr1 195320700 195471900 High Signal Region +chr2 0 3086300 High Signal Region +chr2 3474900 3488800 High Signal Region +chr2 3932700 3939100 Low Mappability +chr2 3963500 3986100 High Signal Region +chr2 4515100 4518600 High Signal Region +chr2 4600600 4620300 High Signal Region +chr2 5378100 5394600 High Signal Region +chr2 5545900 5561600 High Signal Region +chr2 6078200 6095300 High Signal Region +chr2 6773100 6777500 Low Mappability +chr2 6832200 6846700 High Signal Region +chr2 7137500 7139600 High Signal Region +chr2 7404000 7458100 High Signal Region +chr2 7571700 7609800 High Signal Region +chr2 7656300 7669700 Low Mappability +chr2 7752800 7758500 High Signal Region +chr2 8034600 8042900 High Signal Region +chr2 8266200 8275600 High Signal Region +chr2 8528400 8535700 High Signal Region +chr2 8938000 8940500 High Signal Region +chr2 9212600 9219300 High Signal Region +chr2 10177100 10183400 Low Mappability +chr2 10483200 10501500 Low Mappability +chr2 10677000 10697600 Low Mappability +chr2 12605500 12668600 High Signal Region +chr2 13824000 13869200 High Signal Region +chr2 13946300 13948900 High Signal Region +chr2 14014100 14035300 High Signal Region +chr2 14359100 14386600 High Signal Region +chr2 14919000 14924500 High Signal Region +chr2 15301300 15334700 High Signal Region +chr2 15430100 15435500 Low Mappability +chr2 15575900 15602800 High Signal Region +chr2 15716700 15721100 High Signal Region +chr2 15768300 15770500 High Signal Region +chr2 16192400 16198500 High Signal Region +chr2 16320200 16326500 Low Mappability +chr2 16762800 16787000 High Signal Region +chr2 17383200 17385100 High Signal Region +chr2 17612500 17654500 Low Mappability +chr2 17747200 17753000 High Signal Region +chr2 19209900 19212900 High Signal Region +chr2 19498400 19510300 High Signal Region +chr2 19707900 19712200 High Signal Region +chr2 20038500 20067400 Low Mappability +chr2 20426800 20433300 Low Mappability +chr2 20898900 20901100 High Signal Region +chr2 21062600 21082200 Low Mappability +chr2 22049700 22087700 High Signal Region +chr2 22137300 22165500 High Signal Region +chr2 22389900 22608700 High Signal Region +chr2 22737300 22745800 High Signal Region +chr2 23009600 23015000 Low Mappability +chr2 23274600 23304900 High Signal Region +chr2 23693700 23707900 High Signal Region +chr2 24193300 24199000 High Signal Region +chr2 26333100 26351900 Low Mappability +chr2 26759100 26763600 High Signal Region +chr2 26998200 27004400 Low Mappability +chr2 28183200 28205000 High Signal Region +chr2 30204600 30239600 Low Mappability +chr2 32381300 32488200 Low Mappability +chr2 33933000 33935300 High Signal Region +chr2 34049900 34051800 High Signal Region +chr2 34903900 34935900 Low Mappability +chr2 35090800 35109900 High Signal Region +chr2 35505000 35526700 Low Mappability +chr2 36008600 36019300 Low Mappability +chr2 36401900 36413100 High Signal Region +chr2 36508600 36515200 High Signal Region +chr2 36542800 36549100 High Signal Region +chr2 36761000 36766500 High Signal Region +chr2 36951900 36970700 High Signal Region +chr2 37156900 37185900 High Signal Region +chr2 37339700 37359400 Low Mappability +chr2 38564700 38566600 Low Mappability +chr2 39225400 39293200 High Signal Region +chr2 39360600 39367900 Low Mappability +chr2 39517800 39534800 High Signal Region +chr2 39778500 39785700 Low Mappability +chr2 39887500 39915800 High Signal Region +chr2 40131200 40240800 High Signal Region +chr2 40262500 40268600 High Signal Region +chr2 40766400 40794000 High Signal Region +chr2 41059500 41070200 Low Mappability +chr2 41168700 41171400 High Signal Region +chr2 41692800 41694800 High Signal Region +chr2 41744300 41751600 Low Mappability +chr2 41775100 41781500 High Signal Region +chr2 41895300 41897200 High Signal Region +chr2 42044500 42051600 High Signal Region +chr2 42200300 42240700 High Signal Region +chr2 42950100 42956600 High Signal Region +chr2 43347900 43356400 High Signal Region +chr2 44936600 44942400 High Signal Region +chr2 46224800 46226700 High Signal Region +chr2 46343100 46348100 Low Mappability +chr2 46574200 46579600 Low Mappability +chr2 47008600 47023500 High Signal Region +chr2 47196300 47199300 High Signal Region +chr2 47533600 47642600 High Signal Region +chr2 47942200 47943800 High Signal Region +chr2 48483000 48491000 Low Mappability +chr2 50543200 50545500 High Signal Region +chr2 50679600 50686800 Low Mappability +chr2 51552600 51555600 High Signal Region +chr2 51750900 51756000 High Signal Region +chr2 51881600 51890600 Low Mappability +chr2 51945900 51948400 High Signal Region +chr2 52695900 52718600 High Signal Region +chr2 52786800 52796300 High Signal Region +chr2 53317700 53321600 Low Mappability +chr2 53347800 53367000 High Signal Region +chr2 53633400 53642900 High Signal Region +chr2 53745700 53799800 High Signal Region +chr2 54252600 54258500 High Signal Region +chr2 54698000 54747900 High Signal Region +chr2 54862600 54895300 High Signal Region +chr2 55197500 55216400 High Signal Region +chr2 55308300 55353700 High Signal Region +chr2 55823800 55829000 High Signal Region +chr2 55860200 55874300 Low Mappability +chr2 55942000 55947800 High Signal Region +chr2 56192800 56194600 High Signal Region +chr2 56298700 56304900 High Signal Region +chr2 56465200 56471900 High Signal Region +chr2 56834300 56879100 High Signal Region +chr2 56988500 56990600 Low Mappability +chr2 57166400 57172900 Low Mappability +chr2 57214400 57223500 Low Mappability +chr2 57417400 57446500 High Signal Region +chr2 57628500 57633800 High Signal Region +chr2 57726600 57728500 High Signal Region +chr2 58212900 58263100 High Signal Region +chr2 58648300 58691900 High Signal Region +chr2 58881200 58902500 High Signal Region +chr2 59971300 59972800 Low Mappability +chr2 61038200 61042700 High Signal Region +chr2 61959600 61965300 High Signal Region +chr2 62022900 62040100 High Signal Region +chr2 62861100 62867200 High Signal Region +chr2 63297300 63302700 Low Mappability +chr2 63368100 63403900 High Signal Region +chr2 63462300 63483800 High Signal Region +chr2 63641200 63654600 High Signal Region +chr2 63718200 63725400 High Signal Region +chr2 63838100 63845300 Low Mappability +chr2 64309200 64319600 High Signal Region +chr2 64608400 64633400 Low Mappability +chr2 64698700 64703300 High Signal Region +chr2 65592500 65602200 High Signal Region +chr2 65737700 65781500 Low Mappability +chr2 66721600 66750400 High Signal Region +chr2 66845100 66852300 High Signal Region +chr2 67408400 67414500 High Signal Region +chr2 67939700 67946000 High Signal Region +chr2 68770400 68776700 High Signal Region +chr2 68917800 68924100 Low Mappability +chr2 69353900 69356600 High Signal Region +chr2 70263100 70270000 Low Mappability +chr2 70880100 70892900 High Signal Region +chr2 71054700 71071300 Low Mappability +chr2 71942000 71949500 Low Mappability +chr2 72270200 72275700 Low Mappability +chr2 73867000 73868900 High Signal Region +chr2 74364300 74402600 Low Mappability +chr2 74437600 74444900 Low Mappability +chr2 75499500 75504600 High Signal Region +chr2 77224000 77230500 Low Mappability +chr2 78318000 78339500 High Signal Region +chr2 79437700 79441900 High Signal Region +chr2 79936500 79943700 High Signal Region +chr2 80119000 80121500 High Signal Region +chr2 80220600 80257700 Low Mappability +chr2 80795600 80838700 High Signal Region +chr2 80879000 80880200 High Signal Region +chr2 80956500 81006000 High Signal Region +chr2 81069000 81075100 High Signal Region +chr2 81639400 81644800 High Signal Region +chr2 81750800 81756800 High Signal Region +chr2 81790000 81795900 High Signal Region +chr2 82329800 82340100 High Signal Region +chr2 82673800 82679900 High Signal Region +chr2 82714300 82728500 High Signal Region +chr2 82783900 82789500 High Signal Region +chr2 82868800 82887900 High Signal Region +chr2 82916300 82936800 High Signal Region +chr2 83120100 83146100 High Signal Region +chr2 83185100 83193200 High Signal Region +chr2 83325900 83328200 High Signal Region +chr2 83413500 83587500 High Signal Region +chr2 83865600 83893100 High Signal Region +chr2 83931600 83995800 Low Mappability +chr2 84080900 84085600 High Signal Region +chr2 84505000 84510500 Low Mappability +chr2 84532500 84534600 Low Mappability +chr2 84564800 84576000 Low Mappability +chr2 85685600 85701800 Low Mappability +chr2 85874000 85896300 High Signal Region +chr2 86018200 86021700 Low Mappability +chr2 86303400 86317700 High Signal Region +chr2 86339600 86346900 Low Mappability +chr2 86612700 86617500 High Signal Region +chr2 87381000 87382800 High Signal Region +chr2 87875700 87941300 High Signal Region +chr2 88167400 88212600 High Signal Region +chr2 88776200 88780800 High Signal Region +chr2 89206600 89277100 Low Mappability +chr2 89345700 89350400 High Signal Region +chr2 89761200 89775100 High Signal Region +chr2 89856400 89920100 High Signal Region +chr2 90127200 90132700 High Signal Region +chr2 90157100 90249100 High Signal Region +chr2 90273200 90279100 High Signal Region +chr2 90309300 90396100 High Signal Region +chr2 92092600 92094700 High Signal Region +chr2 92167200 92169100 High Signal Region +chr2 93824700 93850200 High Signal Region +chr2 94602800 94607800 Low Mappability +chr2 94633900 94656500 High Signal Region +chr2 94801000 94809400 Low Mappability +chr2 94852800 94891200 High Signal Region +chr2 95064700 95093500 Low Mappability +chr2 95148000 95167800 High Signal Region +chr2 95215900 95320600 High Signal Region +chr2 95414700 95420600 High Signal Region +chr2 95536400 95538400 Low Mappability +chr2 95647900 95654300 High Signal Region +chr2 95794500 95799200 High Signal Region +chr2 95929300 95934400 High Signal Region +chr2 96191400 96208900 High Signal Region +chr2 96547800 96566800 Low Mappability +chr2 96954700 96977300 High Signal Region +chr2 97021000 97034600 High Signal Region +chr2 97308000 97327600 High Signal Region +chr2 97671600 97686300 High Signal Region +chr2 97760700 97765800 High Signal Region +chr2 97872400 97958200 High Signal Region +chr2 98361700 98449600 High Signal Region +chr2 98659400 98668200 High Signal Region +chr2 98796500 98801900 High Signal Region +chr2 99020000 99057500 High Signal Region +chr2 99300200 99320300 High Signal Region +chr2 99944600 99970200 High Signal Region +chr2 100112000 100114300 High Signal Region +chr2 100223900 100238300 High Signal Region +chr2 100418400 100777900 Low Mappability +chr2 101127200 101153600 Low Mappability +chr2 101313100 101350600 High Signal Region +chr2 102828400 102830400 High Signal Region +chr2 103231300 103232300 High Signal Region +chr2 103852300 103872800 High Signal Region +chr2 104684900 104697300 High Signal Region +chr2 105249300 105259000 High Signal Region +chr2 105539300 105563200 Low Mappability +chr2 105825900 105865100 High Signal Region +chr2 106555100 106569300 High Signal Region +chr2 107134100 107140900 High Signal Region +chr2 107593900 107601200 Low Mappability +chr2 107710100 107712400 High Signal Region +chr2 108608600 108614000 High Signal Region +chr2 108945100 108972800 High Signal Region +chr2 109629400 109636000 High Signal Region +chr2 110016800 110025500 High Signal Region +chr2 110091100 110128700 High Signal Region +chr2 110157100 110163300 High Signal Region +chr2 110292700 110294600 High Signal Region +chr2 110545800 110583400 High Signal Region +chr2 110752400 110780100 High Signal Region +chr2 111007400 111018600 High Signal Region +chr2 111042000 111046600 High Signal Region +chr2 111172700 111179800 High Signal Region +chr2 111281500 111287900 Low Mappability +chr2 111545600 111553300 Low Mappability +chr2 111716900 111722900 High Signal Region +chr2 111844900 111866400 High Signal Region +chr2 111890900 111898900 High Signal Region +chr2 112053900 112086000 High Signal Region +chr2 112319700 112326200 Low Mappability +chr2 112522900 112570500 High Signal Region +chr2 112602800 112605100 High Signal Region +chr2 112701400 112707900 High Signal Region +chr2 113095800 113102400 Low Mappability +chr2 113330900 113333000 Low Mappability +chr2 113518400 113524900 Low Mappability +chr2 113564300 113565700 High Signal Region +chr2 113659300 113673200 High Signal Region +chr2 114180800 114187400 Low Mappability +chr2 114242400 114244000 High Signal Region +chr2 114469200 114504000 High Signal Region +chr2 116454300 116524000 High Signal Region +chr2 117829600 117835500 High Signal Region +chr2 118017700 118020200 High Signal Region +chr2 120608600 120650200 High Signal Region +chr2 120810300 120821000 High Signal Region +chr2 121435600 121523600 High Signal Region +chr2 121938800 121957600 High Signal Region +chr2 122680400 122683200 High Signal Region +chr2 123288000 123294300 Low Mappability +chr2 123496800 123525300 High Signal Region +chr2 123785200 123790700 High Signal Region +chr2 124002700 124004600 High Signal Region +chr2 124798800 124835800 High Signal Region +chr2 125625000 125635900 Low Mappability +chr2 126217400 126263800 High Signal Region +chr2 126445400 126447400 Low Mappability +chr2 126964900 126972100 Low Mappability +chr2 127720400 127734000 Low Mappability +chr2 128050800 128053200 High Signal Region +chr2 128480400 128486900 Low Mappability +chr2 128772500 128774500 Low Mappability +chr2 129499400 129523400 High Signal Region +chr2 129602700 129613700 Low Mappability +chr2 131791800 131793800 High Signal Region +chr2 131908300 131931100 Low Mappability +chr2 131963900 131983700 High Signal Region +chr2 132885700 132890400 High Signal Region +chr2 132952400 132954500 Low Mappability +chr2 133053200 133083400 High Signal Region +chr2 133239300 133261800 High Signal Region +chr2 133934000 133937500 High Signal Region +chr2 134560100 134577900 High Signal Region +chr2 134661800 134673000 High Signal Region +chr2 134746600 134751100 High Signal Region +chr2 135146800 135151900 High Signal Region +chr2 135987600 135989700 High Signal Region +chr2 136234300 136286800 Low Mappability +chr2 137028200 137037000 High Signal Region +chr2 137345900 137369900 High Signal Region +chr2 137394500 137405600 High Signal Region +chr2 137640000 137642300 High Signal Region +chr2 137890200 137895000 High Signal Region +chr2 138035000 138056400 Low Mappability +chr2 138573700 138580400 High Signal Region +chr2 138621500 138624200 High Signal Region +chr2 138833600 138853100 High Signal Region +chr2 138904300 138935000 High Signal Region +chr2 139433200 139476200 High Signal Region +chr2 140345800 140352400 Low Mappability +chr2 142197000 142204400 Low Mappability +chr2 142464200 142483300 Low Mappability +chr2 142789100 142795600 Low Mappability +chr2 143275500 143290300 High Signal Region +chr2 143725900 143764700 High Signal Region +chr2 144627800 144636700 Low Mappability +chr2 144975200 144977100 High Signal Region +chr2 145001300 145003200 High Signal Region +chr2 145118300 145146300 Low Mappability +chr2 145236800 145242600 Low Mappability +chr2 145625100 145630800 Low Mappability +chr2 145732700 145734600 High Signal Region +chr2 146135700 146176900 High Signal Region +chr2 146995700 147013200 Low Mappability +chr2 147675300 147677500 High Signal Region +chr2 147864800 147871300 High Signal Region +chr2 147918800 147925100 Low Mappability +chr2 148410500 148416000 Low Mappability +chr2 148459900 148473800 High Signal Region +chr2 148612700 148620200 Low Mappability +chr2 148939300 148984200 High Signal Region +chr2 149049800 149056000 High Signal Region +chr2 149269400 149292700 High Signal Region +chr2 150413500 150452500 High Signal Region +chr2 150728300 150749700 Low Mappability +chr2 151029700 151385300 High Signal Region +chr2 151408800 151496700 High Signal Region +chr2 152157000 152159000 Low Mappability +chr2 152206800 152227500 High Signal Region +chr2 152263400 152269900 Low Mappability +chr2 153674800 153693100 Low Mappability +chr2 154174200 154180000 High Signal Region +chr2 154353800 154359700 Low Mappability +chr2 155016300 155051500 High Signal Region +chr2 155235400 155258100 High Signal Region +chr2 156185100 156214400 Low Mappability +chr2 157566000 157655300 Low Mappability +chr2 157833200 157835600 High Signal Region +chr2 158286300 158292800 High Signal Region +chr2 159455200 159469500 High Signal Region +chr2 160620300 160638500 High Signal Region +chr2 161368800 161376200 High Signal Region +chr2 161984900 161990900 High Signal Region +chr2 162369100 162376700 High Signal Region +chr2 162594500 162602700 High Signal Region +chr2 162843800 162847600 High Signal Region +chr2 163519100 163533100 Low Mappability +chr2 163644500 163655100 High Signal Region +chr2 163788900 163796100 Low Mappability +chr2 163833800 163849200 Low Mappability +chr2 163958100 163963000 Low Mappability +chr2 164201000 164202700 High Signal Region +chr2 165477300 165529900 Low Mappability +chr2 165675100 165679500 Low Mappability +chr2 165848700 165953000 Low Mappability +chr2 166530600 166535100 Low Mappability +chr2 166780500 166832200 Low Mappability +chr2 167269400 167291100 High Signal Region +chr2 167407900 167423000 Low Mappability +chr2 170315100 170320000 High Signal Region +chr2 170503800 170509800 High Signal Region +chr2 171814300 171816700 High Signal Region +chr2 171912800 171932200 Low Mappability +chr2 172007100 172014300 High Signal Region +chr2 172743600 172751100 Low Mappability +chr2 173098700 173101000 Low Mappability +chr2 173706700 173708800 High Signal Region +chr2 174961800 176745500 High Signal Region +chr2 176767100 177166600 High Signal Region +chr2 177232400 177490200 High Signal Region +chr2 177526700 177841000 High Signal Region +chr2 178775000 178794400 High Signal Region +chr2 180025600 180093500 Low Mappability +chr2 181169900 181188000 Low Mappability +chr2 181285900 181298800 High Signal Region +chr2 181739800 181745800 High Signal Region +chr2 181885000 181933400 High Signal Region +chr2 182003800 182113200 High Signal Region +chr3 0 3052500 High Signal Region +chr3 3084100 3098300 High Signal Region +chr3 3123200 3150800 High Signal Region +chr3 3443300 3493700 High Signal Region +chr3 4698100 4725500 High Signal Region +chr3 5517700 5525000 Low Mappability +chr3 5859400 5863500 High Signal Region +chr3 6115100 6117100 High Signal Region +chr3 6601900 6627400 High Signal Region +chr3 6900700 6916400 High Signal Region +chr3 6941100 6946600 High Signal Region +chr3 7178300 7223900 High Signal Region +chr3 7477600 7482500 High Signal Region +chr3 7910300 7916600 High Signal Region +chr3 8225200 8247500 High Signal Region +chr3 8574000 8589900 High Signal Region +chr3 8815300 8838700 High Signal Region +chr3 9091900 9096900 Low Mappability +chr3 9777500 9778500 High Signal Region +chr3 9904100 9910700 High Signal Region +chr3 9952100 9967100 High Signal Region +chr3 10453800 10464500 High Signal Region +chr3 10961700 10971700 High Signal Region +chr3 11050200 11070500 High Signal Region +chr3 11120700 11143300 High Signal Region +chr3 11518700 11524700 High Signal Region +chr3 11779200 11806000 High Signal Region +chr3 11933500 11938400 High Signal Region +chr3 11961500 11973100 High Signal Region +chr3 12107500 12131400 High Signal Region +chr3 12221200 12262000 High Signal Region +chr3 12336000 12339700 High Signal Region +chr3 12814500 12857800 Low Mappability +chr3 12906200 12907300 High Signal Region +chr3 13219400 13222800 High Signal Region +chr3 13821100 13826600 Low Mappability +chr3 13965800 13972000 High Signal Region +chr3 14272100 14336300 High Signal Region +chr3 14449600 14478500 High Signal Region +chr3 14593200 14597400 High Signal Region +chr3 14668900 14744700 High Signal Region +chr3 15028800 15045100 High Signal Region +chr3 15079500 15087400 High Signal Region +chr3 15451600 15872400 High Signal Region +chr3 15964200 15967200 High Signal Region +chr3 16351400 16357100 High Signal Region +chr3 16626000 16633700 High Signal Region +chr3 16995700 17021400 High Signal Region +chr3 17419700 17447600 High Signal Region +chr3 17679600 17682100 High Signal Region +chr3 17954200 17997400 High Signal Region +chr3 18379800 18395100 High Signal Region +chr3 18432100 18437500 High Signal Region +chr3 18966900 18983600 High Signal Region +chr3 19357600 19359300 High Signal Region +chr3 19594900 19601100 High Signal Region +chr3 19917700 19940300 High Signal Region +chr3 21247500 21250200 High Signal Region +chr3 21317800 21324600 High Signal Region +chr3 21383700 21389000 High Signal Region +chr3 21512900 21519300 High Signal Region +chr3 21661800 21663700 Low Mappability +chr3 21685300 21709500 High Signal Region +chr3 22069200 22070500 High Signal Region +chr3 22240800 22250100 High Signal Region +chr3 22362000 22377000 High Signal Region +chr3 22517600 22521100 High Signal Region +chr3 22612100 22759200 High Signal Region +chr3 22933800 23015000 High Signal Region +chr3 23077300 23099800 High Signal Region +chr3 23173700 23180900 Low Mappability +chr3 23302200 23321100 High Signal Region +chr3 23353500 23360000 High Signal Region +chr3 23463300 23468200 High Signal Region +chr3 23579500 23584900 High Signal Region +chr3 23841700 23843800 Low Mappability +chr3 24624400 24627900 High Signal Region +chr3 24655200 24661300 High Signal Region +chr3 25210800 25228800 Low Mappability +chr3 25277500 25310400 High Signal Region +chr3 25416900 25421600 Low Mappability +chr3 25472900 25478900 High Signal Region +chr3 26089400 26113400 High Signal Region +chr3 26346800 26369700 High Signal Region +chr3 26724600 26737000 High Signal Region +chr3 26944500 26950800 High Signal Region +chr3 27010100 27023300 High Signal Region +chr3 27309300 27319800 Low Mappability +chr3 28198300 28201300 Low Mappability +chr3 28513900 28535500 High Signal Region +chr3 28983500 29014200 High Signal Region +chr3 29461500 29492300 High Signal Region +chr3 29675900 29680600 High Signal Region +chr3 31176300 31188900 Low Mappability +chr3 31340700 31364500 Low Mappability +chr3 31651800 31680100 High Signal Region +chr3 31819800 31826900 High Signal Region +chr3 33696500 33708400 High Signal Region +chr3 33768300 33798500 High Signal Region +chr3 33930000 33948800 Low Mappability +chr3 34516200 34518200 High Signal Region +chr3 35285400 35292700 High Signal Region +chr3 35707000 35713500 Low Mappability +chr3 35743300 35744600 High Signal Region +chr3 36106500 36109400 High Signal Region +chr3 36285400 36291100 High Signal Region +chr3 36847300 36853900 High Signal Region +chr3 39026800 39030900 High Signal Region +chr3 39183300 39189800 High Signal Region +chr3 40151300 40157700 High Signal Region +chr3 40347600 40352600 High Signal Region +chr3 40549300 40651700 High Signal Region +chr3 41871900 41887800 High Signal Region +chr3 41993500 41999500 High Signal Region +chr3 42170000 42187300 High Signal Region +chr3 42682100 42722800 High Signal Region +chr3 42820200 42827400 High Signal Region +chr3 43108100 43197200 High Signal Region +chr3 43466400 43492100 High Signal Region +chr3 43538900 43557700 High Signal Region +chr3 44185900 44191600 High Signal Region +chr3 44241200 44260000 High Signal Region +chr3 44401500 44407500 High Signal Region +chr3 44559600 44565200 High Signal Region +chr3 44884400 44890700 High Signal Region +chr3 45579200 45591900 High Signal Region +chr3 45848500 45863400 Low Mappability +chr3 45986000 45990700 High Signal Region +chr3 46141000 46148200 High Signal Region +chr3 46338200 46340300 Low Mappability +chr3 46735000 46741900 High Signal Region +chr3 46795400 46805400 High Signal Region +chr3 46910900 46936200 High Signal Region +chr3 47592800 47598000 High Signal Region +chr3 47798300 47799600 High Signal Region +chr3 47966600 47968700 High Signal Region +chr3 48437800 48462000 High Signal Region +chr3 49443600 49482800 High Signal Region +chr3 49727200 49734400 High Signal Region +chr3 50464900 50474400 High Signal Region +chr3 50763700 50814900 High Signal Region +chr3 50957300 50963000 High Signal Region +chr3 51233600 51245400 Low Mappability +chr3 51616000 51623700 Low Mappability +chr3 51765300 51784900 High Signal Region +chr3 52230000 52233400 High Signal Region +chr3 53426900 53431000 High Signal Region +chr3 54849100 54874300 Low Mappability +chr3 56069700 56075200 High Signal Region +chr3 56210900 56215900 High Signal Region +chr3 56513600 56576700 High Signal Region +chr3 56903800 56943000 High Signal Region +chr3 57059400 57070200 High Signal Region +chr3 57349800 57379400 High Signal Region +chr3 58051100 58081600 Low Mappability +chr3 59370700 59412200 High Signal Region +chr3 59565300 59632700 High Signal Region +chr3 59684600 59689200 High Signal Region +chr3 59791800 59804200 Low Mappability +chr3 59887400 59889300 High Signal Region +chr3 59919200 59921100 High Signal Region +chr3 60044300 60046800 High Signal Region +chr3 60489700 60495200 Low Mappability +chr3 61150800 61177900 High Signal Region +chr3 61260700 61275000 Low Mappability +chr3 61495400 61499700 High Signal Region +chr3 61672300 61678300 High Signal Region +chr3 61707600 61726600 Low Mappability +chr3 61853900 61858900 High Signal Region +chr3 62032400 62038600 High Signal Region +chr3 62108300 62160100 High Signal Region +chr3 62356900 62367700 High Signal Region +chr3 62543000 62549200 High Signal Region +chr3 62873000 62879300 High Signal Region +chr3 63515500 63530100 High Signal Region +chr3 63590100 63591500 High Signal Region +chr3 64171000 64172900 High Signal Region +chr3 64237900 64245700 High Signal Region +chr3 64453100 64512800 High Signal Region +chr3 64609600 64665300 Low Mappability +chr3 64697900 64730500 High Signal Region +chr3 67027900 67054100 High Signal Region +chr3 67262400 67264000 High Signal Region +chr3 67411100 67419400 High Signal Region +chr3 67747300 67752800 High Signal Region +chr3 67786800 67793600 High Signal Region +chr3 68114300 68119700 Low Mappability +chr3 68519400 68525100 High Signal Region +chr3 69228600 69230500 High Signal Region +chr3 69848400 69854900 High Signal Region +chr3 69944400 69949800 High Signal Region +chr3 71117300 71122800 High Signal Region +chr3 71369600 71447800 High Signal Region +chr3 72273600 72293700 High Signal Region +chr3 72698100 72704800 High Signal Region +chr3 73088300 73098500 High Signal Region +chr3 73733100 73738500 Low Mappability +chr3 74583300 74598400 Low Mappability +chr3 74865000 74881800 High Signal Region +chr3 75348300 75378700 Low Mappability +chr3 75409000 75424100 High Signal Region +chr3 76598800 76604700 High Signal Region +chr3 76886600 76892900 Low Mappability +chr3 77597400 77604300 Low Mappability +chr3 77667400 77711400 High Signal Region +chr3 77926800 77931400 High Signal Region +chr3 78281900 78283900 Low Mappability +chr3 79012700 79014900 High Signal Region +chr3 79046300 79052800 Low Mappability +chr3 79763800 79780000 High Signal Region +chr3 79959500 79965700 High Signal Region +chr3 80465400 80472000 High Signal Region +chr3 82283300 82288700 Low Mappability +chr3 82462100 82508600 Low Mappability +chr3 82589000 82616700 Low Mappability +chr3 82921400 82924800 High Signal Region +chr3 83123200 83125100 High Signal Region +chr3 83330900 83343400 High Signal Region +chr3 83845100 83867000 High Signal Region +chr3 84142200 84149700 Low Mappability +chr3 84359000 84366300 Low Mappability +chr3 85305200 85326800 Low Mappability +chr3 85622200 85629500 Low Mappability +chr3 87424200 87426100 High Signal Region +chr3 87469300 87474600 High Signal Region +chr3 88044000 88066500 High Signal Region +chr3 88666500 88673500 Low Mappability +chr3 88716700 88873000 Low Mappability +chr3 90761500 90810400 High Signal Region +chr3 90991100 90996800 Low Mappability +chr3 91856700 91898200 High Signal Region +chr3 92185400 92291300 High Signal Region +chr3 93059200 93107000 High Signal Region +chr3 93168500 93172800 High Signal Region +chr3 93203900 93229100 High Signal Region +chr3 93323700 93331700 Low Mappability +chr3 93860300 94093700 High Signal Region +chr3 94136200 94152300 High Signal Region +chr3 94658300 94665700 Low Mappability +chr3 94690000 94730800 High Signal Region +chr3 94757600 94765200 Low Mappability +chr3 96043600 96058900 High Signal Region +chr3 96196200 96288300 High Signal Region +chr3 96313200 96388900 Low Mappability +chr3 96446800 96463800 Low Mappability +chr3 96485600 96514300 High Signal Region +chr3 96840000 96863800 High Signal Region +chr3 97245200 97251500 High Signal Region +chr3 98396100 98411400 High Signal Region +chr3 98443100 98597600 Low Mappability +chr3 98709300 98778900 High Signal Region +chr3 98986000 99034100 High Signal Region +chr3 99406000 99434100 High Signal Region +chr3 99882900 99908100 High Signal Region +chr3 99980200 99982200 High Signal Region +chr3 100315500 100330900 High Signal Region +chr3 100484400 100486300 High Signal Region +chr3 102813400 102839300 High Signal Region +chr3 102983600 102989900 High Signal Region +chr3 103134600 103136000 High Signal Region +chr3 103427600 103447900 High Signal Region +chr3 103555000 103557000 Low Mappability +chr3 104116800 104123100 High Signal Region +chr3 104194200 104198800 High Signal Region +chr3 104588100 104595500 Low Mappability +chr3 105028200 105030500 High Signal Region +chr3 106118500 106311800 High Signal Region +chr3 106777900 106779700 High Signal Region +chr3 109258500 109277300 High Signal Region +chr3 109458000 109462700 High Signal Region +chr3 110319800 110325700 High Signal Region +chr3 110416300 110421800 High Signal Region +chr3 111256100 111268600 High Signal Region +chr3 111578400 111605200 Low Mappability +chr3 111794100 111799000 Low Mappability +chr3 111830400 111836300 High Signal Region +chr3 112274500 112287300 High Signal Region +chr3 112315500 112337400 High Signal Region +chr3 112561900 112586900 High Signal Region +chr3 112863500 112869300 High Signal Region +chr3 112913800 112918000 High Signal Region +chr3 113186300 113189100 High Signal Region +chr3 113250900 113527800 High Signal Region +chr3 113709900 113719000 High Signal Region +chr3 113742300 113748300 High Signal Region +chr3 114272600 114279400 High Signal Region +chr3 114472100 114499300 Low Mappability +chr3 114587900 114595900 High Signal Region +chr3 114976700 114982800 High Signal Region +chr3 115020700 115027100 Low Mappability +chr3 115367700 115372200 Low Mappability +chr3 115905900 115922900 High Signal Region +chr3 116817400 116843900 Low Mappability +chr3 117267200 117292400 High Signal Region +chr3 117379100 117386400 Low Mappability +chr3 118055100 118060000 High Signal Region +chr3 119211800 119212900 High Signal Region +chr3 120735000 120742200 High Signal Region +chr3 120825200 120851500 High Signal Region +chr3 121248900 121250900 High Signal Region +chr3 121694400 121696100 High Signal Region +chr3 122294000 122329300 High Signal Region +chr3 122654100 122657300 High Signal Region +chr3 122804300 122806600 High Signal Region +chr3 123471600 123476200 Low Mappability +chr3 123729200 123743200 High Signal Region +chr3 123924800 123957700 High Signal Region +chr3 124282300 124288300 High Signal Region +chr3 125902800 125908900 High Signal Region +chr3 126127300 126136000 Low Mappability +chr3 126905300 126910600 High Signal Region +chr3 127522400 127523700 Low Mappability +chr3 127771600 127780600 High Signal Region +chr3 128203600 128211000 High Signal Region +chr3 128440100 128446100 High Signal Region +chr3 128935800 128937700 High Signal Region +chr3 129020900 129032100 High Signal Region +chr3 129393000 129394900 High Signal Region +chr3 133123600 133130800 Low Mappability +chr3 133566400 133568700 High Signal Region +chr3 133636000 133642800 High Signal Region +chr3 133837100 133859400 High Signal Region +chr3 134007400 134026700 Low Mappability +chr3 134685700 134690700 High Signal Region +chr3 134862500 134888400 High Signal Region +chr3 135148300 135163000 High Signal Region +chr3 136173700 136181000 Low Mappability +chr3 137407500 137413500 High Signal Region +chr3 137469200 137470300 High Signal Region +chr3 138200900 138207900 High Signal Region +chr3 139365700 139417700 High Signal Region +chr3 140376900 140384200 Low Mappability +chr3 142190700 142192800 High Signal Region +chr3 142513000 142517200 High Signal Region +chr3 143840800 143847000 High Signal Region +chr3 144030200 144036300 High Signal Region +chr3 144655600 144660600 High Signal Region +chr3 145040500 145061800 High Signal Region +chr3 145109000 145114400 Low Mappability +chr3 145188100 145190400 High Signal Region +chr3 145301600 145303100 High Signal Region +chr3 146073300 146102400 High Signal Region +chr3 146358800 146362600 High Signal Region +chr3 146476200 146479000 High Signal Region +chr3 146918900 146924200 High Signal Region +chr3 147107400 147113000 High Signal Region +chr3 147769500 147781800 High Signal Region +chr3 147874500 147877600 High Signal Region +chr3 148704800 148716900 High Signal Region +chr3 148750100 148757400 Low Mappability +chr3 148797800 148799700 High Signal Region +chr3 149051500 149053800 High Signal Region +chr3 150120900 150123800 High Signal Region +chr3 150336900 150341400 Low Mappability +chr3 151028900 151031200 High Signal Region +chr3 151657500 151679800 High Signal Region +chr3 152313800 152332200 High Signal Region +chr3 152700700 152702700 High Signal Region +chr3 153090100 153109400 High Signal Region +chr3 154640300 154646700 High Signal Region +chr3 154931700 154932800 High Signal Region +chr3 155515800 155517600 High Signal Region +chr3 155765900 155771900 High Signal Region +chr3 156256900 156262800 Low Mappability +chr3 156285600 156322500 High Signal Region +chr3 156799400 156804900 Low Mappability +chr3 157646900 157678300 High Signal Region +chr3 157946200 157969400 High Signal Region +chr3 158095300 158119200 High Signal Region +chr3 158698600 158756800 High Signal Region +chr3 159165900 159179700 High Signal Region +chr3 159225800 159239300 Low Mappability +chr3 159478300 159479700 High Signal Region +chr3 159748800 159826500 High Signal Region +chr3 159938500 160039600 High Signal Region +chr4 0 3114800 High Signal Region +chr4 3139700 3333100 High Signal Region +chr4 18476200 18498400 High Signal Region +chr4 20168700 20213200 High Signal Region +chr4 20804100 20808300 High Signal Region +chr4 20982300 20983700 High Signal Region +chr4 21281300 21287700 High Signal Region +chr4 22535900 22542300 High Signal Region +chr4 24193400 24201100 High Signal Region +chr4 25471300 25473200 High Signal Region +chr4 28175900 28177900 High Signal Region +chr4 31353200 31355200 High Signal Region +chr4 34934800 34936700 High Signal Region +chr4 35042700 35048900 High Signal Region +chr4 38305900 38322000 High Signal Region +chr4 57979700 57981800 High Signal Region +chr4 64454600 64499000 High Signal Region +chr4 68427300 68447900 High Signal Region +chr4 70367200 70379200 High Signal Region +chr4 73196300 73209300 High Signal Region +chr4 80001800 80004900 High Signal Region +chr4 83536900 83541900 High Signal Region +chr4 90725600 90727500 High Signal Region +chr4 92230800 92236500 High Signal Region +chr4 93843500 93853100 High Signal Region +chr4 99380500 99382400 High Signal Region +chr4 110469700 110505300 High Signal Region +chr4 118546100 118549600 High Signal Region +chr4 131222500 131229300 High Signal Region +chr4 145404200 147840400 High Signal Region +chr4 149809200 149811700 High Signal Region +chr4 153152100 153154100 High Signal Region +chr4 156256000 156508100 High Signal Region +chr5 3175400 3186000 High Signal Region +chr5 12489500 12490600 High Signal Region +chr5 14899000 15726800 High Signal Region +chr5 17466700 17481500 High Signal Region +chr5 36629400 36662500 High Signal Region +chr5 46434800 46436700 High Signal Region +chr5 49722200 49755700 High Signal Region +chr5 60041900 60043900 Low Mappability +chr5 80499900 80501900 High Signal Region +chr5 93288700 93351800 High Signal Region +chr5 106126300 106177800 High Signal Region +chr5 110063700 110075500 High Signal Region +chr5 114921500 114923500 High Signal Region +chr5 137148800 137153800 High Signal Region +chr5 146260000 146262300 High Signal Region +chr5 151733600 151834600 High Signal Region +chr6 0 3255700 High Signal Region +chr6 3280700 3340300 High Signal Region +chr6 4922900 4925100 High Signal Region +chr6 5608000 5657900 High Signal Region +chr6 5704400 5706800 High Signal Region +chr6 6400000 6442800 High Signal Region +chr6 6700000 6727600 High Signal Region +chr6 8729200 8731100 High Signal Region +chr6 8906700 8932300 High Signal Region +chr6 9519200 9529100 High Signal Region +chr6 9580600 9610100 High Signal Region +chr6 9646900 9663400 High Signal Region +chr6 9720400 9733100 High Signal Region +chr6 9889000 9891100 High Signal Region +chr6 10228400 10269900 High Signal Region +chr6 10559100 10588400 High Signal Region +chr6 10623400 10633900 High Signal Region +chr6 11251100 11256800 High Signal Region +chr6 11406400 11457900 High Signal Region +chr6 11813900 11897100 High Signal Region +chr6 12671100 12680300 High Signal Region +chr6 13390500 13394900 High Signal Region +chr6 13700500 13743100 High Signal Region +chr6 14085000 14092300 Low Mappability +chr6 14793800 14805500 High Signal Region +chr6 14929200 14935100 High Signal Region +chr6 16299700 16310100 High Signal Region +chr6 16922600 16924800 High Signal Region +chr6 17004600 17042000 High Signal Region +chr6 17391200 17397900 High Signal Region +chr6 17981700 17983400 High Signal Region +chr6 18264800 18267200 High Signal Region +chr6 18836700 18848600 High Signal Region +chr6 19068900 19075400 High Signal Region +chr6 20113900 20143500 High Signal Region +chr6 21452400 21458100 High Signal Region +chr6 21801300 21803200 High Signal Region +chr6 21841300 21845300 High Signal Region +chr6 21873300 21876800 High Signal Region +chr6 22107700 22131800 High Signal Region +chr6 22479600 22483900 High Signal Region +chr6 22516700 22534300 High Signal Region +chr6 25505600 25566400 Low Mappability +chr6 26049500 26072100 High Signal Region +chr6 26247700 26278000 High Signal Region +chr6 26834800 26840700 High Signal Region +chr6 26988500 26992000 High Signal Region +chr6 27199000 27228400 High Signal Region +chr6 28924100 28929500 Low Mappability +chr6 29746800 29750000 High Signal Region +chr6 29974300 29978200 High Signal Region +chr6 30752800 30806400 High Signal Region +chr6 30929300 30936100 Low Mappability +chr6 31594900 31597200 High Signal Region +chr6 32740700 32746800 High Signal Region +chr6 32867600 32869000 High Signal Region +chr6 33490300 33495000 High Signal Region +chr6 33650500 33665400 High Signal Region +chr6 33743900 33749000 High Signal Region +chr6 36224300 36230500 High Signal Region +chr6 40535500 40559800 Low Mappability +chr6 40716600 40723700 High Signal Region +chr6 42122800 42174200 High Signal Region +chr6 42492600 42516600 High Signal Region +chr6 42617600 42620900 High Signal Region +chr6 44265200 44270800 High Signal Region +chr6 44497000 44513300 High Signal Region +chr6 44785200 44794100 High Signal Region +chr6 44836300 44837500 High Signal Region +chr6 46381300 46402000 High Signal Region +chr6 46678600 46685300 High Signal Region +chr6 47639000 47779200 High Signal Region +chr6 48120300 48122300 High Signal Region +chr6 48149300 48172900 High Signal Region +chr6 48231500 48292600 High Signal Region +chr6 48320300 48347000 High Signal Region +chr6 49235500 49237500 High Signal Region +chr6 50601400 50636700 Low Mappability +chr6 51046500 51048400 High Signal Region +chr6 53464100 53487500 Low Mappability +chr6 54976500 54993700 High Signal Region +chr6 56232700 56257500 High Signal Region +chr6 56455900 56465300 High Signal Region +chr6 57425200 57455700 High Signal Region +chr6 57588900 57634500 High Signal Region +chr6 57919500 57925700 High Signal Region +chr6 58068500 58073500 High Signal Region +chr6 58588700 58612800 High Signal Region +chr6 59123600 59130100 High Signal Region +chr6 59199600 59230600 High Signal Region +chr6 59584300 59598000 High Signal Region +chr6 59676000 59698200 High Signal Region +chr6 60622400 60625600 High Signal Region +chr6 60668000 60688200 High Signal Region +chr6 61023100 61029400 High Signal Region +chr6 61088400 61094600 High Signal Region +chr6 62525500 62527300 High Signal Region +chr6 64331600 64338900 Low Mappability +chr6 64778500 64812500 High Signal Region +chr6 64882100 64930500 High Signal Region +chr6 65100600 65106700 High Signal Region +chr6 65184300 65261600 High Signal Region +chr6 66070200 66095900 High Signal Region +chr6 66815600 66831600 High Signal Region +chr6 67311500 67312900 High Signal Region +chr6 67494800 67522100 Low Mappability +chr6 67576400 67630800 High Signal Region +chr6 67658300 67710900 High Signal Region +chr6 68011000 68012900 High Signal Region +chr6 68221900 68252400 Low Mappability +chr6 68641400 68661300 High Signal Region +chr6 68971900 68996400 High Signal Region +chr6 69017600 69035700 High Signal Region +chr6 70000300 70053000 High Signal Region +chr6 70187800 70213700 High Signal Region +chr6 70620700 70648600 High Signal Region +chr6 73105700 73113400 High Signal Region +chr6 73502200 73521000 High Signal Region +chr6 73671400 73672600 High Signal Region +chr6 74191700 74194400 High Signal Region +chr6 74365900 74386400 High Signal Region +chr6 74700100 74705300 High Signal Region +chr6 75054000 75083000 High Signal Region +chr6 76645400 76649100 High Signal Region +chr6 76847200 76854100 High Signal Region +chr6 78352900 78359500 High Signal Region +chr6 78456200 78491700 Low Mappability +chr6 78637400 78639700 High Signal Region +chr6 78716700 78722400 High Signal Region +chr6 79627500 79635200 High Signal Region +chr6 79817300 79819200 High Signal Region +chr6 79898900 79922800 Low Mappability +chr6 79959800 79967500 Low Mappability +chr6 81012200 81036700 High Signal Region +chr6 81829400 81875000 High Signal Region +chr6 81997000 82011600 High Signal Region +chr6 82213400 82218800 High Signal Region +chr6 84662700 84688200 High Signal Region +chr6 84712600 84720200 High Signal Region +chr6 89723500 89735600 High Signal Region +chr6 91768300 91770200 High Signal Region +chr6 92321600 92328300 High Signal Region +chr6 94988600 94990700 Low Mappability +chr6 95030100 95043800 Low Mappability +chr6 95475600 95479900 High Signal Region +chr6 95980800 95987100 High Signal Region +chr6 96877800 96896100 High Signal Region +chr6 97356800 97379400 High Signal Region +chr6 101571200 101621400 High Signal Region +chr6 102379600 102384100 High Signal Region +chr6 102483000 102505700 High Signal Region +chr6 102767600 102791400 High Signal Region +chr6 103313700 103315600 High Signal Region +chr6 103647900 103650200 High Signal Region +chr6 103750700 103752000 High Signal Region +chr6 105194700 105199600 High Signal Region +chr6 105253400 105257600 Low Mappability +chr6 105306000 105337600 High Signal Region +chr6 107141500 107146300 High Signal Region +chr6 107284300 107299800 High Signal Region +chr6 107860500 107920500 High Signal Region +chr6 109498200 109506200 High Signal Region +chr6 109641800 109648100 High Signal Region +chr6 109984000 110013000 High Signal Region +chr6 114340600 114343000 High Signal Region +chr6 114492200 114643400 High Signal Region +chr6 116021200 116043900 High Signal Region +chr6 116238700 116252600 High Signal Region +chr6 116566200 116593800 High Signal Region +chr6 117087400 117094300 High Signal Region +chr6 118209000 118234000 High Signal Region +chr6 119419600 119431100 High Signal Region +chr6 121690100 121703800 High Signal Region +chr6 122614200 122616600 High Signal Region +chr6 123132100 123179400 High Signal Region +chr6 123204800 123242900 High Signal Region +chr6 126135200 126137300 Low Mappability +chr6 128680200 128693700 High Signal Region +chr6 128861200 128865300 High Signal Region +chr6 129857800 129863300 High Signal Region +chr6 129935700 129948400 High Signal Region +chr6 131088300 131114900 High Signal Region +chr6 131208300 131252100 High Signal Region +chr6 131495900 131505900 High Signal Region +chr6 132497200 132523000 Low Mappability +chr6 132597000 132598700 High Signal Region +chr6 132635400 132642000 High Signal Region +chr6 133169000 133170900 High Signal Region +chr6 133891500 133899800 High Signal Region +chr6 134689500 134692700 High Signal Region +chr6 138216100 138221900 High Signal Region +chr6 138647300 138649100 High Signal Region +chr6 138685400 138700700 High Signal Region +chr6 142060700 142079300 High Signal Region +chr6 142396700 142400200 Low Mappability +chr6 142433400 142439400 High Signal Region +chr6 143014400 143016300 High Signal Region +chr6 143466500 143481400 High Signal Region +chr6 143883500 143886900 High Signal Region +chr6 144655200 144670000 High Signal Region +chr6 145784700 145787000 High Signal Region +chr6 145931800 145933900 Low Mappability +chr6 146018900 146080500 High Signal Region +chr6 147077200 147079900 High Signal Region +chr6 147459800 147465000 Low Mappability +chr6 147549600 147555000 Low Mappability +chr6 147881900 147908400 High Signal Region +chr6 148013100 148038400 High Signal Region +chr6 148121800 148124500 High Signal Region +chr6 148635700 148640300 Low Mappability +chr6 148662900 148665000 Low Mappability +chr6 149585500 149736500 High Signal Region +chr7 4558200 4594300 High Signal Region +chr7 4648600 4651500 High Signal Region +chr7 5153200 5244900 High Signal Region +chr7 5588700 5591600 High Signal Region +chr7 6050500 6056000 High Signal Region +chr7 6249400 6251400 High Signal Region +chr7 6590800 6597400 High Signal Region +chr7 7209500 7231000 High Signal Region +chr7 7273500 7327400 High Signal Region +chr7 7527500 7533900 High Signal Region +chr7 7556800 8278400 High Signal Region +chr7 8490800 9968800 High Signal Region +chr7 9992100 9998900 High Signal Region +chr7 10314900 10320900 High Signal Region +chr7 11097700 11123700 High Signal Region +chr7 11271100 11438600 High Signal Region +chr7 12009500 12084600 High Signal Region +chr7 12379600 12385400 High Signal Region +chr7 12526600 12548100 High Signal Region +chr7 13112300 13118100 High Signal Region +chr7 13591200 13620200 High Signal Region +chr7 14051300 14055900 High Signal Region +chr7 14767700 14823800 High Signal Region +chr7 14930100 15023000 High Signal Region +chr7 15128800 15623000 High Signal Region +chr7 16661400 16667800 High Signal Region +chr7 17112200 17123900 High Signal Region +chr7 17215800 17323400 High Signal Region +chr7 17800000 17806700 High Signal Region +chr7 17829700 17862600 High Signal Region +chr7 18487100 18493200 High Signal Region +chr7 19032600 19034500 High Signal Region +chr7 20799700 21103900 High Signal Region +chr7 21135700 23286800 High Signal Region +chr7 23494700 23503600 High Signal Region +chr7 24026200 24031700 High Signal Region +chr7 24103800 24108200 High Signal Region +chr7 24729400 24731300 High Signal Region +chr7 26022700 26066900 High Signal Region +chr7 26779000 26780900 High Signal Region +chr7 27082300 27098300 High Signal Region +chr7 27712800 27732500 High Signal Region +chr7 31365500 31387000 High Signal Region +chr7 31818200 31876700 High Signal Region +chr7 31934500 32043100 High Signal Region +chr7 32215700 32235200 High Signal Region +chr7 32629300 33098700 High Signal Region +chr7 33124200 33198000 High Signal Region +chr7 33949500 34004800 High Signal Region +chr7 34957200 34959100 High Signal Region +chr7 38396600 38787200 High Signal Region +chr7 38839800 39181000 High Signal Region +chr7 39227600 39404100 High Signal Region +chr7 39874600 39875900 High Signal Region +chr7 41791900 41851900 High Signal Region +chr7 43123800 43220300 High Signal Region +chr7 44737800 44739900 High Signal Region +chr7 47175100 47188600 High Signal Region +chr7 47414400 47519700 High Signal Region +chr7 48102600 48135800 High Signal Region +chr7 50940400 50986800 High Signal Region +chr7 51329800 51335900 High Signal Region +chr7 51800300 51812600 High Signal Region +chr7 51909200 51911200 High Signal Region +chr7 52095700 52104400 High Signal Region +chr7 52283300 52288900 High Signal Region +chr7 53677100 53683100 High Signal Region +chr7 53977800 54027400 High Signal Region +chr7 54336000 54351800 High Signal Region +chr7 54808900 54810100 High Signal Region +chr7 54923000 54971200 High Signal Region +chr7 55011500 55016500 High Signal Region +chr7 55080000 55086300 High Signal Region +chr7 55115400 55141000 High Signal Region +chr7 55657400 55667100 High Signal Region +chr7 56062300 56081700 High Signal Region +chr7 56160100 56163400 Low Mappability +chr7 56660300 56693600 High Signal Region +chr7 57367200 57374700 High Signal Region +chr7 58040300 58077100 High Signal Region +chr7 58161700 58177900 High Signal Region +chr7 59673100 59910900 High Signal Region +chr7 60209400 60215600 High Signal Region +chr7 60676300 60682800 High Signal Region +chr7 61320100 61395400 High Signal Region +chr7 62135200 62137500 High Signal Region +chr7 62651400 62693400 High Signal Region +chr7 63272500 63287100 High Signal Region +chr7 63431300 63432400 High Signal Region +chr7 63803700 63810800 High Signal Region +chr7 63908200 63910100 High Signal Region +chr7 64072600 64134600 High Signal Region +chr7 64465300 64496400 High Signal Region +chr7 64601000 64617900 High Signal Region +chr7 65187500 65198300 High Signal Region +chr7 68534700 68537900 High Signal Region +chr7 68775900 68778100 High Signal Region +chr7 69086500 69102900 High Signal Region +chr7 69785300 69792200 High Signal Region +chr7 70757900 70765000 High Signal Region +chr7 71971100 71984500 High Signal Region +chr7 72317400 72337900 High Signal Region +chr7 72630000 72679900 High Signal Region +chr7 73212000 73218800 High Signal Region +chr7 73671700 73680000 High Signal Region +chr7 75003200 75007700 High Signal Region +chr7 76067800 76079300 High Signal Region +chr7 76556000 76573000 High Signal Region +chr7 76703900 76708400 High Signal Region +chr7 77520600 77526000 High Signal Region +chr7 78416900 78422400 High Signal Region +chr7 80708100 80730100 Low Mappability +chr7 80787500 80813800 High Signal Region +chr7 81756100 81760500 High Signal Region +chr7 82770300 82772800 High Signal Region +chr7 85017700 85023600 High Signal Region +chr7 85757200 85768800 High Signal Region +chr7 86118700 86125800 High Signal Region +chr7 86497400 86503500 High Signal Region +chr7 86532600 86534000 High Signal Region +chr7 86805600 86807500 High Signal Region +chr7 87989300 88000600 High Signal Region +chr7 89683300 89704600 High Signal Region +chr7 90087300 90089400 High Signal Region +chr7 90441000 90442900 High Signal Region +chr7 91741500 91747500 High Signal Region +chr7 93259400 93278100 High Signal Region +chr7 93699600 93717500 High Signal Region +chr7 93744000 93766100 High Signal Region +chr7 93969600 93973700 High Signal Region +chr7 94293000 94299300 High Signal Region +chr7 94822500 94848800 High Signal Region +chr7 95177200 95193600 High Signal Region +chr7 95527400 95533200 High Signal Region +chr7 97795000 97797300 High Signal Region +chr7 103100800 103115000 High Signal Region +chr7 103195500 103202100 High Signal Region +chr7 103483000 103487500 High Signal Region +chr7 104097400 104126600 High Signal Region +chr7 104476800 104477900 High Signal Region +chr7 104770000 104801200 High Signal Region +chr7 105830300 106325300 High Signal Region +chr7 106979000 106984900 High Signal Region +chr7 107245200 107271400 High Signal Region +chr7 108780600 108789800 High Signal Region +chr7 110058500 110061600 High Signal Region +chr7 111228400 111230600 High Signal Region +chr7 112636600 112639800 High Signal Region +chr7 116432200 116453400 High Signal Region +chr7 119739900 119742100 High Signal Region +chr7 119795700 119797700 High Signal Region +chr7 119998800 120015100 High Signal Region +chr7 124522300 124528300 High Signal Region +chr7 125009800 125016600 High Signal Region +chr7 128171000 128189300 High Signal Region +chr7 130054200 130055700 High Signal Region +chr7 130591400 130596900 High Signal Region +chr7 130833500 130835600 High Signal Region +chr7 134100500 134107200 High Signal Region +chr7 134329200 134335200 High Signal Region +chr7 135006900 135008800 High Signal Region +chr7 135337800 135340900 High Signal Region +chr7 138590500 138594500 High Signal Region +chr7 139447400 139448900 High Signal Region +chr7 140288200 140307300 High Signal Region +chr7 140551100 140558800 High Signal Region +chr7 140580500 140585700 High Signal Region +chr7 141637000 141640700 High Signal Region +chr7 142828900 142845000 High Signal Region +chr7 145340000 145441400 High Signal Region +chr8 3753500 3779100 High Signal Region +chr8 14305800 14308200 High Signal Region +chr8 15508900 15521000 High Signal Region +chr8 19671800 19937800 High Signal Region +chr8 19960800 20868000 High Signal Region +chr8 20945500 20963700 High Signal Region +chr8 23085600 23096700 High Signal Region +chr8 35134000 35135900 High Signal Region +chr8 39132400 39157700 High Signal Region +chr8 55111200 55397300 High Signal Region +chr8 69416700 69597900 High Signal Region +chr8 71432100 71434100 High Signal Region +chr8 71796100 71863300 High Signal Region +chr8 73318700 73320700 High Signal Region +chr8 83755800 83757900 High Signal Region +chr8 114436000 114437900 High Signal Region +chr8 123537300 123638300 High Signal Region +chr8 125778100 125780100 High Signal Region +chr8 129272900 129401200 High Signal Region +chr9 0 3053100 High Signal Region +chr9 3240200 3259800 High Signal Region +chr9 3302000 3336000 High Signal Region +chr9 3461000 3466600 Low Mappability +chr9 3627400 3699700 Low Mappability +chr9 3802100 3806700 High Signal Region +chr9 3881100 3887600 High Signal Region +chr9 4238700 4245700 Low Mappability +chr9 4375700 4406800 High Signal Region +chr9 5248000 5254100 High Signal Region +chr9 5276200 5284600 Low Mappability +chr9 6431500 6467200 High Signal Region +chr9 6742900 6806200 Low Mappability +chr9 7294600 7300700 High Signal Region +chr9 7370900 7412600 Low Mappability +chr9 7520900 7525900 High Signal Region +chr9 8029400 8067100 Low Mappability +chr9 8275900 8292300 Low Mappability +chr9 8447200 8483700 High Signal Region +chr9 8628200 8633700 Low Mappability +chr9 8859900 8865500 High Signal Region +chr9 9598800 9626700 High Signal Region +chr9 9846900 9891900 Low Mappability +chr9 10193200 10198800 Low Mappability +chr9 10701300 10707400 High Signal Region +chr9 10964200 10970600 High Signal Region +chr9 11341900 11345100 High Signal Region +chr9 11722300 11747100 High Signal Region +chr9 11792800 11798400 Low Mappability +chr9 11821400 11845400 High Signal Region +chr9 12282000 12287500 High Signal Region +chr9 12364900 12379600 High Signal Region +chr9 12469100 12472900 Low Mappability +chr9 12768200 12773800 High Signal Region +chr9 12840100 12851100 High Signal Region +chr9 12917600 12922300 High Signal Region +chr9 12998400 13045600 Low Mappability +chr9 13324200 13426100 High Signal Region +chr9 13533500 13535700 High Signal Region +chr9 13994600 13996700 High Signal Region +chr9 14410500 14429300 Low Mappability +chr9 15123900 15136900 High Signal Region +chr9 16607400 16691900 Low Mappability +chr9 16833700 16861000 High Signal Region +chr9 16939400 16950500 Low Mappability +chr9 17059000 17088000 High Signal Region +chr9 17197900 17207600 High Signal Region +chr9 17261400 17263400 Low Mappability +chr9 17387200 17406200 High Signal Region +chr9 17525800 17527700 High Signal Region +chr9 17632000 17636100 High Signal Region +chr9 17916200 17919600 High Signal Region +chr9 18010000 18015600 High Signal Region +chr9 18117000 18162200 Low Mappability +chr9 18235100 18270100 High Signal Region +chr9 18893800 18900100 High Signal Region +chr9 18980400 18994100 High Signal Region +chr9 19268700 19294700 High Signal Region +chr9 19595400 19638400 High Signal Region +chr9 19720500 19725500 Low Mappability +chr9 19901400 19906100 High Signal Region +chr9 20183600 20196700 Low Mappability +chr9 20322100 20407900 High Signal Region +chr9 21879200 21928200 High Signal Region +chr9 22116600 22191600 High Signal Region +chr9 22699500 22731700 High Signal Region +chr9 22892700 22926500 Low Mappability +chr9 22947900 22956900 High Signal Region +chr9 23508700 23526900 High Signal Region +chr9 24523300 24576000 High Signal Region +chr9 25596700 25602700 High Signal Region +chr9 25842900 25863600 High Signal Region +chr9 26096100 26103500 Low Mappability +chr9 26700800 26708000 High Signal Region +chr9 26904600 26911000 High Signal Region +chr9 27212200 27232300 High Signal Region +chr9 27974400 27981700 High Signal Region +chr9 29739800 29741800 Low Mappability +chr9 30604400 30606300 Low Mappability +chr9 30641800 30696800 Low Mappability +chr9 30929800 30931100 High Signal Region +chr9 32059200 32083600 Low Mappability +chr9 32353900 32356500 High Signal Region +chr9 32839200 32846600 Low Mappability +chr9 32888700 32896000 Low Mappability +chr9 32953000 32958100 Low Mappability +chr9 33127100 33161100 Low Mappability +chr9 33392400 33402700 High Signal Region +chr9 33949500 33961900 Low Mappability +chr9 35071200 35091800 High Signal Region +chr9 35304300 35306500 High Signal Region +chr9 36235800 36241900 High Signal Region +chr9 36555000 36569100 High Signal Region +chr9 37331400 37349500 Low Mappability +chr9 37441700 37448100 High Signal Region +chr9 39330900 39359100 High Signal Region +chr9 39444100 39449600 High Signal Region +chr9 39835400 39899000 Low Mappability +chr9 44214200 44235400 Low Mappability +chr9 44305700 44408400 Low Mappability +chr9 47957400 47959300 High Signal Region +chr9 50082000 50088400 High Signal Region +chr9 51667400 51673700 High Signal Region +chr9 52601800 52617200 High Signal Region +chr9 52749000 52756100 High Signal Region +chr9 53089800 53107000 High Signal Region +chr9 53804100 53805400 High Signal Region +chr9 54916200 54928900 High Signal Region +chr9 55070600 55078000 Low Mappability +chr9 55150300 55152300 High Signal Region +chr9 55936900 55972500 High Signal Region +chr9 56222700 56224800 High Signal Region +chr9 56259500 56284300 High Signal Region +chr9 56991700 56993700 Low Mappability +chr9 57408000 57434800 High Signal Region +chr9 58766500 58785800 High Signal Region +chr9 59046200 59052700 Low Mappability +chr9 59103800 59125000 High Signal Region +chr9 60538500 60551200 High Signal Region +chr9 60726100 60733500 High Signal Region +chr9 61721500 61723400 High Signal Region +chr9 62811600 62868300 Low Mappability +chr9 64236700 64255000 Low Mappability +chr9 64410400 64417700 Low Mappability +chr9 65292600 65314200 High Signal Region +chr9 65867400 65909400 High Signal Region +chr9 67198600 67205000 Low Mappability +chr9 68451200 68461200 High Signal Region +chr9 68527100 68534600 High Signal Region +chr9 71080600 71120800 Low Mappability +chr9 71421100 71434600 High Signal Region +chr9 72895800 72900800 Low Mappability +chr9 72957900 72985700 Low Mappability +chr9 73285500 73311300 High Signal Region +chr9 73396800 73412500 Low Mappability +chr9 73861400 73863500 Low Mappability +chr9 73935600 73946700 High Signal Region +chr9 74615600 74641300 Low Mappability +chr9 74664800 74690900 High Signal Region +chr9 74768600 74774600 High Signal Region +chr9 75709200 75736000 Low Mappability +chr9 77079900 77082800 High Signal Region +chr9 77152800 77158800 High Signal Region +chr9 77972400 77974300 High Signal Region +chr9 78175200 78182700 Low Mappability +chr9 78230500 78296900 High Signal Region +chr9 78554700 78589200 Low Mappability +chr9 78755200 78757800 High Signal Region +chr9 78819200 78830500 Low Mappability +chr9 80234500 80235700 High Signal Region +chr9 80660700 80665600 High Signal Region +chr9 81251500 81303200 High Signal Region +chr9 81614000 81620700 High Signal Region +chr9 81906400 81937200 High Signal Region +chr9 83278800 83288100 High Signal Region +chr9 83558300 83560200 High Signal Region +chr9 83935500 83950000 High Signal Region +chr9 83992400 83998900 High Signal Region +chr9 84211900 84226800 High Signal Region +chr9 85898900 85918900 High Signal Region +chr9 86062600 86070000 Low Mappability +chr9 86120100 86137500 High Signal Region +chr9 86458200 86463100 High Signal Region +chr9 87098700 87112200 High Signal Region +chr9 87481400 87500900 High Signal Region +chr9 87576700 87594000 High Signal Region +chr9 87945600 87952400 High Signal Region +chr9 88011000 88013900 High Signal Region +chr9 88592100 88829800 High Signal Region +chr9 89031300 89075400 Low Mappability +chr9 89321400 89361800 High Signal Region +chr9 90147100 90149100 High Signal Region +chr9 90285200 90395300 High Signal Region +chr9 90455400 90456800 High Signal Region +chr9 90808100 90821900 Low Mappability +chr9 90857200 90876300 Low Mappability +chr9 91222100 91268200 High Signal Region +chr9 91598800 91647400 High Signal Region +chr9 92032700 92035300 High Signal Region +chr9 92075300 92113200 High Signal Region +chr9 92239700 92242900 High Signal Region +chr9 92624800 92654500 High Signal Region +chr9 93013300 93035300 High Signal Region +chr9 93286500 93296500 High Signal Region +chr9 93360800 93442100 Low Mappability +chr9 93618000 93668500 Low Mappability +chr9 94821700 94828100 Low Mappability +chr9 95245800 95299600 High Signal Region +chr9 95425000 95426900 High Signal Region +chr9 95829400 95831300 High Signal Region +chr9 96104900 96111400 Low Mappability +chr9 96852000 96854100 High Signal Region +chr9 98343300 98345700 Low Mappability +chr9 98451100 98458500 Low Mappability +chr9 98747700 98771800 Low Mappability +chr9 99266600 99273100 Low Mappability +chr9 99735800 99763300 High Signal Region +chr9 99922800 99937600 High Signal Region +chr9 100073800 100080700 High Signal Region +chr9 100516900 100519200 High Signal Region +chr9 100920400 100922300 High Signal Region +chr9 101085500 101110600 High Signal Region +chr9 101292500 101326600 Low Mappability +chr9 102277400 102283800 Low Mappability +chr9 102764700 102766800 Low Mappability +chr9 102812800 102815000 High Signal Region +chr9 102956300 102970000 Low Mappability +chr9 103296200 103305600 High Signal Region +chr9 103352800 103367100 Low Mappability +chr9 103988500 103990400 High Signal Region +chr9 104524500 104525700 High Signal Region +chr9 104848800 104850600 High Signal Region +chr9 105086200 105119300 High Signal Region +chr9 105818400 105820400 High Signal Region +chr9 107207900 107219900 High Signal Region +chr9 109036600 109083500 High Signal Region +chr9 109245000 109252200 High Signal Region +chr9 109272900 109374100 High Signal Region +chr9 110280300 110306700 High Signal Region +chr9 110443100 110455100 High Signal Region +chr9 110970300 110976000 High Signal Region +chr9 111661900 111668700 High Signal Region +chr9 112330100 112336900 High Signal Region +chr9 112956300 112990600 High Signal Region +chr9 113260500 113262400 High Signal Region +chr9 113535400 113541300 High Signal Region +chr9 114101400 114149500 Low Mappability +chr9 114172400 114322200 High Signal Region +chr9 114970100 114974700 Low Mappability +chr9 115077900 115085200 Low Mappability +chr9 115349900 115351800 High Signal Region +chr9 115496100 115498100 Low Mappability +chr9 116981500 116988600 High Signal Region +chr9 118088300 118151400 High Signal Region +chr9 118674000 118675900 High Signal Region +chr9 119861200 119895000 Low Mappability +chr9 120265300 120288700 High Signal Region +chr9 120633900 120641200 Low Mappability +chr9 121024600 121042700 Low Mappability +chr9 121178300 121184500 High Signal Region +chr9 121220100 121247600 High Signal Region +chr9 121313700 121385800 Low Mappability +chr9 121406300 121418400 Low Mappability +chr9 122161300 122163200 High Signal Region +chr9 122277700 122334500 Low Mappability +chr9 122401500 122441900 Low Mappability +chr9 122660600 122667200 Low Mappability +chr9 122703400 122730400 Low Mappability +chr9 122903900 122906600 High Signal Region +chr9 123190700 123197500 Low Mappability +chr9 123460900 123463100 High Signal Region +chr9 123742600 123753500 Low Mappability +chr9 123851700 123929500 High Signal Region +chr9 123966100 124009300 High Signal Region +chr9 124161300 124282600 High Signal Region +chr9 124494100 124595100 High Signal Region +chrX 3286700 4493800 High Signal Region +chrX 4524500 5370300 High Signal Region +chrX 8346400 8348200 High Signal Region +chrX 8550300 8557800 High Signal Region +chrX 8818900 8824300 High Signal Region +chrX 9345800 9395300 High Signal Region +chrX 9500200 9595700 High Signal Region +chrX 14739100 14741000 High Signal Region +chrX 21466500 21472700 High Signal Region +chrX 21846900 21896100 High Signal Region +chrX 26459300 26505100 High Signal Region +chrX 26907100 29639200 High Signal Region +chrX 29660500 35508900 High Signal Region +chrX 37612500 37669100 High Signal Region +chrX 39073800 39075700 High Signal Region +chrX 41482500 41489500 High Signal Region +chrX 42676200 42688100 High Signal Region +chrX 44239900 44293300 High Signal Region +chrX 44732600 44738600 High Signal Region +chrX 48699000 48771100 High Signal Region +chrX 54269300 55286000 High Signal Region +chrX 55716700 55807400 High Signal Region +chrX 58475000 58478700 High Signal Region +chrX 59773000 59796900 High Signal Region +chrX 61868200 61874000 High Signal Region +chrX 62065700 62084900 High Signal Region +chrX 63509200 63515900 High Signal Region +chrX 63634600 63640900 High Signal Region +chrX 64125800 64132200 High Signal Region +chrX 65962800 65999900 High Signal Region +chrX 66067900 66084000 High Signal Region +chrX 66143100 66145700 High Signal Region +chrX 66316400 66356900 High Signal Region +chrX 67662500 67708500 High Signal Region +chrX 70055300 70072000 High Signal Region +chrX 72800000 72818700 High Signal Region +chrX 75582400 75709000 High Signal Region +chrX 76589100 76607100 High Signal Region +chrX 79135300 79150400 High Signal Region +chrX 81153100 81154600 High Signal Region +chrX 82475800 82481000 High Signal Region +chrX 84290800 84296100 High Signal Region +chrX 87222400 87262500 High Signal Region +chrX 87838600 87845200 High Signal Region +chrX 88230200 88246900 High Signal Region +chrX 89182800 89232600 High Signal Region +chrX 89914800 89916600 High Signal Region +chrX 90308600 90336600 High Signal Region +chrX 92765200 92767900 High Signal Region +chrX 94795400 94980600 High Signal Region +chrX 95265900 95291700 High Signal Region +chrX 97728000 97734800 High Signal Region +chrX 98008600 98033000 High Signal Region +chrX 98585800 98612400 High Signal Region +chrX 101111300 101113600 High Signal Region +chrX 102560800 102585100 High Signal Region +chrX 103455000 103457100 High Signal Region +chrX 104959400 104966000 High Signal Region +chrX 105523800 105529900 High Signal Region +chrX 108202600 108222500 High Signal Region +chrX 108567500 108585200 High Signal Region +chrX 109871000 109876200 High Signal Region +chrX 110976700 110997000 High Signal Region +chrX 112369800 112402300 High Signal Region +chrX 114412500 114421300 High Signal Region +chrX 118100900 118102900 High Signal Region +chrX 118901200 118905100 Low Mappability +chrX 119137300 119142400 High Signal Region +chrX 119247400 119264800 High Signal Region +chrX 119335000 119339300 High Signal Region +chrX 120351000 120355400 High Signal Region +chrX 121511200 121514500 High Signal Region +chrX 122901700 122908000 High Signal Region +chrX 123686000 124042000 High Signal Region +chrX 126695300 126778800 High Signal Region +chrX 127935800 127964600 High Signal Region +chrX 128512700 128514400 High Signal Region +chrX 128959800 128965900 High Signal Region +chrX 129055600 129072400 High Signal Region +chrX 129429300 129448000 High Signal Region +chrX 130696000 130702200 High Signal Region +chrX 131802300 131832800 High Signal Region +chrX 132024200 132026400 High Signal Region +chrX 132158700 132160800 High Signal Region +chrX 134149100 134151200 High Signal Region +chrX 135040100 135056700 High Signal Region +chrX 136459400 136503800 High Signal Region +chrX 136897900 136925800 High Signal Region +chrX 138302200 138324600 High Signal Region +chrX 143471300 143484000 High Signal Region +chrX 144699500 144723900 High Signal Region +chrX 145709800 145739800 High Signal Region +chrX 146582500 146588700 High Signal Region +chrX 146758100 146761900 High Signal Region +chrX 147619400 147620700 High Signal Region +chrX 153994800 154073200 High Signal Region +chrX 154242800 154244800 High Signal Region +chrX 158443900 158460500 High Signal Region +chrX 159120000 159154900 High Signal Region +chrX 161179200 161185600 High Signal Region +chrX 162381600 162384600 High Signal Region +chrX 164615100 164622200 High Signal Region +chrX 166063200 166084500 High Signal Region +chrX 167213400 167220200 High Signal Region +chrX 167246000 167252200 High Signal Region +chrX 169968900 171031200 High Signal Region +chrY 0 806800 High Signal Region +chrY 924800 1005300 High Signal Region +chrY 1276400 1813700 High Signal Region +chrY 1834500 1940700 High Signal Region +chrY 1973200 1996400 High Signal Region +chrY 2017200 2068000 Low Mappability +chrY 2104700 2210800 High Signal Region +chrY 2280300 2288900 Low Mappability +chrY 2471300 3819300 High Signal Region +chrY 3880300 4177100 High Signal Region +chrY 4249500 4289100 High Signal Region +chrY 4432000 4956300 High Signal Region +chrY 5062400 5227700 High Signal Region +chrY 6376700 6382700 High Signal Region +chrY 6530200 6663200 High Signal Region +chrY 6760200 6835800 High Signal Region +chrY 6984100 8985400 High Signal Region +chrY 10638500 41003800 High Signal Region +chrY 41159200 91744600 High Signal Region diff --git a/assets/blacklists/v3.0/GRCh38-blacklist.v3.bed b/assets/blacklists/v3.0/GRCh38-blacklist.v3.bed new file mode 100644 index 0000000000000000000000000000000000000000..a29072530a048767a48cbc1c59812deb974940ed --- /dev/null +++ b/assets/blacklists/v3.0/GRCh38-blacklist.v3.bed @@ -0,0 +1,910 @@ +1 628903 635104 +1 5850087 5850571 +1 8909610 8910014 +1 9574580 9574997 +1 32043823 32044203 +1 33818964 33819344 +1 38674335 38674715 +1 50017081 50017546 +1 52996949 52997329 +1 55372488 55372869 +1 67971776 67972156 +1 73258720 73259100 +1 76971068 76971595 +1 93936365 93936747 +1 93937447 93937827 +1 102160407 102160787 +1 103620975 103621378 +1 106803432 106803816 +1 106804021 106804224 +1 106804753 106805343 +1 121609948 125063427 +1 125166231 125184683 +1 143184599 143276861 +1 146992422 146992802 +1 158449073 158449453 +1 158872114 158872494 +1 159295111 159295493 +1 169473895 169474338 +1 170006204 170006584 +1 172710350 172710732 +1 181422611 181423158 +1 191961694 191962163 +1 195288048 195288429 +1 199487949 199488149 +1 214709795 214710175 +1 215499615 215500014 +1 226652017 226652398 +1 227699752 227700133 +1 229019365 229019745 +1 233139985 233140365 +1 235520204 235520404 +1 235537405 235537785 +1 235538899 235540112 +1 235540243 235540623 +1 235540886 235541649 +1 235870625 235871005 +1 237940595 237940979 +1 237941045 237941514 +1 237941893 237942746 +1 237943028 237943416 +1 237943490 237945232 +1 237945285 237946507 +1 237948983 237949365 +1 237951294 237951802 +10 2235555 2235756 +10 19746628 19747247 +10 19747314 19748342 +10 25638376 25638756 +10 26873147 26873538 +10 30565118 30565501 +10 36432964 36433344 +10 36434047 36435188 +10 37600616 37601002 +10 37601246 37601787 +10 37601884 37602850 +10 38481300 38596500 +10 38782600 38967900 +10 39000365 41916630 +10 42066792 42104971 +10 45577925 45578305 +10 46706229 46706611 +10 47633790 47634172 +10 55597861 55600059 +10 55626794 55627174 +10 57668682 57669062 +10 59261793 59262173 +10 69590538 69590738 +10 69591475 69591858 +10 69592355 69592740 +10 69592776 69593482 +10 69594378 69594760 +10 69595141 69595573 +10 69595681 69596061 +10 77166388 77166768 +10 79411056 79411468 +10 89786504 89786889 +10 100057235 100058064 +10 112894488 112894870 +10 115056512 115056712 +10 123032371 123032751 +10 125819621 125820001 +10 133689373 133689523 +11 8023287 8023667 +11 10507706 10510499 +11 10768339 10768719 +11 10815184 10815384 +11 24839563 24839944 +11 27850562 27850942 +11 47323881 47324333 +11 50424039 50813393 +11 51081363 54424064 +11 64187168 64187556 +11 65069483 65069863 +11 73510500 73510992 +11 81551734 81551934 +11 81553835 81554282 +11 81556152 81556537 +11 81556717 81557101 +11 87813427 87814320 +11 87815683 87816063 +11 103270627 103271007 +11 103403270 103403650 +11 103404014 103404527 +11 103404779 103405289 +11 103405809 103406376 +11 103406653 103407036 +11 103407110 103407310 +11 103408089 103409893 +11 103410074 103411211 +11 110876919 110877308 +11 114021166 114021546 +11 123003425 123003857 +11 123139919 123140301 +12 9923 10481 +12 2539174 2539982 +12 3887955 3888335 +12 19795477 19795864 +12 20769413 20769432 +12 21052950 21053330 +12 22005655 22006093 +12 27925108 27925488 +12 31247541 31247923 +12 31247963 31248343 +12 34665177 37429869 +12 40286245 40286625 +12 41363462 41363903 +12 41698591 41698971 +12 41699048 41699573 +12 49817252 49817634 +12 62773865 62774257 +12 80623983 80624183 +12 101486970 101487350 +12 123053921 123054301 +12 126583199 126583772 +12 126584137 126584530 +12 130315425 130315904 +13 16226300 18171400 +13 25984718 25984918 +13 31866923 31867303 +13 33516898 33517278 +13 36065385 36065836 +13 40768206 40768595 +13 53891451 53891831 +13 55971453 55971922 +13 56688341 56688749 +13 72344211 72344591 +13 75592084 75592468 +13 83688313 83688693 +13 84521524 84522274 +13 84522848 84523233 +13 88308157 88308357 +13 95692549 95692935 +13 95693013 95693215 +13 95694449 95695698 +13 95696145 95696512 +13 105488067 105488448 +13 107058662 107059042 +13 109423944 109424560 +14 16000600 18173660 +14 23426306 23426691 +14 32483953 32485298 +14 37490106 37490486 +14 40643840 40644220 +14 43116742 43117122 +14 45238635 45239016 +14 45430378 45430758 +14 46048457 46048837 +14 46847040 46847420 +14 51587295 51587847 +14 83587331 83587894 +14 83588229 83589060 +14 84171262 84171729 +14 84171838 84172846 +14 84173508 84173969 +14 84174279 84174691 +14 86498937 86499317 +15 17058500 19838644 +15 30477565 30477945 +15 32529779 32530159 +15 34715310 34715692 +15 35396110 35396495 +15 40133887 40134759 +15 41157028 41157408 +15 52100391 52100771 +15 54583731 54584111 +15 58152409 58153114 +15 58153292 58153690 +15 58155859 58156155 +15 67040730 67041122 +15 91960163 91960543 +16 3367430 3368546 +16 3369658 3370039 +16 3370150 3370542 +16 3370932 3371445 +16 3371688 3372222 +16 10719290 10720105 +16 10720417 10720781 +16 10721235 10721874 +16 10721900 10722280 +16 10723423 10723623 +16 10723815 10724200 +16 10724415 10724654 +16 20720929 20721312 +16 20721365 20721746 +16 20722103 20722552 +16 34071571 34071629 +16 34131996 34289269 +16 34571482 34597852 +16 34661168 34661267 +16 34919141 34919184 +16 35966577 38269112 +16 38275767 38280684 +16 46380676 46381095 +16 46386376 46386491 +16 46388622 46389053 +16 46390180 46390788 +16 46394471 46395088 +16 46398828 46401647 +16 60470624 60471006 +16 65701465 65701846 +16 67590312 67590692 +16 69358523 69358990 +16 73161120 73161500 +16 82119745 82120125 +17 141682 142062 +17 14171308 14171688 +17 15568187 15568567 +17 19597515 19597985 +17 19598613 19599532 +17 19599799 19600210 +17 19600300 19602064 +17 19602160 19602545 +17 19602886 19603595 +17 19603847 19604047 +17 19604922 19605588 +17 20851029 20851409 +17 21851150 21992060 +17 22519042 22520149 +17 22520322 22521025 +17 22521116 22526407 +17 22526636 22530152 +17 22530381 22532156 +17 22532315 22532940 +17 22551066 22551446 +17 22813591 26716670 +17 26885752 26885795 +17 35654769 35655182 +17 43251640 43251763 +17 43309853 43310048 +17 43315021 43316491 +17 43997535 43997957 +17 53105552 53106565 +17 54902920 54903301 +17 59279406 59279787 +17 63076394 63076777 +17 63393238 63393438 +17 65555244 65555624 +17 72316258 72316638 +17 80617407 80617802 +18 2842087 2842534 +18 8103913 8104113 +18 8846332 8846713 +18 15457976 20865732 +18 34571460 34571840 +18 47853089 47853617 +18 52883627 52884007 +18 59288306 59288686 +18 61874562 61874960 +18 77455900 77456280 +19 246899 247452 +19 12105016 12105399 +19 13362989 13363369 +19 24182199 27257542 +19 27741787 27741868 +19 36271917 36272148 +19 37572465 37572846 +19 37576134 37576516 +19 46122944 46123324 +19 47941356 47941426 +19 54794749 54795129 +19 56691535 56691736 +19 56922158 56922601 +2 638427 638808 +2 1087103 1087484 +2 16271753 16272134 +2 22316878 22317258 +2 24644617 24644997 +2 32916201 32916632 +2 33767290 33767703 +2 33964664 33965045 +2 36276769 36277149 +2 40784787 40785278 +2 49229452 49230058 +2 50588765 50589566 +2 54451654 54452034 +2 57648677 57649057 +2 67953669 67954049 +2 75063567 75063994 +2 81666317 81666849 +2 82814941 82815321 +2 82815451 82816236 +2 82816261 82816647 +2 82818378 82818748 +2 82820800 82821005 +2 85068666 85069046 +2 87824709 87825530 +2 89272789 89273133 +2 89827607 89827706 +2 89828636 89828710 +2 89828842 89828942 +2 89833685 89833793 +2 89839592 89839709 +2 89909317 89909789 +2 90379778 90402456 +2 92081223 92081398 +2 92188125 94293463 +2 94499181 94570956 +2 94898976 94899645 +2 94900639 94900840 +2 94901421 94901808 +2 97189431 97189813 +2 102482582 102482962 +2 102505606 102505987 +2 110072034 110072434 +2 110299106 110299346 +2 116751234 116751614 +2 116752004 116752448 +2 116752517 116752897 +2 117020171 117020552 +2 117021107 117022152 +2 117022438 117024038 +2 117024277 117025093 +2 117025205 117025670 +2 117026130 117026512 +2 120211535 120212064 +2 120212685 120213069 +2 120213761 120214143 +2 120214590 120215370 +2 121220135 121220515 +2 124680743 124681182 +2 125812046 125812548 +2 129090774 129091154 +2 130272174 130272615 +2 130273451 130273981 +2 130274326 130274992 +2 130275174 130275744 +2 130276119 130276500 +2 130277774 130278727 +2 130279995 130280729 +2 130280827 130281440 +2 130557359 130557607 +2 130563142 130563396 +2 131369643 131369925 +2 131370949 131371562 +2 131371916 131372361 +2 131372758 131373137 +2 131379317 131380344 +2 131381592 131381973 +2 131382344 131382728 +2 131382772 131382974 +2 131383079 131384016 +2 131384051 131384621 +2 131384898 131385281 +2 131385356 131385794 +2 140217229 140218044 +2 140220209 140220840 +2 140220940 140221140 +2 140221198 140222369 +2 140222545 140223623 +2 140223647 140224297 +2 143088644 143089042 +2 143089938 143090358 +2 143090898 143091662 +2 143092255 143092646 +2 143093556 143093941 +2 143094515 143094999 +2 143095614 143095994 +2 143096048 143096428 +2 143096470 143097336 +2 143097466 143097981 +2 143100621 143101005 +2 147048574 147048955 +2 147244849 147245229 +2 147265034 147265432 +2 148822913 148823295 +2 148881545 148882032 +2 155196092 155196473 +2 155263345 155264313 +2 155264362 155264562 +2 155264599 155264982 +2 155311420 155311995 +2 155313539 155313922 +2 156828628 156829008 +2 162517271 162517651 +2 164117001 164117382 +2 166414323 166414779 +2 167378863 167379244 +2 168652433 168652813 +2 179739184 179739689 +2 190593881 190594262 +2 196204680 196205060 +2 201212170 201212612 +2 201212648 201212854 +2 201212903 201213386 +2 201214659 201215040 +2 201549404 201549784 +2 201550130 201550513 +2 201557568 201557948 +2 202614117 202614527 +2 202615371 202615757 +2 202617016 202617398 +2 202618435 202618819 +2 202619754 202620134 +2 211773627 211774158 +2 211774322 211775192 +2 211775641 211776712 +2 211777034 211777417 +2 211777802 211778269 +2 211778916 211779562 +2 215573163 215573544 +2 226722088 226722596 +2 237521663 237522775 +2 237522862 237523652 +20 5999469 5999849 +20 9168743 9169145 +20 10441916 10442296 +20 13167142 13167534 +20 18449173 18449556 +20 22078162 22078542 +20 24024376 24024757 +20 26438448 28554562 +20 28644084 29015573 +20 29125977 29294639 +20 30744370 30744939 +20 30746748 30747241 +20 31051540 31106909 +20 31157044 31159116 +20 31161652 31223331 +20 34688743 34689039 +20 47894699 47896109 +20 57063873 57064279 +20 57357555 57358134 +20 57358221 57359428 +20 57359451 57360972 +20 63644937 63645318 +21 6369257 6372342 +21 7201205 7327885 +21 7919585 7919691 +21 8211710 8211892 +21 8212412 8212570 +21 8213694 8213987 +21 8219372 8220330 +21 8234456 8234568 +21 8394767 8394902 +21 8395471 8395591 +21 8396751 8397011 +21 8445918 8446080 +21 8446629 8446729 +21 8446925 8447070 +21 8595669 8595768 +21 8844362 8844855 +21 8846669 8847382 +21 10014674 10015194 +21 10650900 12965800 +21 16645305 16645685 +21 32095835 32096215 +21 35890413 35890796 +21 44474913 44475301 +21 45376056 45376517 +22 10863370 10863448 +22 11210951 11215489 +22 11854150 11854643 +22 11856460 11857173 +22 11974159 11974336 +22 12135181 12135894 +22 12137711 12138204 +22 12691742 12694097 +22 12954427 15057495 +22 15153934 15211502 +22 15940533 16085728 +22 32894952 32895345 +22 33819338 33819538 +22 35885491 35885898 +22 36172705 36173085 +22 36177875 36178257 +22 46470112 46470493 +22 50086003 50086529 +22 50806858 50808224 +3 3571912 3572292 +3 24705149 24705529 +3 25467328 25467722 +3 29797534 29797914 +3 33548103 33548483 +3 40252107 40253916 +3 41532177 41532556 +3 43229296 43229733 +3 68658875 68659467 +3 68670345 68670734 +3 73054640 73055020 +3 82655447 82655827 +3 89588895 89589538 +3 90269605 90722189 +3 90774880 91249595 +3 91519649 93657524 +3 93705477 93800019 +3 96475262 96475643 +3 96617014 96618680 +3 106894019 106894441 +3 106895181 106895568 +3 106896124 106896504 +3 106898661 106899022 +3 106899753 106900122 +3 106901799 106902741 +3 106903188 106903605 +3 119947198 119947578 +3 120721858 120722610 +3 122688557 122688938 +3 125982519 125982900 +3 127005357 127005745 +3 128988979 128989359 +3 137095968 137096348 +3 142662232 142662612 +3 152919604 152919995 +3 153658704 153659087 +3 160947473 160948127 +3 166159726 166160108 +3 166160260 166160644 +3 166161631 166162087 +3 166226563 166226945 +3 166232406 166232886 +3 166232970 166233355 +3 166474023 166474223 +3 171534313 171534700 +3 177010776 177011156 +3 192880587 192880967 +4 5404508 5404897 +4 12640142 12640815 +4 14506099 14506467 +4 17061824 17062213 +4 18949310 18949691 +4 22502173 22502553 +4 25717756 25718136 +4 25718275 25718655 +4 25719398 25719626 +4 27730251 27730747 +4 30884524 30884906 +4 32280109 32280489 +4 41023064 41023448 +4 47772100 47772544 +4 49136056 49136102 +4 49141052 49141147 +4 49246355 49246848 +4 49548607 49549100 +4 49631231 49658125 +4 49708086 51743949 +4 51793952 51817249 +4 55327979 55328462 +4 64606369 64606752 +4 64606841 64607360 +4 64607395 64607789 +4 64607976 64608801 +4 64608937 64609326 +4 64609811 64610876 +4 64611176 64611617 +4 66065193 66065631 +4 68050141 68050521 +4 68572333 68572774 +4 78008402 78008882 +4 83383282 83383662 +4 89731703 89732163 +4 92701787 92702300 +4 107501924 107502304 +4 112372589 112372969 +4 116296652 116297040 +4 116297165 116297545 +4 116297659 116298726 +4 116299003 116300416 +4 128081280 128081956 +4 140929567 140929947 +4 143017907 143018107 +4 143347973 143348354 +4 144379497 144379877 +4 155076906 155077288 +4 155452733 155452935 +4 155453928 155454313 +4 155454407 155455447 +4 155455566 155455766 +4 155457624 155458008 +4 155459547 155459747 +4 155460171 155460553 +4 155461093 155461689 +4 155462078 155463456 +4 155463701 155464839 +4 155464895 155465305 +4 155465580 155466624 +4 157628391 157628774 +4 160044429 160044815 +4 161449477 161449857 +4 161788291 161788671 +4 162421207 162421721 +4 172036714 172037094 +4 179069259 179069639 +4 183489243 183489623 +4 189844495 189844576 +5 12284 12523 +5 12952 13361 +5 5395563 5395943 +5 5396182 5396616 +5 5396675 5397057 +5 8619083 8619464 +5 8619927 8620307 +5 8620707 8621192 +5 8621953 8622333 +5 8622354 8622753 +5 32927394 32927776 +5 37164286 37164673 +5 45913363 50265419 +5 60761358 60762176 +5 66253509 66253889 +5 73775720 73776112 +5 79089860 79090240 +5 80649841 80652548 +5 94567275 94571098 +5 97678633 97679016 +5 98409947 98410327 +5 98410700 98411257 +5 99813005 99813388 +5 100045805 100055225 +5 106553187 106553689 +5 111488864 111489244 +5 119127218 119127602 +5 121030820 121031445 +5 122338658 122339042 +5 123760111 123760622 +5 123760719 123761918 +5 134923133 134928692 +5 136533606 136533986 +5 137305006 137305387 +5 152198765 152199145 +5 160600365 160600745 +5 163146853 163147234 +5 163959711 163960091 +5 164673914 164674288 +5 166530241 166530641 +5 170635389 170635774 +6 1705930 1706304 +6 3943769 3944149 +6 29454054 29454435 +6 32706020 32706850 +6 43490986 43491370 +6 54899048 54899248 +6 58554346 59830578 +6 61278527 61521106 +6 61573960 61574809 +6 72747981 72748361 +6 72799169 72799549 +6 76708390 76708770 +6 88555202 88555591 +6 91726616 91727363 +6 94446937 94447370 +6 96941571 96941951 +6 104699855 104700055 +6 114377334 114377534 +6 122764824 122765204 +6 126478329 126478709 +6 127735330 127735710 +6 132799554 132799939 +6 133150492 133150881 +6 133930809 133931190 +6 138133082 138133462 +6 143077647 143078031 +6 153666229 153666618 +6 153667363 153667744 +6 153668187 153668753 +6 153669025 153669419 +6 156547729 156548118 +6 163638068 163638448 +7 18021726 18022106 +7 22748471 22748854 +7 33749120 33749500 +7 36228567 36229008 +7 37387570 37387950 +7 45251808 45252289 +7 55369049 55369429 +7 57167688 57168071 +7 57168472 57168852 +7 57169046 57169430 +7 57169550 57169932 +7 57170307 57170523 +7 57170675 57171410 +7 57171502 57172122 +7 57173798 57174181 +7 57174854 57175239 +7 57185615 57185995 +7 57186105 57186589 +7 57187287 57188033 +7 57188305 57188872 +7 57189116 57189730 +7 57190949 57191332 +7 57191618 57191818 +7 57192132 57192860 +7 57193489 57193872 +7 57193974 57194701 +7 57194829 57195210 +7 57196302 57197490 +7 57198263 57198644 +7 57879605 58032504 +7 58166363 62995324 +7 63094673 63095057 +7 64104133 64104513 +7 64105294 64106415 +7 64106627 64107010 +7 64108329 64108798 +7 64110007 64110707 +7 64111376 64111804 +7 64111957 64112849 +7 67627830 67628213 +7 68097607 68097990 +7 68736347 68736811 +7 69331805 69332005 +7 69332037 69332438 +7 69333013 69333393 +7 69333597 69334167 +7 72088575 72088955 +7 83100026 83100406 +7 83469984 83470184 +7 83855080 83855464 +7 95851249 95851629 +7 104989516 104989896 +7 112372484 112372865 +7 112374724 112374950 +7 117263552 117264184 +7 117264231 117264614 +7 130116678 130117058 +7 141173000 141173384 +7 141801916 141802451 +7 141802901 141803366 +7 141804074 141804274 +7 141804814 141805507 +7 142665099 142667846 +7 143187483 143187863 +7 145997159 145997608 +7 150131843 150132229 +7 153968598 153968979 +7 159294463 159294846 +8 13353292 13353679 +8 16056863 16057063 +8 18849121 18849571 +8 20551162 20551554 +8 32805708 32806092 +8 33010514 33010894 +8 33011359 33014071 +8 33014510 33014895 +8 33015020 33015853 +8 36277446 36278060 +8 36278272 36278791 +8 36278835 36279634 +8 40070431 40070867 +8 43237631 43242390 +8 43937900 45969600 +8 46827305 46827914 +8 46828298 46829961 +8 46830195 46831222 +8 46837581 46837961 +8 46838101 46838484 +8 50758259 50758639 +8 56736733 56736933 +8 61303079 61303460 +8 67580689 67581493 +8 67581588 67581972 +8 67582178 67582568 +8 67585216 67585693 +8 67585787 67586175 +8 67587282 67587922 +8 69102851 69103234 +8 72985528 72985923 +8 74828644 74829025 +8 76201592 76202319 +8 76645407 76645800 +8 97907908 97908279 +8 99495689 99496133 +8 102774315 102774695 +8 103082925 103083379 +8 103083704 103084399 +8 103084730 103085110 +8 103085323 103085806 +8 103086859 103087242 +8 108533901 108534281 +8 110933150 110933533 +8 110934510 110935010 +8 111248936 111249316 +8 120224204 120224584 +8 127053876 127054257 +8 127968653 127969034 +8 133615761 133616142 +8 133755390 133755856 +9 5091131 5091511 +9 5091962 5093013 +9 5093063 5094123 +9 5094192 5094697 +9 5094931 5095816 +9 5096206 5096816 +9 5097188 5097890 +9 5098134 5098516 +9 5099352 5099552 +9 5100044 5100427 +9 5108063 5108592 +9 5109193 5109986 +9 5110030 5110411 +9 9896970 9897350 +9 15866612 15866992 +9 18336471 18336854 +9 31498260 31498640 +9 33656533 33658316 +9 33658346 33659299 +9 34998988 34999474 +9 36466192 36466572 +9 43153721 45525161 +9 64045550 64046043 +9 64047855 64048422 +9 65048153 65079624 +9 68251002 68251071 +9 72788174 72788555 +9 78741395 78741775 +9 78742155 78742969 +9 78743199 78743630 +9 78744108 78744492 +9 78810721 78811113 +9 79804550 79804933 +9 80564643 80565085 +9 80565478 80565941 +9 81747641 81748021 +9 82427689 82428071 +9 92108965 92109347 +9 92539106 92539763 +9 95876956 95877338 +9 117109914 117110296 +9 122505687 122506067 +9 129878699 129879081 +9 134164478 134165354 +9 134170819 134171060 +X 4059512 4059712 +X 5168678 5169232 +X 5169733 5170646 +X 15727702 15728089 +X 17116414 17116794 +X 24056083 24056470 +X 24375345 24375545 +X 33762401 33762781 +X 55178596 55179289 +X 55179434 55180459 +X 55181196 55182790 +X 55183051 55184112 +X 58061543 62821716 +X 62841379 62841765 +X 62842257 62842639 +X 70119464 70119845 +X 70127233 70127620 +X 77501934 77502314 +X 78561721 78561921 +X 84403779 84404168 +X 100027094 100027475 +X 102010329 102010712 +X 102011531 102011915 +X 102772405 102772791 +X 102785904 102786287 +X 102798001 102798386 +X 102802747 102803161 +X 102809395 102809788 +X 104409869 104410249 +X 106239694 106239894 +X 111416893 111417294 +X 126471558 126473451 +X 126728884 126729272 +X 126729326 126729709 +X 126729837 126730217 +X 126730716 126731106 +X 126731624 126732029 +X 129983338 129983538 +X 133041871 133042251 +X 135292293 135292493 +X 143430213 143430837 +X 143431144 143431537 +X 143431716 143432219 +X 143432410 143433212 +X 143433510 143434156 +X 143543636 143544023 +X 146995842 146996224 +Y 4344757 4344879 +Y 9141870 9141995 +Y 10203380 10266932 +Y 10316749 10544446 +Y 10594583 10626838 +Y 10663669 10663716 +Y 10744417 10921497 +Y 11290797 11334278 +Y 11493053 11592850 +Y 11671014 11671046 +Y 11721528 11749472 +Y 56694632 56889743 diff --git a/assets/blacklists/v3.0/hg38-blacklist.v3.bed b/assets/blacklists/v3.0/hg38-blacklist.v3.bed new file mode 100644 index 0000000000000000000000000000000000000000..4e386f455324bf003566f374f517d54803c77eeb --- /dev/null +++ b/assets/blacklists/v3.0/hg38-blacklist.v3.bed @@ -0,0 +1,910 @@ +chr1 628903 635104 +chr1 5850087 5850571 +chr1 8909610 8910014 +chr1 9574580 9574997 +chr1 32043823 32044203 +chr1 33818964 33819344 +chr1 38674335 38674715 +chr1 50017081 50017546 +chr1 52996949 52997329 +chr1 55372488 55372869 +chr1 67971776 67972156 +chr1 73258720 73259100 +chr1 76971068 76971595 +chr1 93936365 93936747 +chr1 93937447 93937827 +chr1 102160407 102160787 +chr1 103620975 103621378 +chr1 106803432 106803816 +chr1 106804021 106804224 +chr1 106804753 106805343 +chr1 121609948 125063427 +chr1 125166231 125184683 +chr1 143184599 143276861 +chr1 146992422 146992802 +chr1 158449073 158449453 +chr1 158872114 158872494 +chr1 159295111 159295493 +chr1 169473895 169474338 +chr1 170006204 170006584 +chr1 172710350 172710732 +chr1 181422611 181423158 +chr1 191961694 191962163 +chr1 195288048 195288429 +chr1 199487949 199488149 +chr1 214709795 214710175 +chr1 215499615 215500014 +chr1 226652017 226652398 +chr1 227699752 227700133 +chr1 229019365 229019745 +chr1 233139985 233140365 +chr1 235520204 235520404 +chr1 235537405 235537785 +chr1 235538899 235540112 +chr1 235540243 235540623 +chr1 235540886 235541649 +chr1 235870625 235871005 +chr1 237940595 237940979 +chr1 237941045 237941514 +chr1 237941893 237942746 +chr1 237943028 237943416 +chr1 237943490 237945232 +chr1 237945285 237946507 +chr1 237948983 237949365 +chr1 237951294 237951802 +chr10 2235555 2235756 +chr10 19746628 19747247 +chr10 19747314 19748342 +chr10 25638376 25638756 +chr10 26873147 26873538 +chr10 30565118 30565501 +chr10 36432964 36433344 +chr10 36434047 36435188 +chr10 37600616 37601002 +chr10 37601246 37601787 +chr10 37601884 37602850 +chr10 38481300 38596500 +chr10 38782600 38967900 +chr10 39000365 41916630 +chr10 42066792 42104971 +chr10 45577925 45578305 +chr10 46706229 46706611 +chr10 47633790 47634172 +chr10 55597861 55600059 +chr10 55626794 55627174 +chr10 57668682 57669062 +chr10 59261793 59262173 +chr10 69590538 69590738 +chr10 69591475 69591858 +chr10 69592355 69592740 +chr10 69592776 69593482 +chr10 69594378 69594760 +chr10 69595141 69595573 +chr10 69595681 69596061 +chr10 77166388 77166768 +chr10 79411056 79411468 +chr10 89786504 89786889 +chr10 100057235 100058064 +chr10 112894488 112894870 +chr10 115056512 115056712 +chr10 123032371 123032751 +chr10 125819621 125820001 +chr10 133689373 133689523 +chr11 8023287 8023667 +chr11 10507706 10510499 +chr11 10768339 10768719 +chr11 10815184 10815384 +chr11 24839563 24839944 +chr11 27850562 27850942 +chr11 47323881 47324333 +chr11 50424039 50813393 +chr11 51081363 54424064 +chr11 64187168 64187556 +chr11 65069483 65069863 +chr11 73510500 73510992 +chr11 81551734 81551934 +chr11 81553835 81554282 +chr11 81556152 81556537 +chr11 81556717 81557101 +chr11 87813427 87814320 +chr11 87815683 87816063 +chr11 103270627 103271007 +chr11 103403270 103403650 +chr11 103404014 103404527 +chr11 103404779 103405289 +chr11 103405809 103406376 +chr11 103406653 103407036 +chr11 103407110 103407310 +chr11 103408089 103409893 +chr11 103410074 103411211 +chr11 110876919 110877308 +chr11 114021166 114021546 +chr11 123003425 123003857 +chr11 123139919 123140301 +chr12 9923 10481 +chr12 2539174 2539982 +chr12 3887955 3888335 +chr12 19795477 19795864 +chr12 20769413 20769432 +chr12 21052950 21053330 +chr12 22005655 22006093 +chr12 27925108 27925488 +chr12 31247541 31247923 +chr12 31247963 31248343 +chr12 34665177 37429869 +chr12 40286245 40286625 +chr12 41363462 41363903 +chr12 41698591 41698971 +chr12 41699048 41699573 +chr12 49817252 49817634 +chr12 62773865 62774257 +chr12 80623983 80624183 +chr12 101486970 101487350 +chr12 123053921 123054301 +chr12 126583199 126583772 +chr12 126584137 126584530 +chr12 130315425 130315904 +chr13 16226300 18171400 +chr13 25984718 25984918 +chr13 31866923 31867303 +chr13 33516898 33517278 +chr13 36065385 36065836 +chr13 40768206 40768595 +chr13 53891451 53891831 +chr13 55971453 55971922 +chr13 56688341 56688749 +chr13 72344211 72344591 +chr13 75592084 75592468 +chr13 83688313 83688693 +chr13 84521524 84522274 +chr13 84522848 84523233 +chr13 88308157 88308357 +chr13 95692549 95692935 +chr13 95693013 95693215 +chr13 95694449 95695698 +chr13 95696145 95696512 +chr13 105488067 105488448 +chr13 107058662 107059042 +chr13 109423944 109424560 +chr14 16000600 18173660 +chr14 23426306 23426691 +chr14 32483953 32485298 +chr14 37490106 37490486 +chr14 40643840 40644220 +chr14 43116742 43117122 +chr14 45238635 45239016 +chr14 45430378 45430758 +chr14 46048457 46048837 +chr14 46847040 46847420 +chr14 51587295 51587847 +chr14 83587331 83587894 +chr14 83588229 83589060 +chr14 84171262 84171729 +chr14 84171838 84172846 +chr14 84173508 84173969 +chr14 84174279 84174691 +chr14 86498937 86499317 +chr15 17058500 19838644 +chr15 30477565 30477945 +chr15 32529779 32530159 +chr15 34715310 34715692 +chr15 35396110 35396495 +chr15 40133887 40134759 +chr15 41157028 41157408 +chr15 52100391 52100771 +chr15 54583731 54584111 +chr15 58152409 58153114 +chr15 58153292 58153690 +chr15 58155859 58156155 +chr15 67040730 67041122 +chr15 91960163 91960543 +chr16 3367430 3368546 +chr16 3369658 3370039 +chr16 3370150 3370542 +chr16 3370932 3371445 +chr16 3371688 3372222 +chr16 10719290 10720105 +chr16 10720417 10720781 +chr16 10721235 10721874 +chr16 10721900 10722280 +chr16 10723423 10723623 +chr16 10723815 10724200 +chr16 10724415 10724654 +chr16 20720929 20721312 +chr16 20721365 20721746 +chr16 20722103 20722552 +chr16 34071571 34071629 +chr16 34131996 34289269 +chr16 34571482 34597852 +chr16 34661168 34661267 +chr16 34919141 34919184 +chr16 35966577 38269112 +chr16 38275767 38280684 +chr16 46380676 46381095 +chr16 46386376 46386491 +chr16 46388622 46389053 +chr16 46390180 46390788 +chr16 46394471 46395088 +chr16 46398828 46401647 +chr16 60470624 60471006 +chr16 65701465 65701846 +chr16 67590312 67590692 +chr16 69358523 69358990 +chr16 73161120 73161500 +chr16 82119745 82120125 +chr17 141682 142062 +chr17 14171308 14171688 +chr17 15568187 15568567 +chr17 19597515 19597985 +chr17 19598613 19599532 +chr17 19599799 19600210 +chr17 19600300 19602064 +chr17 19602160 19602545 +chr17 19602886 19603595 +chr17 19603847 19604047 +chr17 19604922 19605588 +chr17 20851029 20851409 +chr17 21851150 21992060 +chr17 22519042 22520149 +chr17 22520322 22521025 +chr17 22521116 22526407 +chr17 22526636 22530152 +chr17 22530381 22532156 +chr17 22532315 22532940 +chr17 22551066 22551446 +chr17 22813591 26716670 +chr17 26885752 26885795 +chr17 35654769 35655182 +chr17 43251640 43251763 +chr17 43309853 43310048 +chr17 43315021 43316491 +chr17 43997535 43997957 +chr17 53105552 53106565 +chr17 54902920 54903301 +chr17 59279406 59279787 +chr17 63076394 63076777 +chr17 63393238 63393438 +chr17 65555244 65555624 +chr17 72316258 72316638 +chr17 80617407 80617802 +chr18 2842087 2842534 +chr18 8103913 8104113 +chr18 8846332 8846713 +chr18 15457976 20865732 +chr18 34571460 34571840 +chr18 47853089 47853617 +chr18 52883627 52884007 +chr18 59288306 59288686 +chr18 61874562 61874960 +chr18 77455900 77456280 +chr19 246899 247452 +chr19 12105016 12105399 +chr19 13362989 13363369 +chr19 24182199 27257542 +chr19 27741787 27741868 +chr19 36271917 36272148 +chr19 37572465 37572846 +chr19 37576134 37576516 +chr19 46122944 46123324 +chr19 47941356 47941426 +chr19 54794749 54795129 +chr19 56691535 56691736 +chr19 56922158 56922601 +chr2 638427 638808 +chr2 1087103 1087484 +chr2 16271753 16272134 +chr2 22316878 22317258 +chr2 24644617 24644997 +chr2 32916201 32916632 +chr2 33767290 33767703 +chr2 33964664 33965045 +chr2 36276769 36277149 +chr2 40784787 40785278 +chr2 49229452 49230058 +chr2 50588765 50589566 +chr2 54451654 54452034 +chr2 57648677 57649057 +chr2 67953669 67954049 +chr2 75063567 75063994 +chr2 81666317 81666849 +chr2 82814941 82815321 +chr2 82815451 82816236 +chr2 82816261 82816647 +chr2 82818378 82818748 +chr2 82820800 82821005 +chr2 85068666 85069046 +chr2 87824709 87825530 +chr2 89272789 89273133 +chr2 89827607 89827706 +chr2 89828636 89828710 +chr2 89828842 89828942 +chr2 89833685 89833793 +chr2 89839592 89839709 +chr2 89909317 89909789 +chr2 90379778 90402456 +chr2 92081223 92081398 +chr2 92188125 94293463 +chr2 94499181 94570956 +chr2 94898976 94899645 +chr2 94900639 94900840 +chr2 94901421 94901808 +chr2 97189431 97189813 +chr2 102482582 102482962 +chr2 102505606 102505987 +chr2 110072034 110072434 +chr2 110299106 110299346 +chr2 116751234 116751614 +chr2 116752004 116752448 +chr2 116752517 116752897 +chr2 117020171 117020552 +chr2 117021107 117022152 +chr2 117022438 117024038 +chr2 117024277 117025093 +chr2 117025205 117025670 +chr2 117026130 117026512 +chr2 120211535 120212064 +chr2 120212685 120213069 +chr2 120213761 120214143 +chr2 120214590 120215370 +chr2 121220135 121220515 +chr2 124680743 124681182 +chr2 125812046 125812548 +chr2 129090774 129091154 +chr2 130272174 130272615 +chr2 130273451 130273981 +chr2 130274326 130274992 +chr2 130275174 130275744 +chr2 130276119 130276500 +chr2 130277774 130278727 +chr2 130279995 130280729 +chr2 130280827 130281440 +chr2 130557359 130557607 +chr2 130563142 130563396 +chr2 131369643 131369925 +chr2 131370949 131371562 +chr2 131371916 131372361 +chr2 131372758 131373137 +chr2 131379317 131380344 +chr2 131381592 131381973 +chr2 131382344 131382728 +chr2 131382772 131382974 +chr2 131383079 131384016 +chr2 131384051 131384621 +chr2 131384898 131385281 +chr2 131385356 131385794 +chr2 140217229 140218044 +chr2 140220209 140220840 +chr2 140220940 140221140 +chr2 140221198 140222369 +chr2 140222545 140223623 +chr2 140223647 140224297 +chr2 143088644 143089042 +chr2 143089938 143090358 +chr2 143090898 143091662 +chr2 143092255 143092646 +chr2 143093556 143093941 +chr2 143094515 143094999 +chr2 143095614 143095994 +chr2 143096048 143096428 +chr2 143096470 143097336 +chr2 143097466 143097981 +chr2 143100621 143101005 +chr2 147048574 147048955 +chr2 147244849 147245229 +chr2 147265034 147265432 +chr2 148822913 148823295 +chr2 148881545 148882032 +chr2 155196092 155196473 +chr2 155263345 155264313 +chr2 155264362 155264562 +chr2 155264599 155264982 +chr2 155311420 155311995 +chr2 155313539 155313922 +chr2 156828628 156829008 +chr2 162517271 162517651 +chr2 164117001 164117382 +chr2 166414323 166414779 +chr2 167378863 167379244 +chr2 168652433 168652813 +chr2 179739184 179739689 +chr2 190593881 190594262 +chr2 196204680 196205060 +chr2 201212170 201212612 +chr2 201212648 201212854 +chr2 201212903 201213386 +chr2 201214659 201215040 +chr2 201549404 201549784 +chr2 201550130 201550513 +chr2 201557568 201557948 +chr2 202614117 202614527 +chr2 202615371 202615757 +chr2 202617016 202617398 +chr2 202618435 202618819 +chr2 202619754 202620134 +chr2 211773627 211774158 +chr2 211774322 211775192 +chr2 211775641 211776712 +chr2 211777034 211777417 +chr2 211777802 211778269 +chr2 211778916 211779562 +chr2 215573163 215573544 +chr2 226722088 226722596 +chr2 237521663 237522775 +chr2 237522862 237523652 +chr20 5999469 5999849 +chr20 9168743 9169145 +chr20 10441916 10442296 +chr20 13167142 13167534 +chr20 18449173 18449556 +chr20 22078162 22078542 +chr20 24024376 24024757 +chr20 26438448 28554562 +chr20 28644084 29015573 +chr20 29125977 29294639 +chr20 30744370 30744939 +chr20 30746748 30747241 +chr20 31051540 31106909 +chr20 31157044 31159116 +chr20 31161652 31223331 +chr20 34688743 34689039 +chr20 47894699 47896109 +chr20 57063873 57064279 +chr20 57357555 57358134 +chr20 57358221 57359428 +chr20 57359451 57360972 +chr20 63644937 63645318 +chr21 6369257 6372342 +chr21 7201205 7327885 +chr21 7919585 7919691 +chr21 8211710 8211892 +chr21 8212412 8212570 +chr21 8213694 8213987 +chr21 8219372 8220330 +chr21 8234456 8234568 +chr21 8394767 8394902 +chr21 8395471 8395591 +chr21 8396751 8397011 +chr21 8445918 8446080 +chr21 8446629 8446729 +chr21 8446925 8447070 +chr21 8595669 8595768 +chr21 8844362 8844855 +chr21 8846669 8847382 +chr21 10014674 10015194 +chr21 10650900 12965800 +chr21 16645305 16645685 +chr21 32095835 32096215 +chr21 35890413 35890796 +chr21 44474913 44475301 +chr21 45376056 45376517 +chr22 10863370 10863448 +chr22 11210951 11215489 +chr22 11854150 11854643 +chr22 11856460 11857173 +chr22 11974159 11974336 +chr22 12135181 12135894 +chr22 12137711 12138204 +chr22 12691742 12694097 +chr22 12954427 15057495 +chr22 15153934 15211502 +chr22 15940533 16085728 +chr22 32894952 32895345 +chr22 33819338 33819538 +chr22 35885491 35885898 +chr22 36172705 36173085 +chr22 36177875 36178257 +chr22 46470112 46470493 +chr22 50086003 50086529 +chr22 50806858 50808224 +chr3 3571912 3572292 +chr3 24705149 24705529 +chr3 25467328 25467722 +chr3 29797534 29797914 +chr3 33548103 33548483 +chr3 40252107 40253916 +chr3 41532177 41532556 +chr3 43229296 43229733 +chr3 68658875 68659467 +chr3 68670345 68670734 +chr3 73054640 73055020 +chr3 82655447 82655827 +chr3 89588895 89589538 +chr3 90269605 90722189 +chr3 90774880 91249595 +chr3 91519649 93657524 +chr3 93705477 93800019 +chr3 96475262 96475643 +chr3 96617014 96618680 +chr3 106894019 106894441 +chr3 106895181 106895568 +chr3 106896124 106896504 +chr3 106898661 106899022 +chr3 106899753 106900122 +chr3 106901799 106902741 +chr3 106903188 106903605 +chr3 119947198 119947578 +chr3 120721858 120722610 +chr3 122688557 122688938 +chr3 125982519 125982900 +chr3 127005357 127005745 +chr3 128988979 128989359 +chr3 137095968 137096348 +chr3 142662232 142662612 +chr3 152919604 152919995 +chr3 153658704 153659087 +chr3 160947473 160948127 +chr3 166159726 166160108 +chr3 166160260 166160644 +chr3 166161631 166162087 +chr3 166226563 166226945 +chr3 166232406 166232886 +chr3 166232970 166233355 +chr3 166474023 166474223 +chr3 171534313 171534700 +chr3 177010776 177011156 +chr3 192880587 192880967 +chr4 5404508 5404897 +chr4 12640142 12640815 +chr4 14506099 14506467 +chr4 17061824 17062213 +chr4 18949310 18949691 +chr4 22502173 22502553 +chr4 25717756 25718136 +chr4 25718275 25718655 +chr4 25719398 25719626 +chr4 27730251 27730747 +chr4 30884524 30884906 +chr4 32280109 32280489 +chr4 41023064 41023448 +chr4 47772100 47772544 +chr4 49136056 49136102 +chr4 49141052 49141147 +chr4 49246355 49246848 +chr4 49548607 49549100 +chr4 49631231 49658125 +chr4 49708086 51743949 +chr4 51793952 51817249 +chr4 55327979 55328462 +chr4 64606369 64606752 +chr4 64606841 64607360 +chr4 64607395 64607789 +chr4 64607976 64608801 +chr4 64608937 64609326 +chr4 64609811 64610876 +chr4 64611176 64611617 +chr4 66065193 66065631 +chr4 68050141 68050521 +chr4 68572333 68572774 +chr4 78008402 78008882 +chr4 83383282 83383662 +chr4 89731703 89732163 +chr4 92701787 92702300 +chr4 107501924 107502304 +chr4 112372589 112372969 +chr4 116296652 116297040 +chr4 116297165 116297545 +chr4 116297659 116298726 +chr4 116299003 116300416 +chr4 128081280 128081956 +chr4 140929567 140929947 +chr4 143017907 143018107 +chr4 143347973 143348354 +chr4 144379497 144379877 +chr4 155076906 155077288 +chr4 155452733 155452935 +chr4 155453928 155454313 +chr4 155454407 155455447 +chr4 155455566 155455766 +chr4 155457624 155458008 +chr4 155459547 155459747 +chr4 155460171 155460553 +chr4 155461093 155461689 +chr4 155462078 155463456 +chr4 155463701 155464839 +chr4 155464895 155465305 +chr4 155465580 155466624 +chr4 157628391 157628774 +chr4 160044429 160044815 +chr4 161449477 161449857 +chr4 161788291 161788671 +chr4 162421207 162421721 +chr4 172036714 172037094 +chr4 179069259 179069639 +chr4 183489243 183489623 +chr4 189844495 189844576 +chr5 12284 12523 +chr5 12952 13361 +chr5 5395563 5395943 +chr5 5396182 5396616 +chr5 5396675 5397057 +chr5 8619083 8619464 +chr5 8619927 8620307 +chr5 8620707 8621192 +chr5 8621953 8622333 +chr5 8622354 8622753 +chr5 32927394 32927776 +chr5 37164286 37164673 +chr5 45913363 50265419 +chr5 60761358 60762176 +chr5 66253509 66253889 +chr5 73775720 73776112 +chr5 79089860 79090240 +chr5 80649841 80652548 +chr5 94567275 94571098 +chr5 97678633 97679016 +chr5 98409947 98410327 +chr5 98410700 98411257 +chr5 99813005 99813388 +chr5 100045805 100055225 +chr5 106553187 106553689 +chr5 111488864 111489244 +chr5 119127218 119127602 +chr5 121030820 121031445 +chr5 122338658 122339042 +chr5 123760111 123760622 +chr5 123760719 123761918 +chr5 134923133 134928692 +chr5 136533606 136533986 +chr5 137305006 137305387 +chr5 152198765 152199145 +chr5 160600365 160600745 +chr5 163146853 163147234 +chr5 163959711 163960091 +chr5 164673914 164674288 +chr5 166530241 166530641 +chr5 170635389 170635774 +chr6 1705930 1706304 +chr6 3943769 3944149 +chr6 29454054 29454435 +chr6 32706020 32706850 +chr6 43490986 43491370 +chr6 54899048 54899248 +chr6 58554346 59830578 +chr6 61278527 61521106 +chr6 61573960 61574809 +chr6 72747981 72748361 +chr6 72799169 72799549 +chr6 76708390 76708770 +chr6 88555202 88555591 +chr6 91726616 91727363 +chr6 94446937 94447370 +chr6 96941571 96941951 +chr6 104699855 104700055 +chr6 114377334 114377534 +chr6 122764824 122765204 +chr6 126478329 126478709 +chr6 127735330 127735710 +chr6 132799554 132799939 +chr6 133150492 133150881 +chr6 133930809 133931190 +chr6 138133082 138133462 +chr6 143077647 143078031 +chr6 153666229 153666618 +chr6 153667363 153667744 +chr6 153668187 153668753 +chr6 153669025 153669419 +chr6 156547729 156548118 +chr6 163638068 163638448 +chr7 18021726 18022106 +chr7 22748471 22748854 +chr7 33749120 33749500 +chr7 36228567 36229008 +chr7 37387570 37387950 +chr7 45251808 45252289 +chr7 55369049 55369429 +chr7 57167688 57168071 +chr7 57168472 57168852 +chr7 57169046 57169430 +chr7 57169550 57169932 +chr7 57170307 57170523 +chr7 57170675 57171410 +chr7 57171502 57172122 +chr7 57173798 57174181 +chr7 57174854 57175239 +chr7 57185615 57185995 +chr7 57186105 57186589 +chr7 57187287 57188033 +chr7 57188305 57188872 +chr7 57189116 57189730 +chr7 57190949 57191332 +chr7 57191618 57191818 +chr7 57192132 57192860 +chr7 57193489 57193872 +chr7 57193974 57194701 +chr7 57194829 57195210 +chr7 57196302 57197490 +chr7 57198263 57198644 +chr7 57879605 58032504 +chr7 58166363 62995324 +chr7 63094673 63095057 +chr7 64104133 64104513 +chr7 64105294 64106415 +chr7 64106627 64107010 +chr7 64108329 64108798 +chr7 64110007 64110707 +chr7 64111376 64111804 +chr7 64111957 64112849 +chr7 67627830 67628213 +chr7 68097607 68097990 +chr7 68736347 68736811 +chr7 69331805 69332005 +chr7 69332037 69332438 +chr7 69333013 69333393 +chr7 69333597 69334167 +chr7 72088575 72088955 +chr7 83100026 83100406 +chr7 83469984 83470184 +chr7 83855080 83855464 +chr7 95851249 95851629 +chr7 104989516 104989896 +chr7 112372484 112372865 +chr7 112374724 112374950 +chr7 117263552 117264184 +chr7 117264231 117264614 +chr7 130116678 130117058 +chr7 141173000 141173384 +chr7 141801916 141802451 +chr7 141802901 141803366 +chr7 141804074 141804274 +chr7 141804814 141805507 +chr7 142665099 142667846 +chr7 143187483 143187863 +chr7 145997159 145997608 +chr7 150131843 150132229 +chr7 153968598 153968979 +chr7 159294463 159294846 +chr8 13353292 13353679 +chr8 16056863 16057063 +chr8 18849121 18849571 +chr8 20551162 20551554 +chr8 32805708 32806092 +chr8 33010514 33010894 +chr8 33011359 33014071 +chr8 33014510 33014895 +chr8 33015020 33015853 +chr8 36277446 36278060 +chr8 36278272 36278791 +chr8 36278835 36279634 +chr8 40070431 40070867 +chr8 43237631 43242390 +chr8 43937900 45969600 +chr8 46827305 46827914 +chr8 46828298 46829961 +chr8 46830195 46831222 +chr8 46837581 46837961 +chr8 46838101 46838484 +chr8 50758259 50758639 +chr8 56736733 56736933 +chr8 61303079 61303460 +chr8 67580689 67581493 +chr8 67581588 67581972 +chr8 67582178 67582568 +chr8 67585216 67585693 +chr8 67585787 67586175 +chr8 67587282 67587922 +chr8 69102851 69103234 +chr8 72985528 72985923 +chr8 74828644 74829025 +chr8 76201592 76202319 +chr8 76645407 76645800 +chr8 97907908 97908279 +chr8 99495689 99496133 +chr8 102774315 102774695 +chr8 103082925 103083379 +chr8 103083704 103084399 +chr8 103084730 103085110 +chr8 103085323 103085806 +chr8 103086859 103087242 +chr8 108533901 108534281 +chr8 110933150 110933533 +chr8 110934510 110935010 +chr8 111248936 111249316 +chr8 120224204 120224584 +chr8 127053876 127054257 +chr8 127968653 127969034 +chr8 133615761 133616142 +chr8 133755390 133755856 +chr9 5091131 5091511 +chr9 5091962 5093013 +chr9 5093063 5094123 +chr9 5094192 5094697 +chr9 5094931 5095816 +chr9 5096206 5096816 +chr9 5097188 5097890 +chr9 5098134 5098516 +chr9 5099352 5099552 +chr9 5100044 5100427 +chr9 5108063 5108592 +chr9 5109193 5109986 +chr9 5110030 5110411 +chr9 9896970 9897350 +chr9 15866612 15866992 +chr9 18336471 18336854 +chr9 31498260 31498640 +chr9 33656533 33658316 +chr9 33658346 33659299 +chr9 34998988 34999474 +chr9 36466192 36466572 +chr9 43153721 45525161 +chr9 64045550 64046043 +chr9 64047855 64048422 +chr9 65048153 65079624 +chr9 68251002 68251071 +chr9 72788174 72788555 +chr9 78741395 78741775 +chr9 78742155 78742969 +chr9 78743199 78743630 +chr9 78744108 78744492 +chr9 78810721 78811113 +chr9 79804550 79804933 +chr9 80564643 80565085 +chr9 80565478 80565941 +chr9 81747641 81748021 +chr9 82427689 82428071 +chr9 92108965 92109347 +chr9 92539106 92539763 +chr9 95876956 95877338 +chr9 117109914 117110296 +chr9 122505687 122506067 +chr9 129878699 129879081 +chr9 134164478 134165354 +chr9 134170819 134171060 +chrX 4059512 4059712 +chrX 5168678 5169232 +chrX 5169733 5170646 +chrX 15727702 15728089 +chrX 17116414 17116794 +chrX 24056083 24056470 +chrX 24375345 24375545 +chrX 33762401 33762781 +chrX 55178596 55179289 +chrX 55179434 55180459 +chrX 55181196 55182790 +chrX 55183051 55184112 +chrX 58061543 62821716 +chrX 62841379 62841765 +chrX 62842257 62842639 +chrX 70119464 70119845 +chrX 70127233 70127620 +chrX 77501934 77502314 +chrX 78561721 78561921 +chrX 84403779 84404168 +chrX 100027094 100027475 +chrX 102010329 102010712 +chrX 102011531 102011915 +chrX 102772405 102772791 +chrX 102785904 102786287 +chrX 102798001 102798386 +chrX 102802747 102803161 +chrX 102809395 102809788 +chrX 104409869 104410249 +chrX 106239694 106239894 +chrX 111416893 111417294 +chrX 126471558 126473451 +chrX 126728884 126729272 +chrX 126729326 126729709 +chrX 126729837 126730217 +chrX 126730716 126731106 +chrX 126731624 126732029 +chrX 129983338 129983538 +chrX 133041871 133042251 +chrX 135292293 135292493 +chrX 143430213 143430837 +chrX 143431144 143431537 +chrX 143431716 143432219 +chrX 143432410 143433212 +chrX 143433510 143434156 +chrX 143543636 143544023 +chrX 146995842 146996224 +chrY 4344757 4344879 +chrY 9141870 9141995 +chrY 10203380 10266932 +chrY 10316749 10544446 +chrY 10594583 10626838 +chrY 10663669 10663716 +chrY 10744417 10921497 +chrY 11290797 11334278 +chrY 11493053 11592850 +chrY 11671014 11671046 +chrY 11721528 11749472 +chrY 56694632 56889743 diff --git a/assets/multiqc/deseq2_clustering_header.txt b/assets/multiqc/deseq2_clustering_header.txt new file mode 100644 index 0000000000000000000000000000000000000000..f7bb33d8f358ea5a8d8f4235e3db849cdbbe450e --- /dev/null +++ b/assets/multiqc/deseq2_clustering_header.txt @@ -0,0 +1,12 @@ +#id: 'deseq2_clustering' +#section_name: 'MERGED LIB: DESeq2 sample similarity' +#description: "Matrix is generated from clustering with Euclidean distances between +# <a href='https://bioconductor.org/packages/release/bioc/html/DESeq2.html' target='_blank'>DESeq2</a> +# rlog values for each sample +# in the <a href='https://github.com/nf-core/chipseq/blob/master/bin/deseq2_qc.r'><code>deseq2_qc.r</code></a> script." +#plot_type: 'heatmap' +#anchor: 'deseq2_clustering' +#pconfig: +# title: 'DESeq2: Heatmap of the sample-to-sample distances' +# xlab: True +# reverseColors: True diff --git a/assets/multiqc/deseq2_pca_header.txt b/assets/multiqc/deseq2_pca_header.txt new file mode 100644 index 0000000000000000000000000000000000000000..250c1cb774d62ed037336a0e81cda4e2d91635ca --- /dev/null +++ b/assets/multiqc/deseq2_pca_header.txt @@ -0,0 +1,11 @@ +#id: 'deseq2_pca' +#section_name: 'MERGED LIB: DESeq2 PCA plot' +#description: "PCA plot of the samples in the experiment. +# These values are calculated using <a href='https://bioconductor.org/packages/release/bioc/html/DESeq2.html'>DESeq2</a> +# in the <a href='https://github.com/nf-core/chipseq/blob/master/bin/deseq2_qc.r'><code>deseq2_qc.r</code></a> script." +#plot_type: 'scatter' +#anchor: 'deseq2_pca' +#pconfig: +# title: 'DESeq2: Principal component plot' +# xlab: PC1 +# ylab: PC2 diff --git a/assets/multiqc/frip_score_header.txt b/assets/multiqc/frip_score_header.txt new file mode 100644 index 0000000000000000000000000000000000000000..829021154cb3529949bdaa3c4d4e66f4173a495b --- /dev/null +++ b/assets/multiqc/frip_score_header.txt @@ -0,0 +1,13 @@ +#id: 'frip_score' +#section_name: 'MERGED LIB: MACS2 FRiP score' +#description: "is generated by calculating the fraction of all mapped reads that fall +# into the MACS2 called peak regions. A read must overlap a peak by at least 20% to be counted. +# See <a href='https://www.encodeproject.org/data-standards/terms/' target='_blank'>FRiP score</a>." +#plot_type: 'bargraph' +#anchor: 'frip_score' +#pconfig: +# title: 'FRiP score' +# ylab: 'FRiP score' +# ymax: 1 +# ymin: 0 +# tt_decimals: 2 diff --git a/assets/multiqc/peak_annotation_header.txt b/assets/multiqc/peak_annotation_header.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b3ee938a3d82da378aeac70186b54e086eb2119 --- /dev/null +++ b/assets/multiqc/peak_annotation_header.txt @@ -0,0 +1,9 @@ +#id: 'peak_annotation' +#section_name: 'MERGED LIB: HOMER peak annotation' +#description: "is generated by calculating the proportion of peaks assigned to genomic features by +# <a href='http://homer.ucsd.edu/homer/ngs/annotation.html' target='_blank'>HOMER annotatePeaks.pl</a>." +#plot_type: 'bargraph' +#anchor: 'peak_annotation' +#pconfig: +# title: 'Peak to feature proportion' +# ylab: 'Peak count' diff --git a/assets/multiqc/peak_count_header.txt b/assets/multiqc/peak_count_header.txt new file mode 100644 index 0000000000000000000000000000000000000000..aa4dd3460f40baa283c08f8ad3b2b1229641e520 --- /dev/null +++ b/assets/multiqc/peak_count_header.txt @@ -0,0 +1,9 @@ +#id: 'peak_count' +#section_name: 'MERGED LIB: MACS2 peak count' +#description: "is calculated from total number of peaks called by +# <a href='https://github.com/taoliu/MACS' target='_blank'>MACS2</a>" +#plot_type: 'bargraph' +#anchor: 'peak_count' +#pconfig: +# title: 'Total peak count' +# ylab: 'Peak count' diff --git a/assets/multiqc/spp_correlation_header.txt b/assets/multiqc/spp_correlation_header.txt new file mode 100644 index 0000000000000000000000000000000000000000..ad57156347561119b3b244eff245891b10cc9269 --- /dev/null +++ b/assets/multiqc/spp_correlation_header.txt @@ -0,0 +1,12 @@ +#id: 'strand_shift_correlation' +#section_name: 'MERGED LIB: spp strand-shift correlation' +#description: "generated using run_spp.R script from +# <a href='https://github.com/kundajelab/phantompeakqualtools' target='_blank'>phantompeakqualtools</a>." +#plot_type: 'linegraph' +#anchor: 'strand_shift_correlation' +#pconfig: +# title: 'Strand-shift correlation plot' +# ylab: 'Cross-correlation' +# xlab: 'Strand-shift (bp)' +# xDecimals: False +# tt_label: 'Strand-shift (bp) {point.x}: {point.y:.2f} Cross-correlation' diff --git a/assets/multiqc/spp_nsc_header.txt b/assets/multiqc/spp_nsc_header.txt new file mode 100644 index 0000000000000000000000000000000000000000..43370f32f33f168a086d9d70c89b59f9daa21397 --- /dev/null +++ b/assets/multiqc/spp_nsc_header.txt @@ -0,0 +1,11 @@ +#id: 'nsc_coefficient' +#section_name: 'MERGED LIB: spp NSC coefficient' +#description: "generated using run_spp.R script from +# <a href='https://github.com/kundajelab/phantompeakqualtools' target='_blank'>phantompeakqualtools</a>." +#plot_type: 'bargraph' +#anchor: 'nsc_coefficient' +#pconfig: +# title: 'Normalized strand cross-correlation coefficient' +# ylab: 'NSC coefficient' +# ymin: 1 +# tt_decimals: 1 diff --git a/assets/multiqc/spp_rsc_header.txt b/assets/multiqc/spp_rsc_header.txt new file mode 100644 index 0000000000000000000000000000000000000000..bab5e09b22324f954552d970794ec7a12bd8e6e7 --- /dev/null +++ b/assets/multiqc/spp_rsc_header.txt @@ -0,0 +1,11 @@ +#id: 'rsc_coefficient' +#section_name: 'MERGED LIB: spp RSC coefficient' +#description: "generated using run_spp.R script from +# <a href='https://github.com/kundajelab/phantompeakqualtools' target='_blank'>phantompeakqualtools</a>." +#plot_type: 'bargraph' +#anchor: 'rsc_coefficient' +#pconfig: +# title: 'Relative strand cross-correlation coefficient' +# ylab: 'RSC coefficient' +# ymin: 0 +# tt_decimals: 1 diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 5b7d1652869a71da0cd52cf5f2685ec661c75a0a..1a55a16cb0b76f8e15db50e565fce8e0a83a1a40 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -2,10 +2,174 @@ report_comment: > This report has been generated by the <a href="https://github.com/nf-core/chipseq" target="_blank">nf-core/chipseq</a> analysis pipeline. For information about how to interpret these results, please see the <a href="https://nf-co.re/chipseq" target="_blank">documentation</a>. + +data_format: "yaml" + +export_plots: true + +run_modules: + - custom_content + - fastqc + - cutadapt + - samtools + - picard + - preseq + - featureCounts + - deeptools + - phantompeakqualtools + +exclude_modules: + - "general_stats" + +module_order: + - fastqc: + name: "LIB: FastQC (raw)" + info: "This section of the report shows FastQC results before adapter trimming for individual libraries." + path_filters: + - "./fastqc/*.zip" + - cutadapt: + name: "LIB: cutadapt (trimmed)" + info: "This section of the report shows the length of trimmed reads by cutadapt for individual libraries." + - fastqc: + name: "LIB: FastQC (trimmed)" + info: "This section of the report shows FastQC results after adapter trimming for individual libraries." + path_filters: + - "./trimgalore/fastqc/*.zip" + - samtools: + name: "LIB: SAMTools" + info: "This section of the report shows SAMTools results for individual libraries." + path_filters: + - "./alignment/library/*" + - samtools: + name: "MERGED LIB: SAMTools (unfiltered)" + info: "This section of the report shows SAMTools results after merging libraries and before filtering." + path_filters: + - "./alignment/mergedLibrary/unfiltered/*.mLb.mkD.sorted.bam*" + - picard: + name: "MERGED LIB: Picard (unfiltered)" + info: "This section of the report shows picard results after merging libraries and before filtering." + path_filters: + - "./alignment/mergedLibrary/unfiltered/picard_metrics/*" + - preseq: + name: "MERGED LIB: Preseq (unfiltered)" + info: "This section of the report shows Preseq results after merging libraries and before filtering." + - samtools: + name: "MERGED LIB: SAMTools (filtered)" + info: "This section of the report shows SAMTools results after merging libraries and after filtering." + path_filters: + - "./alignment/mergedLibrary/filtered/*.mLb.clN.sorted.bam*" + - picard: + name: "MERGED LIB: Picard (filtered)" + info: "This section of the report shows picard results after merging libraries and after filtering." + path_filters: + - "./alignment/mergedLibrary/filtered/picard_metrics/*" + - deeptools: + name: "MERGED LIB: deepTools" + anchor: "mlib_deeptools" + info: "This section of the report shows ChIP-seq QC plots generated by deepTools." + - featureCounts: + name: "MERGED LIB: featureCounts" + anchor: "mlib_featurecounts" + info: "This section of the report shows featureCounts results for the number of reads assigned to merged library consensus peaks." + path_filters: + - "./macs/consensus/*.summary" + report_section_order: + peak_count: + before: mlib_deeptools + frip_score: + before: peak_count + peak_annotation: + before: frip_score + strand_shift_correlation: + before: peak_annotation + nsc_coefficient: + before: strand_shift_correlation + rsc_coefficient: + before: nsc_coefficient + mlib_featurecounts: + before: rsc_coefficient + deseq2_pca_1: + order: -1600 + deseq2_pca_2: + order: -1700 + deseq2_pca_3: + order: -1800 + deseq2_pca_4: + order: -1900 + deseq2_pca_5: + order: -2000 + deseq2_pca_6: + order: -2100 + deseq2_pca_7: + order: -2200 + deseq2_pca_8: + order: -2300 + deseq2_pca_9: + order: -2400 + deseq2_pca_10: + order: -2500 + deseq2_clustering_1: + order: -2600 + deseq2_clustering_2: + order: -2700 + deseq2_clustering_3: + order: -2800 + deseq2_clustering_4: + order: -2900 + deseq2_clustering_5: + order: -3000 + deseq2_clustering_6: + order: -3100 + deseq2_clustering_7: + order: -3200 + deseq2_clustering_8: + order: -3300 + deseq2_clustering_9: + order: -3400 + deseq2_clustering_10: + order: -3500 software_versions: - order: -1000 - "nf-core-chipseq-summary": - order: -1001 + order: -3600 + nf-core-chipseq-summary: + order: -3700 -export_plots: true +custom_plot_config: + picard_insert_size: + cpswitch_c_active: False + smooth_points: 1000 + featurecounts: + cpswitch_c_active: False + +extra_fn_clean_exts: + - "fastq.gz" + - "_trimmed" + - "_val" + - "sorted.bam" + - ".Lb" + - "mkD" + - "clN" + - "mLb" + - "_peaks" + - ".FRiP" + - ".peak" + - "_spp" + - ".spp" + - "lc_extrap" + +# # Customise the module search patterns to speed up execution time +# # - Skip module sub-tools that we are not interested in +# # - Replace file-content searching with filename pattern searching +# # - Don't add anything that is the same as the MultiQC default +# # See https://multiqc.info/docs/#optimise-file-search-patterns for details +sp: + cutadapt: + fn: "*trimming_report.txt" + preseq: + fn: "*.lc_extrap.txt" + deeptools/plotFingerprintOutRawCounts: + fn: "*plotFingerprint*" + deeptools/plotProfile: + fn: "*plotProfile*" + phantompeakqualtools/out: + fn: "*.spp.out" diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv deleted file mode 100644 index 5f653ab7bfc86c905b720d2bb8708646bb66366e..0000000000000000000000000000000000000000 --- a/assets/samplesheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/samplesheet_pe.csv b/assets/samplesheet_pe.csv new file mode 100644 index 0000000000000000000000000000000000000000..3a304f121b35b97e9a1c6cc441e7b92cc4e5e5c0 --- /dev/null +++ b/assets/samplesheet_pe.csv @@ -0,0 +1,21 @@ +sample,fastq_1,fastq_2,antibody,control +WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,BLA203A1_S27_L006_R2_001.fastq.gz,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,BLA203A25_S16_L001_R2_001.fastq.gz,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,BLA203A25_S16_L002_R2_001.fastq.gz,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,BLA203A49_S40_L001_R2_001.fastq.gz,BCATENIN,WT_INPUT +NAIVE_BCATENIN_IP_REP1,BLA203A7_S60_L001_R1_001.fastq.gz,BLA203A7_S60_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L001_R1_001.fastq.gz,BLA203A43_S34_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L002_R1_001.fastq.gz,BLA203A43_S34_L002_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP3,BLA203A64_S55_L001_R1_001.fastq.gz,BLA203A64_S55_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT +WT_TCF4_IP_REP1,BLA203A3_S29_L006_R1_001.fastq.gz,BLA203A3_S29_L006_R2_001.fastq.gz,TCF4,WT_INPUT +WT_TCF4_IP_REP2,BLA203A27_S18_L001_R1_001.fastq.gz,BLA203A27_S18_L001_R2_001.fastq.gz,TCF4,WT_INPUT +WT_TCF4_IP_REP2,BLA203A51_S42_L001_R1_001.fastq.gz,BLA203A51_S42_L001_R2_001.fastq.gz,TCF4,WT_INPUT +NAIVE_TCF4_IP_REP1,BLA203A9_S62_L001_R1_001.fastq.gz,BLA203A9_S62_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP2,BLA203A45_S36_L001_R1_001.fastq.gz,BLA203A45_S36_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP3,BLA203A66_S57_L001_R1_001.fastq.gz,BLA203A66_S57_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT +WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,BLA203A6_S32_L006_R2_001.fastq.gz,, +WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,BLA203A30_S21_L001_R2_001.fastq.gz,, +WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,BLA203A31_S21_L003_R2_001.fastq.gz,, +NAIVE_INPUT_REP1,BLA203A12_S3_L001_R1_001.fastq.gz,BLA203A12_S3_L001_R2_001.fastq.gz,, +NAIVE_INPUT_REP2,BLA203A48_S39_L001_R1_001.fastq.gz,BLA203A48_S39_L001_R2_001.fastq.gz,, +NAIVE_INPUT_REP3,BLA203A49_S1_L006_R1_001.fastq.gz,BLA203A49_S1_L006_R2_001.fastq.gz,, diff --git a/assets/samplesheet_se.csv b/assets/samplesheet_se.csv new file mode 100644 index 0000000000000000000000000000000000000000..a9581d6e07308e19ef6ca9299d1c54e9f1af497f --- /dev/null +++ b/assets/samplesheet_se.csv @@ -0,0 +1,21 @@ +sample,fastq_1,fastq_2,antibody,control +WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +NAIVE_BCATENIN_IP_REP1,BLA203A7_S60_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L002_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP3,BLA203A64_S55_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +WT_TCF4_IP_REP1,BLA203A3_S29_L006_R1_001.fastq.gz,,TCF4,WT_INPUT +WT_TCF4_IP_REP2,BLA203A27_S18_L001_R1_001.fastq.gz,,TCF4,WT_INPUT +WT_TCF4_IP_REP3,BLA203A51_S42_L001_R1_001.fastq.gz,,TCF4,WT_INPUT +NAIVE_TCF4_IP_REP1,BLA203A9_S62_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP2,BLA203A45_S36_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP3,BLA203A66_S57_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,,, +WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,,, +WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,,, +NAIVE_INPUT_REP1,BLA203A12_S3_L001_R1_001.fastq.gz,,, +NAIVE_INPUT_REP2,BLA203A48_S39_L001_R1_001.fastq.gz,,, +NAIVE_INPUT_REP3,BLA203A49_S1_L006_R1_001.fastq.gz,,, diff --git a/assets/schema_input.json b/assets/schema_input.json index 20b4e844bd6d8c4c76dcbbc7478bebc35a6e8a29..cda13e0b8036c979e049eb496222c055cfa84d8b 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -29,6 +29,16 @@ "maxLength": 0 } ] + }, + "antibody": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Antibody entry cannot contain spaces" + }, + "control": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Control entry cannot contain spaces" } }, "required": ["sample", "fastq_1"] diff --git a/bin/bampe_rm_orphan.py b/bin/bampe_rm_orphan.py new file mode 100755 index 0000000000000000000000000000000000000000..5de45ea65531877b849d07700c44b813db2fa51c --- /dev/null +++ b/bin/bampe_rm_orphan.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 + +############################################################################### +############################################################################### +## Created on February 1st 2017 to remove singletons from paired-end BAM file +############################################################################### +############################################################################### + +import os +import pysam +import errno +import argparse + +############################################ +############################################ +## PARSE ARGUMENTS +############################################ +############################################ + +Description = 'Remove singleton reads from paired-end BAM file i.e if read1 is present in BAM file without read 2 and vice versa.' +Epilog = """Example usage: bampe_rm_orphan.py <BAM_INPUT_FILE> <BAM_OUTPUT_FILE>""" + +argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) + +## REQUIRED PARAMETERS +argParser.add_argument('BAM_INPUT_FILE', help="Input BAM file sorted by name.") +argParser.add_argument('BAM_OUTPUT_FILE', help="Output BAM file sorted by name.") + +## OPTIONAL PARAMETERS +argParser.add_argument('-fr', '--only_fr_pairs', dest="ONLY_FR_PAIRS", help="Only keeps pairs that are in FR orientation on same chromosome.",action='store_true') +args = argParser.parse_args() + +############################################ +############################################ +## HELPER FUNCTIONS +############################################ +############################################ + +def makedir(path): + + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + +############################################ +############################################ +## MAIN FUNCTION +############################################ +############################################ + +def bampe_rm_orphan(BAMIn,BAMOut,onlyFRPairs=False): + + ## SETUP DIRECTORY/FILE STRUCTURE + OutDir = os.path.dirname(BAMOut) + makedir(OutDir) + + ## COUNT VARIABLES + totalReads = 0; totalOutputPairs = 0; totalSingletons = 0; totalImproperPairs = 0 + + ## ITERATE THROUGH BAM FILE + EOF = 0 + SAMFin = pysam.AlignmentFile(BAMIn, "rb") + SAMFout = pysam.AlignmentFile(BAMOut, "wb", header=SAMFin.header) + iter = SAMFin.fetch(until_eof=True) + currRead = next(iter) + for read in iter: + totalReads += 1 + if currRead.qname == read.qname: + pair1 = currRead; pair2 = read + + ## FILTER FOR READS ON SAME CHROMOSOME IN FR ORIENTATION + if onlyFRPairs: + if pair1.tid == pair2.tid: + + ## READ1 FORWARD AND READ2 REVERSE STRAND + if not pair1.is_reverse and pair2.is_reverse: + if pair1.reference_start <= pair2.reference_start: + totalOutputPairs += 1 + SAMFout.write(pair1) + SAMFout.write(pair2) + else: + totalImproperPairs += 1 + + ## READ1 REVERSE AND READ2 FORWARD STRAND + elif pair1.is_reverse and not pair2.is_reverse: + if pair2.reference_start <= pair1.reference_start: + totalOutputPairs += 1 + SAMFout.write(pair1) + SAMFout.write(pair2) + else: + totalImproperPairs += 1 + + else: + totalImproperPairs += 1 + else: + totalImproperPairs += 1 + else: + totalOutputPairs += 1 + SAMFout.write(pair1) + SAMFout.write(pair2) + + ## RESET COUNTER + try: + totalReads += 1 + currRead = next(iter) + except: + StopIteration + EOF = 1 + + ## READS WHERE ONLY ONE OF A PAIR IS IN FILE + else: + totalSingletons += 1 + pair1 = currRead + currRead = read + + if not EOF: + totalReads += 1 + totalSingletons += 1 + pair1 = currRead + + ## CLOSE ALL FILE HANDLES + SAMFin.close() + SAMFout.close() + + LogFile = os.path.join(OutDir,'%s_bampe_rm_orphan.log' % (os.path.basename(BAMOut[:-4]))) + SamLogFile = open(LogFile,'w') + SamLogFile.write('\n##############################\n') + SamLogFile.write('FILES/DIRECTORIES') + SamLogFile.write('\n##############################\n\n') + SamLogFile.write('Input File: ' + BAMIn + '\n') + SamLogFile.write('Output File: ' + BAMOut + '\n') + SamLogFile.write('\n##############################\n') + SamLogFile.write('OVERALL COUNTS') + SamLogFile.write('\n##############################\n\n') + SamLogFile.write('Total Input Reads = ' + str(totalReads) + '\n') + SamLogFile.write('Total Output Pairs = ' + str(totalOutputPairs) + '\n') + SamLogFile.write('Total Singletons Excluded = ' + str(totalSingletons) + '\n') + SamLogFile.write('Total Improper Pairs Excluded = ' + str(totalImproperPairs) + '\n') + SamLogFile.write('\n##############################\n') + SamLogFile.close() + +############################################ +############################################ +## RUN FUNCTION +############################################ +############################################ + +bampe_rm_orphan(BAMIn=args.BAM_INPUT_FILE,BAMOut=args.BAM_OUTPUT_FILE,onlyFRPairs=args.ONLY_FR_PAIRS) + +############################################ +############################################ +############################################ +############################################ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 11b155723a63c779ccde1ef268de82a0553e5a84..1cc8a593917fffd3a153763bc9faa75d1c51b524 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -1,261 +1,198 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging +import os import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" +import errno +import argparse -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) +def parse_args(args=None): + Description = "Reformat nf-core/chipseq samplesheet file and check its contents." + Epilog = "Example usage: python check_samplesheet.py <FILE_IN> <FILE_OUT>" + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("FILE_IN", help="Input samplesheet file.") + parser.add_argument("FILE_OUT", help="Output file.") + return parser.parse_args(args) -def sniff_format(handle): - """ - Detect the tabular format. - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). +def make_dir(path): + if len(path) > 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise exception - Returns: - csv.Dialect: The detected tabular format. - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical("The given sample sheet does not appear to contain a header.") - sys.exit(1) - dialect = sniffer.sniff(peek) - return dialect +def print_error(error, context="Line", context_str=""): + error_str = "ERROR: Please check samplesheet -> {}".format(error) + if context != "" and context_str != "": + error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( + error, context.strip(), context_str.strip() + ) + print(error_str) + sys.exit(1) def check_samplesheet(file_in, file_out): """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - + This function checks that the samplesheet follows the following structure: + sample,fastq_1,fastq_2,antibody,control + SPT5_T0_REP1,SRR1822153_1.fastq.gz,SRR1822153_2.fastq.gz,SPT5,SPT5_INPUT_REP1 + SPT5_T0_REP2,SRR1822154_1.fastq.gz,SRR1822154_2.fastq.gz,SPT5,SPT5_INPUT_REP2 + SPT5_INPUT_REP1,SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,, + SPT5_INPUT_REP2,SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,, + For an example see: + https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_test.csv """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) + sample_mapping_dict = {} + with open(file_in, "r", encoding="utf-8-sig") as fin: + ## Check header + MIN_COLS = 2 + HEADER = ["sample", "fastq_1", "fastq_2", "antibody", "control"] + header = [x.strip('"') for x in fin.readline().strip().split(",")] + if header[: len(HEADER)] != HEADER: + print( + f"ERROR: Please check samplesheet header -> {','.join(header)} != {','.join(HEADER)}" + ) + sys.exit(1) -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) + ## Check sample entries + for line in fin: + lspl = [x.strip().strip('"') for x in line.strip().split(",")] + + # Check valid number of columns per row + if len(lspl) < len(HEADER): + print_error( + "Invalid number of columns (minimum = {})!".format(len(HEADER)), + "Line", + line, + ) + num_cols = len([x for x in lspl if x]) + if num_cols < MIN_COLS: + print_error( + "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), + "Line", + line, + ) + + ## Check sample name entries + sample, fastq_1, fastq_2, antibody, control = lspl[: len(HEADER)] + if sample.find(" ") != -1: + print( + f"WARNING: Spaces have been replaced by underscores for sample: {sample}" + ) + sample = sample.replace(" ", "_") + if not sample: + print_error("Sample entry has not been specified!", "Line", line) + + ## Check FastQ file extension + for fastq in [fastq_1, fastq_2]: + if fastq: + if fastq.find(" ") != -1: + print_error("FastQ file contains spaces!", "Line", line) + if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): + print_error( + "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", + "Line", + line, + ) + + ## Check antibody and control columns have valid values + if antibody: + if antibody.find(" ") != -1: + print( + f"WARNING: Spaces have been replaced by underscores for antibody: {antibody}" + ) + antibody = antibody.replace(" ", "_") + if not control: + print_error( + "Both antibody and control columns must be specified!", + "Line", + line, + ) + if control: + if control.find(" ") != -1: + print( + f"WARNING: Spaces have been replaced by underscores for control: {control}" + ) + control = control.replace(" ", "_") + if not antibody: + print_error( + "Both antibody and control columns must be specified!", + "Line", + line, + ) + + ## Auto-detect paired-end/single-end + sample_info = [] ## [single_end, fastq_1, fastq_2, antibody, control] + if sample and fastq_1 and fastq_2: ## Paired-end short reads + sample_info = ["0", fastq_1, fastq_2, antibody, control] + elif sample and fastq_1 and not fastq_2: ## Single-end short reads + sample_info = ["1", fastq_1, fastq_2, antibody, control] + else: + print_error("Invalid combination of columns provided!", "Line", line) + + ## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, antibody, control ]]} + if sample not in sample_mapping_dict: + sample_mapping_dict[sample] = [sample_info] + else: + if sample_info in sample_mapping_dict[sample]: + print_error("Samplesheet contains duplicate rows!", "Line", line) + else: + sample_mapping_dict[sample].append(sample_info) + + ## Write validated samplesheet with appropriate columns + if len(sample_mapping_dict) > 0: + out_dir = os.path.dirname(file_out) + make_dir(out_dir) + with open(file_out, "w") as fout: + fout.write( + ",".join( + [ + "sample", + "single_end", + "fastq_1", + "fastq_2", + "antibody", + "control", + ] + ) + + "\n" + ) + for sample in sorted(sample_mapping_dict.keys()): + + ## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + if not all( + x[0] == sample_mapping_dict[sample][0][0] + for x in sample_mapping_dict[sample] + ): + print_error( + f"Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end!", + "Sample", + sample, + ) + + for idx, val in enumerate(sample_mapping_dict[sample]): + control = val[-1] + if control and control not in sample_mapping_dict.keys(): + print_error( + f"Control identifier has to match does a provided sample identifier!", + "Control", + control, + ) + + fout.write(",".join([f"{sample}_T{idx+1}"] + val) + "\n") + else: + print_error(f"No entries to process!", "Samplesheet: {file_in}") + + +def main(args=None): + args = parse_args(args) + check_samplesheet(args.FILE_IN, args.FILE_OUT) if __name__ == "__main__": diff --git a/bin/deseq2_qc.r b/bin/deseq2_qc.r new file mode 100755 index 0000000000000000000000000000000000000000..e8c2617f52425899a59beb650417bbda0b0b71f6 --- /dev/null +++ b/bin/deseq2_qc.r @@ -0,0 +1,247 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## REQUIREMENTS ## +################################################ +################################################ + +## PCA, HEATMAP AND SCATTERPLOTS FOR SAMPLES IN COUNTS FILE +## - SAMPLE NAMES HAVE TO END IN e.g. "_R1" REPRESENTING REPLICATE ID. LAST 3 CHARACTERS OF SAMPLE NAME WILL BE TRIMMED TO OBTAIN GROUP ID FOR DESEQ2 COMPARISONS. +## - PACKAGES BELOW NEED TO BE AVAILABLE TO LOAD WHEN RUNNING R + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(DESeq2) +library(ggplot2) +library(RColorBrewer) +library(pheatmap) + +################################################ +################################################ +## PARSE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list( + make_option(c("-i", "--count_file" ), type="character", default=NULL , metavar="path" , help="Count file matrix where rows are genes and columns are samples." ), + make_option(c("-f", "--count_col" ), type="integer" , default=2 , metavar="integer", help="First column containing sample count data." ), + make_option(c("-d", "--id_col" ), type="integer" , default=1 , metavar="integer", help="Column containing identifiers to be used." ), + make_option(c("-r", "--sample_suffix" ), type="character", default='' , metavar="string" , help="Suffix to remove after sample name in columns e.g. '.rmDup.bam' if 'DRUG_R1.rmDup.bam'."), + make_option(c("-o", "--outdir" ), type="character", default='./' , metavar="path" , help="Output directory." ), + make_option(c("-p", "--outprefix" ), type="character", default='deseq2', metavar="string" , help="Output prefix." ), + make_option(c("-v", "--vst" ), type="logical" , default=FALSE , metavar="boolean", help="Run vst transform instead of rlog." ), + make_option(c("-c", "--cores" ), type="integer" , default=1 , metavar="integer", help="Number of cores." ) +) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +if (is.null(opt$count_file)){ + print_help(opt_parser) + stop("Please provide a counts file.", call.=FALSE) +} + +################################################ +################################################ +## READ IN COUNTS FILE ## +################################################ +################################################ + +count.table <- read.delim(file=opt$count_file,header=TRUE, row.names=NULL, skip=1, check.names=FALSE) +rownames(count.table) <- count.table[,opt$id_col] +count.table <- count.table[,opt$count_col:ncol(count.table),drop=FALSE] +colnames(count.table) <- gsub(opt$sample_suffix,"",colnames(count.table)) +colnames(count.table) <- gsub(pattern='\\.$', replacement='', colnames(count.table)) + +################################################ +################################################ +## RUN DESEQ2 ## +################################################ +################################################ + +if (file.exists(opt$outdir) == FALSE) { + dir.create(opt$outdir, recursive=TRUE) +} +setwd(opt$outdir) + +samples.vec <- colnames(count.table) +name_components <- strsplit(samples.vec, "_") +n_components <- length(name_components[[1]]) +decompose <- n_components!=1 && all(sapply(name_components, length)==n_components) +coldata <- data.frame(samples.vec, sample=samples.vec, row.names=1) +if (decompose) { + groupings <- as.data.frame(lapply(1:n_components, function(i) sapply(name_components, "[[", i))) + names(groupings) <- paste0("Group", 1:n_components) + n_distinct <- sapply(groupings, function(grp) length(unique(grp))) + groupings <- groupings[n_distinct!=1 & n_distinct!=length(samples.vec)] + if (ncol(groupings)!=0) { + coldata <- cbind(coldata, groupings) + } else { + decompose <- FALSE + } +} + +DDSFile <- paste(opt$outprefix,".dds.RData",sep="") + +counts <- count.table[,samples.vec,drop=FALSE] +dds <- DESeqDataSetFromMatrix(countData=round(counts), colData=coldata, design=~ 1) +dds <- estimateSizeFactors(dds) +if (min(dim(count.table))<=1) { # No point if only one sample, or one gene + save(dds,file=DDSFile) + saveRDS(dds, file=sub("\\.dds\\.RData$", ".rds", DDSFile)) + warning("Not enough samples or genes in counts file for PCA.", call.=FALSE) + quit(save = "no", status = 0, runLast = FALSE) +} +if (!opt$vst) { + vst_name <- "rlog" + rld <- rlog(dds) +} else { + vst_name <- "vst" + rld <- varianceStabilizingTransformation(dds) +} + +assay(dds, vst_name) <- assay(rld) +save(dds,file=DDSFile) +saveRDS(dds, file=sub("\\.dds\\.RData$", ".rds", DDSFile)) + +################################################ +################################################ +## PLOT QC ## +################################################ +################################################ + +##' PCA pre-processeor +##' +##' Generate all the necessary information to plot PCA from a DESeq2 object +##' in which an assay containing a variance-stabilised matrix of counts is +##' stored. Copied from DESeq2::plotPCA, but with additional ability to +##' say which assay to run the PCA on. +##' +##' @param object The DESeq2DataSet object. +##' @param ntop number of top genes to use for principla components, selected by highest row variance. +##' @param assay the name or index of the assay that stores the variance-stabilised data. +##' @return A data.frame containing the projected data alongside the grouping columns. +##' A 'percentVar' attribute is set which includes the percentage of variation each PC explains, +##' and additionally how much the variation within that PC is explained by the grouping variable. +##' @author Gavin Kelly +plotPCA_vst <- function (object, ntop = 500, assay=length(assays(object))) { + rv <- rowVars(assay(object, assay)) + select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))] + pca <- prcomp(t(assay(object, assay)[select, ]), center=TRUE, scale=FALSE) + percentVar <- pca$sdev^2/sum(pca$sdev^2) + df <- cbind( as.data.frame(colData(object)), pca$x) + #Order points so extreme samples are more likely to get label + ord <- order(abs(rank(df$PC1)-median(df$PC1)), abs(rank(df$PC2)-median(df$PC2))) + df <- df[ord,] + attr(df, "percentVar") <- data.frame(PC=seq(along=percentVar), percentVar=100*percentVar) + return(df) +} + +PlotFile <- paste(opt$outprefix,".plots.pdf",sep="") + +pdf(file=PlotFile, onefile=TRUE, width=7, height=7) +## PCA +ntop <- c(500, Inf) +for (n_top_var in ntop) { + pca.data <- plotPCA_vst(dds, assay=vst_name, ntop=n_top_var) + percentVar <- round(attr(pca.data, "percentVar")$percentVar) + plot_subtitle <- ifelse(n_top_var==Inf, "All genes", paste("Top", n_top_var, "genes")) + pl <- ggplot(pca.data, aes(PC1, PC2, label=paste0(" ", sample, " "))) + + geom_point() + + geom_text(check_overlap=TRUE, vjust=0.5, hjust="inward") + + xlab(paste0("PC1: ",percentVar[1],"% variance")) + + ylab(paste0("PC2: ",percentVar[2],"% variance")) + + labs(title = paste0("First PCs on ", vst_name, "-transformed data"), subtitle = plot_subtitle) + + theme(legend.position="top", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + panel.border = element_rect(colour = "black", fill=NA, size=1)) + print(pl) + + if (decompose) { + pc_names <- paste0("PC", attr(pca.data, "percentVar")$PC) + long_pc <- reshape(pca.data, varying=pc_names, direction="long", sep="", timevar="component", idvar="pcrow") + long_pc <- subset(long_pc, component<=5) + long_pc_grp <- reshape(long_pc, varying=names(groupings), direction="long", sep="", timevar="grouper") + long_pc_grp <- subset(long_pc_grp, grouper<=5) + long_pc_grp$component <- paste("PC", long_pc_grp$component) + long_pc_grp$grouper <- paste0(long_pc_grp$grouper, c("st","nd","rd","th","th")[long_pc_grp$grouper], " prefix") + pl <- ggplot(long_pc_grp, aes(x=Group, y=PC)) + + geom_point() + + stat_summary(fun=mean, geom="line", aes(group = 1)) + + labs(x=NULL, y=NULL, subtitle = plot_subtitle, title="PCs split by sample-name prefixes") + + facet_grid(component~grouper, scales="free_x") + + scale_x_discrete(guide = guide_axis(n.dodge = 3)) + print(pl) + } +} # at end of loop, we'll be using the user-defined ntop if any, else all genes + +## WRITE PC1 vs PC2 VALUES TO FILE +pca.vals <- pca.data[,c("PC1","PC2")] +colnames(pca.vals) <- paste0(colnames(pca.vals), ": ", percentVar[1:2], '% variance') +pca.vals <- cbind(sample = rownames(pca.vals), pca.vals) +write.table(pca.vals, file = paste(opt$outprefix, ".pca.vals.txt", sep=""), + row.names = FALSE, col.names = TRUE, sep = "\t", quote = TRUE) + +## SAMPLE CORRELATION HEATMAP +sampleDists <- dist(t(assay(dds, vst_name))) +sampleDistMatrix <- as.matrix(sampleDists) +colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) +pheatmap( + sampleDistMatrix, + clustering_distance_rows=sampleDists, + clustering_distance_cols=sampleDists, + col=colors, + main=paste("Euclidean distance between", vst_name, "of samples") +) + +## WRITE SAMPLE DISTANCES TO FILE +write.table(cbind(sample = rownames(sampleDistMatrix), sampleDistMatrix),file=paste(opt$outprefix, ".sample.dists.txt", sep=""), + row.names=FALSE, col.names=TRUE, sep="\t", quote=FALSE) +dev.off() + +################################################ +################################################ +## SAVE SIZE FACTORS ## +################################################ +################################################ + +SizeFactorsDir <- "size_factors/" +if (file.exists(SizeFactorsDir) == FALSE) { + dir.create(SizeFactorsDir, recursive=TRUE) +} + +NormFactorsFile <- paste(SizeFactorsDir,opt$outprefix, ".size_factors.RData", sep="") + +normFactors <- sizeFactors(dds) +save(normFactors, file=NormFactorsFile) + +for (name in names(sizeFactors(dds))) { + sizeFactorFile <- paste(SizeFactorsDir,name, ".txt", sep="") + write(as.numeric(sizeFactors(dds)[name]), file=sizeFactorFile) +} + +################################################ +################################################ +## R SESSION INFO ## +################################################ +################################################ + +RLogFile <- "R_sessionInfo.log" + +sink(RLogFile) +a <- sessionInfo() +print(a) +sink() + +################################################ +################################################ +################################################ +################################################ diff --git a/bin/gtf2bed b/bin/gtf2bed new file mode 100755 index 0000000000000000000000000000000000000000..66d523067ff6c258de92ab408f5b17319bbc7449 --- /dev/null +++ b/bin/gtf2bed @@ -0,0 +1,123 @@ +#!/usr/bin/env perl + +# Copyright (c) 2011 Erik Aronesty (erik@q32.com) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ALSO, IT WOULD BE NICE IF YOU LET ME KNOW YOU USED IT. + +use Getopt::Long; + +my $extended; +GetOptions("x"=>\$extended); + +$in = shift @ARGV; + +my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in|" : "$in") || die "Can't open $in: $!\n"; +open IN, $in_cmd; + +while (<IN>) { + $gff = 2 if /^##gff-version 2/; + $gff = 3 if /^##gff-version 3/; + next if /^#/ && $gff; + + s/\s+$//; + # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr + my @f = split /\t/; + if ($gff) { + # most ver 2's stick gene names in the id field + ($id) = $f[8]=~ /\bID="([^"]+)"/; + # most ver 3's stick unquoted names in the name field + ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; + } else { + ($id) = $f[8]=~ /transcript_id "([^"]+)"/; + } + + next unless $id && $f[0]; + + if ($f[2] eq 'exon') { + die "no position at exon on line $." if ! $f[3]; + # gff3 puts :\d in exons sometimes + $id =~ s/:\d+$// if $gff == 3; + push @{$exons{$id}}, \@f; + # save lowest start + $trans{$id} = \@f if !$trans{$id}; + } elsif ($f[2] eq 'start_codon') { + #optional, output codon start/stop as "thick" region in bed + $sc{$id}->[0] = $f[3]; + } elsif ($f[2] eq 'stop_codon') { + $sc{$id}->[1] = $f[4]; + } elsif ($f[2] eq 'miRNA' ) { + $trans{$id} = \@f if !$trans{$id}; + push @{$exons{$id}}, \@f; + } +} + +for $id ( + # sort by chr then pos + sort { + $trans{$a}->[0] eq $trans{$b}->[0] ? + $trans{$a}->[3] <=> $trans{$b}->[3] : + $trans{$a}->[0] cmp $trans{$b}->[0] + } (keys(%trans)) ) { + my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; + my ($cds, $cde); + ($cds, $cde) = @{$sc{$id}} if $sc{$id}; + + # sort by pos + my @ex = sort { + $a->[3] <=> $b->[3] + } @{$exons{$id}}; + + my $beg = $ex[0][3]; + my $end = $ex[-1][4]; + + if ($dir eq '-') { + # swap + $tmp=$cds; + $cds=$cde; + $cde=$tmp; + $cds -= 2 if $cds; + $cde += 2 if $cde; + } + + # not specified, just use exons + $cds = $beg if !$cds; + $cde = $end if !$cde; + + # adjust start for bed + --$beg; --$cds; + + my $exn = @ex; # exon count + my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start + my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size + + my $gene_id; + my $extend = ""; + if ($extended) { + ($gene_id) = $attr =~ /gene_name "([^"]+)"/; + ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; + $extend="\t$gene_id"; + } + # added an extra comma to make it look exactly like ucsc's beds + print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; +} + + +close IN; diff --git a/bin/igv_files_to_session.py b/bin/igv_files_to_session.py new file mode 100755 index 0000000000000000000000000000000000000000..adfe8f7e276bc0b06c32113cb53cc571e62a67b1 --- /dev/null +++ b/bin/igv_files_to_session.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 + +####################################################################### +####################################################################### +## Created on July 4th 2018 to create IGV session file from file list +####################################################################### +####################################################################### + +import os +import errno +import argparse + +############################################ +############################################ +## PARSE ARGUMENTS +############################################ +############################################ + +Description = 'Create IGV session file from a list of files and associated colours - ".bed", ".bw", ".bigwig", ".tdf", ".gtf" files currently supported.' +Epilog = """Example usage: python igv_files_to_session.py <XML_OUT> <LIST_FILE> <GENOME>""" + +argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) + +## REQUIRED PARAMETERS +argParser.add_argument('XML_OUT', help="XML output file.") +argParser.add_argument('LIST_FILE', help="Tab-delimited file containing two columns i.e. file_name\tcolour. Header isnt required.") +argParser.add_argument('GENOME', help="Full path to genome fasta file or shorthand for genome available in IGV e.g. hg19.") + +## OPTIONAL PARAMETERS +argParser.add_argument('-pp', '--path_prefix', type=str, dest="PATH_PREFIX", default='', help="Path prefix to be added at beginning of all files in input list file.") +args = argParser.parse_args() + +############################################ +############################################ +## HELPER FUNCTIONS +############################################ +############################################ + +def makedir(path): + + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + +############################################ +############################################ +## MAIN FUNCTION +############################################ +############################################ + +def igv_files_to_session(XMLOut,ListFile,Genome,PathPrefix=''): + + makedir(os.path.dirname(XMLOut)) + + fileList = [] + fin = open(ListFile,'r') + while True: + line = fin.readline() + if line: + ifile,colour = line.strip().split('\t') + if len(colour.strip()) == 0: + colour = '0,0,178' + fileList.append((PathPrefix.strip()+ifile,colour)) + else: + break + fout.close() + + ## ADD RESOURCES SECTION + XMLStr = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n' + XMLStr += '<Session genome="%s" hasGeneTrack="true" hasSequenceTrack="true" locus="All" version="8">\n' % (Genome) + XMLStr += '\t<Resources>\n' + for ifile,colour in fileList: + XMLStr += '\t\t<Resource path="%s"/>\n' % (ifile) + XMLStr += '\t</Resources>\n' + + ## ADD PANEL SECTION + XMLStr += '\t<Panel height="1160" name="DataPanel" width="1897">\n' + for ifile,colour in fileList: + extension = os.path.splitext(ifile)[1].lower() + if extension in ['.bed','.broadpeak','.narrowpeak']: + XMLStr += '\t\t<Track altColor="0,0,178" autoScale="false" clazz="org.broad.igv.track.FeatureTrack" color="%s" ' % (colour) + XMLStr += 'displayMode="SQUISHED" featureVisibilityWindow="-1" fontSize="10" height="20" ' + XMLStr += 'id="%s" name="%s" renderer="BASIC_FEATURE" sortable="false" visible="true" windowFunction="count"/>\n' % (ifile,os.path.basename(ifile)) + elif extension in ['.bw', '.bigwig', '.tdf']: + XMLStr += '\t\t<Track altColor="0,0,178" autoScale="true" clazz="org.broad.igv.track.DataSourceTrack" color="%s" ' % (colour) + XMLStr += 'displayMode="COLLAPSED" featureVisibilityWindow="-1" fontSize="10" height="30" ' + XMLStr += 'id="%s" name="%s" normalize="false" renderer="BAR_CHART" sortable="true" visible="true" windowFunction="mean">\n' % (ifile,os.path.basename(ifile)) + XMLStr += '\t\t\t<DataRange baseline="0.0" drawBaseline="true" flipAxis="false" maximum="10" minimum="0.0" type="LINEAR"/>\n' + XMLStr += '\t\t</Track>\n' + elif extension in ['.gtf']: + XMLStr += '\t\t<Track altColor="0,0,178" autoScale="false" clazz="org.broad.igv.track.FeatureTrack" color="%s" ' % (colour) + XMLStr += 'displayMode="COLLAPSED" featureVisibilityWindow="-1" fontSize="10" ' + XMLStr += 'id="%s" name="%s" renderer="BASIC_FEATURE" sortable="false" visible="true" windowFunction="count"/>\n' % (ifile,os.path.basename(ifile)) + elif extension in ['.bam']: + pass + else: + XMLStr += '\t\t<Track altColor="0,0,178" autoScale="false" clazz="org.broad.igv.track.FeatureTrack" color="%s" ' % (colour) + XMLStr += 'displayMode="SQUISHED" featureVisibilityWindow="-1" fontSize="10" height="20" ' + XMLStr += 'id="%s" name="%s" renderer="BASIC_FEATURE" sortable="false" visible="true" windowFunction="count"/>\n' % (ifile,os.path.basename(ifile)) + + XMLStr += '\t</Panel>\n' + #XMLStr += '\t<HiddenAttributes>\n\t\t<Attribute name="DATA FILE"/>\n\t\t<Attribute name="DATA TYPE"/>\n\t\t<Attribute name="NAME"/>\n\t</HiddenAttributes>\n' + XMLStr += '</Session>' + XMLOut = open(XMLOut,'w') + XMLOut.write(XMLStr) + XMLOut.close() + +############################################ +############################################ +## RUN FUNCTION +############################################ +############################################ + +igv_files_to_session(XMLOut=args.XML_OUT,ListFile=args.LIST_FILE,Genome=args.GENOME,PathPrefix=args.PATH_PREFIX) + +############################################ +############################################ +############################################ +############################################ diff --git a/bin/macs2_merged_expand.py b/bin/macs2_merged_expand.py new file mode 100755 index 0000000000000000000000000000000000000000..0aa1847579684189f8ef59b3afcd78133f64e6a4 --- /dev/null +++ b/bin/macs2_merged_expand.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 + +####################################################################### +####################################################################### +## Created on June 29th 2018 to annotate merged peaks +####################################################################### +####################################################################### + +import os +import errno +import argparse + +############################################ +############################################ +## PARSE ARGUMENTS +############################################ +############################################ + +Description = 'Add sample boolean files and aggregate columns from merged MACS narrow or broad peak file.' +Epilog = """Example usage: python macs2_merged_expand.py <MERGED_INTERVAL_FILE> <SAMPLE_NAME_LIST> <OUTFILE> --is_narrow_peak --min_replicates 1""" + +argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) + +## REQUIRED PARAMETERS +argParser.add_argument('MERGED_INTERVAL_FILE', help="Merged MACS2 interval file created using linux sort and mergeBed.") +argParser.add_argument('SAMPLE_NAME_LIST', help="Comma-separated list of sample names as named in individual MACS2 broadPeak/narrowPeak output file e.g. SAMPLE_R1 for SAMPLE_R1_peak_1.") +argParser.add_argument('OUTFILE', help="Full path to output directory.") + +## OPTIONAL PARAMETERS +argParser.add_argument('-in', '--is_narrow_peak', dest="IS_NARROW_PEAK", help="Whether merged interval file was generated from narrow or broad peak files (default: False).",action='store_true') +argParser.add_argument('-mr', '--min_replicates', type=int, dest="MIN_REPLICATES", default=1, help="Minumum number of replicates per sample required to contribute to merged peak (default: 1).") +args = argParser.parse_args() + +############################################ +############################################ +## HELPER FUNCTIONS +############################################ +############################################ + +def makedir(path): + + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + +############################################ +############################################ +## MAIN FUNCTION +############################################ +############################################ + +## MergedIntervalTxtFile is file created using commands below: +## 1) broadPeak +## sort -k1,1 -k2,2n <MACS_BROADPEAK_FILES_LIST> | mergeBed -c 2,3,4,5,6,7,8,9 -o collapse,collapse,collapse,collapse,collapse,collapse,collapse,collapse > merged_peaks.txt +## 2) narrowPeak +## sort -k1,1 -k2,2n <MACS_NARROWPEAK_FILE_LIST> | mergeBed -c 2,3,4,5,6,7,8,9,10 -o collapse,collapse,collapse,collapse,collapse,collapse,collapse,collapse,collapse > merged_peaks.txt + +def macs2_merged_expand(MergedIntervalTxtFile,SampleNameList,OutFile,isNarrow=False,minReplicates=1): + + makedir(os.path.dirname(OutFile)) + + combFreqDict = {} + totalOutIntervals = 0 + SampleNameList = sorted(SampleNameList) + fin = open(MergedIntervalTxtFile,'r') + fout = open(OutFile,'w') + oFields = ['chr','start','end','interval_id','num_peaks','num_samples'] + [x+'.bool' for x in SampleNameList] + [x+'.fc' for x in SampleNameList] + [x+'.qval' for x in SampleNameList] + [x+'.pval' for x in SampleNameList] + [x+'.start' for x in SampleNameList] + [x+'.end' for x in SampleNameList] + if isNarrow: + oFields += [x+'.summit' for x in SampleNameList] + fout.write('\t'.join(oFields) + '\n') + while True: + line = fin.readline() + if line: + lspl = line.strip().split('\t') + + chromID = lspl[0]; mstart = int(lspl[1]); mend = int(lspl[2]); + starts = [int(x) for x in lspl[3].split(',')]; ends = [int(x) for x in lspl[4].split(',')] + names = lspl[5].split(','); fcs = [float(x) for x in lspl[8].split(',')] + pvals = [float(x) for x in lspl[9].split(',')]; qvals = [float(x) for x in lspl[10].split(',')] + summits = [] + if isNarrow: + summits = [int(x) for x in lspl[11].split(',')] + + ## GROUP SAMPLES BY REMOVING TRAILING *_R* + groupDict = {} + for sID in ['_'.join(x.split('_')[:-2]) for x in names]: + gID = '_'.join(sID.split('_')[:-1]) + if gID not in groupDict: + groupDict[gID] = [] + if sID not in groupDict[gID]: + groupDict[gID].append(sID) + + ## GET SAMPLES THAT PASS REPLICATE THRESHOLD + passRepThreshList = [] + for gID,sIDs in groupDict.items(): + if len(sIDs) >= minReplicates: + passRepThreshList += sIDs + + ## GET VALUES FROM INDIVIDUAL PEAK SETS + fcDict = {}; qvalDict = {}; pvalDict = {}; startDict = {}; endDict = {}; summitDict = {} + for idx in range(len(names)): + sample = '_'.join(names[idx].split('_')[:-2]) + if sample in passRepThreshList: + if sample not in fcDict: + fcDict[sample] = [] + fcDict[sample].append(str(fcs[idx])) + if sample not in qvalDict: + qvalDict[sample] = [] + qvalDict[sample].append(str(qvals[idx])) + if sample not in pvalDict: + pvalDict[sample] = [] + pvalDict[sample].append(str(pvals[idx])) + if sample not in startDict: + startDict[sample] = [] + startDict[sample].append(str(starts[idx])) + if sample not in endDict: + endDict[sample] = [] + endDict[sample].append(str(ends[idx])) + if isNarrow: + if sample not in summitDict: + summitDict[sample] = [] + summitDict[sample].append(str(summits[idx])) + + samples = sorted(fcDict.keys()) + if samples != []: + numSamples = len(samples) + boolList = ['TRUE' if x in samples else 'FALSE' for x in SampleNameList] + fcList = [';'.join(fcDict[x]) if x in samples else 'NA' for x in SampleNameList] + qvalList = [';'.join(qvalDict[x]) if x in samples else 'NA' for x in SampleNameList] + pvalList = [';'.join(pvalDict[x]) if x in samples else 'NA' for x in SampleNameList] + startList = [';'.join(startDict[x]) if x in samples else 'NA' for x in SampleNameList] + endList = [';'.join(endDict[x]) if x in samples else 'NA' for x in SampleNameList] + oList = [str(x) for x in [chromID,mstart,mend,'Interval_'+str(totalOutIntervals+1),len(names),numSamples]+boolList+fcList+qvalList+pvalList+startList+endList] + if isNarrow: + oList += [';'.join(summitDict[x]) if x in samples else 'NA' for x in SampleNameList] + fout.write('\t'.join(oList) + '\n') + + tsamples = tuple(sorted(samples)) + if tsamples not in combFreqDict: + combFreqDict[tsamples] = 0 + combFreqDict[tsamples] += 1 + totalOutIntervals += 1 + + else: + fin.close() + fout.close() + break + + ## WRITE FILE FOR INTERVAL INTERSECT ACROSS SAMPLES. + ## COMPATIBLE WITH UPSETR PACKAGE. + fout = open(OutFile[:-4]+'.intersect.txt','w') + combFreqItems = sorted([(combFreqDict[x],x) for x in combFreqDict.keys()],reverse=True) + for k,v in combFreqItems: + fout.write('%s\t%s\n' % ('&'.join(v),k)) + fout.close() + +############################################ +############################################ +## RUN FUNCTION +############################################ +############################################ + +macs2_merged_expand(MergedIntervalTxtFile=args.MERGED_INTERVAL_FILE,SampleNameList=args.SAMPLE_NAME_LIST.split(','),OutFile=args.OUTFILE,isNarrow=args.IS_NARROW_PEAK,minReplicates=args.MIN_REPLICATES) + +############################################ +############################################ +############################################ +############################################ diff --git a/bin/plot_homer_annotatepeaks.r b/bin/plot_homer_annotatepeaks.r new file mode 100755 index 0000000000000000000000000000000000000000..fc2096eb929194a3077107fabb95eeeac5c69558 --- /dev/null +++ b/bin/plot_homer_annotatepeaks.r @@ -0,0 +1,170 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(ggplot2) +library(reshape2) +library(scales) + +################################################ +################################################ +## PARSE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list(make_option(c("-i", "--homer_files"), type="character", default=NULL, help="Comma-separated list of homer annotated text files.", metavar="path"), + make_option(c("-s", "--sample_ids"), type="character", default=NULL, help="Comma-separated list of sample ids associated with homer annotated text files. Must be unique and in same order as homer files input.", metavar="string"), + make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"), + make_option(c("-p", "--outprefix"), type="character", default='homer_annotation', help="Output prefix", metavar="string")) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +if (is.null(opt$homer_files)){ + print_help(opt_parser) + stop("At least one homer annotated file must be supplied", call.=FALSE) +} +if (is.null(opt$sample_ids)){ + print_help(opt_parser) + stop("Please provide sample ids associated with homer files.", call.=FALSE) +} + +if (file.exists(opt$outdir) == FALSE) { + dir.create(opt$outdir,recursive=TRUE) +} + +HomerFiles <- unlist(strsplit(opt$homer_files,",")) +SampleIDs <- unlist(strsplit(opt$sample_ids,",")) +if (length(HomerFiles) != length(SampleIDs)) { + print_help(opt_parser) + stop("Number of sample ids must equal number of homer annotated files.", call.=FALSE) +} + +################################################ +################################################ +## READ IN DATA ## +################################################ +################################################ + +plot.dat <- data.frame() +plot.dist.dat <- data.frame() +plot.feature.dat <- data.frame() +for (idx in 1:length(HomerFiles)) { + + sampleid = SampleIDs[idx] + anno.dat <- read.csv(HomerFiles[idx], sep="\t", header=TRUE) + anno.dat <- anno.dat[,c("Annotation","Distance.to.TSS","Nearest.PromoterID")] + + ## REPLACE UNASSIGNED FEATURE ENTRIES WITH SENSIBLE VALUES + unassigned <- which(is.na(as.character(anno.dat$Distance.to.TSS))) + anno.dat$Distance.to.TSS[unassigned] <- 1000000 + + anno.dat$Annotation <- as.character(anno.dat$Annotation) + anno.dat$Annotation[unassigned] <- "Unassigned" + anno.dat$Annotation <- as.factor(anno.dat$Annotation) + + anno.dat$Nearest.PromoterID <- as.character(anno.dat$Nearest.PromoterID) + anno.dat$Nearest.PromoterID[unassigned] <- "Unassigned" + anno.dat$Nearest.PromoterID <- as.factor(anno.dat$Nearest.PromoterID) + + anno.dat$name <- rep(sampleid,nrow(anno.dat)) + anno.dat$Distance.to.TSS <- abs(anno.dat$Distance.to.TSS) + 1 + plot.dat <- rbind(plot.dat,anno.dat) + + ## GET ANNOTATION COUNTS + anno.freq <- as.character(lapply(strsplit(as.character(anno.dat$Annotation)," "), function(x) x[1])) + anno.freq <- as.data.frame(table(anno.freq)) + colnames(anno.freq) <- c("feature",sampleid) + anno.melt <- melt(anno.freq) + plot.feature.dat <- rbind(plot.feature.dat,anno.melt) + + ## GET CLOSEST INSTANCE OF GENE TO ANY GIVEN PEAK + unique.gene.dat <- anno.dat[order(anno.dat$Distance.to.TSS),] + unique.gene.dat <- unique.gene.dat[!duplicated(unique.gene.dat$Nearest.PromoterID), ] + dist.freq <- rep("> 10kb",nrow(unique.gene.dat)) + dist.freq[which(unique.gene.dat$Distance.to.TSS < 10000)] <- "< 10kb" + dist.freq[which(unique.gene.dat$Distance.to.TSS < 5000)] <- "< 5kb" + dist.freq[which(unique.gene.dat$Distance.to.TSS < 2000)] <- "< 2kb" + dist.freq <- as.data.frame(table(dist.freq)) + colnames(dist.freq) <- c("distance",sampleid) + dist.melt <- melt(dist.freq) + plot.dist.dat <- rbind(plot.dist.dat,dist.melt) + +} +plot.dat$name <- factor(plot.dat$name, levels=sort(unique(as.character(plot.dat$name)))) +plot.dist.dat$variable <- factor(plot.dist.dat$variable, levels=sort(unique(as.character(plot.dist.dat$variable)))) +plot.feature.dat$variable <- factor(plot.feature.dat$variable, levels=sort(unique(as.character(plot.feature.dat$variable)))) + +summary.dat <- dcast(plot.feature.dat, variable ~ feature, value.var="value") +colnames(summary.dat)[1] <- "sample" +write.table(summary.dat,file=file.path(opt$outdir,paste(opt$outprefix,".summary.txt",sep="")),sep="\t",row.names=F,col.names=T,quote=F) + +################################################ +################################################ +## PLOTS ## +################################################ +################################################ + +PlotFile <- file.path(opt$outdir,paste(opt$outprefix,".plots.pdf",sep="")) +pdf(PlotFile,height=6,width=3*length(HomerFiles)) + +## FEATURE COUNT STACKED BARPLOT +plot <- ggplot(plot.feature.dat, aes(x=variable, y=value, group=feature)) + + geom_bar(stat="identity", position = "fill", aes(colour=feature,fill=feature), alpha = 0.3) + + xlab("") + + ylab("% Feature") + + ggtitle("Peak Location Relative to Annotation") + + scale_y_continuous(labels = percent_format()) + + theme(panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) +print(plot) + +## DISTANCE TO CLOSEST GENE ACROSS ALL PEAKS STACKED BARPLOT +plot <- ggplot(plot.dist.dat, aes(x=variable, y=value, group=distance)) + + geom_bar(stat="identity", position = "fill", aes(colour=distance,fill=distance), alpha = 0.3) + + xlab("") + + ylab("% Unique genes to closest peak") + + ggtitle("Distance of Closest Peak to Gene") + + scale_y_continuous(labels = percent_format()) + + theme(panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) +print(plot) + +## VIOLIN PLOT OF PEAK DISTANCE TO TSS +plot <- ggplot(plot.dat, aes(x=name, y=Distance.to.TSS)) + + geom_violin(aes(colour=name,fill=name), alpha = 0.3) + + geom_boxplot(width=0.1) + + xlab("") + + ylab(expression(log[10]*" distance to TSS")) + + ggtitle("Peak Distribution Relative to TSS") + + scale_y_continuous(trans='log10',breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + + theme(legend.position="none", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) +print(plot) +dev.off() + +################################################ +################################################ +################################################ +################################################ diff --git a/bin/plot_macs2_qc.r b/bin/plot_macs2_qc.r new file mode 100755 index 0000000000000000000000000000000000000000..5cf074de6e7032bac894afcf1adb057767b59c87 --- /dev/null +++ b/bin/plot_macs2_qc.r @@ -0,0 +1,155 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(ggplot2) +library(reshape2) +library(scales) + +################################################ +################################################ +## PARSE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list(make_option(c("-i", "--peak_files"), type="character", default=NULL, help="Comma-separated list of peak files.", metavar="path"), + make_option(c("-s", "--sample_ids"), type="character", default=NULL, help="Comma-separated list of sample ids associated with peak files. Must be unique and in same order as peaks files input.", metavar="string"), + make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"), + make_option(c("-p", "--outprefix"), type="character", default='macs2_peakqc', help="Output prefix", metavar="string")) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +if (is.null(opt$peak_files)){ + print_help(opt_parser) + stop("At least one peak file must be supplied", call.=FALSE) +} +if (is.null(opt$sample_ids)){ + print_help(opt_parser) + stop("Please provide sample ids associated with peak files.", call.=FALSE) +} + +if (file.exists(opt$outdir) == FALSE) { + dir.create(opt$outdir,recursive=TRUE) +} + +PeakFiles <- unlist(strsplit(opt$peak_files,",")) +SampleIDs <- unlist(strsplit(opt$sample_ids,",")) +if (length(PeakFiles) != length(SampleIDs)) { + print_help(opt_parser) + stop("Number of sample ids must equal number of homer annotated files.", call.=FALSE) +} + +################################################ +################################################ +## READ IN DATA ## +################################################ +################################################ + +plot.dat <- data.frame() +summary.dat <- data.frame() +for (idx in 1:length(PeakFiles)) { + + sampleid = SampleIDs[idx] + isNarrow <- FALSE + header <- c("chrom","start","end","name","pileup", "strand", "fold", "-log10(pvalue)","-log10(qvalue)") + fsplit <- unlist(strsplit(basename(PeakFiles[idx]), split='.',fixed=TRUE)) + if (fsplit[length(fsplit)] == 'narrowPeak') { + isNarrow <- TRUE + header <- c(header,"summit") + } + peaks <- read.table(PeakFiles[idx], sep="\t", header=FALSE) + colnames(peaks) <- header + + ## GET SUMMARY STATISTICS + peaks.dat <- peaks[,c('fold','-log10(qvalue)','-log10(pvalue)')] + peaks.dat$length <- (peaks$end - peaks$start) + for (cname in colnames(peaks.dat)) { + sdat <- summary(peaks.dat[,cname]) + sdat["num_peaks"] <- nrow(peaks.dat) + sdat["measure"] <- cname + sdat["sample"] <- sampleid + sdat <- t(data.frame(x=matrix(sdat),row.names=names(sdat))) + summary.dat <- rbind(summary.dat,sdat) + } + colnames(peaks.dat) <- c('fold','fdr','pvalue','length') + peaks.dat$name <- rep(sampleid,nrow(peaks.dat)) + plot.dat <- rbind(plot.dat,peaks.dat) +} +plot.dat$name <- factor(plot.dat$name, levels=sort(unique(as.character(plot.dat$name)))) + +SummaryFile <- file.path(opt$outdir,paste(opt$outprefix,".summary.txt",sep="")) +write.table(summary.dat,file=SummaryFile,quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE) + +################################################ +################################################ +## PLOTS ## +################################################ +################################################ + +## RETURNS VIOLIN PLOT OBJECT +violin.plot <- function(plot.dat,x,y,ylab,title,log) { + + plot <- ggplot(plot.dat, aes_string(x=x, y=y)) + + geom_violin(aes_string(colour=x,fill=x), alpha = 0.3) + + geom_boxplot(width=0.1) + + xlab("") + + ylab(ylab) + + ggtitle(title) + + theme(legend.position="none", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + if (log == 10) { + plot <- plot + scale_y_continuous(trans='log10',breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + } + if (log == 2) { + plot <- plot + scale_y_continuous(trans='log2',breaks = trans_breaks("log2", function(x) 2^x), labels = trans_format("log2", math_format(2^.x))) + } + return(plot) +} + +############################ + +PlotFile <- file.path(opt$outdir,paste(opt$outprefix,".plots.pdf",sep="")) +pdf(PlotFile,height=6,width=3*length(unique(plot.dat$name))) + +## PEAK COUNT PLOT +peak.count.dat <- as.data.frame(table(plot.dat$name)) +colnames(peak.count.dat) <- c("name","count") +plot <- ggplot(peak.count.dat, aes(x=name, y=count)) + + geom_bar(stat="identity",aes(colour=name,fill=name), position = "dodge", width = 0.8, alpha = 0.3) + + xlab("") + + ylab("Number of peaks") + + ggtitle("Peak count") + + theme(legend.position="none", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + + geom_text(aes(label = count, x = name, y = count), position = position_dodge(width = 0.8), vjust = -0.6) +print(plot) + +## VIOLIN PLOTS +print(violin.plot(plot.dat=plot.dat,x="name",y="length",ylab=expression(log[10]*" peak length"),title="Peak length distribution",log=10)) +print(violin.plot(plot.dat=plot.dat,x="name",y="fold",ylab=expression(log[2]*" fold-enrichment"),title="Fold-change distribution",log=2)) +print(violin.plot(plot.dat=plot.dat,x="name",y="fdr",ylab=expression(-log[10]*" qvalue"),title="FDR distribution",log=-1)) +print(violin.plot(plot.dat=plot.dat,x="name",y="pvalue",ylab=expression(-log[10]*" pvalue"),title="Pvalue distribution",log=-1)) +dev.off() + +################################################ +################################################ +################################################ +################################################ diff --git a/bin/plot_peak_intersect.r b/bin/plot_peak_intersect.r new file mode 100755 index 0000000000000000000000000000000000000000..513e44b38c2c87e04c2b7998a817e2825e125157 --- /dev/null +++ b/bin/plot_peak_intersect.r @@ -0,0 +1,78 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(UpSetR) + +################################################ +################################################ +## PARSE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list(make_option(c("-i", "--input_file"), type="character", default=NULL, help="Path to tab-delimited file containing two columns i.e sample1&sample2&sample3 indicating intersect between samples <TAB> set size.", metavar="path"), + make_option(c("-o", "--output_file"), type="character", default=NULL, help="Path to output file with '.pdf' extension.", metavar="path")) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +if (is.null(opt$input_file)){ + print_help(opt_parser) + stop("Input file must be supplied.", call.=FALSE) +} +if (is.null(opt$output_file)){ + print_help(opt_parser) + stop("Output pdf file must be supplied.", call.=FALSE) +} + +OutDir <- dirname(opt$output_file) +if (file.exists(OutDir) == FALSE) { + dir.create(OutDir,recursive=TRUE) +} + +################################################ +################################################ +## PLOT DATA ## +################################################ +################################################ + +comb.dat <- read.table(opt$input_file,sep="\t",header=FALSE) +comb.vec <- comb.dat[,2] +comb.vec <- setNames(comb.vec,comb.dat[,1]) +sets <- sort(unique(unlist(strsplit(names(comb.vec),split='&'))), decreasing = TRUE) + +nintersects = length(names(comb.vec)) +if (nintersects > 70) { + nintersects <- 70 + comb.vec <- sort(comb.vec, decreasing = TRUE)[1:70] + sets <- sort(unique(unlist(strsplit(names(comb.vec),split='&'))), decreasing = TRUE) +} + +pdf(opt$output_file,onefile=F,height=10,width=20) + +upset( + fromExpression(comb.vec), + nsets = length(sets), + nintersects = nintersects, + sets = sets, + keep.order = TRUE, + sets.bar.color = "#56B4E9", + point.size = 3, + line.size = 1, + mb.ratio = c(0.55, 0.45), + order.by = "freq", + number.angles = 30, + text.scale = c(1.5, 1.5, 1.5, 1.5, 1.5, 1.2) +) + +dev.off() + +################################################ +################################################ +################################################ +################################################ diff --git a/conf/base.config b/conf/base.config index daf12c7c7c95012784ff4de442e00c95c9358936..b38d1fe3b3da8b531cd18d6de31ddd1ddc17ef32 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -24,7 +23,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } diff --git a/conf/igenomes.config b/conf/igenomes.config index 7a1b3ac6d3a3d6c4ec0af72f7879f4958cf35621..7f282cee1097f4eaf2801b07f6d1b1b1cdbe18e9 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -21,8 +21,14 @@ params { bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + blacklist = "${projectDir}/assets/blacklists/v1.0/GRCh37-blacklist.v1.bed" + macs_gsize = [ + "50" : 2684219875, + "75" : 2733035409, + "100" : 2774803719, + "150" : 2824648687, + "200" : 2848794782 + ] } 'GRCh38' { fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" @@ -33,8 +39,14 @@ params { gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + blacklist = "${projectDir}/assets/blacklists/v3.0/hg38-blacklist.v3.bed" + macs_gsize = [ + "50" : 2701262066, + "75" : 2749859687, + "100" : 2805665311, + "150" : 2862089864, + "200" : 2892537351 + ] } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" @@ -46,8 +58,14 @@ params { bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + blacklist = "${projectDir}/assets/blacklists/v2.0/GRCm38-blacklist.v2.bed" + macs_gsize = [ + "50" : 2307679482, + "75" : 2406655830, + "100" : 2466184610, + "150" : 2492306232, + "200" : 2519386924 + ] } 'TAIR10' { fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" @@ -59,6 +77,13 @@ params { bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" mito_name = "Mt" + macs_gsize = [ + "50" : 114339094, + "75" : 115317469, + "100" : 118459858, + "150" : 118504138, + "200" : 117723393 + ] } 'EB2' { fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" @@ -69,6 +94,13 @@ params { gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + macs_gsize = [ + "50" : 4150072, + "75" : 4191132, + "100" : 4198752, + "150" : 4176800, + "200" : 4197072 + ] } 'UMD3.1' { fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" @@ -80,6 +112,13 @@ params { bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" mito_name = "MT" + macs_gsize = [ + "50" : 2370644326, + "75" : 2480511357, + "100" : 2567220492, + "150" : 2594494201, + "200" : 2648740387 + ] } 'WBcel235' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" @@ -90,7 +129,13 @@ params { gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" mito_name = "MtDNA" - macs_gsize = "9e7" + macs_gsize = [ + "50" : 95159402, + "75" : 96945370, + "100" : 98259898, + "150" : 98721103, + "200" : 98672558 + ] } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" @@ -102,6 +147,13 @@ params { bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" mito_name = "MT" + macs_gsize = [ + "50" : 2237684358, + "75" : 2279860111, + "100" : 2293979635, + "150" : 2300527794, + "200" : 2313332891 + ] } 'GRCz10' { fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" @@ -112,6 +164,13 @@ params { gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" mito_name = "MT" + macs_gsize = [ + "50" : 1172895610, + "75" : 1229400206, + "100" : 1253908756, + "150" : 1285330773, + "200" : 1292538906 + ] } 'BDGP6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" @@ -122,7 +181,13 @@ params { gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" mito_name = "M" - macs_gsize = "1.2e8" + macs_gsize = [ + "50" : 123519388, + "75" : 124886264, + "100" : 126807034, + "150" : 126903604, + "200" : 128575605 + ] } 'EquCab2' { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" @@ -134,6 +199,13 @@ params { bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" mito_name = "MT" + macs_gsize = [ + "50" : 2294980416, + "75" : 2289244826, + "100" : 2334155865, + "150" : 2343297042, + "200" : 2350515523 + ] } 'EB1' { fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" @@ -144,6 +216,13 @@ params { gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + macs_gsize = [ + "50" : 4481912, + "75" : 4485018, + "100" : 4468952, + "150" : 4489684, + "200" : 4527891 + ] } 'Galgal4' { fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" @@ -154,6 +233,13 @@ params { gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" mito_name = "MT" + macs_gsize = [ + "50" : 974987959, + "75" : 978772437, + "100" : 984935167, + "150" : 979442039, + "200" : 991678648 + ] } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" @@ -164,6 +250,13 @@ params { gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + macs_gsize = [ + "50" : 748112428, + "75" : 826455017, + "100" : 857283568, + "150" : 895077451, + "200" : 911783687 + ] } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" @@ -175,6 +268,13 @@ params { bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" mito_name = "MT" + macs_gsize = [ + "50" : 2498932238, + "75" : 2598624693, + "100" : 2642166663, + "150" : 2661433343, + "200" : 2674888870 + ] } 'IRGSP-1.0' { fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" @@ -185,6 +285,13 @@ params { gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" mito_name = "Mt" + macs_gsize = [ + "50" : 322594956, + "75" : 337043804, + "100" : 345775274, + "150" : 355020671, + "200" : 363478234 + ] } 'CHIMP2.1.4' { fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" @@ -196,6 +303,13 @@ params { bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" mito_name = "MT" + macs_gsize = [ + "50" : 2576111695, + "75" : 2702821987, + "100" : 2733435831, + "150" : 2735167196, + "200" : 2738912507 + ] } 'Rnor_5.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" @@ -206,6 +320,13 @@ params { gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" mito_name = "MT" + macs_gsize = [ + "50" : 2303951475, + "75" : 2367071843, + "100" : 2402745922, + "150" : 2405692811, + "200" : 2407324495 + ] } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" @@ -216,6 +337,13 @@ params { gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" mito_name = "MT" + macs_gsize = [ + "50" : 2375372135, + "75" : 2440746491, + "100" : 2480029900, + "150" : 2477334634, + "200" : 2478552171 + ] } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" @@ -226,7 +354,13 @@ params { gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" mito_name = "MT" - macs_gsize = "1.2e7" + macs_gsize = [ + "50" : 11624332, + "75" : 11693438, + "100" : 11777680, + "150" : 11783749, + "200" : 11825681 + ] } 'EF2' { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" @@ -238,7 +372,13 @@ params { bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" mito_name = "MT" - macs_gsize = "1.21e7" + macs_gsize = [ + "50" : 12190646, + "75" : 12291456, + "100" : 12346649, + "150" : 12403911, + "200" : 12442064 + ] } 'Sbi1' { fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" @@ -249,6 +389,13 @@ params { gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + macs_gsize = [ + "50" : 444102512, + "75" : 506986021, + "100" : 540037446, + "150" : 575130820, + "200" : 595857042 + ] } 'Sscrofa10.2' { fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" @@ -260,6 +407,13 @@ params { bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" mito_name = "MT" + macs_gsize = [ + "50" : 2105185708, + "75" : 2131615607, + "100" : 2149244400, + "150" : 2189757848, + "200" : 2203893315 + ] } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" @@ -270,6 +424,13 @@ params { gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" mito_name = "Mt" + macs_gsize = [ + "50" : 1113453752, + "75" : 1392458449, + "100" : 1579923466, + "150" : 1729475311, + "200" : 1841419596 + ] } 'hg38' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" @@ -280,8 +441,14 @@ params { gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + blacklist = "${projectDir}/assets/blacklists/v3.0/hg38-blacklist.v3.bed" + macs_gsize = [ + "50" : 2701262066, + "75" : 2749859687, + "100" : 2805665311, + "150" : 2862089864, + "200" : 2892537351 + ] } 'hg19' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" @@ -293,8 +460,14 @@ params { bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + blacklist = "${projectDir}/assets/blacklists/v1.0/hg19-blacklist.v1.bed" + macs_gsize = [ + "50" : 2684219875, + "75" : 2733035409, + "100" : 2774803719, + "150" : 2824648687, + "200" : 2848794782 + ] } 'mm10' { fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" @@ -306,8 +479,14 @@ params { bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + blacklist = "${projectDir}/assets/blacklists/v2.0/mm10-blacklist.v2.bed" + macs_gsize = [ + "50" : 2307679482, + "75" : 2406655830, + "100" : 2466184610, + "150" : 2492306232, + "200" : 2519386924 + ] } 'bosTau8' { fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" @@ -318,6 +497,13 @@ params { gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" mito_name = "chrM" + macs_gsize = [ + "50" : 2370644326, + "75" : 2480511357, + "100" : 2567220492, + "150" : 2594494201, + "200" : 2648740387 + ] } 'ce10' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" @@ -329,7 +515,13 @@ params { bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" mito_name = "chrM" - macs_gsize = "9e7" + macs_gsize = [ + "50" : 95156190, + "75" : 96995949, + "100" : 98287299, + "150" : 98879728, + "200" : 98769409 + ] } 'canFam3' { fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" @@ -341,6 +533,13 @@ params { bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" mito_name = "chrM" + macs_gsize = [ + "50" : 2237684358, + "75" : 2279860111, + "100" : 2293979635, + "150" : 2300527794, + "200" : 2313332891 + ] } 'danRer10' { fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" @@ -351,7 +550,13 @@ params { gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" mito_name = "chrM" - macs_gsize = "1.37e9" + macs_gsize = [ + "50" : 1172895610, + "75" : 1229400206, + "100" : 1253908756, + "150" : 1285330773, + "200" : 1292538906 + ] } 'dm6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" @@ -362,7 +567,13 @@ params { gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" mito_name = "chrM" - macs_gsize = "1.2e8" + macs_gsize = [ + "50" : 123548253, + "75" : 124886264, + "100" : 126807034, + "150" : 126908682, + "200" : 128599061 + ] } 'equCab2' { fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" @@ -374,6 +585,13 @@ params { bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" mito_name = "chrM" + macs_gsize = [ + "50" : 2294980416, + "75" : 2289244826, + "100" : 2334155865, + "150" : 2343297042, + "200" : 2350515523 + ] } 'galGal4' { fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" @@ -385,6 +603,13 @@ params { bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" mito_name = "chrM" + macs_gsize = [ + "50" : 974987959, + "75" : 978772437, + "100" : 984935167, + "150" : 979442039, + "200" : 991678648 + ] } 'panTro4' { fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" @@ -396,6 +621,13 @@ params { bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" mito_name = "chrM" + macs_gsize = [ + "50" : 2576111695, + "75" : 2702821987, + "100" : 2733435831, + "150" : 2735167196, + "200" : 2738912507 + ] } 'rn6' { fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" @@ -406,6 +638,13 @@ params { gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" mito_name = "chrM" + macs_gsize = [ + "50" : 2375372135, + "75" : 2440746491, + "100" : 2480029900, + "150" : 2477334634, + "200" : 2478552171 + ] } 'sacCer3' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" @@ -415,7 +654,13 @@ params { bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" mito_name = "chrM" - macs_gsize = "1.2e7" + macs_gsize = [ + "50" : "11624332", + "75" : "11693438", + "100" : "11777680", + "150" : "11783749", + "200" : "11825681" + ] } 'susScr3' { fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" @@ -427,6 +672,13 @@ params { bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" mito_name = "chrM" + macs_gsize = [ + "50" : 2105185708, + "75" : 2131615607, + "100" : 2149244400, + "150" : 2189757848, + "200" : 2203893315 + ] } } } diff --git a/conf/modules.config b/conf/modules.config index da58a5d8817bdf25ac855f588c4fc67d840ad835..47784e03dc2b26a4fb706e243b0f4b432ecc8eb3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,15 +10,18 @@ ---------------------------------------------------------------------------------------- */ -process { +// +// General configuration options +// +process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:INPUT_CHECK:SAMPLESHEET_CHECK' { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, @@ -26,10 +29,6 @@ process { ] } - withName: FASTQC { - ext.args = '--quiet' - } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -37,5 +36,696 @@ process { pattern: '*_versions.yml' ] } +} + +// +// Genome preparation options +// + +process { + withName: 'GUNZIP_.*' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'UNTAR_.*' { + ext.args2 = '--no-same-owner' + publishDir = [ + path: { "${params.outdir}/genome/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'BWA_INDEX|BOWTIE2_BUILD|STAR_GENOMEGENERATE' { + publishDir = [ + path: { "${params.outdir}/genome/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'UNTAR_CHROMAP_INDEX|CHROMAP_INDEX' { + publishDir = [ + path: { "${params.outdir}/genome/index/chromap" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'GFFREAD' { + ext.args = '--keep-exon-attrs -F -T' + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'GTF2BED' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'CUSTOM_GETCHROMSIZES' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GENOME_BLACKLIST_REGIONS' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} + +// +// Read QC and trimming options +// +if (!(params.skip_fastqc || params.skip_qc)) { + process { + withName: '.*:FASTQC_TRIMGALORE:FASTQC' { + ext.args = '--quiet' + publishDir = [ + [ + path: { "${params.outdir}/fastqc" }, + mode: params.publish_dir_mode, + pattern: "*.{html}" + ], + [ + path: { "${params.outdir}/fastqc/zips" }, + mode: params.publish_dir_mode, + pattern: "*.{zip}" + ] + ] + } + } +} + +if (!params.skip_trimming) { + process { + withName: '.*:FASTQC_TRIMGALORE:TRIMGALORE' { + ext.args = [ + '--fastqc', + params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/trimgalore/fastqc" }, + mode: params.publish_dir_mode, + pattern: "*.{html}" + ], + [ + path: { "${params.outdir}/trimgalore/fastqc/zips" }, + mode: params.publish_dir_mode, + pattern: "*.{zip}" + ], + [ + path: { "${params.outdir}/trimgalore/logs" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ], + [ + path: { "${params.outdir}/trimgalore" }, + mode: params.publish_dir_mode, + pattern: "*.fq.gz", + enabled: params.save_trimmed + ] + ] + } + } +} + +process { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.Lb.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_align_intermeds + ] + } + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_align_intermeds + ] + } + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_.*' { + ext.prefix = { "${meta.id}.Lb.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/library/samtools_stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_align_intermeds + ] + } +} + +if (params.aligner == 'bwa') { + process { + withName: 'BWA_MEM' { + ext.args = { [ + '-M', + params.bwa_min_score ? " -T ${params.bwa_min_score}" : '', + meta.read_group ? "-R ${meta.read_group}": '' + ].join(' ').trim() } + ext.args2 = '-bhS -F 0x0100 -O BAM' + ext.prefix = { "${meta.id}.Lb" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ] + } + } +} + +if (params.aligner == 'bowtie2') { + process { + withName: 'BOWTIE2_ALIGN' { + ext.args = '' + ext.prefix = { "${meta.id}.Lb" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ], + [ + path: { "${params.outdir}/${params.aligner}/library/unmapped" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_unaligned + ] + ] + } + } +} + +if (params.aligner == 'chromap') { + process { + withName: CHROMAP_INDEX { + ext.args = '' + publishDir = [ + path: { "${params.outdir}/genome/${params.aligner}/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: CHROMAP_CHROMAP { + ext.args = '-l 2000 --low-mem --SAM' + ext.prefix = { "${meta.id}.Lb" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ] + } + } +} + +if (params.aligner == 'star') { + process { + withName: '.*:ALIGN_STAR:STAR_ALIGN' { + ext.args = [ + '--runMode alignReads', + '--alignIntronMax 1', + '--alignEndsType EndToEnd', + '--outSAMtype BAM Unsorted', + '--readFilesCommand zcat', + '--runRNGseed 0', + '--outSAMattributes NH HI AS NM MD', + params.save_unaligned ? '--outReadsUnmapped Fastx' : '' + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/library/log" }, + mode: params.publish_dir_mode, + pattern: '*.{out,tab}' + ], + [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: false + ], + [ + path: { "${params.outdir}/${params.aligner}/library/unmapped" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_unaligned + ] + ] + } + } +} + +process { + withName: 'PICARD_MERGESAMFILES' { + ext.args = '--SORT_ORDER coordinate --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + ext.prefix = { "${meta.id}.mLb.sorted" } + publishDir = [ enabled: false ] + } + + withName: '.*:MARK_DUPLICATES_PICARD:PICARD_MARKDUPLICATES' { + ext.args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + ext.prefix = { "${meta.id}.mLb.mkD.sorted" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/picard_metrics" }, + mode: params.publish_dir_mode, + pattern: '*.metrics.txt' + ], + [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] + ] + } + + withName: '.*:MARK_DUPLICATES_PICARD:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}', + enabled: params.save_align_intermeds + ] + } + + withName: '.*:MARK_DUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.mkD.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + + // Should only be published when paired end data is used and save_align_intermeds is true + withName: 'BAM_FILTER' { + ext.prefix = { meta.single_end ? "${meta.id}.mLb.noPublish" : "${meta.id}.mLb.flT.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + mode: params.publish_dir_mode, + pattern: '*.mLb.flT.sorted.bam', + enabled: params.save_align_intermeds + ] + } + + withName: 'BAM_REMOVE_ORPHANS' { + ext.args = '--only_fr_pairs' + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.mLb.clN.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + mode: params.publish_dir_mode, + pattern: '*.bam' + ] + } + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.mLb.clN.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] + } + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.clN.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + + withName: 'PHANTOMPEAKQUALTOOLS' { + ext.args2 = { "-p=$task.cpus" } + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/phantompeakqualtools" }, + mode: params.publish_dir_mode, + pattern: "*.{out,pdf}" + ] + } + + withName: 'MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/phantompeakqualtools" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'BEDTOOLS_GENOMECOV' { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/bigwig/scale" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + } + + withName: 'UCSC_BEDGRAPHTOBIGWIG' { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/bigwig" }, + mode: params.publish_dir_mode, + pattern: "*.bigWig" + ] + } +} + +if (!params.skip_picard_metrics) { + process { + withName: 'PICARD_COLLECTMULTIPLEMETRICS' { + ext.args = '--VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + ext.prefix = { "${meta.id}.mLb.clN.sorted" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/picard_metrics" }, + mode: params.publish_dir_mode, + pattern: "*_metrics" + ], + [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/picard_metrics/pdf" }, + mode: params.publish_dir_mode, + pattern: "*.pdf" + ] + ] + } + } +} + +if (!params.skip_preseq) { + process { + withName: 'PRESEQ_LCEXTRAP' { + ext.args = '-verbose -bam -seed 1' + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/preseq" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +if (!params.skip_plot_profile) { + process { + withName: 'DEEPTOOLS_COMPUTEMATRIX' { + ext.args = 'scale-regions --regionBodyLength 1000 --beforeRegionStartLength 3000 --afterRegionStartLength 3000 --skipZeros --smartLabels' + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotProfile" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'DEEPTOOLS_PLOTPROFILE' { + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotProfile" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'DEEPTOOLS_PLOTHEATMAP' { + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotProfile" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +process { + withName: 'KHMER_UNIQUEKMERS' { + publishDir = [ enabled: false ] + } +} + +if (!params.skip_plot_fingerprint) { + process { + withName: 'DEEPTOOLS_PLOTFINGERPRINT' { + ext.args = { [ + '--skipZeros', + "--numberOfSamples $params.fingerprint_bins", + "--labels $meta.id $meta.control" + ].join(' ').trim() } + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotFingerprint" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +process { + withName: 'MACS2_CALLPEAK' { + ext.args = [ + '--keep-dup all', + params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", + params.save_macs_pileup ? '--bdg --SPMR' : '', + params.macs_fdr ? "--qvalue ${params.macs_fdr}" : '', + params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '', + params.aligner == "chromap" ? "--format BAM" : '' + ].join(' ').trim() + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FRIP_SCORE' { + ext.args = '-bed -c -f 0.20' + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/qc' + ].join('') }, + enabled: false + ] + } + + withName: 'MULTIQC_CUSTOM_PEAKS' { + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/qc' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} + +if (!params.skip_peak_annotation) { + process { + withName: 'HOMER_ANNOTATEPEAKS_MACS2' { + ext.args = '-gid' + ext.prefix = { "${meta.id}_peaks" } + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_peak_qc) { + process { + withName: 'PLOT_MACS2_QC' { + ext.args = '-o ./ -p macs2_peak' + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/qc' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'PLOT_HOMER_ANNOTATEPEAKS' { + ext.args = '-o ./ -p macs2_annotatePeaks' + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/qc' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + +if (!params.skip_consensus_peaks) { + process { + withName: 'MACS2_CONSENSUS' { + ext.when = { meta.multiple_groups || meta.replicates_exist } + ext.prefix = { "${meta.id}.consensus_peaks" } + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SUBREAD_FEATURECOUNTS' { + ext.args = '-F SAF -O --fracOverlap 0.2' + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_peak_annotation) { + process { + withName: 'HOMER_ANNOTATEPEAKS_CONSENSUS' { + ext.args = '-gid' + ext.prefix = 'consensus_peaks' + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'ANNOTATE_BOOLEAN_PEAKS' { + ext.prefix = { "${meta.id}_peaks" } + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_deseq2_qc) { + process { + withName: DESEQ2_QC { + ext.when = { meta.multiple_groups && meta.replicates_exist } + ext.args = [ + '--id_col 1', + '--sample_suffix \'.mLb.clN.sorted.bam\'', + '--count_col 7', + params.deseq2_vst ? '--vst TRUE' : '' + ].join(' ').trim() + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus', + '/deseq2' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + +if (!params.skip_igv) { + process { + withName: 'IGV' { + publishDir = [ + path: { [ + "${params.outdir}/igv", + params.narrow_peak? '/narrowPeak' : '/broadPeak' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +if (!params.skip_multiqc) { + process { + withName: 'MULTIQC' { + ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { [ + "${params.outdir}/multiqc", + params.narrow_peak? '/narrowPeak' : '/broadPeak' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } } diff --git a/conf/test.config b/conf/test.config index 2bf8a1faa676cfae731d5266b0c153daf81d0848..2fad3bc877415923e85ee31d82504386c4bd7365 100644 --- a/conf/test.config +++ b/conf/test.config @@ -11,19 +11,25 @@ */ params { - config_profile_name = 'Test profile' + config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_test.csv' + read_length = 50 // Genome references - genome = 'R64-1-1' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genome.fa' + gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genes.gtf' + + // For speed to avoid CI time-out + fingerprint_bins = 100 + + // Avoid preseq errors with test data + skip_preseq = true } diff --git a/conf/test_full.config b/conf/test_full.config index 5c3bc0808ea857a3c92399e68a061ee88577c776..f47445a54f09d39aee9dc129c990517e6dab49c8 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,10 +15,11 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_full.csv' + + // Used to get macs_gsize + read_length = 50 // Genome references - genome = 'R64-1-1' + genome = 'hg19' } diff --git a/docs/images/igv_screenshot.png b/docs/images/igv_screenshot.png new file mode 100755 index 0000000000000000000000000000000000000000..95b19c0b8b1a3e4f15d2204310896aa4ca8ccc8e Binary files /dev/null and b/docs/images/igv_screenshot.png differ diff --git a/docs/images/mqc_annotatePeaks_feature_percentage_plot.png b/docs/images/mqc_annotatePeaks_feature_percentage_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..76de481387036d396aceeca1c62e2eb204adc8cb Binary files /dev/null and b/docs/images/mqc_annotatePeaks_feature_percentage_plot.png differ diff --git a/docs/images/mqc_cutadapt_plot.png b/docs/images/mqc_cutadapt_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..7fcc4348699e76fab9c796d1596240580d0194ab Binary files /dev/null and b/docs/images/mqc_cutadapt_plot.png differ diff --git a/docs/images/mqc_deeptools_plotFingerprint_plot.png b/docs/images/mqc_deeptools_plotFingerprint_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..6bc3a31bbde9c327f8cb17e5f7ba1208c0dadc57 Binary files /dev/null and b/docs/images/mqc_deeptools_plotFingerprint_plot.png differ diff --git a/docs/images/mqc_deeptools_plotProfile_plot.png b/docs/images/mqc_deeptools_plotProfile_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..812dffc55b98afa2ff1da4fb47659c882775eee0 Binary files /dev/null and b/docs/images/mqc_deeptools_plotProfile_plot.png differ diff --git a/docs/images/mqc_deseq2_pca_plot.png b/docs/images/mqc_deseq2_pca_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..874001a6fce059230c5ff6544f0fb721725fab4f Binary files /dev/null and b/docs/images/mqc_deseq2_pca_plot.png differ diff --git a/docs/images/mqc_deseq2_sample_similarity_plot.png b/docs/images/mqc_deseq2_sample_similarity_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..94377233179e8ffce67b996a18c92e71ee468f28 Binary files /dev/null and b/docs/images/mqc_deseq2_sample_similarity_plot.png differ diff --git a/docs/images/mqc_featureCounts_assignment_plot.png b/docs/images/mqc_featureCounts_assignment_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..62546e88f7cd1f6ea0e01126e4b2c2829577e3af Binary files /dev/null and b/docs/images/mqc_featureCounts_assignment_plot.png differ diff --git a/docs/images/mqc_frip_score_plot.png b/docs/images/mqc_frip_score_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..e0d901296475ba645328351882eb6710b816dfa1 Binary files /dev/null and b/docs/images/mqc_frip_score_plot.png differ diff --git a/docs/images/mqc_macs2_peak_count_plot.png b/docs/images/mqc_macs2_peak_count_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..54e46f3098feca42e9f18158c7fb8079de6ba8e5 Binary files /dev/null and b/docs/images/mqc_macs2_peak_count_plot.png differ diff --git a/docs/images/mqc_picard_deduplication_plot.png b/docs/images/mqc_picard_deduplication_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..9b0c08f967f0121642a827139eba8b8464e244cb Binary files /dev/null and b/docs/images/mqc_picard_deduplication_plot.png differ diff --git a/docs/images/mqc_picard_insert_size_plot.png b/docs/images/mqc_picard_insert_size_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..3c3d19eb7a2b1bc5e0768e7bf51e39281d0bacbe Binary files /dev/null and b/docs/images/mqc_picard_insert_size_plot.png differ diff --git a/docs/images/mqc_preseq_plot.png b/docs/images/mqc_preseq_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..c4c98f1703adadda6da42cdec01651feef48db18 Binary files /dev/null and b/docs/images/mqc_preseq_plot.png differ diff --git a/docs/images/mqc_samtools_stats_plot.png b/docs/images/mqc_samtools_stats_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..cd7a50fdec85d66b1e13c790df4eac32219a5ddf Binary files /dev/null and b/docs/images/mqc_samtools_stats_plot.png differ diff --git a/docs/images/mqc_spp_nsc_plot.png b/docs/images/mqc_spp_nsc_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..c98aa1f9fc006a9823c1cfba4a7bc92c13743358 Binary files /dev/null and b/docs/images/mqc_spp_nsc_plot.png differ diff --git a/docs/images/mqc_spp_rsc_plot.png b/docs/images/mqc_spp_rsc_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..a6f0365c39f9403cf1fc9a63552277bedfd27bc5 Binary files /dev/null and b/docs/images/mqc_spp_rsc_plot.png differ diff --git a/docs/images/mqc_spp_strand_correlation_plot.png b/docs/images/mqc_spp_strand_correlation_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..151ca672f611677e490e716cf07a72dc07a4bb1f Binary files /dev/null and b/docs/images/mqc_spp_strand_correlation_plot.png differ diff --git a/docs/images/r_upsetr_intersect_plot.png b/docs/images/r_upsetr_intersect_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..be82ea4d3f9ee9509186be8e46b878f63fed2d03 Binary files /dev/null and b/docs/images/r_upsetr_intersect_plot.png differ diff --git a/docs/output.md b/docs/output.md index d0fe3e904bdc8489cdeaa89ee95cf905c280a81c..6fde66dec9a9106b097a4460134901fe72ddc8f0 100644 --- a/docs/output.md +++ b/docs/output.md @@ -2,61 +2,340 @@ ## Introduction -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report generated from the [full-sized test dataset](https://github.com/nf-core/test-datasets/tree/chipseq#full-test-dataset-origin) for the pipeline using a command similar to the one below: -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. +```console +nextflow run nf-core/chipseq -profile test_full,<docker/singularity/institute> +``` -<!-- TODO nf-core: Write this documentation describing your workflow's output --> +The directories listed below will be created in the output directory after the pipeline has finished. All paths are relative to the top-level results directory. ## Pipeline overview -The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +The pipeline is built using [Nextflow](https://www.nextflow.io/). See [`main README.md`](../README.md) for a condensed overview of the steps in the pipeline, and the bioinformatics tools used at each step. -- [FastQC](#fastqc) - Raw read QC -- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +See [Illumina website](https://emea.illumina.com/techniques/sequencing/dna-sequencing/chip-seq.html) for more information regarding the ChIP-seq protocol, and for an extensive list of publications. -### FastQC +## Library-level analysis + +The initial QC and alignments are performed at the library-level e.g. if the same library has been sequenced more than once to increase sequencing depth. This has the advantage of being able to assess each library individually, and the ability to process multiple libraries from the same sample in parallel. + +### Raw read QC <details markdown="1"> -<summary>Output files</summary> + <summary>Output files</summary> - `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. + - `*_fastqc.html`: FastQC report containing quality metrics for read 1 (_and read2 if paired-end_) **before** adapter trimming. +- `fastqc/zips/` - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. </details> [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). - +### Adapter trimming + +<details markdown="1"> + <summary>Output files</summary> + +- `trimgalore/` + - `*fastq.gz`: If `--save_trimmed` is specified, FastQ files **after** adapter trimming will be placed in this directory. +- `trimgalore/logs/` + - `*.log`: Log file generated by Trim Galore!. +- `trimgalore/fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics for read 1 (_and read2 if paired-end_) **after** adapter trimming. +- `trimgalore/fastqc/zips/` + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +</details> + +[Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) is a wrapper tool around Cutadapt and FastQC to consistently apply quality and adapter trimming to FastQ files. By default, Trim Galore! will automatically detect and trim the appropriate adapter sequence. See [`usage.md`](usage.md) for more details about the trimming options. + + + +### Alignment + +The pipeline has been written in a way where all the files generated downstream of the alignment are placed in the same directory as specified by `--aligner` e.g. if `--aligner bwa` is specified then all the downstream results will be placed in the `bwa/` directory. This helps with organising the directory structure and more importantly, allows the end-user to get the results from multiple aligners by simply re-running the pipeline with a different `--aligner` option along the `-resume` parameter. It also means that results won't be overwritten when resuming the pipeline and can be used for benchmarking between alignment algorithms if required. Thus, `<ALIGNER>` in the directory structure below corresponds to the aligner set when running the pipeline. + +<details markdown="1"> + <summary>Output files</summary> + +- `<ALIGNER>/library/` + - `*.bam`: The files resulting from the alignment of individual libraries are not saved by default so this directory will not be present in your results. You can override this behaviour with the use of the `--save_align_intermeds` flag in which case it will contain the coordinate sorted alignment files in [`*.bam`](https://samtools.github.io/hts-specs/SAMv1.pdf) format. +- `<ALIGNER>/library/samtools_stats/` + - SAMtools `<SAMPLE>.sorted.bam.flagstat`, `<SAMPLE>.sorted.bam.idxstats` and `<SAMPLE>.sorted.bam.stats` files generated from the alignment files. + +> **NB:** File names in the resulting directory (i.e. `<ALIGNER>/library/`) will have the '`.Lb.`' suffix. + +</details> + +Adapter-trimmed reads are mapped to the reference assembly using the aligner set by the `--aligner` parameter. Available aligners are [BWA](http://bio-bwa.sourceforge.net/bwa.shtml) (default), [Bowtie 2](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml), [Chromap](https://github.com/haowenz/chromap) and [STAR](https://github.com/alexdobin/STAR). A genome index is required to run any of this aligners so if this is not provided explicitly using the corresponding parameter (e.g. `--bwa_index`), then it will be created automatically from the genome fasta input. The index creation process can take a while for larger genomes so it is possible to use the `--save_reference` parameter to save the indices for future pipeline runs, reducing processing times. + + + +> **NB:** Currently, paired-end files produced by `Chromap` are excluded from downstream analysis due to [this](https://github.com/nf-core/chipseq/issues/291) issue. Single-end files are processed normally. + +#### Unmapped reads + +The `--save_unaligned` parameter enables to obtain FastQ files containing unmapped reads (only available for STAR and Bowtie2). + +<details markdown="1"> + <summary>Output files</summary> +- `<ALIGNER>/library/unmapped/` + - `*.fastq.gz`: If `--save_unaligned` is specified, FastQ files containing unmapped reads will be placed in this directory. + +</details> + +#### STAR logs + +<details markdown="1"> + <summary>Output files</summary> + +- `star/library/log/` + - `*.SJ.out.tab`: File containing filtered splice junctions detected after mapping the reads. + - `*.Log.final.out`: STAR alignment report containing the mapping results summary. + - `*.Log.out` and `*.Log.progress.out`: STAR log files containing detailed information about the run. Typically only useful for debugging purposes. + +## Merged library-level analysis + +The library-level alignments associated with the same sample are merged and subsequently used for the downstream analyses. + +### Alignment merging, duplicate marking, filtering and QC + +<details markdown="1"> + <summary>Output files</summary> + +- `<ALIGNER>/mergedLibrary/` + - `*.bam`: Merged library-level, coordinate sorted `*.bam` files after the marking of duplicates, and filtering based on various criteria. The file suffix for the final filtered files will be `*.mLb.clN.*`. If you specify the `--save_align_intermeds` parameter then two additional sets of files will be present. These represent the unfiltered alignments with duplicates marked (`*.mLb.mkD.*`), and in the case of paired-end datasets the filtered alignments before the removal of orphan read pairs (`*.mLb.flT.*`). +- `<ALIGNER>/mergedLibrary/samtools_stats/` + - SAMtools `*.flagstat`, `*.idxstats` and `*.stats` files generated from the alignment files. +- `<ALIGNER>/mergedLibrary/picard_metrics/` + - `*_metrics`: Alignment QC files from picard CollectMultipleMetrics. + - `*.metrics.txt`: Metrics file from MarkDuplicates. +- `<ALIGNER>/mergedLibrary/picard_metrics/pdf/` + - `*.pdf`: Alignment QC plot files from picard CollectMultipleMetrics. +- `<ALIGNER>/mergedLibrary/preseq/` + - `*.lc_extrap.txt`: Preseq expected future yield file. + +> **NB:** File names in the resulting directory (i.e. `<ALIGNER>/mergedLibrary/`) will have the '`.mLb.`' suffix. + +</details> + +[Picard MergeSamFiles and MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html) are used in combination to merge the alignments, and for the marking of duplicates, respectively. If you only have one library for any given replicate then the merging step is not carried out because the library-level and merged library-level BAM files will be exactly the same. + + + +Read duplicate marking is carried out using the Picard MarkDuplicates command. Duplicate reads are generally removed from the aligned reads to mitigate for fragments in the library that may have been sequenced more than once due to PCR biases. There is an option to keep duplicate reads with the `--keep_dups` parameter but its generally recommended to remove them to avoid the wrong interpretation of the results. A similar option has been provided to keep reads that are multi-mapped - `--keep_multi_map`. Other steps have been incorporated into the pipeline to filter the resulting alignments - see [`main README.md`](../README.md) for a more comprehensive listing, and the tools used at each step. A selection of alignment-based QC metrics generated by Picard CollectMultipleMetrics and MarkDuplicates will be included in the MultiQC report. + + + +The [Preseq](http://smithlabresearch.org/software/preseq/) package is aimed at predicting and estimating the complexity of a genomic sequencing library, equivalent to predicting and estimating the number of redundant reads from a given sequencing depth and how many will be expected from additional sequencing using an initial sequencing experiment. The estimates can then be used to examine the utility of further sequencing, optimize the sequencing depth, or to screen multiple libraries to avoid low complexity samples. The dashed line shows a perfectly complex library where total reads = unique reads. Note that these are predictive numbers only, not absolute. The MultiQC plot can sometimes give extreme sequencing depth on the X axis - click and drag from the left side of the plot to zoom in on more realistic numbers. + + + +### Normalised bigWig files + +<details markdown="1"> + <summary>Output files</summary> + +- `<ALIGNER>/mergedLibrary/bigwig/` + - `*.bigWig`: Normalised bigWig files scaled to 1 million mapped reads. + +</details> + +The [bigWig](https://genome.ucsc.edu/goldenpath/help/bigWig.html) format is in an indexed binary format useful for displaying dense, continuous data in Genome Browsers such as the [UCSC](https://genome.ucsc.edu/cgi-bin/hgTracks) and [IGV](http://software.broadinstitute.org/software/igv/). This mitigates the need to load the much larger BAM files for data visualisation purposes which will be slower and result in memory issues. The coverage values represented in the bigWig file can also be normalised in order to be able to compare the coverage across multiple samples - this is not possible with BAM files. The bigWig format is also supported by various bioinformatics software for downstream processing such as meta-profile plotting. + +### ChIP-seq QC metrics + +<details markdown="1"> + <summary>Output files</summary> + +- `<ALIGNER>/mergedLibrary/phantompeakqualtools/` + - `*.spp.out`, `*.spp.pdf`: phantompeakqualtools output files. + - `*_mqc.tsv`: MultiQC custom content files. +- `<ALIGNER>/mergedLibrary/deepTools/plotFingerprint/` + - `*.plotFingerprint.pdf`, `*.plotFingerprint.qcmetrics.txt`, `*.plotFingerprint.raw.txt`: plotFingerprint output files. +- `<ALIGNER>/mergedLibrary/deepTools/plotProfile/` + - `*.computeMatrix.mat.gz`, `*.computeMatrix.vals.mat.tab`, `*.plotProfile.pdf`, `*.plotProfile.tab`, `*.plotHeatmap.pdf`, `*.plotHeatmap.mat.tab`: plotProfile output files. + +</details> + +[phantompeakqualtools](https://github.com/kundajelab/phantompeakqualtools) plots the strand cross-correlation of aligned reads for each sample. In a strand cross-correlation plot, reads are shifted in the direction of the strand they map to by an increasing number of base pairs and the Pearson correlation between the per-position read count for each strand is calculated. Two cross-correlation peaks are usually observed in a ChIP experiment, one corresponding to the read length ("phantom" peak) and one to the average fragment length of the library. The absolute and relative height of the two peaks are useful determinants of the success of a ChIP-seq experiment. A high-quality IP is characterized by a ChIP peak that is much higher than the "phantom" peak, while often very small or no such peak is seen in failed experiments. + + + +Normalized strand coefficient (NSC) is the normalized ratio between the fragment-length cross-correlation peak and the background cross-correlation. NSC values range from a minimum of 1 to larger positive numbers. 1.1 is the critical threshold. Datasets with NSC values much less than 1.1 (< 1.05) tend to have low signal to noise or few peaks (this could be biological e.g. a factor that truly binds only a few sites in a particular tissue type OR it could be due to poor quality). ENCODE cut-off: **NSC > 1.05**. + + - +Relative strand correlation (RSC) is the ratio between the fragment-length peak and the read-length peak. RSC values range from 0 to larger positive values. 1 is the critical threshold. RSC values significantly lower than 1 (< 0.8) tend to have low signal to noise. The low scores can be due to failed and poor quality ChIP, low read sequence quality and hence lots of mis-mappings, shallow sequencing depth (significantly below saturation) or a combination of these. Like the NSC, datasets with few binding sites (< 200), which is biologically justifiable, also show low RSC scores. ENCODE cut-off: **RSC > 0.8**. - + -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +[deepTools](https://deeptools.readthedocs.io/en/develop/content/list_of_tools.html) plotFingerprint is a useful QC for ChIP-seq data in order to see the relative enrichment of the IP samples with respect to the controls on a genome-wide basis. The results, however, are expected to look different for example when comparing narrow marks such as transcription factors and broader marks such as histone modifications (see [plotFingerprint docs](https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html)). -### MultiQC + + +The results from deepTools plotProfile gives you a quick visualisation for the genome-wide enrichment of your samples at the TSS, and across the gene body. During the downstream analysis, you may want to refine the features/genes used to generate these plots in order to see a more specific condition-related effect. + + + +### Call peaks + +<details markdown="1"> + <summary>Output files</summary> + +- `<ALIGNER>/mergedLibrary/macs2/<PEAK_TYPE>/` + - `*.xls`, `*.broadPeak` or `*.narrowPeak`, `*.gappedPeak`, `*summits.bed`: MACS2 output files - the files generated will depend on whether MACS2 has been run in _narrowPeak_ or _broadPeak_ mode. + - `*.annotatePeaks.txt`: HOMER peak-to-gene annotation file. +- `<ALIGNER>/mergedLibrary/macs2/<PEAK_TYPE>/qc/` + - `macs2_peak.plots.pdf`: QC plots for MACS2 peaks. + - `macs2_annotatePeaks.plots.pdf`: QC plots for peak-to-gene feature annotation. + - `*.FRiP_mqc.tsv`, `*.peak_count_mqc.tsv`, `annotatepeaks.summary_mqc.tsv`: MultiQC custom-content files for FRiP score, peak count and peak-to-gene ratios. + +> **NB:** `<PEAK_TYPE>` in the directory structure above corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrow_peak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline. + +</details> + +[MACS2](https://github.com/macs3-project/MACS) is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS2 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrow_peak` parameter when running the pipeline. See [MACS2 outputs](https://github.com/macs3-project/MACS/blob/master/docs/callpeak.md#output-files) for a description of the output files generated by MACS2. + + + +[HOMER annotatePeaks.pl](http://homer.ucsd.edu/homer/ngs/annotation.html) is used to annotate the peaks relative to known genomic features. HOMER is able to use the `--gtf` annotation file which is provided to the pipeline. Please note that some of the output columns will be blank because the annotation is not provided using HOMER's in-built database format. However, the more important fields required for downstream analysis will be populated i.e. _Annotation_, _Distance to TSS_ and _Nearest Promoter ID_. + + + +Various QC plots per sample including number of peaks, fold-change distribution, [FRiP score](https://genome.cshlp.org/content/22/9/1813.full.pdf+html) and peak-to-gene feature annotation are also generated by the pipeline. Where possible these have been integrated into the MultiQC report. + + + +### Create and quantify consensus set of peaks <details markdown="1"> -<summary>Output files</summary> + <summary>Output files</summary> -- `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. +- `<ALIGNER>/mergedLibrary/macs2/<PEAK_TYPE>/consensus` + - `*.bed`: Consensus peak-set across all samples in BED format. + - `*.saf`: Consensus peak-set across all samples in SAF format. Required by featureCounts for read quantification. + - `*.featureCounts.txt`: Read counts across all samples relative to consensus peak-set. + - `*.annotatePeaks.txt`: HOMER peak-to-gene annotation file for consensus peaks. + - `*.boolean.annotatePeaks.txt`: Spreadsheet representation of consensus peak-set across samples **with** gene annotation columns. The columns from individual peak files are included in this file along with the ability to filter peaks based on their presence or absence in multiple replicates/conditions. + - `*.boolean.txt`: Spreadsheet representation of consensus peak-set across samples **without** gene annotation columns. Same as file above but without annotation columns. + - `*.boolean.intersect.plot.pdf`, `*.boolean.intersect.txt`: [UpSetR](https://cran.r-project.org/web/packages/UpSetR/README.html) files to illustrate peak intersection. </details> -[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. +In order to perform the differential binding analysis we need to be able to carry out the read quantification for the same intervals across **all** of the samples in the experiment. To this end, the individual peak-sets called per sample have to be merged together in order to create a consensus set of peaks. + +Using the consensus peaks it is possible to assess the degree of overlap between the peaks from a set of samples e.g. _Which consensus peaks contain peaks that are common/unique to a given set of samples?_. This may be useful for downstream filtering of peaks based on whether they are called in multiple replicates/conditions. Please note that it is possible for a consensus peak to contain multiple peaks from the same sample. Unfortunately, this is sample-dependent but the files generated by the pipeline do have columns that report such instances and allow you to factor them into any further analysis. + + + +By default, the peak-sets are not filtered, therefore, the consensus peaks will be generated across the union set of peaks from all samples. However, you can increment the `--min_reps_consensus` parameter appropriately if you are confident you have good reproducibility amongst your replicates to create a "reproducible" set of consensus of peaks. In future iterations of the pipeline more formal analyses such as [IDR](https://projecteuclid.org/euclid.aoas/1318514284) may be implemented to obtain reproducible and high confidence peak-sets with which to perform this sort of analysis. + +The [featureCounts](http://bioinf.wehi.edu.au/featureCounts/) tool is used to count the number of reads relative to the consensus peak-set across all of the samples. This essentially generates a file containing a matrix where the rows represent the consensus intervals, the columns represent all of the samples in the experiment, and the values represent the raw read counts. + + + +### Read counting and differential binding analysis + +<details markdown="1"> + <summary>Output files</summary> + +- `<ALIGNER>/mergedLibrary/macs2/<PEAK_TYPE>/consensus/deseq2/` + - `*.sample.dists.txt`: Spreadsheet containing sample-to-sample distance across each consensus peak. + - `*.plots.pdf`: File containing PCA and hierarchical clustering plots. + - `*.dds.RData`: File containing R `DESeqDataSet` object generated by DESeq2, with either + an rlog or vst `assay` storing the variance-stabilised data. + - `*.rds`: Alternative version of the RData file suitable for + `readRDS` to give user control of the eventual object name. + - `*pca.vals.txt`: Matrix of values for the first 2 principal components. + - `R_sessionInfo.log`: File containing information about R, the OS and attached or loaded packages. + - `<ALIGNER>/mergedLibrary/macs2/<PEAK_TYPE>/consensus/deseq2/sizeFactors/` + - `*.txt`, `*.RData`: Files containing DESeq2 sizeFactors per sample. + +</details> + +[DESeq2](https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html) is more commonly used to perform differential expression analysis for RNA-seq datasets. However, it can also be used for ChIP-seq differential binding analysis, in which case you can imagine that instead of counts per gene for RNA-seq data we now have counts per bound region. + +**This pipeline uses a standardised DESeq2 analysis script to get an idea of the reproducibility within the experiment, and to assess the overall differential binding. Please note that this will not suit every experimental design, and if there are other problems with the experiment then it may not work as well as expected.** + +For larger experiments, it may be recommended to use the `vst` transformation instead of the default `rlog` option. You can do this by providing the `--deseq2_vst` parameter to the pipeline. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) for a more detailed explanation. + + + + + +## Aggregate analysis + +### Present QC for the raw read, alignment, peak and differential binding results + +<details markdown="1"> + <summary>Output files</summary> + +- `multiqc/<PEAK_TYPE>/` + - `multiqc_report.html`: A standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: Directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: Directory containing static images from the report in various formats. + +</details> + +[MultiQC](https://multiqc.info/docs/) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available within the report data directory. + +Results generated by MultiQC collate pipeline QC from FastQC, TrimGalore, samtools flagstat, samtools idxstats, samtools stats, picard CollectMultipleMetrics, picard MarkDuplicates, Preseq, deepTools plotProfile, deepTools plotFingerprint, phantompeakqualtools and featureCounts. The default [`multiqc config file`](../assets/multiqc_config.yaml) also contains the provision for loading custom-content to report peak counts, FRiP scores, peak-to-gene annnotation proportions, spp NSC coefficient, spp RSC coefficient, PCA plots and sample-similarity heatmaps. + +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see <http://multiqc.info>. + +### Create IGV session file + +<details markdown="1"> + <summary>Output files</summary> + +- `igv/<PEAK_TYPE>/` + - `igv_session.xml`: Session file that can be directly loaded into IGV. + - `igv_files.txt`: File containing a listing of the files used to create the IGV session. + +</details> + +An [IGV](https://software.broadinstitute.org/software/igv/UserGuide) session file will be created at the end of the pipeline containing the normalised bigWig tracks, per-sample peaks, consensus peaks and differential sites. This avoids having to load all of the data individually into IGV for visualisation. + +The genome fasta file required for the IGV session will be the same as the one that was provided to the pipeline. This will be copied into `genome/` to overcome any loading issues. If you prefer to use another path or an in-built genome provided by IGV just change the `genome` entry in the second-line of the session file. + +The file paths in the IGV session file will only work if the results are kept in the same place on your storage. If the results are moved or for example, if you prefer to load the data over the web then just replace the file paths with others that are more appropriate. + +Once installed, open IGV, go to `File > Open Session` and select the `igv_session.xml` file for loading. + + + +> **NB:** If you are not using an in-built genome provided by IGV you will need to load the annotation yourself e.g. in .gtf and/or .bed format. + +## Other results + +### Reference genome files + +<details markdown="1"> + <summary>Output files</summary> + +- `genome/` + - A number of genome-specific files are generated by the pipeline in order to aid in the filtering of the data, and because they are required by standard tools such as BEDTools. These can be found in this directory along with the genome fasta file which is required by IGV. If using a genome from AWS iGenomes and if it exists a `README.txt` file containing information about the annotation version will also be saved in this directory. +- `genome/index/` + + - `bwa/`: Directory containing BWA indices. + - `bowtie2/`: Directory containing BOWTIE2 indices. + - `chromap/`: Directory containing Chromap indices. + - `star/`: Directory containing STAR indices. + + - If the `--save_reference` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for future runs of the pipeline or for other purposes. + +</details> -Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see <http://multiqc.info>. +Reference genome-specific files can be useful to keep for the downstream processing of the results. ### Pipeline information <details markdown="1"> -<summary>Output files</summary> + <summary>Output files</summary> - `pipeline_info/` - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. @@ -65,4 +344,4 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ </details> -[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to trouble-shoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md index 997dfd2ce7916156d50d66e6764607cfbc37a88a..4a65603223b52633b4b3966faa3e4c255f67c7e2 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -4,53 +4,113 @@ > _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ -## Introduction - -<!-- TODO nf-core: Add documentation about anything specific to running your pipeline. For general topics, please point to (and add to) the main nf-core website. --> - ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 5 columns, and a header row as shown in the examples below. ```bash --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample +### Multiple runs of the same library -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. Below is an example where the samples called `WT_BCATENIN_IP_REP2` and `WT_INPUT_REP2` have been re-sequenced multiple times: ```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +sample,fastq_1,fastq_2,antibody,control +WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L003_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,,, +WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,,, +WT_INPUT_REP2,BLA203A30_S21_L002_R1_001.fastq.gz,,, +WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,,, ``` -### Full samplesheet +### Full design + +The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 5 columns to match those defined in the table below. + +The `antibody` column is required to separate the downstream consensus peak merging for different antibodies. Its not advisable to generate a consensus peak set across different antibodies especially if their binding patterns are inherently different e.g. narrow transcription factors and broad histone marks. -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +The `control` column should be the `sample` identifier for the controls for any given IP. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +A final design file may look something like the one below. This is for two antibodies and associated controls, where the `WT_BCATENIN_IP_REP2` and `NAIVE_BCATENIN_IP_REP2` samples have been sequenced twice: ```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample,fastq_1,fastq_2,antibody,control +WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +NAIVE_BCATENIN_IP_REP1,BLA203A7_S60_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L002_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP3,BLA203A64_S55_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +WT_TCF4_IP_REP1,BLA203A3_S29_L006_R1_001.fastq.gz,,TCF4,WT_INPUT +WT_TCF4_IP_REP2,BLA203A27_S18_L001_R1_001.fastq.gz,,TCF4,WT_INPUT +WT_TCF4_IP_REP3,BLA203A51_S42_L001_R1_001.fastq.gz,,TCF4,WT_INPUT +NAIVE_TCF4_IP_REP1,BLA203A9_S62_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP2,BLA203A45_S36_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP3,BLA203A66_S57_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,,, +WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,,, +WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,,, +NAIVE_INPUT_REP1,BLA203A12_S3_L001_R1_001.fastq.gz,,, +NAIVE_INPUT_REP2,BLA203A48_S39_L001_R1_001.fastq.gz,,, +NAIVE_INPUT_REP3,BLA203A49_S1_L006_R1_001.fastq.gz,,, ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `antibody` | Antibody name. This is required to segregate downstream analysis for different antibodies. Required when `control` is specified. | +| `control` | Sample name for control sample. | + +Example design files have been provided with the pipeline for [paired-end](../assets/samplesheet_pe.csv) and [single-end](../assets/samplesheet_se.csv) data. + +> **NB:** The `group` and `replicate` columns were replaced with a single `sample` column as of v2.0 of the pipeline. The `sample` column is essentially a concatenation of the `group` and `replicate` columns. If all values of `sample` have the same number of underscores, fields defined by these underscore-separated names may be used in the PCA plots produced by the pipeline, to regain the ability to represent different groupings. + +## Reference genome files + +The minimum reference genome requirements are a FASTA and GTF file, all other files required to run the pipeline can be generated from these files. However, it is more storage and compute friendly if you are able to re-use reference genome files as efficiently as possible. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build new indices (e.g. those unavailable on [AWS iGenomes](https://nf-co.re/usage/reference_genomes)) so that you can save them somewhere locally. The index building step can be quite a time-consuming process and it permits their reuse for future runs of the pipeline to save disk space. You can then either provide the appropriate reference genome files on the command-line via the appropriate parameters (e.g. `--bwa_index '/path/to/bwa/index/'`) or via a custom config file. + +- If `--genome` is provided then the FASTA and GTF files (and existing indices) will be automatically obtained from AWS-iGenomes unless these have already been downloaded locally in the path specified by `--igenomes_base`. +- If `--gene_bed` is not provided then it will be generated from the GTF file. + +> **NB:** Compressed reference files are also supported by the pipeline i.e. standard files with the `.gz` extension and indices folders with the `tar.gz` extension. + +## Blacklist bed files + +The blacklist bed files where obtained using the commands below: + +```console +cd .. +mkdir -p v1.0 +cd v1.0 +wget -L https://www.encodeproject.org/files/ENCFF001TDO/@@download/ENCFF001TDO.bed.gz && gunzip ENCFF001TDO.bed.gz && mv ENCFF001TDO.bed hg19-blacklist.v1.bed + +mkdir -p assets/blacklists/v2.0/ +cd assets/blacklists/v2.0/ +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/ce10-blacklist.v2.bed.gz && gunzip ce10-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/ce11-blacklist.v2.bed.gz && gunzip ce11-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/dm3-blacklist.v2.bed.gz && gunzip dm3-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/dm6-blacklist.v2.bed.gz && gunzip dm6-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/hg19-blacklist.v2.bed.gz && gunzip hg19-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/hg38-blacklist.v2.bed.gz && gunzip hg38-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/mm10-blacklist.v2.bed.gz && gunzip mm10-blacklist.v2.bed.gz + +cd .. +mkdir -p v3.0 +cd v3.0 +wget -L https://www.encodeproject.org/files/ENCFF356LFX/@@download/ENCFF356LFX.bed.gz && gunzip ENCFF356LFX.bed.gz && mv ENCFF356LFX.bed hg38-blacklist.v3.bed +``` -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +> **NB:** A detailed description of the different versions of the files can be found [here](https://sites.google.com/site/anshulkundaje/projects/blacklists). Also, to to see which blacklist bed files are assigned by default to the respective reference genome check the [igenomes.config](https://github.com/nf-core/chipseq/blob/master/conf/igenomes.config). ## Running the pipeline @@ -83,7 +143,7 @@ nextflow pull nf-core/chipseq It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/chipseq releases page](https://github.com/nf-core/chipseq/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [nf-core/chipseq releases page](https://github.com/nf-core/chipseq/releases) and find the latest version number - numeric only (eg. `1.2.2`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.2.2`. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. @@ -186,7 +246,6 @@ process { ``` > **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> > If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. ### Updating containers diff --git a/lib/WorkflowChipseq.groovy b/lib/WorkflowChipseq.groovy index a5952af0cfd23882875cd8d47e5dd06b0b2841a4..b46f9362b6da73f3934834c070c6082f63d2c4d0 100755 --- a/lib/WorkflowChipseq.groovy +++ b/lib/WorkflowChipseq.groovy @@ -7,7 +7,7 @@ class WorkflowChipseq { // // Check and validate parameters // - public static void initialise(params, log) { + public static void initialise(params, log, valid_params) { genomeExistsError(params, log) @@ -15,6 +15,31 @@ class WorkflowChipseq { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) } + + if (!params.gtf && !params.gff) { + log.error "No GTF or GFF3 annotation specified! The pipeline requires at least one of these files." + System.exit(1) + } + + if (params.gtf && params.gff) { + gtfGffWarn(log) + } + + if (!params.macs_gsize) { + macsGsizeWarn(log) + } + + if (!params.read_length && !params.macs_gsize) { + log.error "Both '--read_length' and '--macs_gsize' not specified! Please specify either to infer MACS2 genome size for peak calling." + System.exit(1) + } + + if (params.aligner) { + if (!valid_params['aligners'].contains(params.aligner)) { + log.error "Invalid option: '${params.aligner}'. Valid options for '--aligner': ${valid_params['aligners'].join(', ')}." + System.exit(1) + } + } } // @@ -55,4 +80,26 @@ class WorkflowChipseq { System.exit(1) } } + + // + // Print a warning if both GTF and GFF have been provided + // + private static void gtfGffWarn(log) { + log.warn "=============================================================================\n" + + " Both '--gtf' and '--gff' parameters have been provided.\n" + + " Using GTF file as priority.\n" + + "===================================================================================" + } + + // + // Print a warning if macs_gsize parameter has not been provided + // + private static void macsGsizeWarn(log) { + log.warn "=============================================================================\n" + + " --macs_gsize parameter has not been provided.\n" + + " It will be auto-calculated by 'khmer unique-kmers.py' using the '--read_length' parameter.\n" + + " Explicitly provide '--macs_gsize macs2_genome_size' to change this behaviour.\n" + + "===================================================================================" + } + } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 904824ae27dc92a85f8c3af16f44b417fc724eda..9eaea561cb4ffe578ae0dbe32a420dff296aab15 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -9,9 +9,8 @@ class WorkflowMain { // public static String citation(workflow) { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + + "* The pipeline\n" + + " https://doi.org/10.5281/zenodo.3240506\n\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + @@ -22,7 +21,7 @@ class WorkflowMain { // Print help to screen if required // public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile docker" def help_string = '' help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) help_string += NfcoreSchema.paramsHelp(workflow, params, command) @@ -90,4 +89,19 @@ class WorkflowMain { } return null } + + // + // Get macs genome size (macs_gsize) + // + public static Long getMacsGsize(params) { + def val = null + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey('macs_gsize')) { + if (params.genomes[ params.genome ][ 'macs_gsize' ].containsKey(params.read_length.toString())) { + val = params.genomes[ params.genome ][ 'macs_gsize' ][ params.read_length.toString() ] + } + } + } + return val + } } diff --git a/main.nf b/main.nf old mode 100644 new mode 100755 index d6adc85138b6af1b09347940870bee4edd7b03bd..969542d9e79730527594f99e837705150f8b19cd --- a/main.nf +++ b/main.nf @@ -17,7 +17,16 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.bwa_index = WorkflowMain.getGenomeAttribute(params, 'bwa') +params.bowtie2_index = WorkflowMain.getGenomeAttribute(params, 'bowtie2') +params.chromap_index = WorkflowMain.getGenomeAttribute(params, 'chromap') +params.star_index = WorkflowMain.getGenomeAttribute(params, 'star') +params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') +params.gff = WorkflowMain.getGenomeAttribute(params, 'gff') +params.gene_bed = WorkflowMain.getGenomeAttribute(params, 'gene_bed') +params.blacklist = WorkflowMain.getGenomeAttribute(params, 'blacklist') +params.macs_gsize = WorkflowMain.getMacsGsize(params) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/modules.json b/modules.json index db8387141a39cf7f90ca60d148273a1c6ecb2f8d..ad30c80a2fcefa8ec0063233abb612b4d7dccf7b 100644 --- a/modules.json +++ b/modules.json @@ -3,20 +3,101 @@ "homePage": "https://github.com/nf-core/chipseq", "repos": { "nf-core/modules": { - "git_url": "https://github.com/nf-core/modules.git", - "modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - } + "bowtie2/align": { + "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c" + }, + "bowtie2/build": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "bwa/index": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "bwa/mem": { + "git_sha": "4f5274c3de0c9521f5033893ff61057a74c45ba9" + }, + "chromap/chromap": { + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + }, + "chromap/index": { + "git_sha": "dbb46c9b635080b132bab4b8d5b9a14f0d1c22e7" + }, + "custom/dumpsoftwareversions": { + "git_sha": "e5b44499efcf6f7fb24874886bac60591c5d94dd" + }, + "custom/getchromsizes": { + "git_sha": "213403187932dbbdd936a04474cc8cd8abae7a08" + }, + "deeptools/computematrix": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "deeptools/plotfingerprint": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "deeptools/plotheatmap": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "deeptools/plotprofile": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "fastqc": { + "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" + }, + "gffread": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "gunzip": { + "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" + }, + "homer/annotatepeaks": { + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" + }, + "khmer/uniquekmers": { + "git_sha": "82fdff4fb4ce6cafcc028a7503da835427f35352" + }, + "macs2/callpeak": { + "git_sha": "f0800157544a82ae222931764483331a81812012" + }, + "phantompeakqualtools": { + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" + }, + "picard/collectmultiplemetrics": { + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + }, + "picard/markduplicates": { + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + }, + "picard/mergesamfiles": { + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + }, + "preseq/lcextrap": { + "git_sha": "7111e571cc5b6069de4673cd6165af680f17b4d7" + }, + "samtools/flagstat": { + "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" + }, + "samtools/idxstats": { + "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" + }, + "samtools/index": { + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + }, + "samtools/sort": { + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + }, + "samtools/stats": { + "git_sha": "f48a24770e24358e58de66e9b805a70d77cd154b" + }, + "subread/featurecounts": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "trimgalore": { + "git_sha": "85ec13ff1fc2196c5a507ea497de468101baabed" + }, + "ucsc/bedgraphtobigwig": { + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" + }, + "untar": { + "git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5" } } } diff --git a/modules/local/annotate_boolean_peaks.nf b/modules/local/annotate_boolean_peaks.nf new file mode 100644 index 0000000000000000000000000000000000000000..8d45fe92f8619837f765b06cb4e44f6342a08abc --- /dev/null +++ b/modules/local/annotate_boolean_peaks.nf @@ -0,0 +1,28 @@ +process ANNOTATE_BOOLEAN_PEAKS { + + label 'process_low' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(boolean_txt), path(homer_peaks) + + output: + path '*.boolean.annotatePeaks.txt', emit: annotate_peaks_txt + path "versions.yml" , emit: versions + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cut -f2- ${homer_peaks} | awk 'NR==1; NR > 1 {print \$0 | "sort -T '.' -k1,1 -k2,2n"}' | cut -f6- > tmp.txt + paste $boolean_txt tmp.txt > ${prefix}.boolean.annotatePeaks.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/bam_filter.nf b/modules/local/bam_filter.nf new file mode 100644 index 0000000000000000000000000000000000000000..ff07c084b58cfb5a2fa906d3e802113c3573b7b3 --- /dev/null +++ b/modules/local/bam_filter.nf @@ -0,0 +1,47 @@ +/* + * Filter BAM file + */ +process BAM_FILTER { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bamtools=2.5.2 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' : + 'quay.io/biocontainers/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' }" + + input: + tuple val(meta), path(bam), path(bai) + path bed + path bamtools_filter_se_config + path bamtools_filter_pe_config + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def filter_params = meta.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001' + def dup_params = params.keep_dups ? '' : '-F 0x0400' + def multimap_params = params.keep_multi_map ? '' : '-q 1' + def blacklist_params = params.blacklist ? "-L $bed" : '' + def config = meta.single_end ? bamtools_filter_se_config : bamtools_filter_pe_config + """ + samtools view \\ + $filter_params \\ + $dup_params \\ + $multimap_params \\ + $blacklist_params \\ + -b $bam \\ + | bamtools filter \\ + -out ${prefix}.bam \\ + -script $config + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + bamtools: \$(echo \$(bamtools --version 2>&1) | sed 's/^.*bamtools //; s/Part .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/bam_remove_orphans.nf b/modules/local/bam_remove_orphans.nf new file mode 100644 index 0000000000000000000000000000000000000000..21ffc73a9fb6837c18e0aad58eb648b87e2230cc --- /dev/null +++ b/modules/local/bam_remove_orphans.nf @@ -0,0 +1,43 @@ +/* + * Remove orphan reads from paired-end BAM file + */ +process BAM_REMOVE_ORPHANS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::pysam=0.19.0 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' : + 'quay.io/biocontainers/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + path "versions.yml" , emit: versions + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + if (!meta.single_end) { + """ + samtools sort -n -@ $task.cpus -o ${prefix}.name.sorted.bam -T ${prefix}.name.sorted $bam + bampe_rm_orphan.py ${prefix}.name.sorted.bam ${prefix}.bam $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } else { + """ + ln -s $bam ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/local/bedtools_genomecov.nf b/modules/local/bedtools_genomecov.nf new file mode 100644 index 0000000000000000000000000000000000000000..e8cbb3f7420b661935839aedd18202b78b3705d5 --- /dev/null +++ b/modules/local/bedtools_genomecov.nf @@ -0,0 +1,41 @@ +process BEDTOOLS_GENOMECOV { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0': + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(bam), path(flagstat) + + output: + tuple val(meta), path("*.bedGraph"), emit: bedgraph + tuple val(meta), path("*.txt") , emit: scale_factor + path "versions.yml" , emit: versions + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + + def pe = meta.single_end ? '' : '-pc' + def extend = (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' + """ + SCALE_FACTOR=\$(grep '[0-9] mapped (' $flagstat | awk '{print 1000000/\$1}') + echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt + + bedtools \\ + genomecov \\ + -ibam $bam \\ + -bg \\ + -scale \$SCALE_FACTOR \\ + $pe \\ + $extend \\ + | sort -T '.' -k1,1 -k2,2n > ${prefix}.bedGraph + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/local/deseq2_qc.nf b/modules/local/deseq2_qc.nf new file mode 100644 index 0000000000000000000000000000000000000000..bdf5535af5e9ef49d27b4b8469d3895d009261d4 --- /dev/null +++ b/modules/local/deseq2_qc.nf @@ -0,0 +1,54 @@ +process DESEQ2_QC { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::r-base=4.0 bioconda::bioconductor-deseq2=1.28.0 bioconda::bioconductor-biocparallel bioconda::bioconductor-tximport bioconda::bioconductor-complexheatmap conda-forge::r-optparse conda-forge::r-ggplot2 conda-forge::r-rcolorbrewer conda-forge::r-pheatmap" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' : + 'quay.io/biocontainers/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' }" + + input: + tuple val(meta), path(counts) + path deseq2_pca_header + path deseq2_clustering_header + + output: + path "*.pdf" , optional:true, emit: pdf + path "*.RData" , optional:true, emit: rdata + path "*.rds" , optional:true, emit: rds + path "*pca.vals.txt" , optional:true, emit: pca_txt + path "*pca.vals_mqc.tsv" , optional:true, emit: pca_multiqc + path "*sample.dists.txt" , optional:true, emit: dists_txt + path "*sample.dists_mqc.tsv", optional:true, emit: dists_multiqc + path "*.log" , optional:true, emit: log + path "size_factors" , optional:true, emit: size_factors + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak' + def antibody = meta.antibody + def prefix = "${antibody}.consensus_peaks" + """ + deseq2_qc.r \\ + --count_file $counts \\ + --outdir ./ \\ + --outprefix $prefix \\ + --cores $task.cpus \\ + $args + + sed 's/deseq2_pca/deseq2_pca_${task.index}/g' <$deseq2_pca_header >tmp.txt + sed -i -e 's/DESeq2 /${antibody} DESeq2 /g' tmp.txt + cat tmp.txt ${prefix}.pca.vals.txt > ${prefix}.pca.vals_mqc.tsv + + sed 's/deseq2_clustering/deseq2_clustering_${task.index}/g' <$deseq2_clustering_header >tmp.txt + sed -i -e 's/DESeq2 /${antibody} DESeq2 /g' tmp.txt + cat tmp.txt ${prefix}.sample.dists.txt > ${prefix}.sample.dists_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + bioconductor-deseq2: \$(Rscript -e "library(DESeq2); cat(as.character(packageVersion('DESeq2')))") + END_VERSIONS + """ +} diff --git a/modules/local/frip_score.nf b/modules/local/frip_score.nf new file mode 100644 index 0000000000000000000000000000000000000000..e8fdbcc919886c50bde1175255b5a059b739f13e --- /dev/null +++ b/modules/local/frip_score.nf @@ -0,0 +1,31 @@ +process FRIP_SCORE { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0': + 'quay.io/biocontainers/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0' }" + + input: + tuple val(meta), path(bam), path(peak) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + READS_IN_PEAKS=\$(intersectBed -a $bam -b $peak $args | awk -F '\t' '{sum += \$NF} END {print sum}') + samtools flagstat $bam > ${bam}.flagstat + grep 'mapped (' ${bam}.flagstat | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${prefix}", a/\$1}' > ${prefix}.FRiP.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/genome_blacklist_regions.nf b/modules/local/genome_blacklist_regions.nf new file mode 100644 index 0000000000000000000000000000000000000000..1a28af0f6221e157c957f3332acfebaf23b88ffc --- /dev/null +++ b/modules/local/genome_blacklist_regions.nf @@ -0,0 +1,41 @@ +/* + * Prepare genome intervals for filtering by removing regions in blacklist file + */ +process GENOME_BLACKLIST_REGIONS { + tag "$sizes" + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0': + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + path sizes + path blacklist + + output: + path '*.bed' , emit: bed + path "versions.yml", emit: versions + + script: + def file_out = "${sizes.simpleName}.include_regions.bed" + if (blacklist) { + """ + sortBed -i $blacklist -g $sizes | complementBed -i stdin -g $sizes > $file_out + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + } else { + """ + awk '{print \$1, '0' , \$2}' OFS='\t' $sizes > $file_out + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + } +} diff --git a/modules/local/gtf2bed.nf b/modules/local/gtf2bed.nf new file mode 100644 index 0000000000000000000000000000000000000000..1d306cee5520b5eed66fa5137003ae970f8bdb78 --- /dev/null +++ b/modules/local/gtf2bed.nf @@ -0,0 +1,28 @@ +process GTF2BED { + tag "$gtf" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::perl=5.26.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/perl:5.26.2': + 'quay.io/biocontainers/perl:5.26.2' }" + + input: + path gtf + + output: + path '*.bed' , emit: bed + path "versions.yml", emit: versions + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + """ + gtf2bed \\ + $gtf \\ + > ${gtf.baseName}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + perl: \$(echo \$(perl --version 2>&1) | sed 's/.*v\\(.*\\)) built.*/\\1/') + END_VERSIONS + """ +} diff --git a/modules/local/igv.nf b/modules/local/igv.nf new file mode 100644 index 0000000000000000000000000000000000000000..8578092b4a4a1d16cb8b68c5856ba362a8fec1c9 --- /dev/null +++ b/modules/local/igv.nf @@ -0,0 +1,40 @@ +/* + * Create IGV session file + */ +process IGV { + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3': + 'quay.io/biocontainers/python:3.8.3' }" + + input: + path fasta + path ("${bigwig_publish_dir}/*") + path ("${peak_publish_dir}/*") + path ("${consensus_publish_dir}/*") + val bigwig_publish_dir + val peak_publish_dir + val consensus_publish_dir + + output: + path "*files.txt" , emit: txt + path "*.xml" , emit: xml + path "versions.yml", emit: versions + + script: // scripts are bundled with the pipeline in nf-core/chipseq/bin/ + """ + find * -type l -name "*.bigWig" -exec echo -e ""{}"\\t0,0,178" \\; > bigwig.igv.txt + find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; > peaks.igv.txt + # Avoid error when consensus not produced + find * -type l -name "*.bed" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$consensus_publish_dir" || test \$? = 1; } > bed.igv.txt + + cat *.txt > igv_files.txt + igv_files_to_session.py igv_session.xml igv_files.txt ../../genome/${fasta.getName()} --path_prefix '../../' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/macs2_consensus.nf b/modules/local/macs2_consensus.nf new file mode 100644 index 0000000000000000000000000000000000000000..f28dc732eaa1b6e7df7dc099dbf5fe0d6cf39265 --- /dev/null +++ b/modules/local/macs2_consensus.nf @@ -0,0 +1,59 @@ +/* + * Consensus peaks across samples, create boolean filtering file, SAF file for featureCounts + */ +process MACS2_CONSENSUS { + tag "$meta.id" + label 'process_long' + + conda (params.enable_conda ? "conda-forge::biopython conda-forge::r-optparse=1.7.1 conda-forge::r-upsetr=1.4.0 bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0': + 'quay.io/biocontainers/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0' }" + + input: + tuple val(meta), path(peaks) + + output: + tuple val(meta), path("*.bed") , emit: bed + tuple val(meta), path("*.saf") , emit: saf + tuple val(meta), path("*.pdf") , emit: pdf + tuple val(meta), path("*.boolean.txt") , emit: boolean_txt + tuple val(meta), path("*.intersect.txt"), emit: intersect_txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + + def prefix = task.ext.prefix ?: "${meta.id}" + def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak' + def mergecols = params.narrow_peak ? (2..10).join(',') : (2..9).join(',') + def collapsecols = params.narrow_peak ? (['collapse']*9).join(',') : (['collapse']*8).join(',') + def expandparam = params.narrow_peak ? '--is_narrow_peak' : '' + """ + sort -T '.' -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ + | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt + + macs2_merged_expand.py \\ + ${prefix}.txt \\ + ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${peak_type}","")} \\ + ${prefix}.boolean.txt \\ + --min_replicates $params.min_reps_consensus \\ + $expandparam + + awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2, \$3, \$4, "0", "+" }' ${prefix}.boolean.txt > ${prefix}.bed + + echo -e "GeneID\tChr\tStart\tEnd\tStrand" > ${prefix}.saf + awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$4, \$1, \$2, \$3, "+" }' ${prefix}.boolean.txt >> ${prefix}.saf + + plot_peak_intersect.r -i ${prefix}.boolean.intersect.txt -o ${prefix}.boolean.intersect.plot.pdf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ + +} diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf new file mode 100644 index 0000000000000000000000000000000000000000..92824f01d84c3d0c3b834f6edd6788efd8eb8fcb --- /dev/null +++ b/modules/local/multiqc.nf @@ -0,0 +1,71 @@ +process MULTIQC { + label 'process_medium' + + conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1': + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" + + input: + path multiqc_config + path mqc_custom_config + path software_versions + path workflow_summary + + path ('fastqc/*') + path ('trimgalore/fastqc/*') + path ('trimgalore/*') + + path ('alignment/library/*') + path ('alignment/library/*') + path ('alignment/library/*') + + path ('alignment/mergedLibrary/unfiltered/*') + path ('alignment/mergedLibrary/unfiltered/*') + path ('alignment/mergedLibrary/unfiltered/*') + path ('alignment/mergedLibrary/unfiltered/picard_metrics/*') + + path ('alignment/mergedLibrary/filtered/*') + path ('alignment/mergedLibrary/filtered/*') + path ('alignment/mergedLibrary/filtered/*') + path ('alignment/mergedLibrary/filtered/picard_metrics/*') + + path ('preseq/*') + path ('deeptools/*') + path ('deeptools/*') + path ('phantompeakqualtools/*') + path ('phantompeakqualtools/*') + path ('phantompeakqualtools/*') + path ('phantompeakqualtools/*') + + path ('macs2/peaks/*') + path ('macs2/peaks/*') + path ('macs2/annotation/*') + + path ('featurecounts/*') + + path ('deseq2/*') + path ('deseq2/*') + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def custom_config = params.multiqc_config ? "--config $mqc_custom_config" : '' + """ + multiqc \\ + -f \\ + $args \\ + $custom_config \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/local/multiqc_custom_peaks.nf b/modules/local/multiqc_custom_peaks.nf new file mode 100644 index 0000000000000000000000000000000000000000..1ca41d4ce15a5f5cfd473e6c21bdabe04dd52904 --- /dev/null +++ b/modules/local/multiqc_custom_peaks.nf @@ -0,0 +1,28 @@ +process MULTIQC_CUSTOM_PEAKS { + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(peak), path(frip) + path peak_count_header + path frip_score_header + + output: + tuple val(meta), path("*.peak_count_mqc.tsv"), emit: count + tuple val(meta), path("*.FRiP_mqc.tsv") , emit: frip + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cat $peak | wc -l | awk -v OFS='\t' '{ print "${prefix}", \$1 }' | cat $peak_count_header - > ${prefix}.peak_count_mqc.tsv + cat $frip_score_header $frip > ${prefix}.FRiP_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/multiqc_custom_phantompeakqualtools.nf b/modules/local/multiqc_custom_phantompeakqualtools.nf new file mode 100644 index 0000000000000000000000000000000000000000..706af4fc092357ca14db2b691956d57f7c459f29 --- /dev/null +++ b/modules/local/multiqc_custom_phantompeakqualtools.nf @@ -0,0 +1,32 @@ +process MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS { + conda (params.enable_conda ? "conda-forge::r-base=3.5.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-base:3.5.1': + 'quay.io/biocontainers/r-base:3.5.1' }" + + input: + tuple val(meta), path(spp), path(rdata) + path nsc_header + path rsc_header + path correlation_header + + output: + tuple val(meta), path("*.spp_nsc_mqc.tsv") , emit: nsc + tuple val(meta), path("*.spp_rsc_mqc.tsv") , emit: rsc + tuple val(meta), path("*.spp_correlation_mqc.tsv"), emit: correlation + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cp $correlation_header ${prefix}.spp_correlation_mqc.tsv + Rscript --max-ppsize=500000 -e "load('$rdata'); write.table(crosscorr\\\$cross.correlation, file=\\"${prefix}.spp_correlation_mqc.tsv\\", sep=",", quote=FALSE, row.names=FALSE, col.names=FALSE,append=TRUE)" + + awk -v OFS='\t' '{print "${meta.id}", \$9}' $spp | cat $nsc_header - > ${prefix}.spp_nsc_mqc.tsv + awk -v OFS='\t' '{print "${meta.id}", \$10}' $spp | cat $rsc_header - > ${prefix}.spp_rsc_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/plot_homer_annotatepeaks.nf b/modules/local/plot_homer_annotatepeaks.nf new file mode 100644 index 0000000000000000000000000000000000000000..2cb38a3732f1991fd92c96df00d1cd1765650ac4 --- /dev/null +++ b/modules/local/plot_homer_annotatepeaks.nf @@ -0,0 +1,35 @@ +process PLOT_HOMER_ANNOTATEPEAKS { + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': + 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + + input: + path annos + path mqc_header + val suffix + + output: + path '*.txt' , emit: txt + path '*.pdf' , emit: pdf + path '*.tsv' , emit: tsv + path "versions.yml", emit: versions + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + def args = task.ext.args ?: '' + """ + plot_homer_annotatepeaks.r \\ + -i ${annos.join(',')} \\ + -s ${annos.join(',').replaceAll("${suffix}","")} \\ + $args + + find ./ -type f -name "*.txt" -exec cat {} \\; | cat $mqc_header - > annotatepeaks.summary_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/plot_macs2_qc.nf b/modules/local/plot_macs2_qc.nf new file mode 100644 index 0000000000000000000000000000000000000000..bbbf1ce81a8569c3e8d181dbf16d68bbcb04c234 --- /dev/null +++ b/modules/local/plot_macs2_qc.nf @@ -0,0 +1,31 @@ +process PLOT_MACS2_QC { + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': + 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + + input: + path peaks + + output: + path '*.txt' , emit: txt + path '*.pdf' , emit: pdf + path "versions.yml", emit: versions + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + def args = task.ext.args ?: '' + def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak' + """ + plot_macs2_qc.r \\ + -i ${peaks.join(',')} \\ + -s ${peaks.join(',').replaceAll("_peaks.${peak_type}","")} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf new file mode 100644 index 0000000000000000000000000000000000000000..f960045f3cb422798ef92fe3b59262dfcda1c830 --- /dev/null +++ b/modules/local/star_align.nf @@ -0,0 +1,57 @@ +process STAR_ALIGN { + tag "$meta.id" + label 'process_high' + + // Note: 2.7X indices incompatible with AWS iGenomes. + conda (params.enable_conda ? "bioconda::star=2.6.1d" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/star:2.6.1d--0' : + 'quay.io/biocontainers/star:2.6.1d--0' }" + + input: + tuple val(meta), path(reads) + path index + + output: + tuple val(meta), path('*d.out.bam') , emit: bam + tuple val(meta), path('*Log.final.out') , emit: log_final + tuple val(meta), path('*Log.out') , emit: log_out + tuple val(meta), path('*Log.progress.out'), emit: log_progress + path "versions.yml" , emit: versions + + tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted + tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript + tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted + tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq + tuple val(meta), path('*.tab') , optional:true, emit: tab + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def seq_center = params.seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$params.seq_center' 'SM:$prefix'" : "--outSAMattrRGline ID:$prefix 'SM:$prefix'" + def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' + def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' + """ + STAR \\ + --genomeDir $index \\ + --readFilesIn $reads \\ + --runThreadN $task.cpus \\ + --outFileNamePrefix $prefix. \\ + $out_sam_type \\ + $seq_center \\ + $args + $mv_unsorted_bam + if [ -f ${prefix}.Unmapped.out.mate1 ]; then + mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq + gzip ${prefix}.unmapped_1.fastq + fi + if [ -f ${prefix}.Unmapped.out.mate2 ]; then + mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq + gzip ${prefix}.unmapped_2.fastq + fi + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + END_VERSIONS + """ +} diff --git a/modules/local/star_genomegenerate.nf b/modules/local/star_genomegenerate.nf new file mode 100644 index 0000000000000000000000000000000000000000..3cd4ff20cb5275c9436a179478df2eeb2dcb163d --- /dev/null +++ b/modules/local/star_genomegenerate.nf @@ -0,0 +1,58 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + // Note: 2.7X indices incompatible with AWS iGenomes. + conda (params.enable_conda ? "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' : + 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' }" + + input: + path fasta + path gtf + + output: + path "star" , emit: index + path "versions.yml", emit: versions + + script: + def args = (task.ext.args ?: '').tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + if (args.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + ${args.join(' ')} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + ${args.join(' ')} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..c74e376f7f040d65e1695120d1e1d5073981989b --- /dev/null +++ b/modules/nf-core/modules/bowtie2/align/main.nf @@ -0,0 +1,71 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label "process_high" + + conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null) + container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ? + "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" : + "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }" + + input: + tuple val(meta), path(reads) + path index + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/modules/bowtie2/align/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..42ba0f9642590187f5d701bb0176cc0a7165ec6d --- /dev/null +++ b/modules/nf-core/modules/bowtie2/align/meta.yml @@ -0,0 +1,62 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Aligment log + pattern: "*.log" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/modules/bowtie2/build/main.nf b/modules/nf-core/modules/bowtie2/build/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..a4da62d073b5353cfdd6bff727cc12a86692706b --- /dev/null +++ b/modules/nf-core/modules/bowtie2/build/main.nf @@ -0,0 +1,30 @@ +process BOWTIE2_BUILD { + tag "$fasta" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' : + 'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" + + input: + path fasta + + output: + path 'bowtie2' , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bowtie2 + bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bowtie2/build/meta.yml b/modules/nf-core/modules/bowtie2/build/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..2da9a217163fae0e1b392af5ce6473c62e074c25 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/build/meta.yml @@ -0,0 +1,33 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - fasta: + type: file + description: Input genome fasta file +output: + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/modules/bwa/index/main.nf b/modules/nf-core/modules/bwa/index/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..3affbf16bc36ff27d86427c1ca64f362eae57ece --- /dev/null +++ b/modules/nf-core/modules/bwa/index/main.nf @@ -0,0 +1,35 @@ +process BWA_INDEX { + tag "$fasta" + label 'process_high' + + conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : + 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" + + input: + path fasta + + output: + path "bwa" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bwa + bwa \\ + index \\ + $args \\ + -p bwa/${fasta.baseName} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bwa/index/meta.yml b/modules/nf-core/modules/bwa/index/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..2bbd81d9cf1714b0803b8d9906f83c1112a3aefc --- /dev/null +++ b/modules/nf-core/modules/bwa/index/meta.yml @@ -0,0 +1,32 @@ +name: bwa_index +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - fasta: + type: file + description: Input genome fasta file +output: + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/modules/bwa/mem/main.nf b/modules/nf-core/modules/bwa/mem/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..f55af944377b5347b159c3eeacfb672b90a599d3 --- /dev/null +++ b/modules/nf-core/modules/bwa/mem/main.nf @@ -0,0 +1,43 @@ +process BWA_MEM { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' : + 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' }" + + input: + tuple val(meta), path(reads) + path index + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + + bwa mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bwa/mem/meta.yml b/modules/nf-core/modules/bwa/mem/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..f84c5227b6de9c1fc26805464d4ee2961727e10b --- /dev/null +++ b/modules/nf-core/modules/bwa/mem/meta.yml @@ -0,0 +1,50 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@jeremy1805" diff --git a/modules/nf-core/modules/chromap/chromap/main.nf b/modules/nf-core/modules/chromap/chromap/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..137f0340c71bea8c50bdca1902e435281509502a --- /dev/null +++ b/modules/nf-core/modules/chromap/chromap/main.nf @@ -0,0 +1,95 @@ +process CHROMAP_CHROMAP { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::chromap=0.2.1 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:963e4fe6a85c548a4018585660aed79780a175d3-0' : + 'quay.io/biocontainers/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:963e4fe6a85c548a4018585660aed79780a175d3-0' }" + + input: + tuple val(meta), path(reads) + path fasta + path index + path barcodes + path whitelist + path chr_order + path pairs_chr_order + + output: + tuple val(meta), path("*.bed.gz") , optional:true, emit: bed + tuple val(meta), path("*.bam") , optional:true, emit: bam + tuple val(meta), path("*.tagAlign.gz"), optional:true, emit: tagAlign + tuple val(meta), path("*.pairs.gz") , optional:true, emit: pairs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def args_list = args.tokenize() + + def file_extension = args.contains("--SAM") ? 'sam' : args.contains("--TagAlign")? 'tagAlign' : args.contains("--pairs")? 'pairs' : 'bed' + if (barcodes) { + args_list << "-b ${barcodes.join(',')}" + if (whitelist) { + args_list << "--barcode-whitelist $whitelist" + } + } + if (chr_order) { + args_list << "--chr-order $chr_order" + } + if (pairs_chr_order){ + args_list << "--pairs-natural-chr-order $pairs_chr_order" + } + def final_args = args_list.join(' ') + def compression_cmds = "gzip -n ${prefix}.${file_extension}" + if (args.contains("--SAM")) { + compression_cmds = """ + samtools view $args2 -@ $task.cpus -bh \\ + -o ${prefix}.bam ${prefix}.${file_extension} + rm ${prefix}.${file_extension} + """ + } + if (meta.single_end) { + """ + chromap \\ + $final_args \\ + -t $task.cpus \\ + -x $index \\ + -r $fasta \\ + -1 ${reads.join(',')} \\ + -o ${prefix}.${file_extension} + + $compression_cmds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chromap: \$(echo \$(chromap --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } else { + """ + chromap \\ + $final_args \\ + -t $task.cpus \\ + -x $index \\ + -r $fasta \\ + -1 ${reads[0]} \\ + -2 ${reads[1]} \\ + -o ${prefix}.${file_extension} + + $compression_cmds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chromap: \$(echo \$(chromap --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/chromap/chromap/meta.yml b/modules/nf-core/modules/chromap/chromap/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..a86fddc99c7ac6c442b2eaa519baf4cd0159166c --- /dev/null +++ b/modules/nf-core/modules/chromap/chromap/meta.yml @@ -0,0 +1,88 @@ +name: chromap_chromap +description: | + Performs preprocessing and alignment of chromatin fastq files to + fasta reference files using chromap. +keywords: + - chromap + - alignment + - map + - fastq + - bam + - sam + - hi-c + - atac-seq + - chip-seq + - trimming + - duplicate removal +tools: + - chromap: + description: Fast alignment and preprocessing of chromatin profiles + homepage: https://github.com/haowenz/chromap + documentation: https://github.com/haowenz/chromap + tool_dev_url: https://github.com/haowenz/chromap + doi: "" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - fasta: + type: file + description: | + The fasta reference file. + - index: + type: file + description: | + Chromap genome index files (*.index) + - barcodes: + type: file + description: | + Cell barcode files + - whitelist: + type: file + description: | + Cell barcode whitelist file + - chr_order: + type: file + description: | + Custom chromosome order + - pairs_chr_order: + type: file + description: | + Natural chromosome order for pairs flipping +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bed: + type: file + description: BED file + pattern: "*.bed.gz" + - bam: + type: file + description: BAM file + pattern: "*.bam" + - tagAlign: + type: file + description: tagAlign file + pattern: "*.tagAlign.gz" + - pairs: + type: file + description: pairs file + pattern: "*.pairs.gz" + +authors: + - "@mahesh-panchal" diff --git a/modules/nf-core/modules/chromap/index/main.nf b/modules/nf-core/modules/chromap/index/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..ee3706959ea05ac463c75aa5998b58196ad9c180 --- /dev/null +++ b/modules/nf-core/modules/chromap/index/main.nf @@ -0,0 +1,36 @@ +process CHROMAP_INDEX { + tag "$fasta" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::chromap=0.2.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/chromap:0.2.1--hd03093a_0' : + 'quay.io/biocontainers/chromap:0.2.1--hd03093a_0' }" + + input: + path fasta + + output: + path "*.index" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = fasta.baseName + """ + chromap \\ + -i \\ + $args \\ + -t $task.cpus \\ + -r $fasta \\ + -o ${prefix}.index + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chromap: \$(echo \$(chromap --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/chromap/index/meta.yml b/modules/nf-core/modules/chromap/index/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..6659221f6e95945b1e1fdbb2fdd0a1843d037a92 --- /dev/null +++ b/modules/nf-core/modules/chromap/index/meta.yml @@ -0,0 +1,33 @@ +name: chromap_index +description: Indexes a fasta reference genome ready for chromatin profiling. +keywords: + - index + - fasta + - genome + - reference +tools: + - chromap: + description: Fast alignment and preprocessing of chromatin profiles + homepage: https://github.com/haowenz/chromap + documentation: https://github.com/haowenz/chromap + tool_dev_url: https://github.com/haowenz/chromap + doi: "" + licence: ["GPL v3"] + +input: + - fasta: + type: file + description: Fasta reference file. + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - index: + type: file + description: Index file of the reference genome + pattern: "*.{index}" + +authors: + - "@mahesh-panchal" diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf index 327d5100560d84eb0020b60acf0db2922497991b..203e485aec396d608f19de9c7cfae29ba2aae703 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_low' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: path versions diff --git a/modules/nf-core/modules/custom/getchromsizes/main.nf b/modules/nf-core/modules/custom/getchromsizes/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..0eabf3a4c3cdd9a862154859e1241990052dc2d4 --- /dev/null +++ b/modules/nf-core/modules/custom/getchromsizes/main.nf @@ -0,0 +1,32 @@ +process CUSTOM_GETCHROMSIZES { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + path fasta + + output: + path '*.sizes' , emit: sizes + path '*.fai' , emit: fai + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools faidx $fasta + cut -f 1,2 ${fasta}.fai > ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + custom: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/custom/getchromsizes/meta.yml b/modules/nf-core/modules/custom/getchromsizes/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..ee6c257185a41746801ae1cccaaef1d31379ddd7 --- /dev/null +++ b/modules/nf-core/modules/custom/getchromsizes/meta.yml @@ -0,0 +1,38 @@ +name: custom_getchromsizes +description: Generates a FASTA file of chromosome sizes and a fasta index file +keywords: + - fasta + - chromosome + - indexing +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + tool_dev_url: https://github.com/samtools/samtools + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + +input: + - fasta: + type: file + description: FASTA file + pattern: "*.{fasta}" + +output: + - sizes: + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" + +authors: + - "@tamara-hodgetts" + - "@chris-cheshire" diff --git a/modules/nf-core/modules/deeptools/computematrix/main.nf b/modules/nf-core/modules/deeptools/computematrix/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..96dfef3c602992f4b4e9a51f67c963852350450d --- /dev/null +++ b/modules/nf-core/modules/deeptools/computematrix/main.nf @@ -0,0 +1,39 @@ +process DEEPTOOLS_COMPUTEMATRIX { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(bigwig) + path bed + + output: + tuple val(meta), path("*.mat.gz") , emit: matrix + tuple val(meta), path("*.mat.tab"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + computeMatrix \\ + $args \\ + --regionsFileName $bed \\ + --scoreFileName $bigwig \\ + --outFileName ${prefix}.computeMatrix.mat.gz \\ + --outFileNameMatrix ${prefix}.computeMatrix.vals.mat.tab \\ + --numberOfProcessors $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(computeMatrix --version | sed -e "s/computeMatrix //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/deeptools/computematrix/meta.yml b/modules/nf-core/modules/deeptools/computematrix/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..eaa990dd9a60b31c6fc34ebe8f5149258d241b2f --- /dev/null +++ b/modules/nf-core/modules/deeptools/computematrix/meta.yml @@ -0,0 +1,58 @@ +name: deeptools_computematrix +description: calculates scores per genome regions for other deeptools plotting utilities +keywords: + - genome + - regions + - scores + - matrix +tools: + - deeptools: + description: A set of user-friendly tools for normalization and visualization of deep-sequencing data + homepage: + documentation: https://deeptools.readthedocs.io/en/develop/index.html + tool_dev_url: https://github.com/deeptools/deepTools + doi: "10.1093/nar/gku365" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bigwig: + type: file + description: bigwig file containing genomic scores + pattern: "*.{bw,bigwig}" + - bed: + type: file + description: bed file containing genomic regions + pattern: "*.{bed}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - matrix: + type: file + description: | + gzipped matrix file needed by the plotHeatmap and plotProfile + deeptools utilities + pattern: "*.{computeMatrix.mat.gz}" + - table: + type: file + description: | + tabular file containing the scores of the generated matrix + pattern: "*.{computeMatrix.vals.mat.tab}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@jeremy1805" + - "@emiller88" + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/modules/deeptools/plotfingerprint/main.nf b/modules/nf-core/modules/deeptools/plotfingerprint/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..83613be7b3d73981c849311bc5a090e80fe9414a --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotfingerprint/main.nf @@ -0,0 +1,41 @@ +process DEEPTOOLS_PLOTFINGERPRINT { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(bams), path(bais) + + output: + tuple val(meta), path("*.pdf") , emit: pdf + tuple val(meta), path("*.raw.txt") , emit: matrix + tuple val(meta), path("*.qcmetrics.txt"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extend = (meta.single_end && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' + """ + plotFingerprint \\ + $args \\ + $extend \\ + --bamfiles ${bams.join(' ')} \\ + --plotFile ${prefix}.plotFingerprint.pdf \\ + --outRawCounts ${prefix}.plotFingerprint.raw.txt \\ + --outQualityMetrics ${prefix}.plotFingerprint.qcmetrics.txt \\ + --numberOfProcessors $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(plotFingerprint --version | sed -e "s/plotFingerprint //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/deeptools/plotfingerprint/meta.yml b/modules/nf-core/modules/deeptools/plotfingerprint/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..07c25748082a338117c8720408a2fe76911371fc --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotfingerprint/meta.yml @@ -0,0 +1,61 @@ +name: deeptools_plotfingerprint +description: plots cumulative reads coverages by BAM file +keywords: + - plot + - fingerprint + - cumulative coverage + - bam +tools: + - deeptools: + description: A set of user-friendly tools for normalization and visualization of deep-sequencing data + homepage: + documentation: https://deeptools.readthedocs.io/en/develop/index.html + tool_dev_url: https://github.com/deeptools/deepTools + doi: "10.1093/nar/gku365" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: One or more BAM files + pattern: "*.{bam}" + - bais: + type: file + description: Corresponding BAM file indexes + pattern: "*.bam.bai" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pdf: + type: file + description: | + Output figure containing resulting plot + pattern: "*.{plotFingerprint.pdf}" + - matrix: + type: file + description: | + Output file summarizing the read counts per bin + pattern: "*.{plotFingerprint.raw.txt}" + - metrics: + type: file + description: | + file containing BAM file quality metrics + pattern: "*.{qcmetrics.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emiller88" + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/modules/deeptools/plotheatmap/main.nf b/modules/nf-core/modules/deeptools/plotheatmap/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..1e402e3919cdce7e97064ce46ec06bc1ef4d5d85 --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotheatmap/main.nf @@ -0,0 +1,36 @@ +process DEEPTOOLS_PLOTHEATMAP { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(matrix) + + output: + tuple val(meta), path("*.pdf"), emit: pdf + tuple val(meta), path("*.tab"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + plotHeatmap \\ + $args \\ + --matrixFile $matrix \\ + --outFileName ${prefix}.plotHeatmap.pdf \\ + --outFileNameMatrix ${prefix}.plotHeatmap.mat.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(plotHeatmap --version | sed -e "s/plotHeatmap //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/deeptools/plotheatmap/meta.yml b/modules/nf-core/modules/deeptools/plotheatmap/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..ea206fb672630db193f95c992b9d3e7a5e791d32 --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotheatmap/meta.yml @@ -0,0 +1,55 @@ +name: deeptools_plotheatmap +description: plots values produced by deeptools_computematrix as a heatmap +keywords: + - plot + - heatmap + - scores + - matrix +tools: + - deeptools: + description: A set of user-friendly tools for normalization and visualization of deep-sequencing data + homepage: + documentation: https://deeptools.readthedocs.io/en/develop/index.html + tool_dev_url: https://github.com/deeptools/deepTools + doi: "10.1093/nar/gku365" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - matrix: + type: file + description: | + gzipped matrix file produced by deeptools_ + computematrix deeptools utility + pattern: "*.{mat.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pdf: + type: file + description: | + Output figure containing resulting plot + pattern: "*.{plotHeatmap.pdf}" + - matrix: + type: file + description: | + File containing the matrix of values + used to generate the heatmap + pattern: "*.{plotHeatmap.mat.tab}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emiller88" + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/modules/deeptools/plotprofile/main.nf b/modules/nf-core/modules/deeptools/plotprofile/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..d83a9493876a9ac6ed3ada4e54e05a18d1514cfc --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotprofile/main.nf @@ -0,0 +1,36 @@ +process DEEPTOOLS_PLOTPROFILE { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(matrix) + + output: + tuple val(meta), path("*.pdf"), emit: pdf + tuple val(meta), path("*.tab"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + plotProfile \\ + $args \\ + --matrixFile $matrix \\ + --outFileName ${prefix}.plotProfile.pdf \\ + --outFileNameData ${prefix}.plotProfile.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(plotProfile --version | sed -e "s/plotProfile //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/deeptools/plotprofile/meta.yml b/modules/nf-core/modules/deeptools/plotprofile/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..795fda4444db5b0247de1af208adbb7b8a6de8ca --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotprofile/meta.yml @@ -0,0 +1,55 @@ +name: deeptools_plotprofile +description: plots values produced by deeptools_computematrix as a profile plot +keywords: + - plot + - profile + - scores + - matrix +tools: + - deeptools: + description: A set of user-friendly tools for normalization and visualization of deep-sequencing data + homepage: + documentation: https://deeptools.readthedocs.io/en/develop/index.html + tool_dev_url: https://github.com/deeptools/deepTools + doi: "10.1093/nar/gku365" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - matrix: + type: file + description: | + gzipped matrix file produced by deeptools_ + computematrix deeptools utility + pattern: "*.{mat.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pdf: + type: file + description: | + Output figure containing resulting plot + pattern: "*.{plotProfile.pdf}" + - matrix: + type: file + description: | + File containing the matrix of values + used to generate the profile + pattern: "*.{plotProfile.mat.tab}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emiller88" + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf index ed6b8c50b1fb6bfa64e5acb72a536328bd8ca88a..05730368b2d43e0eaac6b13a69f07ed54d1ed2cb 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/modules/fastqc/main.nf @@ -44,4 +44,16 @@ process FASTQC { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/gffread/main.nf b/modules/nf-core/modules/gffread/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..7c575c97ff20f5d0a2f35cad1149eff725bc9112 --- /dev/null +++ b/modules/nf-core/modules/gffread/main.nf @@ -0,0 +1,33 @@ +process GFFREAD { + tag "$gff" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gffread=0.12.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : + 'quay.io/biocontainers/gffread:0.12.1--h8b12597_0' }" + + input: + path gff + + output: + path "*.gtf" , emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${gff.baseName}" + """ + gffread \\ + $gff \\ + $args \\ + -o ${prefix}.gtf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gffread/meta.yml b/modules/nf-core/modules/gffread/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..20335747745aec8285683cf203ed8e6c7282d961 --- /dev/null +++ b/modules/nf-core/modules/gffread/meta.yml @@ -0,0 +1,33 @@ +name: gffread +description: Validate, filter, convert and perform various other operations on GFF files +keywords: + - gff + - conversion + - validation +tools: + - gffread: + description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more. + homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + tool_dev_url: https://github.com/gpertea/gffread + doi: 10.12688/f1000research.23297.1 + licence: ["MIT"] + +input: + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" + +output: + - gtf: + type: file + description: GTF file resulting from the conversion of the GFF input file + pattern: "*.{gtf}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emiller88" diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/modules/gunzip/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..703670495b7ae557baf0cb3138bf63c38c506ab8 --- /dev/null +++ b/modules/nf-core/modules/gunzip/main.nf @@ -0,0 +1,44 @@ +process GUNZIP { + tag "$archive" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + gunzip = archive.toString() - '.gz' + """ + gunzip \\ + -f \\ + $args \\ + $archive + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gunzip/meta.yml b/modules/nf-core/modules/gunzip/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..4d2ebc84e8f0ce18306b3ac93534d654e5f2020b --- /dev/null +++ b/modules/nf-core/modules/gunzip/meta.yml @@ -0,0 +1,34 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/modules/homer/annotatepeaks/main.nf b/modules/nf-core/modules/homer/annotatepeaks/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..9056a5ab50ef539e7685dee809af29ebaa0e23ef --- /dev/null +++ b/modules/nf-core/modules/homer/annotatepeaks/main.nf @@ -0,0 +1,41 @@ +process HOMER_ANNOTATEPEAKS { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::homer=4.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/homer:4.11--pl526hc9558a2_3' : + 'quay.io/biocontainers/homer:4.11--pl526hc9558a2_3' }" + + input: + tuple val(meta), path(peak) + path fasta + path gtf + + output: + tuple val(meta), path("*annotatePeaks.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '4.11' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + annotatePeaks.pl \\ + $peak \\ + $fasta \\ + $args \\ + -gtf $gtf \\ + -cpu $task.cpus \\ + > ${prefix}.annotatePeaks.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + homer: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/homer/annotatepeaks/meta.yml b/modules/nf-core/modules/homer/annotatepeaks/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..b815e975ddb93c8f8adeacad50abc5b4df25153d --- /dev/null +++ b/modules/nf-core/modules/homer/annotatepeaks/meta.yml @@ -0,0 +1,48 @@ +name: homer_annotatepeaks +description: Annotate peaks with HOMER suite +keywords: + - annotations + - peaks + - bed +tools: + - homer: + description: | + HOMER (Hypergeometric Optimization of Motif EnRichment) is a suite of tools for Motif Discovery and next-gen sequencing analysis. + documentation: http://homer.ucsd.edu/homer/ + doi: 10.1016/j.molcel.2010.05.004. + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - peaks: + type: file + description: The peak files in bed format + pattern: "*.bed" + - fasta: + type: file + description: Fasta file of reference genome + pattern: "*.fasta" + - gtf: + type: file + description: GTF file of reference genome + pattern: "*.gtf" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - annotated_peaks: + type: file + description: The annotated peaks + pattern: "*annotatePeaks.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/modules/khmer/uniquekmers/main.nf b/modules/nf-core/modules/khmer/uniquekmers/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..8ad89a6287b7e8e83e06aa20a948cf9c3c0ec3c7 --- /dev/null +++ b/modules/nf-core/modules/khmer/uniquekmers/main.nf @@ -0,0 +1,38 @@ +process KHMER_UNIQUEKMERS { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "bioconda::khmer=3.0.0a3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/khmer:3.0.0a3--py37haa7609a_2' : + 'quay.io/biocontainers/khmer:3.0.0a3--py37haa7609a_2' }" + + input: + path fasta + val kmer_size + + output: + path "report.txt" , emit: report + path "kmers.txt" , emit: kmers + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + unique-kmers.py \\ + -k $kmer_size \\ + -R report.txt \\ + $args \\ + $fasta + + grep ^number report.txt | sed 's/^.*:.[[:blank:]]//g' > kmers.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + khmer: \$( unique-kmers.py --version 2>&1 | grep ^khmer | sed 's/^khmer //;s/ .*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/khmer/uniquekmers/meta.yml b/modules/nf-core/modules/khmer/uniquekmers/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..31405cc16cade306d878a6f5ebc090eb7325c0f4 --- /dev/null +++ b/modules/nf-core/modules/khmer/uniquekmers/meta.yml @@ -0,0 +1,42 @@ +name: "khmer_uniquekmers" +description: +keywords: + - khmer + - k-mer + - effective genome size + +tools: + - "khmer": + description: khmer k-mer counting library + homepage: https://github.com/dib-lab/khmer + documentation: https://khmer.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/dib-lab/khmer + doi: "10.12688/f1000research.6924.1" + licence: ["BSD License"] + +input: + - fasta: + type: file + description: fasta file + pattern: "*.{fa,fasta}" + - kmer_size: + type: value + description: k-mer size to use + pattern: "[0-9]+" + +output: + - report: + type: file + description: Text file containing unique-kmers.py execution report + pattern: "report.txt" + - kmers: + type: file + description: Text file containing number of kmers + pattern: "kmers.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@JoseEspinosa" diff --git a/modules/nf-core/modules/macs2/callpeak/main.nf b/modules/nf-core/modules/macs2/callpeak/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..9aaf97a93be5f3f0361dd0ec915b0db1a2980b91 --- /dev/null +++ b/modules/nf-core/modules/macs2/callpeak/main.nf @@ -0,0 +1,53 @@ +process MACS2_CALLPEAK { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::macs2=2.2.7.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/macs2:2.2.7.1--py38h4a8c8d9_3' : + 'quay.io/biocontainers/macs2:2.2.7.1--py38h4a8c8d9_3' }" + + input: + tuple val(meta), path(ipbam), path(controlbam) + val macs2_gsize + + output: + tuple val(meta), path("*.{narrowPeak,broadPeak}"), emit: peak + tuple val(meta), path("*.xls") , emit: xls + path "versions.yml" , emit: versions + + tuple val(meta), path("*.gappedPeak"), optional:true, emit: gapped + tuple val(meta), path("*.bed") , optional:true, emit: bed + tuple val(meta), path("*.bdg") , optional:true, emit: bdg + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def args_list = args.tokenize() + def format = meta.single_end ? 'BAM' : 'BAMPE' + def control = controlbam ? "--control $controlbam" : '' + if(args_list.contains('--format')){ + def id = args_list.findIndexOf{it=='--format'} + format = args_list[id+1] + args_list.remove(id+1) + args_list.remove(id) + } + """ + macs2 \\ + callpeak \\ + ${args_list.join(' ')} \\ + --gsize $macs2_gsize \\ + --format $format \\ + --name $prefix \\ + --treatment $ipbam \\ + $control + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + macs2: \$(macs2 --version | sed -e "s/macs2 //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/macs2/callpeak/meta.yml b/modules/nf-core/modules/macs2/callpeak/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..982bc5b241626d7e1fe6b7b4bd0de69649996ce7 --- /dev/null +++ b/modules/nf-core/modules/macs2/callpeak/meta.yml @@ -0,0 +1,63 @@ +name: macs2_callpeak +description: Peak calling of enriched genomic regions of ChIP-seq and ATAC-seq experiments +keywords: + - alignment + - atac-seq + - chip-seq + - peak-calling +tools: + - macs2: + description: Model Based Analysis for ChIP-Seq data + homepage: None + documentation: https://docs.csc.fi/apps/macs2/ + tool_dev_url: https://github.com/macs3-project/MACS + doi: "https://doi.org/10.1101/496521" + licence: ["BSD"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ipbam: + type: file + description: The ChIP-seq treatment file + - controlbam: + type: file + description: The control file + - macs2_gsize: + type: string + description: Effective genome size. It can be 1.0e+9 or 1000000000, or shortcuts:'hs' for human (2.7e9), + 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8) + +output: + - versions: + type: file + description: File containing software version + pattern: "versions.yml" + - peak: + type: file + description: BED file containing annotated peaks + pattern: "*.gappedPeak,*.narrowPeak}" + - xls: + type: file + description: xls file containing annotated peaks + pattern: "*.xls" + - gapped: + type: file + description: Optional BED file containing gapped peak + pattern: "*.gappedPeak" + - bed: + type: file + description: Optional BED file containing peak summits locations for every peak + pattern: "*.bed" + - bdg: + type: file + description: Optional bedGraph files for input and treatment input samples + pattern: "*.bdg" + +authors: + - "@ntoda03" + - "@JoseEspinosa" + - "@jianhong" diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf deleted file mode 100644 index 1264aac1ebfc902ae6633862472b412cd929656a..0000000000000000000000000000000000000000 --- a/modules/nf-core/modules/multiqc/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" - - input: - path multiqc_files - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - multiqc -f $args . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/modules/multiqc/meta.yml deleted file mode 100644 index 6fa891efc2c607fa6e1d081171b1bf2a710443ab..0000000000000000000000000000000000000000 --- a/modules/nf-core/modules/multiqc/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: MultiQC -description: Aggregate results from bioinformatics analyses across many samples into a single report -keywords: - - QC - - bioinformatics tools - - Beautiful stand-alone HTML report -tools: - - multiqc: - description: | - MultiQC searches a given directory for analysis logs and compiles a HTML report. - It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. - homepage: https://multiqc.info/ - documentation: https://multiqc.info/docs/ - licence: ["GPL-3.0-or-later"] -input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC -output: - - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - type: dir - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@abhi18av" - - "@bunop" - - "@drpatelh" diff --git a/modules/nf-core/modules/phantompeakqualtools/main.nf b/modules/nf-core/modules/phantompeakqualtools/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..43fca68362702a9c8994254613be82896cfcc21b --- /dev/null +++ b/modules/nf-core/modules/phantompeakqualtools/main.nf @@ -0,0 +1,37 @@ +process PHANTOMPEAKQUALTOOLS { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::phantompeakqualtools=1.2.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/phantompeakqualtools:1.2.2--0' : + 'quay.io/biocontainers/phantompeakqualtools:1.2.2--0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.out") , emit: spp + tuple val(meta), path("*.pdf") , emit: pdf + tuple val(meta), path("*.Rdata"), emit: rdata + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.2.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + RUN_SPP=`which run_spp.R` + Rscript $args -e "library(caTools); source(\\"\$RUN_SPP\\")" -c="$bam" -savp="${prefix}.spp.pdf" -savd="${prefix}.spp.Rdata" -out="${prefix}.spp.out" $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + phantompeakqualtools: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/phantompeakqualtools/meta.yml b/modules/nf-core/modules/phantompeakqualtools/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..6488500d45f42c21ea23dba450c39c8e8aaadd3f --- /dev/null +++ b/modules/nf-core/modules/phantompeakqualtools/meta.yml @@ -0,0 +1,60 @@ +name: "phantompeakqualtools" + +description: +keywords: + - "ChIP-Seq" + - "QC" + - "phantom peaks" +tools: + - "phantompeakqualtools": + description: | + "This package computes informative enrichment and quality measures + for ChIP-seq/DNase-seq/FAIRE-seq/MNase-seq data. It can also be used + to obtain robust estimates of the predominant fragment length or + characteristic tag shift values in these assays." + homepage: "None" + documentation: "https://github.com/kundajelab/phantompeakqualtools" + tool_dev_url: "https://github.com/kundajelab/phantompeakqualtools" + doi: "https://doi.org/10.1101/gr.136184.111" + licence: "['BSD-3-clause']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - spp: + type: file + description: | + A ChIP-Seq Processing Pipeline file containing + peakshift/phantomPeak results + pattern: "*.{out}" + - pdf: + type: file + description: A pdf containing save cross-correlation plots + pattern: "*.{pdf}" + - rdata: + type: file + description: Rdata file containing the R session + pattern: "*.{Rdata}" + +authors: + - "@drpatelh" + - "@Emiller88" + - "@JoseEspinosa" diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf b/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..63f4e872951a1bbfde4e486a2d08c1f6dec7ab6a --- /dev/null +++ b/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf @@ -0,0 +1,67 @@ +process PICARD_COLLECTMULTIPLEMETRICS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path fasta + path fai + + output: + tuple val(meta), path("*_metrics"), emit: metrics + tuple val(meta), path("*.pdf") , emit: pdf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + CollectMultipleMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectMultipleMetrics \\ + $reference + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectMultipleMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectMultipleMetrics.alignment_summary_metrics + touch ${prefix}.CollectMultipleMetrics.insert_size_metrics + touch ${prefix}.CollectMultipleMetrics.quality_distribution.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.read_length_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.insert_size_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.quality_distribution_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectMultipleMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..c11b02cfa4ab26028cf0859d9f758c1423ef481c --- /dev/null +++ b/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml @@ -0,0 +1,54 @@ +name: picard_collectmultiplemetrics +description: Collect multiple metrics from a BAM file +keywords: + - alignment + - metrics + - statistics + - insert + - quality + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - fasta: + type: file + description: Genome fasta file + - fai: + type: file + description: Index of FASTA file. Only needed when fasta is supplied. + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - metrics: + type: file + description: Alignment metrics files generated by picard + pattern: "*_{metrics}" + - pdf: + type: file + description: PDF plots of metrics + pattern: "*.{pdf}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/modules/picard/markduplicates/main.nf b/modules/nf-core/modules/picard/markduplicates/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..4e559fea0dfb0e2598f9ee027f2a9ed488159b02 --- /dev/null +++ b/modules/nf-core/modules/picard/markduplicates/main.nf @@ -0,0 +1,58 @@ +process PICARD_MARKDUPLICATES { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.metrics.txt"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + MarkDuplicates \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.bam \\ + --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + touch ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/picard/markduplicates/meta.yml b/modules/nf-core/modules/picard/markduplicates/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..842817bcd8f5ab4459a675b1acbf82d45566de62 --- /dev/null +++ b/modules/nf-core/modules/picard/markduplicates/meta.yml @@ -0,0 +1,52 @@ +name: picard_markduplicates +description: Locate and tag duplicate reads in a BAM file +keywords: + - markduplicates + - pcr + - duplicates + - bam + - sam + - cram +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with duplicate reads marked/removed + pattern: "*.{bam}" + - bai: + type: file + description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag + pattern: "*.{bai}" + - metrics: + type: file + description: Duplicate metrics file generated by picard + pattern: "*.{metrics.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@projectoriented" diff --git a/modules/nf-core/modules/picard/mergesamfiles/main.nf b/modules/nf-core/modules/picard/mergesamfiles/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..cccf4d3d53f6b641221c515549a18db571198a66 --- /dev/null +++ b/modules/nf-core/modules/picard/mergesamfiles/main.nf @@ -0,0 +1,52 @@ +process PICARD_MERGESAMFILES { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bams) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bam_files = bams.sort() + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard MergeSamFiles] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + if (bam_files.size() > 1) { + """ + picard \\ + -Xmx${avail_mem}g \\ + MergeSamFiles \\ + $args \\ + ${'--INPUT '+bam_files.join(' --INPUT ')} \\ + --OUTPUT ${prefix}.bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$( echo \$(picard MergeSamFiles --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + } else { + """ + ln -s ${bam_files[0]} ${prefix}.bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$( echo \$(picard MergeSamFiles --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/picard/mergesamfiles/meta.yml b/modules/nf-core/modules/picard/mergesamfiles/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..5f07ecd0a37d728082b83462590407ba09fc001d --- /dev/null +++ b/modules/nf-core/modules/picard/mergesamfiles/meta.yml @@ -0,0 +1,41 @@ +name: picard_mergesamfiles +description: Merges multiple BAM files into a single file +keywords: + - merge + - alignment + - bam + - sam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: List of BAM files + pattern: "*.{bam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Merged BAM file + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/modules/preseq/lcextrap/main.nf b/modules/nf-core/modules/preseq/lcextrap/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..97261557e430eba538147ae258e985141de1e969 --- /dev/null +++ b/modules/nf-core/modules/preseq/lcextrap/main.nf @@ -0,0 +1,40 @@ +process PRESEQ_LCEXTRAP { + tag "$meta.id" + label 'process_medium' + label 'error_ignore' + + conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/preseq:3.1.2--h445547b_2': + 'quay.io/biocontainers/preseq:3.1.2--h445547b_2' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.lc_extrap.txt"), emit: lc_extrap + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired_end = meta.single_end ? '' : '-pe' + """ + preseq \\ + lc_extrap \\ + $args \\ + $paired_end \\ + -output ${prefix}.lc_extrap.txt \\ + $bam + cp .command.err ${prefix}.command.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + preseq: \$(echo \$(preseq 2>&1) | sed 's/^.*Version: //; s/Usage:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/preseq/lcextrap/meta.yml b/modules/nf-core/modules/preseq/lcextrap/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..f1be05a2f8c76c2f3734e31c5905a71b56fa74e7 --- /dev/null +++ b/modules/nf-core/modules/preseq/lcextrap/meta.yml @@ -0,0 +1,48 @@ +name: preseq_lcextrap +description: Software for predicting library complexity and genome coverage in high-throughput sequencing +keywords: + - preseq + - library + - complexity +tools: + - preseq: + description: Software for predicting library complexity and genome coverage in high-throughput sequencing + homepage: http://smithlabresearch.org/software/preseq/ + documentation: http://smithlabresearch.org/wp-content/uploads/manual.pdf + tool_dev_url: https://github.com/smithlabcode/preseq + doi: "" + licence: ["GPL"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - lc_extrap: + type: file + description: File containing output of Preseq lcextrap + pattern: "*.{lc_extrap.txt}" + - log: + type: file + description: Log file containing stderr produced by Preseq + pattern: "*.{log}" + +authors: + - "@drpatelh" + - "@Emiller88" diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..03ec2dcf401e1b1688fc57f0bbbdc2173c1a8eb2 --- /dev/null +++ b/modules/nf-core/modules/samtools/flagstat/main.nf @@ -0,0 +1,35 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/modules/samtools/flagstat/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..9526906396e8cfa6600ba19a7f655959c9f7ee84 --- /dev/null +++ b/modules/nf-core/modules/samtools/flagstat/meta.yml @@ -0,0 +1,49 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/modules/samtools/idxstats/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..4b2454198e5bc8513627ea1181bd58df9c8f0933 --- /dev/null +++ b/modules/nf-core/modules/samtools/idxstats/main.nf @@ -0,0 +1,35 @@ +process SAMTOOLS_IDXSTATS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.idxstats"), emit: idxstats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + samtools \\ + idxstats \\ + $bam \\ + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/idxstats/meta.yml b/modules/nf-core/modules/samtools/idxstats/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..3710ab882fbc8f251d8d673aa40269ca57f28164 --- /dev/null +++ b/modules/nf-core/modules/samtools/idxstats/meta.yml @@ -0,0 +1,50 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/modules/samtools/index/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..e04e63e8591fabdff78345fb832d56d8f347bfec --- /dev/null +++ b/modules/nf-core/modules/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/index/meta.yml b/modules/nf-core/modules/samtools/index/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..e5cadbc24dd42a6c6417ae4607d741c020c7ccc4 --- /dev/null +++ b/modules/nf-core/modules/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/modules/samtools/sort/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..b4fc1cbe9b621d4336da7ef3f0960c990832c7b8 --- /dev/null +++ b/modules/nf-core/modules/samtools/sort/main.nf @@ -0,0 +1,42 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/modules/samtools/sort/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..a820c55a36004ef4f83d0955f05df5116ffb381a --- /dev/null +++ b/modules/nf-core/modules/samtools/sort/meta.yml @@ -0,0 +1,44 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/modules/samtools/stats/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..89b92d79eb5afaab160815e952adc739009fc543 --- /dev/null +++ b/modules/nf-core/modules/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input), path(input_index) + path fasta + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/stats/meta.yml b/modules/nf-core/modules/samtools/stats/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..cac50b1c0a84f50ee840f1df4d88fd3faf7b30cd --- /dev/null +++ b/modules/nf-core/modules/samtools/stats/meta.yml @@ -0,0 +1,53 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/subread/featurecounts/main.nf b/modules/nf-core/modules/subread/featurecounts/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..18e2a92bb05f83b617ad77fae32b61e7600f829e --- /dev/null +++ b/modules/nf-core/modules/subread/featurecounts/main.nf @@ -0,0 +1,47 @@ +process SUBREAD_FEATURECOUNTS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::subread=2.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/subread:2.0.1--hed695b0_0' : + 'quay.io/biocontainers/subread:2.0.1--hed695b0_0' }" + + input: + tuple val(meta), path(bams), path(annotation) + + output: + tuple val(meta), path("*featureCounts.txt") , emit: counts + tuple val(meta), path("*featureCounts.txt.summary"), emit: summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired_end = meta.single_end ? '' : '-p' + + def strandedness = 0 + if (meta.strandedness == 'forward') { + strandedness = 1 + } else if (meta.strandedness == 'reverse') { + strandedness = 2 + } + """ + featureCounts \\ + $args \\ + $paired_end \\ + -T $task.cpus \\ + -a $annotation \\ + -s $strandedness \\ + -o ${prefix}.featureCounts.txt \\ + ${bams.join(' ')} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + subread: \$( echo \$(featureCounts -v 2>&1) | sed -e "s/featureCounts v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/subread/featurecounts/meta.yml b/modules/nf-core/modules/subread/featurecounts/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..cf02f1ea7aab401982b2d04dcd6b5ad95061384e --- /dev/null +++ b/modules/nf-core/modules/subread/featurecounts/meta.yml @@ -0,0 +1,52 @@ +name: subread_featurecounts +description: Count reads that map to genomic features +keywords: + - counts + - fasta + - genome + - reference + +tools: + - featurecounts: + description: featureCounts is a highly efficient general-purpose read summarization program that counts mapped reads for genomic features such as genes, exons, promoter, gene bodies, genomic bins and chromosomal locations. It can be used to count both RNA-seq and genomic DNA-seq reads. + homepage: http://bioinf.wehi.edu.au/featureCounts/ + documentation: http://bioinf.wehi.edu.au/subread-package/SubreadUsersGuide.pdf + doi: "10.1093/bioinformatics/btt656" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/SAM file containing read alignments + pattern: "*.{bam}" + - annotation: + type: file + description: Genomic features annotation in GTF or SAF + pattern: "*.{gtf,saf}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - counts: + type: file + description: Counts of reads mapping to features + pattern: "*featureCounts.txt" + - summary: + type: file + description: Summary log file + pattern: "*.featureCounts.txt.summary" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@ntoda03" diff --git a/modules/nf-core/modules/trimgalore/main.nf b/modules/nf-core/modules/trimgalore/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..3a3fca90449b45107fe89bdac3b657c2521c1b30 --- /dev/null +++ b/modules/nf-core/modules/trimgalore/main.nf @@ -0,0 +1,86 @@ +process TRIMGALORE { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::trim-galore=0.6.7' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : + 'quay.io/biocontainers/trim-galore:0.6.7--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*{trimmed,val}*.fq.gz"), emit: reads + tuple val(meta), path("*report.txt") , emit: log + path "versions.yml" , emit: versions + + tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true + tuple val(meta), path("*.html") , emit: html , optional: true + tuple val(meta), path("*.zip") , emit: zip , optional: true + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Calculate number of --cores for TrimGalore based on value of task.cpus + // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 + // See: https://github.com/nf-core/atacseq/pull/65 + def cores = 1 + if (task.cpus) { + cores = (task.cpus as int) - 4 + if (meta.single_end) cores = (task.cpus as int) - 3 + if (cores < 1) cores = 1 + if (cores > 4) cores = 4 + } + + // Clipping presets have to be evaluated in the context of SE/PE + def c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' + def c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' + def tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' + def tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' + + // Added soft-links to original fastqs for consistent naming in MultiQC + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + trim_galore \\ + $args \\ + --cores $cores \\ + --gzip \\ + $c_r1 \\ + $tpc_r1 \\ + ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + trim_galore \\ + $args \\ + --cores $cores \\ + --paired \\ + --gzip \\ + $c_r1 \\ + $c_r2 \\ + $tpc_r1 \\ + $tpc_r2 \\ + ${prefix}_1.fastq.gz \\ + ${prefix}_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/trimgalore/meta.yml b/modules/nf-core/modules/trimgalore/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..439f566dffb7a898f44e613c271a6216b5dc219a --- /dev/null +++ b/modules/nf-core/modules/trimgalore/meta.yml @@ -0,0 +1,64 @@ +name: trimgalore +description: Trim FastQ files using Trim Galore! +keywords: + - trimming + - adapters + - sequencing adapters + - fastq +tools: + - trimgalore: + description: | + A wrapper tool around Cutadapt and FastQC to consistently apply quality + and adapter trimming to FastQ files, with some extra functionality for + MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ + documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input adapter trimmed FastQ files of size 1 and 2 for + single-end and paired-end data, respectively. + pattern: "*.{fq.gz}" + - unpaired: + type: file + description: | + FastQ files containing unpaired reads from read 1 or read 2 + pattern: "*unpaired*.fq.gz" + - html: + type: file + description: FastQC report (optional) + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive (optional) + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Trim Galore! trimming report + pattern: "*_{report.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..b18b190ada72e06e096e6c4b966792cfb870f0cd --- /dev/null +++ b/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf @@ -0,0 +1,37 @@ +process UCSC_BEDGRAPHTOBIGWIG { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::ucsc-bedgraphtobigwig=377" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:377--h446ed27_1' : + 'quay.io/biocontainers/ucsc-bedgraphtobigwig:377--h446ed27_1' }" + + input: + tuple val(meta), path(bedgraph) + path sizes + + output: + tuple val(meta), path("*.bigWig"), emit: bigwig + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + bedGraphToBigWig \\ + $bedgraph \\ + $sizes \\ + ${prefix}.bigWig + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml b/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..1be1a3b7fc723e590a8237faa2f3812f40170a4d --- /dev/null +++ b/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml @@ -0,0 +1,46 @@ +name: ucsc_bedgraphtobigwig +description: Convert a bedGraph file to bigWig format. +keywords: + - bedgraph + - bigwig +tools: + - ucsc: + description: Convert a bedGraph file to bigWig format. + homepage: None + documentation: None + tool_dev_url: None + doi: "" + licence: ["varies; see http://genome.ucsc.edu/license"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bedgraph: + type: file + description: bedGraph file + pattern: "*.{bedGraph}" + - sizes: + type: file + description: chromosome sizes file + pattern: "*.{sizes}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bigwig: + type: file + description: bigWig file + pattern: "*.{bigWig}" + +authors: + - "@drpatelh" diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..29ab10a505ac2ef8e8798580d1a6849f4746436d --- /dev/null +++ b/modules/nf-core/modules/untar/main.nf @@ -0,0 +1,53 @@ +process UNTAR { + tag "$archive" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$untar"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + untar = archive.toString() - '.tar.gz' + + """ + mkdir output + + tar \\ + -C output --strip-components 1 \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 + + mv output ${untar} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + untar = archive.toString() - '.tar.gz' + """ + touch $untar + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/modules/untar/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..d426919bd3aa014089797b563ab944a4344c9c79 --- /dev/null +++ b/modules/nf-core/modules/untar/meta.yml @@ -0,0 +1,38 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: file + description: + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index 08da317e869833d52775f45e006face465f06711..58cea5bfa6a7336b3f37bf80599889cefcd7a483 100644 --- a/nextflow.config +++ b/nextflow.config @@ -8,16 +8,67 @@ // Global default params, used in configs params { - - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null + seq_center = null + fragment_size = 200 + fingerprint_bins = 500000 + read_length = null // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false + save_reference = false + + // Options: Trimming + clip_r1 = null + clip_r2 = null + three_prime_clip_r1 = null + three_prime_clip_r2 = null + trim_nextseq = null + skip_trimming = false + save_trimmed = false + + // Options: Alignment + aligner = 'bwa' + bwa_min_score = null + keep_dups = false + keep_multi_map = false + save_align_intermeds = false + save_unaligned = false + + // Options: Peaks + narrow_peak = false + broad_cutoff = 0.1 + macs_fdr = null + macs_pvalue = null + min_reps_consensus = 1 + save_macs_pileup = false + skip_peak_qc = false + skip_peak_annotation = false + skip_consensus_peaks = false + + // Options: DESeq2 QC + deseq2_vst = false + skip_deseq2_qc = false + + // Options: QC + skip_qc = false + skip_fastqc = false + skip_picard_metrics = false + skip_preseq = false + skip_plot_profile = false + skip_plot_fingerprint = false + skip_spp = false + skip_igv = false + skip_multiqc = false + + // Options: Config + bamtools_filter_pe_config = "$projectDir/assets/bamtools_filter_pe.json" + bamtools_filter_se_config = "$projectDir/assets/bamtools_filter_se.json" + // MultiQC options multiqc_config = null multiqc_title = null @@ -73,8 +124,6 @@ try { // System.err.println("WARNING: Could not load nf-core/config/chipseq profiles: ${params.custom_config_base}/pipeline/chipseq.config") // } - - profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -140,7 +189,6 @@ profiles { test_full { includeConfig 'conf/test_full.config' } } - // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' @@ -148,7 +196,6 @@ if (!params.igenomes_ignore) { params.genomes = [:] } - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -183,7 +230,7 @@ dag { manifest { name = 'nf-core/chipseq' - author = 'Philip Ewels, Jose Espinosa-Carrasco, Harshil Patel' + author = 'Jose Espinosa-Carrasco, Harshil Patel, Philip Ewels' homePage = 'https://github.com/nf-core/chipseq' description = 'ChIP-seq peak-calling and differential analysis pipeline.' mainScript = 'main.nf' diff --git a/nextflow_schema.json b/nextflow_schema.json index 8080b723ada20486d4c653535114afa48e818e99..ea0721e1b359255be0aa97d1e9ea0b43ad310689 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["outdir"], "properties": { "input": { "type": "string", @@ -19,9 +19,26 @@ "pattern": "^\\S+\\.csv$", "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/chipseq/usage#samplesheet-input).", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 5 columns, and a header row. See [usage docs](https://nf-co.re/chipseq/docs/usage#introduction).", "fa_icon": "fas fa-file-csv" }, + "fragment_size": { + "type": "integer", + "description": "Estimated fragment size used to extend single-end reads.", + "fa_icon": "fas fa-chart-area", + "default": 200 + }, + "seq_center": { + "type": "string", + "description": "Sequencing center information to be added to read group of BAM files.", + "fa_icon": "fas fa-synagogue" + }, + "read_length": { + "type": "integer", + "description": "Read length used to calculate MACS2 genome size for peak calling if `--macs_gsize` isn't provided.", + "fa_icon": "fas fa-chart-area", + "enum": [50, 75, 100, 150, 200] + }, "outdir": { "type": "string", "format": "directory-path", @@ -60,9 +77,76 @@ "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have the appropriate alignment index available this will be generated for you automatically. Combine with `--save_reference` to save alignment index for future runs.", "fa_icon": "far fa-file-code" }, + "gtf": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gtf(\\.gz)?$", + "description": "Path to GTF annotation file.", + "fa_icon": "fas fa-code-branch", + "help_text": "This parameter is *mandatory* if `--genome` is not specified." + }, + "gff": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gff(\\.gz)?$", + "fa_icon": "fas fa-code-branch", + "description": "Path to GFF3 annotation file.", + "help_text": "This parameter must be specified if `--genome` or `--gtf` are not specified." + }, + "bwa_index": { + "type": "string", + "description": "Path to directory or tar.gz archive for pre-built BWA index.", + "fa_icon": "fas fa-bezier-curve" + }, + "bowtie2_index": { + "type": "string", + "format": "path", + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built Bowtie2 index." + }, + "chromap_index": { + "type": "string", + "format": "path", + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built Chromap index." + }, + "star_index": { + "type": "string", + "format": "path", + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built STAR index." + }, + "gene_bed": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.bed(\\.gz)?$", + "fa_icon": "fas fa-procedures", + "description": "Path to BED file containing gene intervals. This will be created from the GTF file if not specified." + }, + "macs_gsize": { + "type": "number", + "description": "Effective genome size parameter required by MACS2.", + "help_text": "[Effective genome size](https://github.com/taoliu/MACS#-g--gsize) parameter required by MACS2. If using an iGenomes reference these have been provided when `--genome` is set as *GRCh37*, *GRCh38*, *GRCm38*, *WBcel235*, *BDGP6*, *R64-1-1*, *EF2*, *hg38*, *hg19* and *mm10*. For other genomes, if this parameter is not specified then the MACS2 peak-calling and differential analysis will be skipped.", + "fa_icon": "fas fa-arrows-alt-h" + }, + "blacklist": { + "type": "string", + "description": "Path to blacklist regions in BED format, used for filtering alignments.", + "help_text": "If provided, alignments that overlap with the regions in this file will be filtered out (see [ENCODE blacklists](https://sites.google.com/site/anshulkundaje/projects/blacklists)). The file should be in BED format. Blacklisted regions for *GRCh37*, *GRCh38*, *GRCm38*, *hg19*, *hg38*, *mm10* are bundled with the pipeline in the [`blacklists`](../assets/blacklists/) directory, and as such will be automatically used if any of those genomes are specified with the `--genome` parameter.", + "fa_icon": "fas fa-book-dead" + }, + "save_reference": { + "type": "boolean", + "description": "If generated by the pipeline save the BWA index in the results directory.", + "help_text": "If the BWA index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times.", + "fa_icon": "fas fa-save" + }, "igenomes_base": { "type": "string", "format": "directory-path", @@ -80,6 +164,229 @@ } } }, + "adapter_trimming_options": { + "title": "Adapter trimming options", + "type": "object", + "fa_icon": "fas fa-cut", + "description": "Options to adjust adapter trimming criteria.", + "properties": { + "clip_r1": { + "type": "integer", + "description": "Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads).", + "fa_icon": "fas fa-cut" + }, + "clip_r2": { + "type": "integer", + "description": "Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only).", + "fa_icon": "fas fa-cut" + }, + "three_prime_clip_r1": { + "type": "integer", + "description": "Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed.", + "fa_icon": "fas fa-cut" + }, + "three_prime_clip_r2": { + "type": "integer", + "description": "Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed.", + "fa_icon": "fas fa-cut" + }, + "trim_nextseq": { + "type": "integer", + "description": "Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails.", + "help_text": "This enables the option Cutadapt `--nextseq-trim=3'CUTOFF` option via Trim Galore, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases.", + "fa_icon": "fas fa-cut" + }, + "skip_trimming": { + "type": "boolean", + "description": "Skip the adapter trimming step.", + "help_text": "Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data.", + "fa_icon": "fas fa-fast-forward" + }, + "save_trimmed": { + "type": "boolean", + "description": "Save the trimmed FastQ files in the results directory.", + "help_text": "By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", + "fa_icon": "fas fa-save" + } + } + }, + "alignment_options": { + "title": "Alignment options", + "type": "object", + "fa_icon": "fas fa-map-signs", + "description": "Options to adjust parameters and filtering criteria for read alignments.", + "properties": { + "aligner": { + "type": "string", + "default": "bwa", + "description": "Specifies the alignment algorithm to use - available options are 'bwa', 'bowtie2' and 'star'.", + "fa_icon": "fas fa-map-signs", + "enum": ["bwa", "bowtie2", "chromap", "star"] + }, + "keep_dups": { + "type": "boolean", + "description": "Duplicate reads are not filtered from alignments.", + "fa_icon": "fas fa-cart-arrow-down" + }, + "keep_multi_map": { + "type": "boolean", + "description": "Reads mapping to multiple locations are not filtered from alignments.", + "fa_icon": "fas fa-cart-arrow-down" + }, + "bwa_min_score": { + "type": "integer", + "description": "Don\u2019t output BWA MEM alignments with score lower than this parameter.", + "fa_icon": "fas fa-hand-paper" + }, + "save_align_intermeds": { + "type": "boolean", + "description": "Save the intermediate BAM files from the alignment step.", + "help_text": "By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set this parameter to also save other intermediate BAM files.", + "fa_icon": "fas fa-save" + }, + "save_unaligned": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory.", + "help_text": "This may either be in the form of FastQ or BAM files depending on the options available for that particular tool." + }, + "bamtools_filter_pe_config": { + "type": "string", + "default": "$projectDir/assets/bamtools_filter_pe.json", + "hidden": true, + "description": "BAMTools JSON file with custom filters for paired-end data.", + "fa_icon": "fas fa-cog" + }, + "bamtools_filter_se_config": { + "type": "string", + "default": "$projectDir/assets/bamtools_filter_se.json", + "hidden": true, + "description": "BAMTools JSON file with custom filters for single-end data.", + "fa_icon": "fas fa-cog" + } + } + }, + "peak_calling_options": { + "title": "Peak calling options", + "type": "object", + "fa_icon": "fas fa-chart-area", + "description": "Options to adjust peak calling criteria.", + "properties": { + "narrow_peak": { + "type": "boolean", + "description": "Run MACS2 in narrowPeak mode.", + "help_text": "MACS2 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode.", + "fa_icon": "fas fa-arrows-alt-h" + }, + "broad_cutoff": { + "type": "number", + "default": 0.1, + "description": "Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified.", + "fa_icon": "fas fa-hand-scissors" + }, + "macs_fdr": { + "type": "number", + "description": "Minimum FDR (q-value) cutoff for peak detection, --macs_fdr and --macs_pvalue are mutually exclusive.", + "fa_icon": "fas fa-sort-amount-down" + }, + "macs_pvalue": { + "type": "number", + "description": "p-value cutoff for peak detection, --macs_fdr and --macs_pvalue are mutually exclusive. If --macs_pvalue cutoff is set, q-value will not be calculated and reported as -1 in the final .xls file.", + "fa_icon": "fas fa-sort-amount-down" + }, + "min_reps_consensus": { + "type": "integer", + "default": 1, + "description": "Number of biological replicates required from a given condition for a peak to contribute to a consensus peak.", + "help_text": "If you are confident you have good reproducibility amongst your replicates then you can increase the value of this parameter to create a 'reproducible' set of consensus peaks. For example, a value of 2 will mean peaks that have been called in at least 2 replicates will contribute to the consensus set of peaks, and as such peaks that are unique to a given replicate will be discarded.", + "fa_icon": "fas fa-sort-numeric-down" + }, + "save_macs_pileup": { + "type": "boolean", + "description": "Instruct MACS2 to create bedGraph files normalised to signal per million reads.", + "fa_icon": "fas fa-save" + }, + "skip_peak_qc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip MACS2 peak QC plot generation." + }, + "skip_peak_annotation": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip annotation of MACS2 and consensus peaks with HOMER." + }, + "skip_consensus_peaks": { + "type": "boolean", + "description": "Skip consensus peak generation, annotation and counting.", + "fa_icon": "fas fa-fast-forward" + } + } + }, + "process_skipping_options": { + "title": "Process skipping options", + "type": "object", + "fa_icon": "fas fa-fast-forward", + "description": "Options to skip various steps within the workflow.", + "properties": { + "skip_fastqc": { + "type": "boolean", + "description": "Skip FastQC.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_picard_metrics": { + "type": "boolean", + "description": "Skip Picard CollectMultipleMetrics.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_preseq": { + "type": "boolean", + "description": "Skip Preseq.", + "fa_icon": "fas fa-fast-forward" + }, + "deseq2_vst": { + "type": "boolean", + "description": "Use vst transformation instead of rlog with DESeq2.", + "help_text": "See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization).", + "fa_icon": "fas fa-dolly" + }, + "skip_plot_profile": { + "type": "boolean", + "description": "Skip deepTools plotProfile.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_plot_fingerprint": { + "type": "boolean", + "description": "Skip deepTools plotFingerprint.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_spp": { + "type": "boolean", + "description": "Skip Phantompeakqualtools.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_deseq2_qc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip DESeq2 PCA and heatmap plotting." + }, + "skip_igv": { + "type": "boolean", + "description": "Skip IGV.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_multiqc": { + "type": "boolean", + "description": "Skip MultiQC.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_qc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip all QC steps except for MultiQC." + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -185,6 +492,13 @@ "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, + "fingerprint_bins": { + "type": "integer", + "default": 500000, + "description": "Number of genomic bins to use when calculating deepTools fingerprint plot.", + "fa_icon": "fas fa-dumpster", + "hidden": true + }, "email_on_fail": { "type": "string", "description": "Email address for completion summary, only when pipeline fails.", @@ -256,6 +570,18 @@ { "$ref": "#/definitions/reference_genome_options" }, + { + "$ref": "#/definitions/adapter_trimming_options" + }, + { + "$ref": "#/definitions/alignment_options" + }, + { + "$ref": "#/definitions/peak_calling_options" + }, + { + "$ref": "#/definitions/process_skipping_options" + }, { "$ref": "#/definitions/institutional_config_options" }, diff --git a/subworkflows/local/filter_bam_bamtools.nf b/subworkflows/local/filter_bam_bamtools.nf new file mode 100644 index 0000000000000000000000000000000000000000..40e9b1be621a865e98f2bbbff623bbcbed5a24d7 --- /dev/null +++ b/subworkflows/local/filter_bam_bamtools.nf @@ -0,0 +1,36 @@ +/* + * Filter BAM file + */ + +include { BAM_FILTER } from '../../modules/local/bam_filter' +include { BAM_REMOVE_ORPHANS } from '../../modules/local/bam_remove_orphans' +include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' + +workflow FILTER_BAM_BAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] + ch_bed // channel: [ bed ] + bamtools_filter_se_config // file: BAMtools filter JSON config file for SE data + bamtools_filter_pe_config // file: BAMtools filter JSON config file for PE data + + main: + ch_versions = Channel.empty() + + BAM_FILTER(ch_bam_bai, ch_bed, bamtools_filter_se_config, bamtools_filter_pe_config) + BAM_REMOVE_ORPHANS(BAM_FILTER.out.bam) + BAM_SORT_SAMTOOLS(BAM_REMOVE_ORPHANS.out.bam) + + ch_versions = ch_versions.mix(BAM_FILTER.out.versions, + BAM_REMOVE_ORPHANS.out.versions, + BAM_SORT_SAMTOOLS.out.versions) + + emit: + name_bam = BAM_REMOVE_ORPHANS.out.bam // channel: [ val(meta), [ bam ] ] + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 0aecf87fb7813dbcfdd8623e20439e7e1e71b252..648a2971a3034e4facfc934f77799d6be0dcc86d 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -7,12 +7,13 @@ include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' workflow INPUT_CHECK { take: samplesheet // file: /path/to/samplesheet.csv + seq_center // string: sequencing center for read group main: SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } + .map { create_fastq_channel(it, seq_center) } .set { reads } emit: @@ -21,11 +22,18 @@ workflow INPUT_CHECK { } // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map +def create_fastq_channel(LinkedHashMap row, String seq_center) { def meta = [:] meta.id = row.sample meta.single_end = row.single_end.toBoolean() + meta.antibody = row.antibody + meta.control = row.control + + def read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\'" + if (seq_center) { + read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\\tCN:${seq_center}\'" + } + meta.read_group = read_group // add path(s) of the fastq file(s) to the meta map def fastq_meta = [] diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf new file mode 100644 index 0000000000000000000000000000000000000000..9c8615993261a6b5281cd1afb0940dd3c874512f --- /dev/null +++ b/subworkflows/local/prepare_genome.nf @@ -0,0 +1,217 @@ +// +// Uncompress and prepare reference genome files +// + +include { + GUNZIP as GUNZIP_FASTA + GUNZIP as GUNZIP_GTF + GUNZIP as GUNZIP_GFF + GUNZIP as GUNZIP_GENE_BED + GUNZIP as GUNZIP_BLACKLIST } from '../../modules/nf-core/modules/gunzip/main' + +include { + UNTAR as UNTAR_BWA_INDEX + UNTAR as UNTAR_BOWTIE2_INDEX + UNTAR as UNTAR_CHROMAP_INDEX + UNTAR as UNTAR_STAR_INDEX } from '../../modules/nf-core/modules/untar/main' + +include { GFFREAD } from '../../modules/nf-core/modules/gffread/main' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/modules/custom/getchromsizes/main' +include { BWA_INDEX } from '../../modules/nf-core/modules/bwa/index/main' +include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main' +include { CHROMAP_INDEX } from '../../modules/nf-core/modules/chromap/index/main' + +include { GTF2BED } from '../../modules/local/gtf2bed' +include { GENOME_BLACKLIST_REGIONS } from '../../modules/local/genome_blacklist_regions' +include { STAR_GENOMEGENERATE } from '../../modules/local/star_genomegenerate' + +workflow PREPARE_GENOME { + take: + prepare_tool_index // string : tool to prepare index for + + main: + + ch_versions = Channel.empty() + + // + // Uncompress genome fasta file if required + // + ch_fasta = Channel.empty() + if (params.fasta.endsWith('.gz')) { + ch_fasta = GUNZIP_FASTA ( [:], params.fasta ).gunzip.map{ it[1] } + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) + } else { + ch_fasta = file(params.fasta) + } + + // Make fasta file available if reference saved or IGV is run + if (params.save_reference || !params.skip_igv) { + file("${params.outdir}/genome/").mkdirs() + ch_fasta.copyTo("${params.outdir}/genome/") + } + + // + // Uncompress GTF annotation file or create from GFF3 if required + // + if (params.gtf) { + if (params.gtf.endsWith('.gz')) { + ch_gtf = GUNZIP_GTF ( [:], params.gtf ).gunzip.map{ it[1] } + ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) + } else { + ch_gtf = file(params.gtf) + } + } else if (params.gff) { + if (params.gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ( [:], params.gff ).gunzip.map{ it[1] } + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) + } else { + ch_gff = file(params.gff) + } + ch_gtf = GFFREAD ( ch_gff ).gtf + ch_versions = ch_versions.mix(GFFREAD.out.versions) + } + + // + // Uncompress blacklist file if required + // + ch_blacklist = Channel.empty() + if (params.blacklist) { + if (params.blacklist.endsWith('.gz')) { + ch_blacklist = GUNZIP_BLACKLIST ( [:], params.blacklist ).gunzip.map{ it[1] } + ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions) + } else { + ch_blacklist = Channel.fromPath(file(params.blacklist)) + } + } + + // + // Uncompress gene BED annotation file or create from GTF if required + // + + // If --gtf is supplied along with --genome + // Make gene bed from supplied --gtf instead of using iGenomes one automatically + def make_bed = false + if (!params.gene_bed) { + make_bed = true + } else if (params.genome && params.gtf) { + if (params.genomes[ params.genome ].gtf != params.gtf) { + make_bed = true + } + } + + if (make_bed) { + ch_gene_bed = GTF2BED ( ch_gtf ).bed + ch_versions = ch_versions.mix(GTF2BED.out.versions) + } else { + if (params.gene_bed.endsWith('.gz')) { + ch_gene_bed = GUNZIP_GENE_BED ( [:], params.gene_bed ).gunzip.map{ it[1] } + ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) + } else { + ch_gene_bed = file(params.gene_bed) + } + } + + // + // Create chromosome sizes file + // + ch_chrom_sizes = CUSTOM_GETCHROMSIZES ( ch_fasta ).sizes + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + + // + // Prepare genome intervals for filtering by removing regions in blacklist file + // + ch_genome_filtered_bed = Channel.empty() + + GENOME_BLACKLIST_REGIONS ( + CUSTOM_GETCHROMSIZES.out.sizes, + ch_blacklist.ifEmpty([]) + ) + ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.bed + ch_versions = ch_versions.mix(GENOME_BLACKLIST_REGIONS.out.versions) + + + // + // Uncompress BWA index or generate from scratch if required + // + ch_bwa_index = Channel.empty() + if (prepare_tool_index == 'bwa') { + if (params.bwa_index) { + if (params.bwa_index.endsWith('.tar.gz')) { + ch_bwa_index = UNTAR_BWA_INDEX ( [:], params.bwa_index ).untar.map{ it[1] } + ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions) + } else { + ch_bwa_index = file(params.bwa_index) + } + } else { + ch_bwa_index = BWA_INDEX ( ch_fasta ).index + ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + } + } + + // + // Uncompress Bowtie2 index or generate from scratch if required + // + ch_bowtie2_index = Channel.empty() + if (prepare_tool_index == 'bowtie2') { + if (params.bowtie2_index) { + if (params.bowtie2_index.endsWith('.tar.gz')) { + ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( [:], params.bowtie2_index ).untar.map{ it[1] } + ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) + } else { + ch_bowtie2_index = file(params.bowtie2_index) + } + } else { + ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta ).index + ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) + } + } + + // + // Uncompress CHROMAP index or generate from scratch if required + // + ch_chromap_index = Channel.empty() + if (prepare_tool_index == 'chromap') { + if (params.chromap_index) { + if (params.chromap_index.endsWith('.tar.gz')) { + ch_chromap_index = UNTAR_CHROMAP_INDEX ( [:], params.chromap_index ).untar.map{ it[1] } + ch_versions = ch_versions.mix(UNTAR.out.versions) + } else { + ch_chromap_index = file(params.chromap_index) + } + } else { + ch_chromap_index = CHROMAP_INDEX ( ch_fasta ).index + ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions) + } + } + + // + // Uncompress STAR index or generate from scratch if required + // + ch_star_index = Channel.empty() + if (prepare_tool_index == 'star') { + if (params.star_index) { + if (params.star_index.endsWith('.tar.gz')) { + ch_star_index = UNTAR_STAR_INDEX ( [:], params.star_index ).untar.map{ it[1] } + ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) + } else { + ch_star_index = file(params.star_index) + } + } else { + ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + } + } + + emit: + fasta = ch_fasta // path: genome.fasta + gtf = ch_gtf // path: genome.gtf + gene_bed = ch_gene_bed // path: gene.bed + chrom_sizes = ch_chrom_sizes // path: genome.sizes + filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed + bwa_index = ch_bwa_index // path: bwa/index/ + bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ + chromap_index = ch_chromap_index // path: genome.index + star_index = ch_star_index // path: star/index/ + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf new file mode 100644 index 0000000000000000000000000000000000000000..3521913138d4a6ca06d241d86f1fc384ada7b2e0 --- /dev/null +++ b/subworkflows/nf-core/align_bowtie2.nf @@ -0,0 +1,38 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' +include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' + +workflow ALIGN_BOWTIE2 { + take: + reads // channel: [ val(meta), [ reads ] ] + index // path: /path/to/index + save_unaligned // boolean: true/false + + main: + + ch_versions = Channel.empty() + + // + // Map reads with BWA + // + BOWTIE2_ALIGN(reads, index, save_unaligned, false) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_SAMTOOLS(BOWTIE2_ALIGN.out.bam) + ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) + + emit: + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // path: versions.yml +} diff --git a/subworkflows/nf-core/align_bwa_mem.nf b/subworkflows/nf-core/align_bwa_mem.nf new file mode 100644 index 0000000000000000000000000000000000000000..0c5dff084e4d75ed9738edcac2e5a2bf0dfb2489 --- /dev/null +++ b/subworkflows/nf-core/align_bwa_mem.nf @@ -0,0 +1,37 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { BWA_MEM } from '../../modules/nf-core/modules/bwa/mem/main' +include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' + +workflow ALIGN_BWA_MEM { + take: + reads // channel: [ val(meta), [ reads ] ] + index // path: /path/to/index + + main: + + ch_versions = Channel.empty() + + // + // Map reads with BWA + // + BWA_MEM(reads, index, false) + ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_SAMTOOLS(BWA_MEM.out.bam) + ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) + + emit: + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // path: versions.yml +} diff --git a/subworkflows/nf-core/align_chromap.nf b/subworkflows/nf-core/align_chromap.nf new file mode 100644 index 0000000000000000000000000000000000000000..7eb73977938543c557d079a2f20166aed3614721 --- /dev/null +++ b/subworkflows/nf-core/align_chromap.nf @@ -0,0 +1,38 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { CHROMAP_CHROMAP } from '../../modules/nf-core/modules/chromap/chromap/main' +include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' + +workflow ALIGN_CHROMAP { + take: + reads // channel: [ val(meta), [ reads ] ] + index // path: /path/to/index + fasta // path: /path/to/fasta + + main: + + ch_versions = Channel.empty() + + // + // Map reads with CHROMAP + // + CHROMAP_CHROMAP(reads, fasta, index, [], [], [], []) + ch_versions = ch_versions.mix(CHROMAP_CHROMAP.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_SAMTOOLS(CHROMAP_CHROMAP.out.bam) + ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) + + emit: + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // path: versions.yml +} diff --git a/subworkflows/nf-core/align_star.nf b/subworkflows/nf-core/align_star.nf new file mode 100644 index 0000000000000000000000000000000000000000..474621824e5a8d31830708de282ca4eedd76ae3a --- /dev/null +++ b/subworkflows/nf-core/align_star.nf @@ -0,0 +1,46 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { STAR_ALIGN } from '../../modules/local/star_align' +include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' + +workflow ALIGN_STAR { + take: + reads // channel: [ val(meta), [ reads ] ] + index // channel: /path/to/star/index/ + + main: + + ch_versions = Channel.empty() + + // + // Map reads with STAR + // + STAR_ALIGN ( reads, index ) + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_SAMTOOLS ( STAR_ALIGN.out.bam ) + ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) + + emit: + orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), bam ] + log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), log_final ] + log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), log_out ] + log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), log_progress ] + bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), bam_sorted ] + bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), bam_transcript ] + fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), fastq ] + tab = STAR_ALIGN.out.tab // channel: [ val(meta), tab ] + + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf new file mode 100644 index 0000000000000000000000000000000000000000..418e14cc04ea978fc4211cb11f361c9a4df617a7 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_samtools.nf @@ -0,0 +1,34 @@ +/* + * Sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { SAMTOOLS_SORT } from '../../modules/nf-core/modules/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' + +workflow BAM_SORT_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_SORT(ch_bam) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX(SAMTOOLS_SORT.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + BAM_STATS_SAMTOOLS(SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf new file mode 100644 index 0000000000000000000000000000000000000000..89a7338f64a80b7c2fe22b66f13e5cc5b56341ff --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools.nf @@ -0,0 +1,32 @@ +/* + * Run SAMtools stats, flagstat and idxstats + */ + +include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/modules/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/modules/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, [] ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastqc_trimgalore.nf b/subworkflows/nf-core/fastqc_trimgalore.nf new file mode 100644 index 0000000000000000000000000000000000000000..966541cadfd3ee0b8c1d84e15eea5bce8e513501 --- /dev/null +++ b/subworkflows/nf-core/fastqc_trimgalore.nf @@ -0,0 +1,48 @@ +// +// Read QC and trimming +// + +include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' +include { TRIMGALORE } from '../../modules/nf-core/modules/trimgalore/main' + +workflow FASTQC_TRIMGALORE { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + skip_trimming // boolean: true/false + + main: + + ch_versions = Channel.empty() + fastqc_html = Channel.empty() + fastqc_zip = Channel.empty() + if (!skip_fastqc) { + FASTQC ( reads ).html.set { fastqc_html } + fastqc_zip = FASTQC.out.zip + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + } + + trim_reads = reads + trim_html = Channel.empty() + trim_zip = Channel.empty() + trim_log = Channel.empty() + if (!skip_trimming) { + TRIMGALORE ( reads ).reads.set { trim_reads } + trim_html = TRIMGALORE.out.html + trim_zip = TRIMGALORE.out.zip + trim_log = TRIMGALORE.out.log + ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_html // channel: [ val(meta), [ html ] ] + fastqc_zip // channel: [ val(meta), [ zip ] ] + + trim_html // channel: [ val(meta), [ html ] ] + trim_zip // channel: [ val(meta), [ zip ] ] + trim_log // channel: [ val(meta), [ txt ] ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/mark_duplicates_picard.nf b/subworkflows/nf-core/mark_duplicates_picard.nf new file mode 100644 index 0000000000000000000000000000000000000000..33e88bf5827fa2ca7667e39b1ae208768bf5ea87 --- /dev/null +++ b/subworkflows/nf-core/mark_duplicates_picard.nf @@ -0,0 +1,42 @@ +/* + * Picard MarkDuplicates, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/modules/picard/markduplicates/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' + +workflow MARK_DUPLICATES_PICARD { + take: + bam // channel: [ val(meta), [ bam ] ] + + main: + + ch_versions = Channel.empty() + + // + // Picard MarkDuplicates + // + PICARD_MARKDUPLICATES(bam) + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) + + // + // Index BAM file and run samtools stats, flagstat and idxstats + // + SAMTOOLS_INDEX(PICARD_MARKDUPLICATES.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + BAM_STATS_SAMTOOLS(PICARD_MARKDUPLICATES.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), [ bam ] ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), [ metrics ] ] + + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index ea403618fa67ca0f6b758a93b801af6eb8eb4f01..eedccc5d50cbfd04adb4e3d12ef8f3af3f9e615a 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -4,19 +4,36 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +def valid_params = [ + aligners : [ 'bwa', 'bowtie2', 'chromap', 'star' ] +] + def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) // Validate input parameters -WorkflowChipseq.initialise(params, log) +WorkflowChipseq.initialise(params, log, valid_params) -// TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] +def checkPathParamList = [ + params.input, params.multiqc_config, + params.fasta, + params.gtf, params.gff, params.gene_bed, + params.bwa_index, params.bowtie2_index, params.chromap_index, params.star_index, + params.blacklist, + params.bamtools_filter_pe_config, params.bamtools_filter_se_config +] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +// Save AWS IGenomes file containing annotation version +def anno_readme = params.genomes[ params.genome ]?.readme +if (anno_readme && file(anno_readme).exists()) { + file("${params.outdir}/genome/").mkdirs() + file(anno_readme).copyTo("${params.outdir}/genome/") +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -26,16 +43,44 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +// JSON files required by BAMTools for alignment filtering +ch_bamtools_filter_se_config = file(params.bamtools_filter_se_config, checkIfExists: true) +ch_bamtools_filter_pe_config = file(params.bamtools_filter_pe_config, checkIfExists: true) + +// Header files for MultiQC +ch_spp_nsc_header = file("$projectDir/assets/multiqc/spp_nsc_header.txt", checkIfExists: true) +ch_spp_rsc_header = file("$projectDir/assets/multiqc/spp_rsc_header.txt", checkIfExists: true) +ch_spp_correlation_header = file("$projectDir/assets/multiqc/spp_correlation_header.txt", checkIfExists: true) +ch_peak_count_header = file("$projectDir/assets/multiqc/peak_count_header.txt", checkIfExists: true) +ch_frip_score_header = file("$projectDir/assets/multiqc/frip_score_header.txt", checkIfExists: true) +ch_peak_annotation_header = file("$projectDir/assets/multiqc/peak_annotation_header.txt", checkIfExists: true) +ch_deseq2_pca_header = file("$projectDir/assets/multiqc/deseq2_pca_header.txt", checkIfExists: true) +ch_deseq2_clustering_header = file("$projectDir/assets/multiqc/deseq2_clustering_header.txt", checkIfExists: true) + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { BEDTOOLS_GENOMECOV } from '../modules/local/bedtools_genomecov' +include { FRIP_SCORE } from '../modules/local/frip_score' +include { PLOT_MACS2_QC } from '../modules/local/plot_macs2_qc' +include { PLOT_HOMER_ANNOTATEPEAKS } from '../modules/local/plot_homer_annotatepeaks' +include { MACS2_CONSENSUS } from '../modules/local/macs2_consensus' +include { ANNOTATE_BOOLEAN_PEAKS } from '../modules/local/annotate_boolean_peaks' +include { DESEQ2_QC } from '../modules/local/deseq2_qc' +include { IGV } from '../modules/local/igv' +include { MULTIQC } from '../modules/local/multiqc' +include { MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS } from '../modules/local/multiqc_custom_phantompeakqualtools' +include { MULTIQC_CUSTOM_PEAKS } from '../modules/local/multiqc_custom_peaks' + // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { FILTER_BAM_BAMTOOLS } from '../subworkflows/local/filter_bam_bamtools' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -46,9 +91,34 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' + +include { PICARD_MERGESAMFILES } from '../modules/nf-core/modules/picard/mergesamfiles/main' +include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/modules/picard/collectmultiplemetrics/main' +include { PRESEQ_LCEXTRAP } from '../modules/nf-core/modules/preseq/lcextrap/main' +include { PHANTOMPEAKQUALTOOLS } from '../modules/nf-core/modules/phantompeakqualtools/main' +include { UCSC_BEDGRAPHTOBIGWIG } from '../modules/nf-core/modules/ucsc/bedgraphtobigwig/main' +include { DEEPTOOLS_COMPUTEMATRIX } from '../modules/nf-core/modules/deeptools/computematrix/main' +include { DEEPTOOLS_PLOTPROFILE } from '../modules/nf-core/modules/deeptools/plotprofile/main' +include { DEEPTOOLS_PLOTHEATMAP } from '../modules/nf-core/modules/deeptools/plotheatmap/main' +include { DEEPTOOLS_PLOTFINGERPRINT } from '../modules/nf-core/modules/deeptools/plotfingerprint/main' +include { KHMER_UNIQUEKMERS } from '../modules/nf-core/modules/khmer/uniquekmers/main' +include { MACS2_CALLPEAK } from '../modules/nf-core/modules/macs2/callpeak/main' +include { SUBREAD_FEATURECOUNTS } from '../modules/nf-core/modules/subread/featurecounts/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' + +include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_MACS2 } from '../modules/nf-core/modules/homer/annotatepeaks/main' +include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_CONSENSUS } from '../modules/nf-core/modules/homer/annotatepeaks/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// + +include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' +include { ALIGN_BWA_MEM } from '../subworkflows/nf-core/align_bwa_mem' +include { ALIGN_BOWTIE2 } from '../subworkflows/nf-core/align_bowtie2' +include { ALIGN_CHROMAP } from '../subworkflows/nf-core/align_chromap' +include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' +include { MARK_DUPLICATES_PICARD } from '../subworkflows/nf-core/mark_duplicates_picard' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -63,22 +133,486 @@ workflow CHIPSEQ { ch_versions = Channel.empty() + // + // SUBWORKFLOW: Uncompress and prepare reference genome files + // + PREPARE_GENOME ( + params.aligner + ) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // INPUT_CHECK ( - ch_input + file(params.input), + params.seq_center ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) // - // MODULE: Run FastQC + // SUBWORKFLOW: Read QC and trim adapters + // + FASTQC_TRIMGALORE ( + INPUT_CHECK.out.reads, + params.skip_fastqc || params.skip_qc, + params.skip_trimming + ) + ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions) + + // + // SUBWORKFLOW: Alignment with BWA & BAM QC + // + ch_genome_bam = Channel.empty() + ch_genome_bam_index = Channel.empty() + ch_samtools_stats = Channel.empty() + ch_samtools_flagstat = Channel.empty() + ch_samtools_idxstats = Channel.empty() + if (params.aligner == 'bwa') { + ALIGN_BWA_MEM ( + FASTQC_TRIMGALORE.out.reads, + PREPARE_GENOME.out.bwa_index + ) + ch_genome_bam = ALIGN_BWA_MEM.out.bam + ch_genome_bam_index = ALIGN_BWA_MEM.out.bai + ch_samtools_stats = ALIGN_BWA_MEM.out.stats + ch_samtools_flagstat = ALIGN_BWA_MEM.out.flagstat + ch_samtools_idxstats = ALIGN_BWA_MEM.out.idxstats + ch_versions = ch_versions.mix(ALIGN_BWA_MEM.out.versions.first()) + } + + // + // SUBWORKFLOW: Alignment with BOWTIE2 & BAM QC + // + if (params.aligner == 'bowtie2') { + ALIGN_BOWTIE2 ( + FASTQC_TRIMGALORE.out.reads, + PREPARE_GENOME.out.bowtie2_index, + params.save_unaligned + ) + ch_genome_bam = ALIGN_BOWTIE2.out.bam + ch_genome_bam_index = ALIGN_BOWTIE2.out.bai + ch_samtools_stats = ALIGN_BOWTIE2.out.stats + ch_samtools_flagstat = ALIGN_BOWTIE2.out.flagstat + ch_samtools_idxstats = ALIGN_BOWTIE2.out.idxstats + ch_versions = ch_versions.mix(ALIGN_BOWTIE2.out.versions.first()) + } + + // + // SUBWORKFLOW: Alignment with CHROMAP & BAM QC + // + if (params.aligner == 'chromap') { + ALIGN_CHROMAP ( + FASTQC_TRIMGALORE.out.reads, + PREPARE_GENOME.out.chromap_index, + PREPARE_GENOME.out.fasta + ) + + // Filter out paired-end reads until the issue below is fixed + // https://github.com/nf-core/chipseq/issues/291 + // ch_genome_bam = ALIGN_CHROMAP.out.bam + ALIGN_CHROMAP + .out + .bam + .branch { + meta, bam -> + single_end: meta.single_end + return [ meta, bam ] + paired_end: !meta.single_end + return [ meta, bam ] + }.set { ch_genome_bam_chromap } + + ch_genome_bam_chromap.paired_end + .collect() + .map { it -> + def count = it.size() + if (count > 0) { + log.warn "=============================================================================\n" + + " Paired-end files produced by chromap can not be used by some downstream tools due the issue below:\n" + + " https://github.com/nf-core/chipseq/issues/291\n" + + " They will be excluded from the analysis. Consider to use a different aligner\n" + + "===================================================================================" + } + } + + ch_genome_bam = ch_genome_bam_chromap.single_end + ch_genome_bam_index = ALIGN_CHROMAP.out.bai + ch_samtools_stats = ALIGN_CHROMAP.out.stats + ch_samtools_flagstat = ALIGN_CHROMAP.out.flagstat + ch_samtools_idxstats = ALIGN_CHROMAP.out.idxstats + ch_versions = ch_versions.mix(ALIGN_CHROMAP.out.versions.first()) + } + + // + // SUBWORKFLOW: Alignment with STAR & BAM QC // - FASTQC ( - INPUT_CHECK.out.reads + if (params.aligner == 'star') { + ALIGN_STAR ( + FASTQC_TRIMGALORE.out.reads, + PREPARE_GENOME.out.star_index + ) + ch_genome_bam = ALIGN_STAR.out.bam + ch_genome_bam_index = ALIGN_STAR.out.bai + ch_transcriptome_bam = ALIGN_STAR.out.bam_transcript + ch_samtools_stats = ALIGN_STAR.out.stats + ch_samtools_flagstat = ALIGN_STAR.out.flagstat + ch_samtools_idxstats = ALIGN_STAR.out.idxstats + ch_star_multiqc = ALIGN_STAR.out.log_final + + ch_versions = ch_versions.mix(ALIGN_STAR.out.versions) + } + + // + // SUBWORKFLOW: Merge resequenced BAM files + // + ch_genome_bam + .map { + meta, bam -> + def fmeta = meta.findAll { it.key != 'read_group' } + fmeta.id = fmeta.id.split('_')[0..-2].join('_') + [ fmeta, bam ] } + .groupTuple(by: [0]) + .map { it -> [ it[0], it[1].flatten() ] } + .set { ch_sort_bam } + + PICARD_MERGESAMFILES ( + ch_sort_bam + ) + ch_versions = ch_versions.mix(PICARD_MERGESAMFILES.out.versions.first().ifEmpty(null)) + + // + // SUBWORKFLOW: Mark duplicates & filter BAM files after merging + // + MARK_DUPLICATES_PICARD ( + PICARD_MERGESAMFILES.out.bam + ) + ch_versions = ch_versions.mix(MARK_DUPLICATES_PICARD.out.versions) + + // + // SUBWORKFLOW: Fix getting name sorted BAM here for PE/SE + // + FILTER_BAM_BAMTOOLS ( + MARK_DUPLICATES_PICARD.out.bam.join(MARK_DUPLICATES_PICARD.out.bai, by: [0]), + PREPARE_GENOME.out.filtered_bed.first(), + + ch_bamtools_filter_se_config, + ch_bamtools_filter_pe_config ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + ch_versions = ch_versions.mix(FILTER_BAM_BAMTOOLS.out.versions.first().ifEmpty(null)) + // + // MODULE: Library coverage + // + ch_preseq_multiqc = Channel.empty() + if (!params.skip_preseq) { + PRESEQ_LCEXTRAP ( + MARK_DUPLICATES_PICARD.out.bam + ) + ch_preseq_multiqc = PRESEQ_LCEXTRAP.out.lc_extrap + ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) + } + + // + // MODULE: Post alignment QC + // + ch_picardcollectmultiplemetrics_multiqc = Channel.empty() + if (!params.skip_picard_metrics) { + PICARD_COLLECTMULTIPLEMETRICS ( + FILTER_BAM_BAMTOOLS.out.bam, + PREPARE_GENOME.out.fasta, + [] + ) + ch_picardcollectmultiplemetrics_multiqc = PICARD_COLLECTMULTIPLEMETRICS.out.metrics + ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) + } + + // + // MODULE: Strand cross-correlation + // + PHANTOMPEAKQUALTOOLS ( + FILTER_BAM_BAMTOOLS.out.bam + ) + ch_versions = ch_versions.mix(PHANTOMPEAKQUALTOOLS.out.versions.first()) + + MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS ( + PHANTOMPEAKQUALTOOLS.out.spp.join(PHANTOMPEAKQUALTOOLS.out.rdata, by: [0]), + ch_spp_nsc_header, + ch_spp_rsc_header, + ch_spp_correlation_header + ) + + // + // MODULE: Coverage tracks + // + BEDTOOLS_GENOMECOV ( + FILTER_BAM_BAMTOOLS.out.bam.join(FILTER_BAM_BAMTOOLS.out.flagstat, by: [0]) + ) + ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first()) + + // + // MODULE: Coverage tracks + // + UCSC_BEDGRAPHTOBIGWIG ( + BEDTOOLS_GENOMECOV.out.bedgraph, + PREPARE_GENOME.out.chrom_sizes + ) + ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions.first()) + + // + // MODULE: Coverage plots + // + ch_deeptoolsplotprofile_multiqc = Channel.empty() + if (!params.skip_plot_profile) { + DEEPTOOLS_COMPUTEMATRIX ( + UCSC_BEDGRAPHTOBIGWIG.out.bigwig, + PREPARE_GENOME.out.gene_bed + ) + ch_versions = ch_versions.mix(DEEPTOOLS_COMPUTEMATRIX.out.versions.first()) + + DEEPTOOLS_PLOTPROFILE ( + DEEPTOOLS_COMPUTEMATRIX.out.matrix + ) + ch_deeptoolsplotprofile_multiqc = DEEPTOOLS_PLOTPROFILE.out.table + ch_versions = ch_versions.mix(DEEPTOOLS_COMPUTEMATRIX.out.versions.first()) + + DEEPTOOLS_PLOTHEATMAP ( + DEEPTOOLS_COMPUTEMATRIX.out.matrix + ) + ch_versions = ch_versions.mix(DEEPTOOLS_PLOTHEATMAP.out.versions.first()) + } + + // + // Refactor channels: [ val(meta), [ ip_bam, control_bam ] [ ip_bai, control_bai ] ] + // + FILTER_BAM_BAMTOOLS + .out + .bam + .join (FILTER_BAM_BAMTOOLS.out.bai, by: [0]) + .map { + meta, bam, bai -> + meta.control ? null : [ meta.id, [ bam ] , [ bai ] ] + } + .set { ch_control_bam_bai } + + FILTER_BAM_BAMTOOLS + .out + .bam + .join (FILTER_BAM_BAMTOOLS.out.bai, by: [0]) + .map { + meta, bam, bai -> + meta.control ? [ meta.control, meta, [ bam ], [ bai ] ] : null + } + .combine(ch_control_bam_bai, by: 0) + .map { it -> [ it[1] , it[2] + it[4], it[3] + it[5] ] } + .set { ch_ip_control_bam_bai } + + // + // plotFingerprint for IP and control together + // + ch_deeptoolsplotfingerprint_multiqc = Channel.empty() + if (!params.skip_plot_fingerprint) { + DEEPTOOLS_PLOTFINGERPRINT ( + ch_ip_control_bam_bai + ) + ch_deeptools_plotfingerprintmultiqc = DEEPTOOLS_PLOTFINGERPRINT.out.matrix + ch_versions = ch_versions.mix(DEEPTOOLS_PLOTFINGERPRINT.out.versions.first()) + } + + // + // Call peaks + // + ch_macs_gsize = Channel.empty() + ch_custompeaks_frip_multiqc = Channel.empty() + ch_custompeaks_count_multiqc = Channel.empty() + ch_plothomerannotatepeaks_multiqc = Channel.empty() + ch_subreadfeaturecounts_multiqc = Channel.empty() + + ch_macs_gsize = params.macs_gsize + if (!params.macs_gsize) { + KHMER_UNIQUEKMERS ( + PREPARE_GENOME.out.fasta, + params.read_length + ) + ch_macs_gsize = KHMER_UNIQUEKMERS.out.kmers.map { it.text.trim() } + } + + // Create channel: [ val(meta), ip_bam, control_bam ] + ch_ip_control_bam_bai + .map { meta, bams, bais -> [ meta , bams[0], bams[1] ] } + .set { ch_ip_control_bam } + + MACS2_CALLPEAK ( + ch_ip_control_bam, + ch_macs_gsize + ) + ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) + + // + // Filter for MACS2 files without peaks + // + MACS2_CALLPEAK + .out + .peak + .filter { meta, peaks -> peaks.size() > 0 } + .set { ch_macs2_peaks } + + ch_ip_control_bam + .join(ch_macs2_peaks, by: [0]) + .map { it -> [ it[0], it[1], it[3] ] } + .set { ch_ip_peak } + + FRIP_SCORE ( + ch_ip_peak + ) + ch_versions = ch_versions.mix(FRIP_SCORE.out.versions.first()) + + ch_ip_peak + .join(FRIP_SCORE.out.txt, by: [0]) + .map { it -> [ it[0], it[2], it[3] ] } + .set { ch_ip_peak_frip } + + MULTIQC_CUSTOM_PEAKS ( + ch_ip_peak_frip, + ch_peak_count_header, + ch_frip_score_header + ) + ch_custompeaks_frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip + ch_custompeaks_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count + + if (!params.skip_peak_annotation) { + HOMER_ANNOTATEPEAKS_MACS2 ( + ch_macs2_peaks, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf + ) + ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_MACS2.out.versions.first()) + + if (!params.skip_peak_qc) { + PLOT_MACS2_QC ( + ch_macs2_peaks.collect{it[1]} + ) + ch_versions = ch_versions.mix(PLOT_MACS2_QC.out.versions) + + PLOT_HOMER_ANNOTATEPEAKS ( + HOMER_ANNOTATEPEAKS_MACS2.out.txt.collect{it[1]}, + ch_peak_annotation_header, + "_peaks.annotatePeaks.txt" + ) + ch_plothomerannotatepeaks_multiqc = PLOT_HOMER_ANNOTATEPEAKS.out.tsv + ch_versions = ch_versions.mix(PLOT_HOMER_ANNOTATEPEAKS.out.versions) + } + } + + // + // Consensus peaks analysis + // + ch_macs2_consensus_bed_lib = Channel.empty() + ch_deseq2_pca_multiqc = Channel.empty() + ch_deseq2_clustering_multiqc = Channel.empty() + if (!params.skip_consensus_peaks) { + // Create channel: [ meta , [ peaks ] ] + // Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ] + ch_macs2_peaks + .map { meta, peak -> [ meta.antibody, meta.id.split('_')[0..-2].join('_'), peak ] } + .groupTuple() + .map { + antibody, groups, peaks -> + [ + antibody, + groups.groupBy().collectEntries { [(it.key) : it.value.size()] }, + peaks + ] } + .map { + antibody, groups, peaks -> + def meta = [:] + meta.id = antibody + meta.multiple_groups = groups.size() > 1 + meta.replicates_exist = groups.max { groups.value }.value > 1 + [ meta, peaks ] } + .set { ch_antibody_peaks } + + MACS2_CONSENSUS ( + ch_antibody_peaks + ) + ch_macs2_consensus_bed_lib = MACS2_CONSENSUS.out.bed + ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions) + + if (!params.skip_peak_annotation) { + HOMER_ANNOTATEPEAKS_CONSENSUS ( + MACS2_CONSENSUS.out.bed, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf + ) + ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_CONSENSUS.out.versions) + + ANNOTATE_BOOLEAN_PEAKS ( + MACS2_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS_CONSENSUS.out.txt, by: [0]), + ) + ch_versions = ch_versions.mix(ANNOTATE_BOOLEAN_PEAKS.out.versions) + } + + // Create channel: [ val(meta), ip_bam ] + MACS2_CONSENSUS + .out + .saf + .map { meta, saf -> [ meta.id, meta, saf ] } + .set { ch_ip_saf } + + ch_ip_control_bam + .map { meta, ip_bam, control_bam -> [ meta.antibody, meta, ip_bam ] } + .groupTuple() + .map { it -> [ it[0], it[1][0], it[2].flatten().sort() ] } + .join(ch_ip_saf) + .map { + it -> + def fmeta = it[1] + fmeta['id'] = it[3]['id'] + fmeta['replicates_exist'] = it[3]['replicates_exist'] + fmeta['multiple_groups'] = it[3]['multiple_groups'] + [ fmeta, it[2], it[4] ] } + .set { ch_ip_bam } + + SUBREAD_FEATURECOUNTS ( + ch_ip_bam + ) + ch_subreadfeaturecounts_multiqc = SUBREAD_FEATURECOUNTS.out.summary + ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions.first()) + + if (!params.skip_deseq2_qc) { + DESEQ2_QC ( + SUBREAD_FEATURECOUNTS.out.counts, + ch_deseq2_pca_header, + ch_deseq2_clustering_header + ) + ch_deseq2_pca_multiqc = DESEQ2_QC.out.pca_multiqc + ch_deseq2_clustering_multiqc = DESEQ2_QC.out.dists_multiqc + } + } + + // + // Create IGV session + // + if (!params.skip_igv) { + IGV ( + PREPARE_GENOME.out.fasta, + UCSC_BEDGRAPHTOBIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), + ch_macs2_peaks.collect{it[1]}.ifEmpty([]), + ch_macs2_consensus_bed_lib.collect{it[1]}.ifEmpty([]), + { "${params.aligner}/mergedLibrary/bigwig" }, + { ["${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak' + ].join('') }, + { ["${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus' + ].join('') } + ) + ch_versions = ch_versions.mix(IGV.out.versions) + } + + // + // MODULE: Pipeline reporting + // CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) @@ -86,21 +620,51 @@ workflow CHIPSEQ { // // MODULE: MultiQC // - workflow_summary = WorkflowChipseq.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) + if (!params.skip_multiqc) { + workflow_summary = WorkflowChipseq.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + MULTIQC ( + ch_multiqc_config, + ch_multiqc_custom_config.collect().ifEmpty([]), + CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(), + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), - MULTIQC ( - ch_multiqc_files.collect() - ) - multiqc_report = MULTIQC.out.report.toList() - ch_versions = ch_versions.mix(MULTIQC.out.versions) + FASTQC_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([]), + FASTQC_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([]), + FASTQC_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([]), + + ch_samtools_stats.collect{it[1]}.ifEmpty([]), + ch_samtools_flagstat.collect{it[1]}.ifEmpty([]), + ch_samtools_idxstats.collect{it[1]}.ifEmpty([]), + + MARK_DUPLICATES_PICARD.out.stats.collect{it[1]}.ifEmpty([]), + MARK_DUPLICATES_PICARD.out.flagstat.collect{it[1]}.ifEmpty([]), + MARK_DUPLICATES_PICARD.out.idxstats.collect{it[1]}.ifEmpty([]), + MARK_DUPLICATES_PICARD.out.metrics.collect{it[1]}.ifEmpty([]), + + FILTER_BAM_BAMTOOLS.out.stats.collect{it[1]}.ifEmpty([]), + FILTER_BAM_BAMTOOLS.out.flagstat.collect{it[1]}.ifEmpty([]), + FILTER_BAM_BAMTOOLS.out.idxstats.collect{it[1]}.ifEmpty([]), + ch_picardcollectmultiplemetrics_multiqc.collect{it[1]}.ifEmpty([]), + + ch_preseq_multiqc.collect{it[1]}.ifEmpty([]), + ch_deeptoolsplotprofile_multiqc.collect{it[1]}.ifEmpty([]), + ch_deeptoolsplotfingerprint_multiqc.collect{it[1]}.ifEmpty([]), + PHANTOMPEAKQUALTOOLS.out.spp.collect{it[1]}.ifEmpty([]), + MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.nsc.collect{it[1]}.ifEmpty([]), + MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.rsc.collect{it[1]}.ifEmpty([]), + MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.correlation.collect{it[1]}.ifEmpty([]), + + ch_custompeaks_frip_multiqc.collect{it[1]}.ifEmpty([]), + ch_custompeaks_count_multiqc.collect{it[1]}.ifEmpty([]), + ch_plothomerannotatepeaks_multiqc.collect().ifEmpty([]), + ch_subreadfeaturecounts_multiqc.collect{it[1]}.ifEmpty([]), + ch_deseq2_pca_multiqc.collect().ifEmpty([]), + ch_deseq2_clustering_multiqc.collect().ifEmpty([]) + ) + multiqc_report = MULTIQC.out.report.toList() + } } /*