diff --git a/bibliography.bib b/bibliography.bib deleted file mode 120000 index 310c23f1e43b85f858ffa3ef2a958d496751b4c5..0000000000000000000000000000000000000000 --- a/bibliography.bib +++ /dev/null @@ -1 +0,0 @@ -../../bibliography/bibliography.bib \ No newline at end of file diff --git a/bibliography.bib b/bibliography.bib new file mode 100644 index 0000000000000000000000000000000000000000..fd7dd479ba0b62545b64df0e19b18eb75c9b8d80 --- /dev/null +++ b/bibliography.bib @@ -0,0 +1,11050 @@ + +@article{abidContrastivePrincipalComponent, + title = {Contrastive {{Principal Component Analysis}}}, + author = {Abid, Abubakar and Bagaria, Vivek K and Zhang, Martin J and Zou, James}, + pages = {19}, + abstract = {We present a new technique called contrastive principal component analysis (cPCA) that is designed to discover low-dimensional structure that is unique to a dataset, or enriched in one dataset relative to other data. The technique is a generalization of standard PCA, for the setting where multiple datasets are available \textendash{} e.g. a treatment and a control group, or a mixed versus a homogeneous population \textendash{} and the goal is to explore patterns that are specific to one of the datasets. We conduct a wide variety of experiments in which cPCA identifies important dataset-specific patterns that are missed by PCA, demonstrating that it is useful for many applications: subgroup discovery, visualizing trends, feature selection, denoising, and data-dependent standardization. We provide geometrical interpretations of cPCA and show that it satisfies desirable theoretical guarantees. We also extend cPCA to nonlinear settings in the form of kernel cPCA. We have released our code as a python package\textdagger{} and documentation is on Github\textdaggerdbl.}, + file = {/Users/laurent/Documents/bibliography/stats/Abid et al. - Contrastive Principal Component Analysis.pdf}, + language = {en} +} + +@article{abuinPASTASparkMultipleSequence2017, + title = {{{PASTASpark}}: Multiple Sequence Alignment Meets {{Big Data}}}, + shorttitle = {{{PASTASpark}}}, + author = {Abu{\'i}n, Jos{\'e} M. and Pena, Tom{\'a}s F. and Pichel, Juan C.}, + year = {2017}, + month = sep, + volume = {33}, + pages = {2948--2950}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx354}, + abstract = {Motivation: One basic step in many bioinformatics analyses is the Multiple Sequence Alignment (MSA). One of the state of the art tools to perform MSA is PASTA (Practical Alignments using SAT{\'e} and TrAnsitivity). PASTA supports multithreading but it is limited to process datasets on shared memory systems. In this work we introduce PASTASpark, a tool that uses the Big Data engine Apache Spark to boost the performance of the alignment phase of PASTA, which is the most expensive task in terms of time consumption.}, + file = {/Users/laurent/Documents/bibliography/to_read/Abuín et al. - 2017 - PASTASpark multiple sequence alignment meets Big .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{adeyIntegrationSingleCellGenomics2019, + title = {Integration of {{Single}}-{{Cell Genomics Datasets}}}, + author = {Adey, Andrew C.}, + year = {2019}, + month = jun, + volume = {177}, + pages = {1677--1679}, + issn = {0092-8674}, + doi = {10.1016/j.cell.2019.05.034}, + abstract = {Welch et~al. and Stuart et~al. present novel techniques for the integration of single-cell RNA-seq datasets across multiple platforms, individuals, and species. They both extend these strategies to map cell types between RNA-seq datasets with epigenetic properties and in situ transcript profiling. The ability to transfer information between datasets and spatial methods will enable more comprehensive profiling and comparisons of cell populations in complex biological systems.}, + file = {/Users/laurent/Zotero/storage/9U2IWLMJ/Adey - 2019 - Integration of Single-Cell Genomics Datasets.pdf;/Users/laurent/Zotero/storage/SP3VRRC3/S0092867419305628.html}, + journal = {Cell}, + language = {en}, + number = {7} +} + +@article{agnielVarianceComponentScore2017, + title = {Variance Component Score Test for Time-Course Gene Set Analysis of Longitudinal {{RNA}}-Seq Data}, + author = {Agniel, Denis and Hejblum, Boris P.}, + year = {2017}, + month = oct, + volume = {18}, + pages = {589--604}, + issn = {1465-4644, 1468-4357}, + doi = {10.1093/biostatistics/kxx005}, + abstract = {As gene expression measurement technology is shifting from microarrays to sequencing, the statistical tools available for their analysis must be adapted since RNA-seq data are measured as counts. It has been proposed to model RNA-seq counts as continuous variables using nonparametric regression to account for their inherent heteroscedasticity. In this vein, we propose tcgsaseq, a principled, model-free, and efficient method for detecting longitudinal changes in RNA-seq gene sets defined a priori. The method identifies those gene sets whose expression varies over time, based on an original variance component score test accounting for both covariates and heteroscedasticity without assuming any specific parametric distribution for the (transformed) counts. We demonstrate that despite the presence of a nonparametric component, our test statistic has a simple form and limiting distribution, and both may be computed quickly. A permutation version of the test is additionally proposed for very small sample sizes. Applied to both simulated data and two real datasets, tcgsaseq is shown to exhibit very good statistical properties, with an increase in stability and power when compared to state-of-the-art methods ROAST (rotation gene set testing), edgeR, and DESeq2, which can fail to control the type I error under certain realistic settings. We have made the method available for the community in the R package tcgsaseq.}, + file = {/Users/laurent/Documents/bibliography/DEA/Agniel and Hejblum - 2017 - Variance component score test for time-course gene.pdf}, + journal = {Biostatistics}, + language = {en}, + number = {4} +} + +@article{aiProfilingChromatinStates2019, + title = {Profiling Chromatin States Using Single-Cell {{itChIP}}-Seq}, + author = {Ai, Shanshan and Xiong, Haiqing and Li, Chen C. and Luo, Yingjie and Shi, Qiang and Liu, Yaxi and Yu, Xianhong and Li, Cheng and He, Aibin}, + year = {2019}, + month = sep, + volume = {21}, + pages = {1164--1172}, + issn = {1476-4679}, + doi = {10.1038/s41556-019-0383-5}, + abstract = {He and colleagues develop itChIP-seq based on simultaneous cellular indexing and chromatin tagmentation. itChIP-seq is applicable to both low-input and single-cell analyses of chromatin states.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature Limited}, + file = {/Users/laurent/Zotero/storage/9SDKPHVJ/Ai et al. - 2019 - Profiling chromatin states using single-cell itChI.pdf;/Users/laurent/Zotero/storage/E3WREJ67/s41556-019-0383-5.html}, + journal = {Nature Cell Biology}, + language = {en}, + number = {9} +} + +@article{akersSTARChimericPost2018, + title = {{{STAR Chimeric Post}} for Rapid Detection of Circular {{RNA}} and Fusion Transcripts}, + author = {Akers, Nicholas K. and Schadt, Eric E. and Losic, Bojan}, + year = {2018}, + month = jul, + volume = {34}, + pages = {2364--2370}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty091}, + abstract = {AbstractMotivation. The biological relevance of chimeric RNA alignments is now well established. Chimera arising as chromosomal fusions are often drivers of ca}, + file = {/Users/laurent/Zotero/storage/6DEJZIPG/Akers et al. - 2018 - STAR Chimeric Post for rapid detection of circular.pdf;/Users/laurent/Zotero/storage/8EENK2ZT/4883488.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{albrechtSinglecellChIPseqImputation2020, + title = {Single-Cell {{ChIP}}-Seq Imputation with {{SIMPA}} by Leveraging Bulk {{ENCODE}} Data}, + author = {Albrecht, Steffen and Andreani, Tommaso and {Andrade-Navarro}, Miguel A. and Fontaine, Jean-Fred}, + year = {2020}, + month = jan, + pages = {2019.12.20.883983}, + doi = {10.1101/2019.12.20.883983}, + abstract = {{$<$}p{$>$}Single-cell ChIP-seq analysis is challenging due to data sparsity. We present SIMPA (https://github.com/salbrec/SIMPA), a single-cell ChIP-seq data imputation method leveraging predictive information within bulk ENCODE data to impute missing protein-DNA interacting regions of target histone marks or transcription factors. Machine learning models trained for each single cell, each target, and each genomic region enable drastic improvement in cell types clustering and genes identification.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2020, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + file = {/Users/laurent/Zotero/storage/7RAVMLPH/Albrecht et al. - 2020 - Single-cell ChIP-seq imputation with SIMPA by leve.pdf;/Users/laurent/Zotero/storage/344NUYM4/2019.12.20.html}, + journal = {bioRxiv}, + language = {en} +} + +@book{ALEGenericAssembly, + title = {{{ALE}}: A Generic Assembly Likelihood Evaluation Framework for Assessing the Accuracy of Genome and Metagenome Assemblies \textbackslash{}textbar {{Bioinformatics}} \textbackslash{}textbar {{Oxford Academic}}}, + keywords = {assembly evaluation} +} + +@incollection{alessandriDifferentialExpressionAnalysis2019, + title = {Differential {{Expression Analysis}} in {{Single}}-{{Cell Transcriptomics}}}, + booktitle = {Single {{Cell Methods}}}, + author = {Alessandr{\`i}, Luca and Arigoni, Maddalena and Calogero, Raffaele}, + editor = {Proserpio, Valentina}, + year = {2019}, + volume = {1979}, + pages = {425--432}, + publisher = {{Springer New York}}, + address = {{New York, NY}}, + doi = {10.1007/978-1-4939-9240-9_25}, + abstract = {Differential expression analysis is an important aspect of bulk RNA sequencing (RNAseq). A lot of tools are available, and among them DESeq2 and edgeR are widely used. Since single-cell RNA sequencing (scRNAseq) expression data are zero inflated, single-cell data are quite different from those generated by conventional bulk RNA sequencing. Comparative analysis of tools used to detect differentially expressed genes between two groups of single cells showed that edgeR with quasi-likelihood F-test (QLF) outperforms other methods.}, + file = {/Users/laurent/Zotero/storage/MKND4MS6/Alessandrì et al. - 2019 - Differential Expression Analysis in Single-Cell Tr.pdf;/Users/laurent/Zotero/storage/XWRVMC5C/Alessandrì et al. - 2019 - Differential Expression Analysis in Single-Cell Tr.pdf}, + isbn = {978-1-4939-9239-3 978-1-4939-9240-9}, + language = {en} +} + +@article{allesCellFixationPreservation2017, + title = {Cell Fixation and Preservation for Droplet-Based Single-Cell Transcriptomics}, + author = {Alles, Jonathan and Karaiskos, Nikos and Praktiknjo, Samantha D. and Grosswendt, Stefanie and Wahle, Philipp and Ruffault, Pierre-Louis and Ayoub, Salah and Schreyer, Luisa and Boltengagen, Anastasiya and Birchmeier, Carmen and Zinzen, Robert and Kocks, Christine and Rajewsky, Nikolaus}, + year = {2017}, + month = may, + volume = {15}, + pages = {44}, + issn = {1741-7007}, + doi = {10.1186/s12915-017-0383-5}, + abstract = {Recent developments in droplet-based microfluidics allow the transcriptional profiling of thousands of individual cells in a quantitative, highly parallel and cost-effective way. A critical, often limiting step is the preparation of cells in an unperturbed state, not altered by stress or ageing. Other challenges are rare cells that need to be collected over several days or samples prepared at different times or locations.}, + file = {/Users/laurent/Zotero/storage/HVSFFLZB/Alles et al. - 2017 - Cell fixation and preservation for droplet-based s.pdf;/Users/laurent/Zotero/storage/9ZLZA8RC/s12915-017-0383-5.html}, + journal = {BMC Biology}, + number = {1} +} + +@article{allhoffDifferentialPeakCalling2016, + title = {Differential Peak Calling of {{ChIP}}-Seq Signals with Replicates with {{THOR}}}, + author = {Allhoff, Manuel and Ser{\'e}, Kristin and F. Pires, Juliana and Zenke, Martin and G. Costa, Ivan}, + year = {2016}, + month = aug, + pages = {gkw680}, + issn = {0305-1048, 1362-4962}, + doi = {10.1093/nar/gkw680}, + abstract = {The study of changes in protein\textendash{}DNA interactions measured by ChIP-seq on dynamic systems, such as cell differentiation, response to treatments or the comparison of healthy and diseased individuals, is still an open challenge. There are few computational methods comparing changes in ChIP-seq signals with replicates. Moreover, none of these previous approaches addresses ChIP-seq specific experimental artefacts arising from studies with biological replicates. We propose THOR, a Hidden Markov Model based approach, to detect differential peaks between pairs of biological conditions with replicates. THOR provides all pre- and post-processing steps required in ChIP-seq analyses. Moreover, we propose a novel normalization approach based on housekeeping genes to deal with cases where replicates have distinct signal-to-noise ratios. To evaluate differential peak calling methods, we delineate a methodology using both biological and simulated data. This includes an evaluation procedure that associates differential peaks with changes in gene expression as well as histone modifications close to these peaks. We evaluate THOR and seven competing methods on data sets with distinct characteristics from in vitro studies with technical replicates to clinical studies of cancer patients. Our evaluation analysis comprises of 13 comparisons between pairs of biological conditions. We show that THOR performs best in all scenarios.}, + file = {/Users/laurent/Documents/bibliography/ChipSeq/Allhoff et al. - 2016 - Differential peak calling of ChIP-seq signals with.pdf}, + journal = {Nucleic Acids Research}, + language = {en} +} + +@article{alquicira-hernandezScPredAccurateSupervised2019, + title = {{{scPred}} : Accurate Supervised Method for Cell-Type Classification from Single-Cell {{RNA}}-Seq Data}, + shorttitle = {{{scPred}}}, + author = {{Alquicira-Hernandez}, Jose and Sathe, Anuja and Ji, Hanlee P. and Nguyen, Quan and Powell, Joseph E.}, + year = {2019}, + month = dec, + volume = {20}, + pages = {1--17}, + issn = {1474-760X}, + doi = {10.1186/s13059-019-1862-5}, + abstract = {Single-cell RNA sequencing has enabled the characterization of highly specific cell types in many tissues, as well as both primary and stem cell-derived cell lines. An important facet of these studies is the ability to identify the transcriptional signatures that define a cell type or state. In theory, this information can be used to classify an individual cell based on its transcriptional profile. Here, we present scPred, a new generalizable method that is able to provide highly accurate classification of single cells, using a combination of unbiased feature selection from a reduced-dimension space, and machine-learning probability-based prediction method. We apply scPred to scRNA-seq data from pancreatic tissue, mononuclear cells, colorectal tumor biopsies, and circulating dendritic cells and show that scPred is able to classify individual cells with high accuracy. The generalized method is available at https://github.com/powellgenomicslab/scPred/.}, + copyright = {2019 The Author(s).}, + file = {/Users/laurent/Zotero/storage/MQ2CVSDI/Alquicira-Hernandez et al. - 2019 - scPred accurate supervised method for cell-type .pdf;/Users/laurent/Zotero/storage/5L8YBZJN/s13059-019-1862-5.html}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@article{altmanPointsSignificanceClustering2017, + title = {Points of {{Significance}}: {{Clustering}}}, + shorttitle = {Points of {{Significance}}}, + author = {Altman, Naomi and Krzywinski, Martin}, + year = {2017}, + month = may, + volume = {14}, + pages = {545--546}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4299}, + file = {/Users/laurent/Documents/bibliography/stats/Altman and Krzywinski - 2017 - Points of Significance Clustering.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {6} +} + +@article{altschulerApproximatingQuadraticTransportation2018, + title = {Approximating the {{Quadratic Transportation Metric}} in {{Near}}-{{Linear Time}}}, + author = {Altschuler, Jason and Bach, Francis and Rudi, Alessandro and Weed, Jonathan}, + year = {2018}, + month = oct, + abstract = {Computing the quadratic transportation metric (also called the 2-Wasserstein distance or root mean square distance) between two point clouds, or, more generally, two discrete distributions, is a fundamental problem in machine learning, statistics, computer graphics, and theoretical computer science. A long line of work has culminated in a sophisticated geometric algorithm due to Agarwal and Sharathkumar [2], which runs in time O\texttildelow{}(n3/2), where n is the number of points. However, obtaining faster algorithms has proven difficult since the 2-Wasserstein distance is known to have poor sketching and embedding properties, which limits the effectiveness of geometric approaches. In this paper, we give an extremely simple deterministic algorithm with O\texttildelow{}(n) runtime by using a completely different approach based on entropic regularization, approximate Sinkhorn scaling, and low-rank approximations of Gaussian kernel matrices. We give explicit dependence of our algorithm on the dimension and precision of the approximation.}, + archivePrefix = {arXiv}, + eprint = {1810.10046}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/XWB4SHV5/Altschuler et al. - 2018 - Approximating the Quadratic Transportation Metric .pdf}, + journal = {arXiv:1810.10046 [cs, math]}, + keywords = {Computer Science - Data Structures and Algorithms,Mathematics - Optimization and Control}, + language = {en}, + primaryClass = {cs, math} +} + +@article{amandDynaVennWebbasedComputation2019, + title = {{{DynaVenn}}: Web-Based Computation of the Most Significant Overlap between Ordered Sets}, + shorttitle = {{{DynaVenn}}}, + author = {Amand, J{\'e}r{\'e}my and Fehlmann, Tobias and Backes, Christina and Keller, Andreas}, + year = {2019}, + month = dec, + volume = {20}, + pages = {743}, + issn = {1471-2105}, + doi = {10.1186/s12859-019-3320-5}, + abstract = {In many research disciplines, ordered lists are compared. One example is to compare a subset of all significant genes or proteins in a primary study to those in a replication study. Often, the top of the lists are compared using Venn diagrams, ore more precisely Euler diagrams (set diagrams showing logical relations between a finite collection of different sets). If different cohort sizes, different techniques or algorithms for evaluation were applied, a direct comparison of significant genes with a fixed threshold can however be misleading and approaches comparing lists would be more appropriate.}, + file = {/Users/laurent/Zotero/storage/EQ6AZ47H/Amand et al. - 2019 - DynaVenn web-based computation of the most signif.pdf}, + journal = {BMC Bioinformatics}, + language = {en}, + number = {1} +} + +@article{ambrosiniPWMScanFastTool2018, + title = {{{PWMScan}}: A Fast Tool for Scanning Entire Genomes with a Position-Specific Weight Matrix}, + shorttitle = {{{PWMScan}}}, + author = {Ambrosini, Giovanna and Groux, Romain and Bucher, Philipp}, + year = {2018}, + month = jul, + volume = {34}, + pages = {2483--2484}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty127}, + abstract = {AbstractSummary. Transcription factors regulate gene expression by binding to specific short DNA sequences of 5\textendash{}20 bp to regulate the rate of transcription of}, + file = {/Users/laurent/Zotero/storage/KUTRYNVQ/Ambrosini et al. - 2018 - PWMScan a fast tool for scanning entire genomes w.pdf;/Users/laurent/Zotero/storage/IC9T5AQV/4921176.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{andersDifferentialExpressionAnalysis2010, + title = {Differential Expression Analysis for Sequence Count Data}, + author = {Anders, Simon and Huber, Wolfgang}, + year = {2010}, + pages = {12}, + abstract = {High-throughput sequencing assays such as RNA-Seq, ChIP-Seq or barcode counting provide quantitative readouts in the form of count data. To infer differential signal in such data correctly and with good statistical power, estimation of data variability throughout the dynamic range and a suitable error model are required. We propose a method based on the negative binomial distribution, with variance and mean linked by local regression and present an implementation, DESeq, as an R/Bioconductor package.}, + file = {/Users/laurent/Documents/bibliography/DEA/Anders and Huber - 2010 - Differential expression analysis for sequence coun.pdf}, + language = {en} +} + +@article{andersDifferentialExpressionRNASeq, + title = {Differential Expression of {{RNA}}-{{Seq}} Data at the Gene Level \textendash{} the {{DESeq}} Package}, + author = {Anders, Simon and Huber, Wolfgang}, + pages = {24}, + abstract = {A basic task in the analysis of count data from RNA-Seq is the detection of differentially expressed genes. The count data are presented as a table which reports, for each sample, the number of reads that have been assigned to a gene. Analogous analyses also arise for other assay types, such as comparative ChIP-Seq. The package DESeq provides methods to test for differential expression by use of the negative binonial distribution and a shrinkage estimator for the distribution's variance1. This vignette explains the use of the package. For an exposition of the statistical method, please see our paper [1] and the additional information in Section 9.}, + file = {/Users/laurent/Documents/bibliography/DEA/Anders and Huber - Differential expression of RNA-Seq data at the gene.pdf}, + language = {en} +} + +@article{andrewsModellingDropoutsFeature2017, + title = {Modelling Dropouts for Feature Selection in {{scRNASeq}} Experiments}, + author = {Andrews, Tallulah S. and Hemberg, Martin}, + year = {2017}, + month = may, + doi = {10.1101/065094}, + abstract = {A key challenge of single-cell RNASeq (scRNASeq) is the many genes with zero reads in some cells, but high expression in others. In full-transcript datasets modelling zeros using the Michaelis-Menten equation provides an equal or superior fit to existing scRNASeq datasets compared to other approaches and enables fast and accurate identification of features corresponding to differentially expressed genes without prior identification of cell subpopulations. For datasets tagged with unique molecular identifiers we introduce a depth adjusted negative binomial (DANB) to perform dropout-rate based feature selection. Applying our method to mouse preimplantation embryos revealed clusters corresponding to the inner cell mass and trophectoderm of the blastocyst. Our feature selection method overcomes batch effects to cluster cells from five different datasets by developmental stage rather than experimental origin.}, + file = {/Users/laurent/Documents/bibliography/to_read/Andrews and Hemberg - 2017 - Modelling dropouts for feature selection in scRNAS.pdf}, + language = {en} +} + +@article{antipovHybridSPAdesAlgorithmHybrid2016, + title = {{{hybridSPAdes}}: An Algorithm for Hybrid Assembly of Short and Long Reads}, + shorttitle = {{{hybridSPAdes}}}, + author = {Antipov, Dmitry and Korobeynikov, Anton and McLean, Jeffrey S. and Pevzner, Pavel A.}, + year = {2016}, + month = apr, + volume = {32}, + pages = {1009--1015}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btv688}, + abstract = {Motivation: Recent advances in single molecule real-time (SMRT) and nanopore sequencing technologies have enabled high-quality assemblies from long and inaccurate reads. However, these approaches require high coverage by long reads and remain expensive. On the other hand, the inexpensive short reads technologies produce accurate but fragmented assemblies. Thus, a hybrid approach that assembles long reads (with low coverage) and short reads has a potential to generate high-quality assemblies at reduced cost., Results: We describe hybridSPAdes algorithm for assembling short and long reads and benchmark it on a variety of bacterial assembly projects. Our results demonstrate that hybridSPAdes generates accurate assemblies (even in projects with relatively low coverage by long reads) thus reducing the overall cost of genome sequencing. We further present the first complete assembly of a genome from single cells using SMRT reads., Availability and implementation: hybridSPAdes is implemented in C++ as a part of SPAdes genome assembler and is publicly available at http://bioinf.spbau.ru/en/spades, Contact: d.antipov@spbu.ru, Supplementary information: supplementary data are available at Bioinformatics online.}, + journal = {Bioinformatics}, + number = {7}, + pmcid = {PMC4907386}, + pmid = {26589280} +} + +@misc{AppearanceAdminArea, + title = {Appearance {$\cdot$} {{Admin Area}}}, + abstract = {LBMC}, + howpublished = {https://gitlab.biologie.ens-lyon.fr/admin/appearance}, + journal = {GitLab}, + language = {en} +} + +@article{ArchimedeanCopulasHigh, + title = {{Archimedean Copulas in High Dimensions: Estimators and Numerical Challenges Motivated by Financial Applications | Journal de la Soci{\'e}t{\'e} Fran{\c c}aise de Statistique}}, + shorttitle = {{Archimedean Copulas in High Dimensions}}, + abstract = {R{\'e}sum{\'e} + The study of Archimedean dependence models in high dimensions is motivated by current practice in +quantitative risk management. The performance of known and new parametric estimators for the parameters of +Archimedean copulas is investigated and related numerical difficulties are addressed. In particular, method-of-momentslike +estimators based on pairwise Kendall's tau, a multivariate extension of Blomqvist's beta, minimum distance +estimators, the maximum-likelihood estimator, a simulated maximum-likelihood estimator, and a maximum-likelihood +estimator based on the copula diagonal are studied. Their performance is compared in a large-scale simulation study +both under known and unknown margins (pseudo-observations), in small and high dimensions, under small and large +dependencies, and various different Archimedean families. High dimensions up to one hundred are considered and +computational problems arising from such large dimensions are addressed in detail. All methods are implemented in +the open source R package copula and can thus be easily accessed and studied. The numerical solutions developed +in this work extend to various asymmetric generalizations of Archimedean copulas and important quantities such as +distributions of radial parts or the Kendall distribution function.}, + file = {/Users/laurent/Zotero/storage/AILGNQIH/Archimedean Copulas in High Dimensions Estimators.pdf;/Users/laurent/Zotero/storage/98RD798S/154.html}, + language = {fr-CA} +} + +@article{arefeenTAPASToolAlternative2018, + title = {{{TAPAS}}: Tool for Alternative Polyadenylation Site Analysis}, + shorttitle = {{{TAPAS}}}, + author = {Arefeen, Ashraful and Liu, Juntao and Xiao, Xinshu and Jiang, Tao}, + year = {2018}, + month = aug, + volume = {34}, + pages = {2521--2529}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty110}, + abstract = {AbstractMotivation. The length of the 3{${'}$} untranslated region (3{${'}$} UTR) of an mRNA is essential for many biological activities such as mRNA stability, sub-cellul}, + file = {/Users/laurent/Zotero/storage/HKCR4LRH/Arefeen et al. - 2018 - TAPAS tool for alternative polyadenylation site a.pdf;/Users/laurent/Zotero/storage/69LKHSCJ/4904269.html}, + journal = {Bioinformatics}, + language = {en}, + number = {15} +} + +@article{arlothDeepWASMultivariateGenotypephenotype2020, + title = {{{DeepWAS}}: {{Multivariate}} Genotype-Phenotype Associations by Directly Integrating Regulatory Information Using Deep Learning}, + shorttitle = {{{DeepWAS}}}, + author = {Arloth, Janine and Eraslan, G{\"o}kcen and Andlauer, Till F. M. and Martins, Jade and Iurato, Stella and K{\"u}hnel, Brigitte and Waldenberger, Melanie and Frank, Josef and Gold, Ralf and Hemmer, Bernhard and Luessi, Felix and Nischwitz, Sandra and Paul, Friedemann and Wiendl, Heinz and Gieger, Christian and {Heilmann-Heimbach}, Stefanie and Kacprowski, Tim and Laudes, Matthias and Meitinger, Thomas and Peters, Annette and Rawal, Rajesh and Strauch, Konstantin and Lucae, Susanne and {M{\"u}ller-Myhsok}, Bertram and Rietschel, Marcella and Theis, Fabian J. and Binder, Elisabeth B. and Mueller, Nikola S.}, + year = {2020}, + month = feb, + volume = {16}, + pages = {e1007616}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007616}, + abstract = {Genome-wide association studies (GWAS) identify genetic variants associated with traits or diseases. GWAS never directly link variants to regulatory mechanisms. Instead, the functional annotation of variants is typically inferred by post hoc analyses. A specific class of deep learning-based methods allows for the prediction of regulatory effects per variant on several cell type-specific chromatin features. We here describe ``DeepWAS'', a new approach that integrates these regulatory effect predictions of single variants into a multivariate GWAS setting. Thereby, single variants associated with a trait or disease are directly coupled to their impact on a chromatin feature in a cell type. Up to 61 regulatory SNPs, called dSNPs, were associated with multiple sclerosis (MS, 4,888 cases and 10,395 controls), major depressive disorder (MDD, 1,475 cases and 2,144 controls), and height (5,974 individuals). These variants were mainly non-coding and reached at least nominal significance in classical GWAS. The prediction accuracy was higher for DeepWAS than for classical GWAS models for 91\% of the genome-wide significant, MS-specific dSNPs. DSNPs were enriched in public or cohort-matched expression and methylation quantitative trait loci and we demonstrated the potential of DeepWAS to generate testable functional hypotheses based on genotype data alone. DeepWAS is available at https://github.com/cellmapslab/DeepWAS.}, + file = {/Users/laurent/Zotero/storage/3ID3DEYL/Arloth et al. - 2020 - DeepWAS Multivariate genotype-phenotype associati.pdf;/Users/laurent/Zotero/storage/J2A2LX5I/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Chromatin,DNA methylation,Gene expression,Genetic loci,Genome-wide association studies,Molecular genetics,Multiple sclerosis,Quantitative trait loci}, + language = {en}, + number = {2} +} + +@article{asafu-adjeiCovariateAdjustedClassification2018, + title = {Covariate Adjusted Classification Trees}, + author = {{Asafu-Adjei}, Josephine K and Sampson, Allan R}, + year = {2018}, + month = jan, + volume = {19}, + pages = {42--53}, + issn = {1465-4644, 1468-4357}, + doi = {10.1093/biostatistics/kxx015}, + abstract = {In studies that compare several diagnostic groups, subjects can be measured on certain features and classification trees can be used to identify which of them best characterize the differences among groups. However, subjects may also be measured on additional covariates whose ability to characterize group differences is not meaningful or of interest, but may still have an impact on the examined features. Therefore, it is important to adjust for the effects of covariates on these features. We present a new semiparametric approach to adjust for covariate effects when constructing classification trees based on the features of interest that is readily implementable. An application is given for postmortem brain tissue data to compare the neurobiological characteristics of subjects with schizophrenia to those of normal controls. We also evaluate the performance of our approach using a simulation study.}, + file = {/Users/laurent/Documents/bibliography/to_read/Asafu-Adjei and Sampson - 2018 - Covariate adjusted classification trees.pdf}, + journal = {Biostatistics}, + language = {en}, + number = {1} +} + +@article{ashoorHMCandiffMethodDetect2017, + title = {{{HMCan}}-Diff: A Method to Detect Changes in Histone Modifications in Cells with Different Genetic Characteristics}, + shorttitle = {{{HMCan}}-Diff}, + author = {Ashoor, Haitham and {Louis-Brennetot}, Caroline and {Janoueix-Lerosey}, Isabelle and Bajic, Vladimir B. and Boeva, Valentina}, + year = {2017}, + month = jan, + pages = {gkw1319}, + issn = {0305-1048, 1362-4962}, + doi = {10.1093/nar/gkw1319}, + abstract = {Comparing histone modification profiles between cancer and normal states, or across different tumor samples, can provide insights into understanding cancer initiation, progression and response to therapy. ChIP-seq histone modification data of cancer samples are distorted by copy number variation innate to any cancer cell. We present HMCan-diff, the first method designed to analyze ChIP-seq data to detect changes in histone modifications between two cancer samples of different genetic backgrounds, or between a cancer sample and a normal control. HMCan-diff explicitly corrects for copy number bias, and for other biases in the ChIP-seq data, which significantly improves prediction accuracy compared to methods that do not consider such corrections. On in silico simulated ChIP-seq data generated using genomes with differences in copy number profiles, HMCan-diff shows a much better performance compared to other methods that have no correction for copy number bias. Additionally, we benchmarked HMCan-diff on four experimental datasets, characterizing two histone marks in two different scenarios. We correlated changes in histone modifications between a cancer and a normal control sample with changes in gene expression. On all experimental datasets, HMCan-diff demonstrated better performance compared to the other methods.}, + file = {/Users/laurent/Documents/bibliography/ChipSeq/Ashoor et al. - 2017 - HMCan-diff a method to detect changes in histone .pdf}, + journal = {Nucleic Acids Research}, + language = {en} +} + +@article{ashtonSingleCellGenomicsBest2019, + title = {Single-{{Cell Genomics}}: {{Best Practices}} and {{New Insights}}}, + shorttitle = {Single-{{Cell Genomics}}}, + author = {Ashton, John M.}, + year = {2019}, + month = dec, + volume = {30}, + pages = {S61-S62}, + issn = {1524-0215}, + abstract = {Single-cell RNA sequencing (scRNA-Seq) offers great new opportunities for increasing our understanding of complex biological processes.In particular, development of an accurate Human Cell Atlas is largely dependent on the rapidly advancing technologies and molecular chemistries employed in scRNA-Seq. These advances have already allowed an increase in throughput for scRNA-Seq from 96 to 80,000 cells on a single instrument run by capturing cells within nano-liter droplets.In addition, use of oligonucleotide labeled antibodies in combination with scRNA-Seq techniques (e.g. CITE-Seq, Total-Seq, REAP-Seq) allows for multiplexing and to pair transcriptome information with cell surface protein expression to enhance the value of these data as well as to allow for novel insights into the complex biology.Use of such barcoded antibodies approached has immense value in understanding complex tissues and developmental processes, such as lung and hematopoietic development, as well as understanding the intricate composition of cancer tissue at the cellular level to help inform treatment strategies.Here we present some initial findings on best practices for use of barcoded antibodies with 10X Genomics technology and provided some data on the added value that can be extracted by using cell surface protein markers to help gain insights into complex tissue organization.}, + file = {/Users/laurent/Zotero/storage/6EVV8EER/Ashton - 2019 - Single-Cell Genomics Best Practices and New Insig.pdf}, + journal = {Journal of Biomolecular Techniques : JBT}, + number = {Suppl}, + pmcid = {PMC6938105}, + pmid = {31897032} +} + +@book{associationStressAmericaCoping2017, + title = {Stress in {{America}}: {{Coping}} with {{Change}}}, + author = {Association, American Psychological}, + year = {2017}, + publisher = {{Stress in America TM Survey}} +} + +@book{associationStressAmericaGeneration2018, + title = {Stress in {{America}}: {{Generation Z}}}, + author = {Association, American Psychological}, + year = {2018}, + publisher = {{Stress in America TM Survey}} +} + +@article{athanasiadouCompleteStatisticalModel2019, + title = {A Complete Statistical Model for Calibration of {{RNA}}-Seq Counts Using External Spike-Ins and Maximum Likelihood Theory}, + author = {Athanasiadou, Rodoniki and Neymotin, Benjamin and Brandt, Nathan and Wang, Wei and Christiaen, Lionel and Gresham, David and Tranchina, Daniel}, + year = {2019}, + month = mar, + volume = {15}, + pages = {e1006794}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006794}, + abstract = {A fundamental assumption, common to the vast majority of high-throughput transcriptome analyses, is that the expression of most genes is unchanged among samples and that total cellular RNA remains constant. As the number of analyzed experimental systems increases however, different independent studies demonstrate that this assumption is often violated. We present a calibration method using RNA spike-ins that allows for the measurement of absolute cellular abundance of RNA molecules. We apply the method to pooled RNA from cell populations of known sizes. For each transcript, we compute a nominal abundance that can be converted to absolute by dividing by a scale factor determined in separate experiments: the yield coefficient of the transcript relative to that of a reference spike-in measured with the same protocol. The method is derived by maximum likelihood theory in the context of a complete statistical model for sequencing counts contributed by cellular RNA and spike-ins. The counts are based on a sample from a fixed number of cells to which a fixed population of spike-in molecules has been added. We illustrate and evaluate the method with applications to two global expression data sets, one from the model eukaryote Saccharomyces cerevisiae, proliferating at different growth rates, and differentiating cardiopharyngeal cell lineages in the chordate Ciona robusta. We tested the method in a technical replicate dilution study, and in a k-fold validation study.}, + file = {/Users/laurent/Zotero/storage/MK7V7ENI/Athanasiadou et al. - 2019 - A complete statistical model for calibration of RN.pdf;/Users/laurent/Zotero/storage/D9NUPB9T/article.html}, + journal = {PLOS Computational Biology}, + keywords = {cDNA libraries,Gene expression,Ribosomal RNA,RNA extraction,RNA sequencing,Statistical models,Total cell counting,Yeast}, + language = {en}, + number = {3} +} + +@article{avsecModelingPositionalEffects2018, + title = {Modeling Positional Effects of Regulatory Sequences with Spline Transformations Increases Prediction Accuracy of Deep Neural Networks}, + author = {Avsec, {\v Z}iga and Barekatain, Mohammadamin and Cheng, Jun and Gagneur, Julien}, + year = {2018}, + month = apr, + volume = {34}, + pages = {1261--1269}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx727}, + abstract = {Motivation: Regulatory sequences are not solely defined by their nucleic acid sequence but also by their relative distances to genomic landmarks such as transcription start site, exon boundaries or polyadenylation site. Deep learning has become the approach of choice for modeling regulatory sequences because of its strength to learn complex sequence features. However, modeling relative distances to genomic landmarks in deep neural networks has not been addressed.}, + file = {/Users/laurent/Documents/bibliography/to_read/Avsec et al. - 2018 - Modeling positional effects of regulatory sequence.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {8} +} + +@misc{BabrahamBioinformaticsFastQC, + title = {Babraham {{Bioinformatics}} - {{FastQC A Quality Control}} Tool for {{High Throughput Sequence Data}}}, + file = {/Users/laurent/Zotero/storage/PDFS2RSA/fastqc.html}, + howpublished = {http://www.bioinformatics.babraham.ac.uk/projects/fastqc/} +} + +@article{babtieLearningRegulatoryModels2017, + title = {Learning Regulatory Models for Cell Development from Single Cell Transcriptomic Data}, + author = {Babtie, Ann C. and Chan, Thalia E. and Stumpf, Michael P.H.}, + year = {2017}, + month = oct, + volume = {5}, + pages = {72--81}, + issn = {24523100}, + doi = {10.1016/j.coisb.2017.07.013}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Babtie et al. - 2017 - Learning regulatory models for cell development fr.pdf;/Users/laurent/Zotero/storage/5SMUQBHY/Babtie et al. - 2017 - Learning regulatory models for cell development fr.pdf;/Users/laurent/Zotero/storage/GZNDVIIP/Babtie et al. - 2017 - Learning regulatory models for cell development fr.pdf;/Users/laurent/Zotero/storage/WUPCBJ3N/Babtie et al. - 2017 - Learning regulatory models for cell development fr.pdf}, + journal = {Current Opinion in Systems Biology}, + language = {en} +} + +@article{bacherDesignComputationalAnalysis2016, + title = {Design and Computational Analysis of Single-Cell {{RNA}}-Sequencing Experiments}, + author = {Bacher, Rhonda and Kendziorski, Christina}, + year = {2016}, + month = dec, + volume = {17}, + issn = {1474-760X}, + doi = {10.1186/s13059-016-0927-y}, + abstract = {Single-cell RNA-sequencing (scRNA-seq) has emerged as a revolutionary tool that allows us to address scientific questions that eluded examination just a few years ago. With the advantages of scRNA-seq come computational challenges that are just beginning to be addressed. In this article, we highlight the computational methods available for the design and analysis of scRNA-seq experiments, their advantages and disadvantages in various settings, the open questions for which novel methods are needed, and expected future developments in this exciting area.}, + file = {/Users/laurent/Zotero/storage/BU6CFY58/Bacher and Kendziorski - 2016 - Design and computational analysis of single-cell R.pdf;/Users/laurent/Zotero/storage/UCGI6M4U/Bacher and Kendziorski - 2016 - Design and computational analysis of single-cell R.pdf}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@article{bacherSCnormRobustNormalization2017, + title = {{{SCnorm}}: Robust Normalization of Single-Cell {{RNA}}-Seq Data}, + shorttitle = {{{SCnorm}}}, + author = {Bacher, Rhonda and Chu, Li-Fang and Leng, Ning and Gasch, Audrey P and Thomson, James A and Stewart, Ron M and Newton, Michael and Kendziorski, Christina}, + year = {2017}, + month = apr, + volume = {14}, + pages = {584--586}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4263}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se 2.pdf;/Users/laurent/Documents/bibliography/scRNASeq/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se 3.pdf;/Users/laurent/Documents/bibliography/scRNASeq/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se.pdf;/Users/laurent/Zotero/storage/B33J25SQ/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se.pdf;/Users/laurent/Zotero/storage/CCNUK68B/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se.pdf;/Users/laurent/Zotero/storage/FCRXXPBV/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se.pdf;/Users/laurent/Zotero/storage/NJDN6DYR/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se.pdf;/Users/laurent/Zotero/storage/RDYGBVIE/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se.pdf;/Users/laurent/Zotero/storage/RWWLDIFY/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se.pdf;/Users/laurent/Zotero/storage/WHDK2ITV/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se.pdf;/Users/laurent/Zotero/storage/XSBMLA2J/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se.pdf;/Users/laurent/Zotero/storage/YWC7T4HJ/Bacher et al. - 2017 - SCnorm robust normalization of single-cell RNA-se.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {6} +} + +@article{bachtrogChromosomeEvolutionEmerging2013, + title = {Y Chromosome Evolution: Emerging Insights into Processes of {{Y}} Chromosome Degeneration}, + shorttitle = {Y Chromosome Evolution}, + author = {Bachtrog, Doris}, + year = {2013}, + month = feb, + volume = {14}, + pages = {113--124}, + issn = {1471-0056}, + doi = {10.1038/nrg3366}, + abstract = {The human Y chromosome is intriguing not only because it harbours the master-switch gene determining gender but also because of its unusual evolutionary trajectory. Previously an autosome, Y chromosome evolution has been characterized by massive gene decay. Recent whole-genome and transcriptome analyses of Y chromosomes in humans and other primates, in Drosophila species as well as in plants have shed light on the current gene content of the Y, its origins and its long-term fate. Comparative analysis of young and old Y chromosomes have given further insights into the evolutionary and molecular forces triggering Y degeneration and its evolutionary destiny.}, + journal = {Nature reviews. Genetics}, + number = {2}, + pmcid = {PMC4120474}, + pmid = {23329112} +} + +@article{badshaImputationSinglecellGene2019, + title = {Imputation of Single-Cell Gene Expression with an Autoencoder Neural Network}, + author = {Badsha, Md. Bahadur and Li, Rui and Liu, Boxiang and Li, Yang I. and Xian, Min and Banovich, Nicholas E. and Fu, Audrey Qiuyan}, + year = {2019}, + month = nov, + doi = {10.1101/504977}, + file = {/Users/laurent/Zotero/storage/NJ8TSIG7/Badsha et al. - 2019 - Imputation of single-cell gene expression with an .pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{baileyIdentifyingDriversParallel2018, + title = {Identifying {{Drivers}} of {{Parallel Evolution}}: {{A Regression Model Approach}}}, + shorttitle = {Identifying {{Drivers}} of {{Parallel Evolution}}}, + author = {Bailey, Susan F. and Guo, Qianyun and Bataillon, Thomas}, + year = {2018}, + month = oct, + volume = {10}, + pages = {2801--2812}, + doi = {10.1093/gbe/evy210}, + abstract = {Abstract. Parallel evolution, defined as identical changes arising in independent populations, is often attributed to similar selective pressures favoring the}, + file = {/Users/laurent/Zotero/storage/ZDFCMTUX/Bailey et al. - 2018 - Identifying Drivers of Parallel Evolution A Regre.pdf;/Users/laurent/Zotero/storage/UPYV8YKA/5106663.html}, + journal = {Genome Biology and Evolution}, + language = {en}, + number = {10} +} + +@article{baisScdsComputationalAnnotation2019, + title = {Scds: {{Computational Annotation}} of {{Doublets}} in {{Single Cell RNA Sequencing Data}}}, + shorttitle = {Scds}, + author = {Bais, Abha S and Kostka, Dennis}, + year = {2019}, + month = feb, + doi = {10.1101/564021}, + abstract = {Motivation: Single cell RNA sequencing (scRNA-seq) technologies enable the study of transcriptional heterogeneity at the resolution of individual cells and have an increasing impact on biomedical research. Specifically, high-throughput approaches that employ micro-fluidics in combination with unique molecular identifiers (UMIs) are capable of assaying many thousands of cells per experiment and are rapidly becoming commonplace. However, it is known that these methods sometimes wrongly consider two or more cells as single cells, and that a number of so-called doublets is present in the output of such experiments. Treating doublets as single cells in downstream analyses can severely bias a study's conclusions, and therefore computational strategies for the identification of doublets are needed. Here we present single cell doublet scoring (scds), a software tool for the in silico identification of doublets in scRNA-seq data.}, + file = {/Users/laurent/Zotero/storage/DZHIXLGF/Bais and Kostka - 2019 - scds Computational Annotation of Doublets in Sing.pdf;/Users/laurent/Zotero/storage/NDEMVDJW/Bais and Kostka - 2019 - scds Computational Annotation of Doublets in Sing.pdf;/Users/laurent/Zotero/storage/TY49IMQF/Bais and Kostka - 2019 - scds Computational Annotation of Doublets in Sing.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{baisScdsComputationalAnnotation2020, + title = {Scds: Computational Annotation of Doublets in Single-Cell {{RNA}} Sequencing Data}, + shorttitle = {Scds}, + author = {Bais, Abha S. and Kostka, Dennis}, + year = {2020}, + month = feb, + volume = {36}, + pages = {1150--1158}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz698}, + abstract = {AbstractMotivation. Single-cell RNA sequencing (scRNA-seq) technologies enable the study of transcriptional heterogeneity at the resolution of individual cells}, + file = {/Users/laurent/Zotero/storage/XYEFM7EG/Bais and Kostka - 2020 - scds computational annotation of doublets in sing.pdf;/Users/laurent/Zotero/storage/4AXV6MU2/5566507.html}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{baiStatisticalTestStructured2019, + title = {Statistical Test of Structured Continuous Trees Based on Discordance Matrix}, + author = {Bai, Xiangqi and Ma, Liang and Wan, Lin}, + editor = {Schwartz, Russell}, + year = {2019}, + month = may, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btz425}, + abstract = {Motivation: Cell fate determination is a continuous process in which one cell type diversifies to other cell types following a hierarchical path. Advancements in single-cell technologies provide the opportunity to reveal the continuum of cell progression which forms a structured continuous tree. Computational algorithms, which are usually based on a priori assumptions on the hidden structures, have previously been proposed as a means of recovering pseudo-trajectory along cell differentiation process. However, there still lack of statistical framework on the assessments of intrinsic structure embedded in high-dimensional gene expression profile. Inherit noise and cell-to-cell variation underlie the single-cell data, on the other hand, pose grand challenges to testing even basic structures, such as linear versus bifurcation.}, + file = {/Users/laurent/Zotero/storage/44D3TP69/Bai et al. - 2019 - Statistical test of structured continuous trees ba.pdf;/Users/laurent/Zotero/storage/RGVICJW4/Bai et al. - 2019 - Statistical test of structured continuous trees ba.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{baldiGenomewideMeasurementLocal2018, + title = {Genome-Wide Measurement of Local Nucleosome Array Regularity and Spacing by Nanopore Sequencing}, + author = {Baldi, Sandro and Krebs, Stefan and Blum, Helmut and Becker, Peter B.}, + year = {2018}, + month = sep, + volume = {25}, + pages = {894}, + issn = {1545-9985}, + doi = {10.1038/s41594-018-0110-0}, + abstract = {A new approach to map nucleosome array regularity and spacing reveals modulation of array regularity and nucleosome repeat length depending on functional chromatin states.}, + copyright = {2018 The Author(s)}, + file = {/Users/laurent/Zotero/storage/UW7C37HH/Baldi et al. - 2018 - Genome-wide measurement of local nucleosome array .pdf;/Users/laurent/Zotero/storage/T4N33FPR/s41594-018-0110-0.html}, + journal = {Nature Structural \& Molecular Biology}, + language = {En}, + number = {9} +} + +@article{bankevichSPAdesNewGenome2012, + title = {{{SPAdes}}: {{A New Genome Assembly Algorithm}} and {{Its Applications}} to {{Single}}-{{Cell Sequencing}}}, + shorttitle = {{{SPAdes}}}, + author = {Bankevich, Anton and Nurk, Sergey and Antipov, Dmitry and Gurevich, Alexey A. and Dvorkin, Mikhail and Kulikov, Alexander S. and Lesin, Valery M. and Nikolenko, Sergey I. and Pham, Son and Prjibelski, Andrey D. and Pyshkin, Alexey V. and Sirotkin, Alexander V. and Vyahhi, Nikolay and Tesler, Glenn and Alekseyev, Max A. and Pevzner, Pavel A.}, + year = {2012}, + month = may, + volume = {19}, + pages = {455--477}, + issn = {1066-5277}, + doi = {10.1089/cmb.2012.0021}, + abstract = {The lion's share of bacteria in various environments cannot be cloned in the laboratory and thus cannot be sequenced using existing technologies. A major goal of single-cell genomics is to complement gene-centric metagenomic data with whole-genome assemblies of uncultivated organisms. Assembly of single-cell data is challenging because of highly non-uniform read coverage as well as elevated levels of sequencing errors and chimeric reads. We describe SPAdes, a new assembler for both single-cell and standard (multicell) assembly, and demonstrate that it improves on the recently released E+V-SC assembler (specialized for single-cell data) and on popular assemblers Velvet and SoapDeNovo (for multicell data). SPAdes generates single-cell assemblies, providing information about genomes of uncultivatable bacteria that vastly exceeds what may be obtained via traditional metagenomics studies. SPAdes is available online (http://bioinf.spbau.ru/spades). It is distributed as open source software.}, + journal = {Journal of Computational Biology}, + number = {5}, + pmcid = {PMC3342519}, + pmid = {22506599} +} + +@article{baoProbabilisticNaturalMapping2018, + title = {Probabilistic Natural Mapping of Gene-Level Tests for Genome-Wide Association Studies}, + author = {Bao, Feng and Deng, Yue and Du, Mulong and Ren, Zhiquan and Zhang, Qingzhao and Zhao, Yanyu and Suo, Jinli and Zhang, Zhengdong and Wang, Meilin and Dai, Qionghai}, + year = {2018}, + month = jul, + volume = {19}, + pages = {545--553}, + issn = {1467-5463}, + doi = {10.1093/bib/bbx002}, + abstract = {Abstract. Genome-wide association studies (GWASs) generally focus on a single marker, which limits the elucidation of the genetic architecture of complex trait}, + file = {/Users/laurent/Zotero/storage/YTDRNWWW/Bao et al. - 2018 - Probabilistic natural mapping of gene-level tests .pdf;/Users/laurent/Zotero/storage/BPWUEHRJ/2997209.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {4} +} + +@article{baoReMILOReferenceAssisted2018, + title = {{{ReMILO}}: Reference Assisted Misassembly Detection Algorithm Using Short and Long Reads}, + shorttitle = {{{ReMILO}}}, + author = {Bao, Ergude and Song, Changjin and Lan, Lingxiao}, + year = {2018}, + month = jan, + volume = {34}, + pages = {24--32}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx524}, + abstract = {Motivation: Contigs assembled from the second generation sequencing short reads may contain misassemblies, and thus complicate downstream analysis or even lead to incorrect analysis results. Fortunately, with more and more sequenced species available, it becomes possible to use the reference genome of a closely related species to detect misassemblies. In addition, long reads of the third generation sequencing technology have been more and more widely used, and can also help detect misassemblies. Results: Here, we introduce ReMILO, a reference assisted misassembly detection algorithm that uses both short reads and PacBio SMRT long reads. ReMILO aligns the initial short reads to both the contigs and reference genome, and then constructs a novel data structure called red-black multipositional de Bruijn graph to detect misassemblies. In addition, ReMILO also aligns the contigs to long reads and find their differences from the long reads to detect more misassemblies. In our performance test on short read assemblies of human chromosome 14 data, ReMILO can detect 41.8-77.9\% extensive misassemblies and 33.6-54.5\% local misassemblies. On hybrid short and long read assemblies of S. pastorianus data, ReMILO can also detect 60.6-70.9\% extensive misassemblies and 28.6-54.0\% local misassemblies.}, + file = {/Users/laurent/Documents/bibliography/to_read/Bao et al. - 2018 - ReMILO reference assisted misassembly detection a.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {1} +} + +@article{baraudLowerBoundsMultiple2018, + title = {About the Lower Bounds for the Multiple Testing Problem}, + author = {Baraud, Yannick}, + year = {2018}, + month = jul, + abstract = {Given an observed random variable X, consider the problem of recovering its distribution among a finite family of candidate ones. The two-point inequality, Fano's lemma and more recently an inequality due to Venkataramanan and Johnson (2018) allow to bound the maximal probability of error over the family from below. The aim of this paper is to give a very short and simple proof of all these results simultaneously and improve in passing the inequality of Venkataramanan and Johnson.}, + archivePrefix = {arXiv}, + eprint = {1807.05410}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/XLK82CZY/Baraud - 2018 - About the lower bounds for the multiple testing pr.pdf}, + journal = {arXiv:1807.05410 [math, stat]}, + keywords = {Mathematics - Statistics Theory}, + language = {en}, + primaryClass = {math, stat} +} + +@article{barronSparseDifferentialClustering2018, + title = {A Sparse Differential Clustering Algorithm for Tracing Cell Type Changes via Single-Cell {{RNA}}-Sequencing Data}, + author = {Barron, Martin and Zhang, Siyuan and Li, Jun}, + year = {2018}, + month = feb, + volume = {46}, + pages = {e14-e14}, + issn = {0305-1048, 1362-4962}, + doi = {10.1093/nar/gkx1113}, + abstract = {Cell types in cell populations change as the condition changes: some cell types die out, new cell types may emerge and surviving cell types evolve to adapt to the new condition. Using single-cell RNAsequencing data that measure the gene expression of cells before and after the condition change, we propose an algorithm, SparseDC, which identifies cell types, traces their changes across conditions and identifies genes which are marker genes for these changes. By solving a unified optimization problem, SparseDC completes all three tasks simultaneously. SparseDC is highly computationally efficient and demonstrates its accuracy on both simulated and real data.}, + file = {/Users/laurent/Documents/bibliography/to_read/Barron et al. - 2018 - A sparse differential clustering algorithm for tra.pdf}, + journal = {Nucleic Acids Research}, + language = {en}, + number = {3} +} + +@article{baruzzoSimulationbasedComprehensiveBenchmarking2017, + title = {Simulation-Based Comprehensive Benchmarking of {{RNA}}-Seq Aligners}, + author = {Baruzzo, Giacomo and Hayer, Katharina E and Kim, Eun Ji and Di Camillo, Barbara and FitzGerald, Garret A and Grant, Gregory R}, + year = {2017}, + month = feb, + volume = {14}, + pages = {135--139}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4106}, + file = {/Users/laurent/Documents/bibliography/mapper/Baruzzo et al. - 2017 - Simulation-based comprehensive benchmarking of RNA.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {2} +} + +@article{baruzzoSPARSimSingleCell, + title = {{{SPARSim}} Single Cell: A Count Data Simulator for {{scRNA}}-Seq Data}, + shorttitle = {{{SPARSim}} Single Cell}, + author = {Baruzzo, Giacomo and Patuzzi, Ilaria and Di Camillo, Barbara}, + doi = {10.1093/bioinformatics/btz752}, + abstract = {AbstractMotivation. Single cell RNA-seq (scRNA-seq) count data show many differences compared with bulk RNA-seq count data, making the application of many RNA-}, + file = {/Users/laurent/Zotero/storage/LA446JDQ/Baruzzo et al. - SPARSim single cell a count data simulator for sc.pdf;/Users/laurent/Zotero/storage/B48ZZPVK/5584234.html}, + journal = {Bioinformatics}, + language = {en} +} + +@book{BasicLocalAlignment, + title = {Basic Local Alignment Search Tool - {{ScienceDirect}}} +} + +@book{BasicLocalAlignmentb, + title = {Basic Local Alignment Search Tool. - {{PubMed}} - {{NCBI}}} +} + +@incollection{basslerBioinformaticToolkitSingleCell2019, + title = {A {{Bioinformatic Toolkit}} for {{Single}}-{{Cell mRNA Analysis}}}, + booktitle = {Single {{Cell Methods}}}, + author = {Ba{\ss}ler, Kevin and G{\"u}nther, Patrick and {Schulte-Schrepping}, Jonas and Becker, Matthias and Biernat, Pawe{\l}}, + editor = {Proserpio, Valentina}, + year = {2019}, + volume = {1979}, + pages = {433--455}, + publisher = {{Springer New York}}, + address = {{New York, NY}}, + doi = {10.1007/978-1-4939-9240-9_26}, + abstract = {The recent technological developments in the field of single-cell RNA-Seq enable us to assay the transcriptome of up to a million single cells in parallel. However, the analyses of such big datasets present a major challenge. During the last decade, a wide variety of strategies have been proposed covering different steps of the analysis. Here, we introduce a selection of computational tools to provide an overview of a generic analysis pipeline.}, + file = {/Users/laurent/Zotero/storage/U9C26S5Q/Baßler et al. - 2019 - A Bioinformatic Toolkit for Single-Cell mRNA Analy.pdf;/Users/laurent/Zotero/storage/W26GD4X8/Baßler et al. - 2019 - A Bioinformatic Toolkit for Single-Cell mRNA Analy.pdf}, + isbn = {978-1-4939-9239-3 978-1-4939-9240-9}, + language = {en} +} + +@article{batesFittingLinearMixedEffects2015, + title = {Fitting {{Linear Mixed}}-{{Effects Models Using}} Lme4}, + author = {Bates, Douglas and M{\"a}chler, Martin and Bolker, Ben and Walker, Steve}, + year = {2015}, + month = oct, + volume = {67}, + pages = {1--48}, + issn = {1548-7660}, + doi = {10.18637/jss.v067.i01}, + copyright = {Copyright (c) 2015 Douglas Bates, Martin M{\"a}chler, Ben Bolker, Steve Walker}, + file = {/Users/laurent/Zotero/storage/XIB8E5MT/Bates et al. - 2015 - Fitting Linear Mixed-Effects Models Using lme4.pdf;/Users/laurent/Zotero/storage/YDK7DK9N/v067i01.html}, + journal = {Journal of Statistical Software}, + keywords = {Cholesky decomposition,linear mixed models,penalized least squares,sparse matrix methods}, + language = {en}, + number = {1} +} + +@article{batoolCharacterizationDevelopmentAverage2019, + title = {Characterization and {{Development}} of {{Average Silhouette Width Clustering}}}, + author = {Batool, Fatima and Hennig, Christian}, + year = {2019}, + month = oct, + abstract = {The purpose of this paper is to introduced a new clustering methodology. This paper is divided into three parts. In the first part we have developed the axiomatic theory for the average silhouette width (ASW) index. There are different ways to investigate the quality and characteristics of clustering methods such as validation indices using simulations and real data experiments, model-based theory, and non-model-based theory known as the axiomatic theory. In this work we have not only taken the empirical approach of validation of clustering results through simulations, but also focus on the development of the axiomatic theory. In the second part we have presented a novel clustering methodology based on the optimization of the ASW index. We have considered the problem of estimation of number of clusters and finding clustering against this number simultaneously. Two algorithms are proposed. The proposed algorithms are evaluated against several partitioning and hierarchical clustering methods. An intensive empirical comparison of the different distance metrics on the various clustering methods is conducted. In the third part we have considered two application domains\textemdash{}novel single cell RNA sequencing datasets and rainfall data to cluster weather stations.}, + archivePrefix = {arXiv}, + eprint = {1910.11339}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/9IS667ME/Batool and Hennig - 2019 - Characterization and Development of Average Silhou.pdf}, + journal = {arXiv:1910.11339 [cs, stat]}, + keywords = {62H30,Computer Science - Machine Learning,I.5.3,Statistics - Machine Learning}, + language = {en}, + primaryClass = {cs, stat} +} + +@article{batoolCharacterizationDevelopmentAverage2019a, + title = {Characterization and {{Development}} of {{Average Silhouette Width Clustering}}}, + author = {Batool, Fatima and Hennig, Christian}, + year = {2019}, + month = oct, + abstract = {The purpose of this paper is to introduced a new clustering methodology. This paper is divided into three parts. In the first part we have developed the axiomatic theory for the average silhouette width (ASW) index. There are different ways to investigate the quality and characteristics of clustering methods such as validation indices using simulations and real data experiments, model-based theory, and non-model-based theory known as the axiomatic theory. In this work we have not only taken the empirical approach of validation of clustering results through simulations, but also focus on the development of the axiomatic theory. In the second part we have presented a novel clustering methodology based on the optimization of the ASW index. We have considered the problem of estimation of number of clusters and finding clustering against this number simultaneously. Two algorithms are proposed. The proposed algorithms are evaluated against several partitioning and hierarchical clustering methods. An intensive empirical comparison of the different distance metrics on the various clustering methods is conducted. In the third part we have considered two application domains\textemdash{}novel single cell RNA sequencing datasets and rainfall data to cluster weather stations.}, + archivePrefix = {arXiv}, + eprint = {1910.11339}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/ZJKFT4HN/Batool and Hennig - 2019 - Characterization and Development of Average Silhou.pdf}, + journal = {arXiv:1910.11339 [cs, stat]}, + keywords = {62H30,Computer Science - Machine Learning,I.5.3,Statistics - Machine Learning}, + language = {en}, + primaryClass = {cs, stat} +} + +@article{bayatImprovedVCFNormalization2016, + title = {Improved {{VCF}} Normalization for Accurate {{VCF}} Comparison}, + author = {Bayat, Arash and Ga{\"e}ta, Bruno and Ignjatovic, Aleksandar and Parameswaran, Sri}, + year = {2016}, + month = dec, + pages = {btw748}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw748}, + abstract = {Motivation: The Variant Call Format (VCF) is widely used to store data about genetic variation. Variant calling workflows detect potential variants in large numbers of short sequence reads generated by DNA sequencing and report them in VCF format. To evaluate the accuracy of variant callers, it is critical to correctly compare their output against a reference VCF file containing a gold standard set of variants. However, comparing VCF files is a complicated task as an individual genomic variant can be represented in several different ways and is therefore not necessarily reported in a unique way by different software.}, + file = {/Users/laurent/Documents/bibliography/to_read/Bayat et al. - 2016 - Improved VCF normalization for accurate VCF compar.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{bayatVariancestabilizedUnitsSequencingbased2020, + title = {Variance-Stabilized Units for Sequencing-Based Genomic Signals}, + author = {Bayat, Faezeh and Libbrecht, Maxwell}, + year = {2020}, + month = feb, + pages = {2020.01.31.929174}, + doi = {10.1101/2020.01.31.929174}, + abstract = {{$<$}p{$>$}Sequencing-based genomic signals such as ChIP-seq are widely used to measure many types of genomic biochemical activity, such transcription factor binding, chromatin accessibility and histone modification. The processing pipeline for these assays usually outputs a real-valued signal for every position in the genome that measures the strength of activity at that position. This signal is used in downstream applications such as visualization and chromatin state annotation. There are several representations of signal strength at a given that are currently used, including the raw read count, the fold enrichment over control, and log p-value of enrichment relative to control. However, these representations lack the property of variance stabilization. That is, a difference between 100 and 200 reads usually has a very different statistical importance from a difference between 1,100 and 1,200 reads. Here, we propose VSS, variance-stabilized signals for sequencing-based genomic signals. We generate VSS by learning the empirical relationship between the mean and variance of a given signal data set and producing transformed signals that normalize for this dependence. We demonstrate that these variance stabilized units have several desirable properties, including that differences in ChIP-seq signal across cell types indicate a difference in that gene9s expression. VSS units will eliminate the need for downstream methods to implement complex mean-variance relationship models, and will enable genomic signals to be easily understood by eye.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2020, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial 4.0 International), CC BY-NC 4.0, as described at http://creativecommons.org/licenses/by-nc/4.0/}, + file = {/Users/laurent/Zotero/storage/RAH8TBYU/Bayat and Libbrecht - 2020 - Variance-stabilized units for sequencing-based gen.pdf;/Users/laurent/Zotero/storage/QA3LF8V3/2020.01.31.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{beccutiSeqBoxRNAseqChIPseq2018, + title = {{{SeqBox}}: {{RNAseq}}/{{ChIPseq}} Reproducible Analysis on a Consumer Game Computer}, + shorttitle = {{{SeqBox}}}, + author = {Beccuti, Marco and Cordero, Francesca and Arigoni, Maddalena and Panero, Riccardo and Amparore, Elvio G and Donatelli, Susanna and Calogero, Raffaele A}, + editor = {Birol, Inanc}, + year = {2018}, + month = mar, + volume = {34}, + pages = {871--872}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx674}, + abstract = {Summary: Short reads sequencing technology has been used for more than a decade now. However, the analysis of RNAseq and ChIPseq data is still computational demanding and the simple access to raw data does not guarantee results reproducibility between laboratories. To address these two aspects, we developed SeqBox, a cheap, efficient and reproducible RNAseq/ChIPseq hardware/software solution based on NUC6I7KYK mini-PC (an Intel consumer game computer with a fast processor and a high performance SSD disk), and Docker container platform. In SeqBox the analysis of RNAseq and ChIPseq data is supported by a friendly GUI. This allows access to fast and reproducible analysis also to scientists with/without scripting experience.}, + file = {/Users/laurent/Documents/bibliography/to_read/Beccuti et al. - 2018 - SeqBox RNAseqChIPseq reproducible analysis on a .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {5} +} + +@article{benazziAsexualSexualReproduction1994, + title = {Asexual and Sexual Reproduction: {{This}} Basic Distinction Seems to Have Lost Any Belief}, + shorttitle = {Asexual and Sexual Reproduction}, + author = {Benazzi, Mario and Lentati, Giuseppina Benazzi}, + year = {1994}, + month = jan, + volume = {61}, + pages = {147--148}, + issn = {0373-4137}, + doi = {10.1080/11250009409355874}, + abstract = {Parthenogenesis is a reproductive mechanism derived from sexual reproduction, that biologists during the first decades of our century, considered as distinct from asexual reproduction, namely, the vegetative multiplication. Pseudogamy is genetically similar to parthenogenesis. However, recent authors have adopted a different point of view, considering as asexual the mechanisms involved in parthenogenesis and in pseudogamy. Events evidenced in pseudogamic planarians have shown that the latter phenomenon possesses the basic features of sexuality.}, + journal = {Bolletino di zoologia}, + keywords = {Parthenogenesis,Pseudogamy,Sexual mechanisms}, + number = {2} +} + +@article{bendallTelescopeCharacterizationRetrotranscriptome2018, + title = {Telescope: {{Characterization}} of the Retrotranscriptome by Accurate Estimation of Transposable Element Expression}, + shorttitle = {Telescope}, + author = {Bendall, Matthew L. and de Mulder, Miguel and I{\~n}iguez, Luis Pedro and {Lecanda-S{\'a}nchez}, Aar{\'o}n and {P{\'e}rez-Losada}, Marcos and Ostrowski, Mario A. and Jones, Richard B. and Mulder, Lubbertus and {Reyes-Ter{\'a}n}, Gustavo and Crandall, Keith A. and Ormsby, Christopher E. and Nixon, Douglas F.}, + year = {2018}, + month = aug, + pages = {398172}, + doi = {10.1101/398172}, + abstract = {Characterization of Human Endogenous Retrovirus (HERV) expression within the transcriptomic landscape using RNA-seq is complicated by uncertainty in fragment assignment because of sequence similarity. We present Telescope, a computational software tool that provides accurate estimation of transposable element expression (retrotranscriptome) resolved to specific genomic locations. Telescope directly addresses uncertainty in fragment assignment by reassigning ambiguously mapped fragments to the most probable source transcript as determined within a Bayesian statistical model. We demonstrate the utility of our approach through single locus analysis of HERV expression in 13 ENCODE cell types. When examined at this resolution, we find that the magnitude and breadth of the retrotranscriptome can be vastly different among cell types. Furthermore, our approach is robust to differences in sequencing technology, and demonstrates that the retrotranscriptome has potential to be used for cell type identification. Telescope performs highly accurate quantification of the retrotranscriptomic landscape in RNA-seq experiments, revealing a differential complexity in the transposable element biology of complex systems not previously observed. Telescope is available at github.com/mlbendall/telescope.}, + copyright = {\textcopyright{} 2018, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + file = {/Users/laurent/Zotero/storage/I93KT6NI/Bendall et al. - 2018 - Telescope Characterization of the retrotranscript.pdf;/Users/laurent/Zotero/storage/J4QEWGVL/398172.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{bendallTelescopeCharacterizationRetrotranscriptome2019, + title = {Telescope: {{Characterization}} of the Retrotranscriptome by Accurate Estimation of Transposable Element Expression}, + shorttitle = {Telescope}, + author = {Bendall, Matthew L. and de Mulder, Miguel and I{\~n}iguez, Luis Pedro and {Lecanda-S{\'a}nchez}, Aar{\'o}n and {P{\'e}rez-Losada}, Marcos and Ostrowski, Mario A. and Jones, R. Brad and Mulder, Lubbertus C. F. and {Reyes-Ter{\'a}n}, Gustavo and Crandall, Keith A. and Ormsby, Christopher E. and Nixon, Douglas F.}, + year = {2019}, + month = sep, + volume = {15}, + pages = {e1006453}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006453}, + abstract = {Characterization of Human Endogenous Retrovirus (HERV) expression within the transcriptomic landscape using RNA-seq is complicated by uncertainty in fragment assignment because of sequence similarity. We present Telescope, a computational software tool that provides accurate estimation of transposable element expression (retrotranscriptome) resolved to specific genomic locations. Telescope directly addresses uncertainty in fragment assignment by reassigning ambiguously mapped fragments to the most probable source transcript as determined within a Bayesian statistical model. We demonstrate the utility of our approach through single locus analysis of HERV expression in 13 ENCODE cell types. When examined at this resolution, we find that the magnitude and breadth of the retrotranscriptome can be vastly different among cell types. Furthermore, our approach is robust to differences in sequencing technology and demonstrates that the retrotranscriptome has potential to be used for cell type identification. We compared our tool with other approaches for quantifying transposable element (TE) expression, and found that Telescope has the greatest resolution, as it estimates expression at specific TE insertions rather than at the TE subfamily level. Telescope performs highly accurate quantification of the retrotranscriptomic landscape in RNA-seq experiments, revealing a differential complexity in the transposable element biology of complex systems not previously observed. Telescope is available at https://github.com/mlbendall/telescope.}, + file = {/Users/laurent/Zotero/storage/7YP2T9HQ/Bendall et al. - 2019 - Telescope Characterization of the retrotranscript.pdf;/Users/laurent/Zotero/storage/33ZY4W94/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Genetic loci,Genome analysis,RNA sequencing,Sequence alignment,Software tools,Telescopes,Transcriptome analysis,Transposable elements}, + language = {en}, + number = {9} +} + +@article{benjaminiControllingFalseDiscovery1995, + title = {Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing}, + shorttitle = {Controlling the False Discovery Rate}, + author = {Benjamini, Yoav and Hochberg, Yosef}, + year = {1995}, + volume = {57}, + pages = {289--300}, + issn = {0035-9246}, + file = {/Users/laurent/Zotero/storage/Q2DKLXG5/mathscinet-getitem.html}, + journal = {Journal of the Royal Statistical Society. Series B. Methodological}, + mrnumber = {1325392}, + number = {1} +} + +@inproceedings{bennerModHMMModularSupraBayesian2019, + title = {{{ModHMM}}: {{A Modular Supra}}-{{Bayesian Genome Segmentation Method}}}, + shorttitle = {{{ModHMM}}}, + booktitle = {Research in {{Computational Molecular Biology}}}, + author = {Benner, Philipp and Vingron, Martin}, + editor = {Cowen, Lenore J.}, + year = {2019}, + pages = {35--50}, + publisher = {{Springer International Publishing}}, + address = {{Cham}}, + doi = {10.1007/978-3-030-17083-7_3}, + abstract = {Genome segmentation methods are powerful tools to obtain cell type or tissue specific genome-wide annotations and are frequently used to discover regulatory elements. However, traditional segmentation methods show low predictive accuracy and their data-driven annotations have some undesirable properties. As an alternative, we developed ModHMM, a highly modular genome segmentation method. Inspired by the supra-Bayesian approach, it incorporates predictions from a set of classifiers. This allows to compute genome segmentations by utilizing state-of-the-art methodology. We demonstrate the method on ENCODE data and show that it outperforms traditional segmentation methods not only in terms of predictive performance, but also in qualitative aspects. Therefore, ModHMM is a valuable alternative to study the epigenetic and regulatory landscape across and within cell types or tissues. The software is freely available at https://github.com/pbenner/modhmm.}, + isbn = {978-3-030-17083-7}, + language = {en}, + series = {Lecture {{Notes}} in {{Computer Science}}} +} + +@article{bensonGenBank2013, + title = {{{GenBank}}}, + author = {Benson, Dennis A. and Cavanaugh, Mark and Clark, Karen and {Karsch-Mizrachi}, Ilene and Lipman, David J. and Ostell, James and Sayers, Eric W.}, + year = {2013}, + month = jan, + volume = {41}, + pages = {D36--42}, + issn = {1362-4962}, + doi = {10.1093/nar/gks1195}, + abstract = {GenBank\textregistered{} (http://www.ncbi.nlm.nih.gov) is a comprehensive database that contains publicly available nucleotide sequences for almost 260 000 formally described species. These sequences are obtained primarily through submissions from individual laboratories and batch submissions from large-scale sequencing projects, including whole-genome shotgun (WGS) and environmental sampling projects. Most submissions are made using the web-based BankIt or standalone Sequin programs, and GenBank staff assigns accession numbers upon data receipt. Daily data exchange with the European Nucleotide Archive (ENA) and the DNA Data Bank of Japan (DDBJ) ensures worldwide coverage. GenBank is accessible through the NCBI Entrez retrieval system, which integrates data from the major DNA and protein sequence databases along with taxonomy, genome, mapping, protein structure and domain information, and the biomedical journal literature via PubMed. BLAST provides sequence similarity searches of GenBank and other sequence databases. Complete bimonthly releases and daily updates of the GenBank database are available by FTP. To access GenBank and its related retrieval and analysis services, begin at the NCBI home page: www.ncbi.nlm.nih.gov.}, + journal = {Nucleic Acids Research}, + keywords = {Base Sequence,Databases,DNA,Genomics,High-Throughput Nucleotide Sequencing,Internet,Molecular Sequence Annotation,Nucleic Acid,Sequence Analysis}, + language = {eng}, + number = {Database issue}, + pmcid = {PMC3531190}, + pmid = {23193287} +} + +@article{bergmanDoesAdaptiveProtein2018, + title = {Does {{Adaptive Protein Evolution Proceed}} by {{Large}} or {{Small Steps}} at the {{Amino Acid Level}}?}, + author = {Bergman, Juraj and {Eyre-Walker}, Adam}, + year = {2018}, + month = jul, + pages = {379073}, + doi = {10.1101/379073}, + abstract = {A long-standing question in evolutionary biology is the relative contribution of large and small effect mutations to the adaptive process. We have investigated this question in proteins by estimating the rate of adaptive evolution between all pairs of amino acids separated by one mutational step using a McDonald-Kreitman type approach and genome-wide data from several Drosophila species. We find that the rate of adaptive evolution is higher amongst amino acids that are more similar. This is partly due to the fact that the proportion of mutations that are adaptive is higher amongst more similar amino acids. We also find that the rate of neutral evolution between amino acids is higher amongst similar amino acids. Overall our results suggest that both the adaptive and non-adaptive evolution of proteins is dominated by substitutions between amino acids that are more similar.}, + copyright = {\textcopyright{} 2018, Posted by Cold Spring Harbor Laboratory. The copyright holder for this pre-print is the author. All rights reserved. The material may not be redistributed, re-used or adapted without the author's permission.}, + file = {/Users/laurent/Zotero/storage/QE8EQ458/Bergman and Eyre-Walker - 2018 - Does Adaptive Protein Evolution Proceed by Large o.pdf;/Users/laurent/Zotero/storage/2ZRDW63B/379073.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{bergXPRESSyourselfEnhancingStandardizing2020, + title = {{{XPRESSyourself}}: {{Enhancing}}, Standardizing, and Automating Ribosome Profiling Computational Analyses Yields Improved Insight into Data}, + shorttitle = {{{XPRESSyourself}}}, + author = {Berg, Jordan A. and Belyeu, Jonathan R. and Morgan, Jeffrey T. and Ouyang, Yeyun and Bott, Alex J. and Quinlan, Aaron R. and Gertz, Jason and Rutter, Jared}, + year = {2020}, + month = jan, + volume = {16}, + pages = {e1007625}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007625}, + abstract = {Ribosome profiling, an application of nucleic acid sequencing for monitoring ribosome activity, has revolutionized our understanding of protein translation dynamics. This technique has been available for a decade, yet the current state and standardization of publicly available computational tools for these data is bleak. We introduce XPRESSyourself, an analytical toolkit that eliminates barriers and bottlenecks associated with this specialized data type by filling gaps in the computational toolset for both experts and non-experts of ribosome profiling. XPRESSyourself automates and standardizes analysis procedures, decreasing time-to-discovery and increasing reproducibility. This toolkit acts as a reference implementation of current best practices in ribosome profiling analysis. We demonstrate this toolkit's performance on publicly available ribosome profiling data by rapidly identifying hypothetical mechanisms related to neurodegenerative phenotypes and neuroprotective mechanisms of the small-molecule ISRIB during acute cellular stress. XPRESSyourself brings robust, rapid analysis of ribosome-profiling data to a broad and ever-expanding audience and will lead to more reproducible and accessible measurements of translation regulation. XPRESSyourself software is perpetually open-source under the GPL-3.0 license and is hosted at https://github.com/XPRESSyourself, where users can access additional documentation and report software issues.}, + file = {/Users/laurent/Zotero/storage/GPI4WC73/Berg et al. - 2020 - XPRESSyourself Enhancing, standardizing, and auto.pdf;/Users/laurent/Zotero/storage/E6LFFUS9/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Data processing,Genetic footprinting,Protein translation,Ribosomal RNA,Ribosomes,RNA sequencing,Sequence alignment,Software tools}, + language = {en}, + number = {1} +} + +@article{biddySinglecellMappingLineage2018, + title = {Single-Cell Mapping of Lineage and Identity in Direct Reprogramming}, + author = {Biddy, Brent A. and Kong, Wenjun and Kamimoto, Kenji and Guo, Chuner and Waye, Sarah E. and Sun, Tao and Morris, Samantha A.}, + year = {2018}, + month = dec, + pages = {1}, + issn = {1476-4687}, + doi = {10.1038/s41586-018-0744-4}, + abstract = {Combinatorial tagging of single cells using expressed DNA barcodes, delivered by a lentiviral vector, is used to track individual cells and reconstruct their lineages and trajectories during cell fate reprogramming.}, + copyright = {2018 Springer Nature Limited}, + file = {/Users/laurent/Zotero/storage/LWATEL8V/Biddy et al. - 2018 - Single-cell mapping of lineage and identity in dir.pdf;/Users/laurent/Zotero/storage/7QAGBCK3/s41586-018-0744-4.html}, + journal = {Nature}, + language = {En} +} + +@article{bilodeauDependenceStatisticsMutual2009, + title = {-Dependence Statistics for Mutual and Serial Independence of Categorical Variables}, + author = {Bilodeau, M. and {Lafaye de Micheaux}, P.}, + year = {2009}, + month = jul, + volume = {139}, + pages = {2407--2419}, + issn = {03783758}, + doi = {10.1016/j.jspi.2008.11.006}, + abstract = {The Mo\textasciidieresis{} bius transformation of probability cells in a multi-way contingency table is used to partition the Pearson chi-square test of mutual independence into A-dependence statistics. A similar partition is proposed for a universal and consistent test of serial independence in a stationary sequence of a categorical variable. The partition proposed can be adapted whether using estimated or theoretical marginal probabilities. With the aim of detecting a dependence of high order in a long sequence, A-dependence terms of the partition measuring increasing lagged dependences can be combined in a Box\textendash{}Pierce type test of serial independence. A real data analysis of a nucleotides sequence using the Box\textendash{}Pierce type test is provided.}, + file = {/Users/laurent/Documents/bibliography/stats/Bilodeau and Lafaye de Micheaux - 2009 - -dependence statistics for mutual and serial indep.pdf}, + journal = {Journal of Statistical Planning and Inference}, + language = {en}, + number = {7} +} + +@article{boivinReducingStructureBias, + title = {Reducing the Structure Bias of {{RNA}}-{{Seq}} Reveals a Large Number of Non-Annotated Non-Coding {{RNA}}}, + author = {Boivin, Vincent and Reulet, Gaspard and Boisvert, Olivier and Couture, Sonia and Elela, Sherif Abou and Scott, Michelle S.}, + doi = {10.1093/nar/gkaa028}, + abstract = {Abstract. The study of RNA expression is the fastest growing area of genomic research. However, despite the dramatic increase in the number of sequenced transc}, + file = {/Users/laurent/Zotero/storage/YRMXH746/Boivin et al. - Reducing the structure bias of RNA-Seq reveals a l.pdf;/Users/laurent/Zotero/storage/FL58MY2T/5715806.html}, + journal = {Nucleic Acids Research}, + language = {en} +} + +@article{bolandTenSimpleRules2017, + title = {Ten {{Simple Rules}} to {{Enable Multi}}-Site {{Collaborations}} through {{Data Sharing}}}, + author = {Boland, Mary Regina and Karczewski, Konrad J. and Tatonetti, Nicholas P.}, + year = {2017}, + month = jan, + volume = {13}, + pages = {e1005278}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005278}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Boland et al. - 2017 - Ten Simple Rules to Enable Multi-site Collaboratio.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {1} +} + +@book{bolkerR2admbADMBInterface2017, + title = {R2admb: '{{ADMB}}' to {{R Interface Functions}}}, + author = {Bolker, Ben and Skaug, Hans and Laake, Jeff}, + year = {2017} +} + +@book{bolstadPreprocessCoreCollectionPreprocessing2018, + title = {{{preprocessCore}}: {{A}} Collection of Pre-Processing Functions}, + author = {Bolstad, Ben}, + year = {2018} +} + +@article{bottiniRecentComputationalDevelopments2018, + title = {Recent Computational Developments on {{CLIP}}-Seq Data Analysis and {{microRNA}} Targeting Implications}, + author = {Bottini, Silvia and Pratella, David and Grandjean, Valerie and Repetto, Emanuela and Trabucchi, Michele}, + year = {2018}, + month = nov, + volume = {19}, + pages = {1290--1301}, + issn = {1467-5463}, + doi = {10.1093/bib/bbx063}, + abstract = {Abstract. Cross-Linking Immunoprecipitation associated to high-throughput sequencing (CLIP-seq) is a technique used to identify RNA directly bound to RNA-bindi}, + file = {/Users/laurent/Zotero/storage/X9BVRG7G/Bottini et al. - 2018 - Recent computational developments on CLIP-seq data.pdf;/Users/laurent/Zotero/storage/H2ERP8B4/3866720.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {6} +} + +@article{bourneTenSimpleRules2017, + title = {Ten Simple Rules to Consider Regarding Preprint Submission}, + author = {Bourne, Philip E. and Polka, Jessica K. and Vale, Ronald D. and Kiley, Robert}, + year = {2017}, + month = may, + volume = {13}, + pages = {e1005473}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005473}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Bourne et al. - 2017 - Ten simple rules to consider regarding preprint su.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {5} +} + +@book{BRAKER1UnsupervisedRNASeqBased, + title = {{{BRAKER1}}: {{Unsupervised RNA}}-{{Seq}}-{{Based Genome Annotation}} with {{GeneMark}}-{{ET}} and {{AUGUSTUS}}} +} + +@book{BRAKER1UnsupervisedRNASeqBaseda, + title = {{{BRAKER1}}: {{Unsupervised RNA}}-{{Seq}}-{{Based Genome Annotation}} with {{GeneMark}}-{{ET}} and {{AUGUSTUS}}. - {{PubMed}} - {{NCBI}}} +} + +@article{brenneckeAccountingTechnicalNoise2013, + title = {Accounting for Technical Noise in Single-Cell {{RNA}}-Seq Experiments}, + author = {Brennecke, Philip and Anders, Simon and Kim, Jong Kyoung and Ko{\l}odziejczyk, Aleksandra A and Zhang, Xiuwei and Proserpio, Valentina and Baying, Bianka and Benes, Vladimir and Teichmann, Sarah A and Marioni, John C and Heisler, Marcus G}, + year = {2013}, + month = nov, + volume = {10}, + pages = {1093--1095}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.2645}, + file = {/Users/laurent/Zotero/storage/PQ7GJDLF/Brennecke et al. - 2013 - Accounting for technical noise in single-cell RNA-.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {11} +} + +@article{brownTenQuickTips2018, + title = {Ten Quick Tips for Teaching Programming}, + author = {Brown, Neil C C and Wilson, Greg}, + year = {2018}, + pages = {8}, + file = {/Users/laurent/Documents/bibliography/to_read/Brown and Wilson - 2018 - Ten quick tips for teaching programming.pdf}, + journal = {PLOS Computational Biology}, + language = {en} +} + +@article{buettnerComputationalAnalysisCelltocell2015, + title = {Computational Analysis of Cell-to-Cell Heterogeneity in Single-Cell {{RNA}}-Sequencing Data Reveals Hidden Subpopulations of Cells}, + author = {Buettner, Florian and Natarajan, Kedar N and Casale, F Paolo and Proserpio, Valentina and Scialdone, Antonio and Theis, Fabian J and Teichmann, Sarah A and Marioni, John C and Stegle, Oliver}, + year = {2015}, + month = feb, + volume = {33}, + pages = {155--160}, + issn = {1087-0156, 1546-1696}, + doi = {10.1038/nbt.3102}, + file = {/Users/laurent/Zotero/storage/9XZLFL8K/Buettner et al. - 2015 - Computational analysis of cell-to-cell heterogenei.pdf;/Users/laurent/Zotero/storage/MJF2LXLN/Buettner et al. - 2015 - Computational analysis of cell-to-cell heterogenei.pdf}, + journal = {Nature Biotechnology}, + language = {en}, + number = {2} +} + +@article{buettnerFscLVMScalableVersatile2017, + title = {F-{{scLVM}}: Scalable and Versatile Factor Analysis for Single-Cell {{RNA}}-Seq}, + shorttitle = {F-{{scLVM}}}, + author = {Buettner, Florian and Pratanwanich, Naruemon and McCarthy, Davis J. and Marioni, John C. and Stegle, Oliver}, + year = {2017}, + month = dec, + volume = {18}, + issn = {1474-760X}, + doi = {10.1186/s13059-017-1334-8}, + abstract = {Single-cell RNA-sequencing (scRNA-seq) allows studying heterogeneity in gene expression in large cell populations. Such heterogeneity can arise due to technical or biological factors, making decomposing sources of variation difficult. We here describe f-scLVM (factorial single-cell latent variable model), a method based on factor analysis that uses pathway annotations to guide the inference of interpretable factors underpinning the heterogeneity. Our model jointly estimates the relevance of individual factors, refines gene set annotations, and infers factors without annotation. In applications to multiple scRNA-seq datasets, we find that f-scLVM robustly decomposes scRNA-seq datasets into interpretable components, thereby facilitating the identification of novel subpopulations.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Buettner et al. - 2017 - f-scLVM scalable and versatile factor analysis fo.pdf;/Users/laurent/Zotero/storage/5WD6DNM3/Buettner et al. - 2017 - f-scLVM scalable and versatile factor analysis fo.pdf;/Users/laurent/Zotero/storage/B6MJIBB2/Buettner et al. - 2017 - f-scLVM scalable and versatile factor analysis fo.pdf;/Users/laurent/Zotero/storage/HB78X2I4/Buettner et al. - 2017 - f-scLVM scalable and versatile factor analysis fo.pdf}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@article{burgessFullSpeedAhead2018, + title = {Full Speed Ahead for Single-Cell Analysis}, + author = {Burgess, Darren J.}, + year = {2018}, + month = nov, + volume = {19}, + pages = {668}, + issn = {1471-0064}, + doi = {10.1038/s41576-018-0049-3}, + abstract = {A study in Nature describes RNA velocity, which is a computational method to derive dynamic gene expression information from static single-cell RNA sequencing data. It provides valuable insights into developmental trajectories of cells.}, + copyright = {2018 Springer Nature Limited}, + file = {/Users/laurent/Zotero/storage/2ZQK3A3C/Burgess - 2018 - Full speed ahead for single-cell analysis.pdf;/Users/laurent/Zotero/storage/SEXXBKKE/Burgess - 2018 - Full speed ahead for single-cell analysis.pdf;/Users/laurent/Zotero/storage/9IGZCI2F/s41576-018-0049-3.html;/Users/laurent/Zotero/storage/IHV56WLW/s41576-018-0049-3.html}, + journal = {Nature Reviews Genetics}, + language = {En}, + number = {11} +} + +@book{BUSCOApplicationsQuality, + title = {{{BUSCO Applications}} from {{Quality Assessments}} to {{Gene Prediction}} and {{Phylogenomics}} \textbackslash{}textbar {{Molecular Biology}} and {{Evolution}} \textbackslash{}textbar {{Oxford Academic}}} +} + +@book{BUSCOAssessingGenome, + title = {{{BUSCO}}: Assessing Genome Assembly and Annotation Completeness with Single-Copy Orthologs \textbackslash{}textbar {{Bioinformatics}} \textbackslash{}textbar {{Oxford Academic}}} +} + +@article{butkiewiczIntroducingCOCOSCodon2017, + title = {Introducing {{COCOS}}: Codon Consequence Scanner for Annotating Reading Frame Changes Induced by Stop-Lost and Frame Shift Variants}, + shorttitle = {Introducing {{COCOS}}}, + author = {Butkiewicz, Mariusz and Haines, Jonathan L. and Bush, William S.}, + year = {2017}, + month = jan, + pages = {btw820}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw820}, + abstract = {Summary: Reading frame altering genomic variants can impact gene expression levels and the structure of protein products, thus potentially inducing disease phenotypes. Current annotation approaches report the impact of such variants in the context of altered DNA sequence only; attributes of the resulting transcript, reading frame and translated protein product are not reported. To remedy this shortcoming, we present a new genetic annotation approach termed Codon Consequence Scanner (COCOS). Implemented as an Ensembl variant effect predictor (VEP) plugin, COCOS captures amino acid sequence alterations stemming from variants that produce an altered reading frame, such as stop-lost variants and small insertions and deletions (InDels). To highlight its significance, COCOS was applied to data from the 1000 Genomes Project. Transcripts affected by stop-lost variants introduce a median of 15 amino acids, while InDels have a more extensive impact with a median of 66 amino acids being incorporated. Captured sequence alterations are written out in FASTA format and can be further analyzed for impact on the underlying protein structure.}, + file = {/Users/laurent/Documents/bibliography/to_read/Butkiewicz et al. - 2017 - Introducing COCOS codon consequence scanner for a.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{butlerIntegratedAnalysisSingle2017, + title = {Integrated Analysis of Single Cell Transcriptomic Data across Conditions, Technologies, and Species}, + author = {Butler, Andrew and Satija, Rahul}, + year = {2017}, + month = jul, + doi = {10.1101/164889}, + abstract = {Single cell RNA-seq (scRNA-seq) has emerged as a transformative tool to discover and define cellular phenotypes. While computational scRNA-seq methods are currently well suited for experiments representing a single condition, technology, or species, analyzing multiple datasets simultaneously raises new challenges. In particular, traditional analytical workflows struggle to align subpopulations that are present across datasets, limiting the possibility for integrated or comparative analysis. Here, we introduce a new computational strategy for scRNA-seq alignment, utilizing common sources of variation to identify shared subpopulations between datasets as part of our R toolkit Seurat. We demonstrate our approach by aligning scRNA-seq datasets of PBMCs under resting and stimulated conditions, hematopoietic progenitors sequenced across two profiling technologies, and pancreatic cell 'atlases' generated from human and mouse islets. In each case, we learn distinct or transitional cell states jointly across datasets, and can identify subpopulations that could not be detected by analyzing datasets independently. We anticipate that these methods will serve not only to correct for batch or technologydependent effects, but also to facilitate general comparisons of scRNA-seq datasets, potentially deepening our understanding of how distinct cell states respond to perturbation, disease, and evolution.}, + file = {/Users/laurent/Zotero/storage/35ABSJVV/Butler and Satija - 2017 - Integrated analysis of single cell transcriptomic .pdf;/Users/laurent/Zotero/storage/IJU43CGF/Butler and Satija - 2017 - Integrated analysis of single cell transcriptomic .pdf;/Users/laurent/Zotero/storage/JZESSN3V/Butler and Satija - 2017 - Integrated analysis of single cell transcriptomic .pdf;/Users/laurent/Zotero/storage/RGZ7RL2U/Butler and Satija - 2017 - Integrated analysis of single cell transcriptomic .pdf}, + language = {en} +} + +@article{butlerIntegratingSinglecellTranscriptomic2018, + title = {Integrating Single-Cell Transcriptomic Data across Different Conditions, Technologies, and Species}, + author = {Butler, Andrew and Hoffman, Paul and Smibert, Peter and Papalexi, Efthymia and Satija, Rahul}, + year = {2018}, + month = apr, + issn = {1087-0156, 1546-1696}, + doi = {10.1038/nbt.4096}, + file = {/Users/laurent/Documents/bibliography/to_read/Butler et al. - 2018 - Integrating single-cell transcriptomic data across.pdf}, + journal = {Nature Biotechnology}, + language = {en} +} + +@article{buttnerAssessmentBatchcorrectionMethods2017, + title = {Assessment of Batch-Correction Methods for {{scRNA}}-Seq Data with a New Test Metric}, + author = {Buttner, Maren and Miao, Zhichao and Wolf, Alexander and Teichmann, Sarah A and Theis, Fabian J}, + year = {2017}, + month = oct, + doi = {10.1101/200345}, + abstract = {Single-cell transcriptomics is a versatile tool for exploring heterogeneous cell populations. As with all genomics experiments, batch effects can hamper data integration and interpretation. The success of batch effect correction is often evaluated by visual inspection of dimensionreduced representations such as principal component analysis. This is inherently imprecise due to the high number of genes and non-normal distribution of gene expression. Here, we present a k-nearest neighbour batch effect test (kBET, https://github.com/theislab/kBET) to quantitatively measure batch effects. kBET is easier to interpret, more sensitive and more robust than visual evaluation and other measures of batch effects. We use kBET to assess commonly used batch regression and normalisation approaches, and quantify the extent to which they remove batch effects while preserving biological variability. Our results illustrate that batch correction based on log-transformation or scran pooling followed by ComBat reduced the batch effect while preserving structure across data sets. Finally we show that kBET can pinpoint successful data integration methods across multiple data sets, in this case from different publications all charting mouse embryonic development. This has important implications for future data integration efforts, which will be central to projects such as the Human Cell Atlas where data for the same tissue may be generated in multiple locations around the world.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Buttner et al. - 2017 - Assessment of batch-correction methods for scRNA-s.pdf;/Users/laurent/Zotero/storage/856R7GVW/Buttner et al. - 2017 - Assessment of batch-correction methods for scRNA-s.pdf;/Users/laurent/Zotero/storage/GNMXXNEI/Buttner et al. - 2017 - Assessment of batch-correction methods for scRNA-s.pdf;/Users/laurent/Zotero/storage/LV6PMYS9/Buttner et al. - 2017 - Assessment of batch-correction methods for scRNA-s.pdf}, + language = {en} +} + +@article{calarcoGenomeWideIdentification2018, + title = {Genome {{Wide Identification}} of {{Mutational Hotspots}} in the {{Apicomplexan Parasite Neospora}} Caninum and the {{Implications}} for {{Virulence}}}, + author = {Calarco, Larissa and Barratt, Joel and Ellis, John}, + year = {2018}, + month = sep, + volume = {10}, + pages = {2417--2431}, + doi = {10.1093/gbe/evy188}, + abstract = {Abstract. Neospora caninum is an apicomplexan parasite responsible for neosporosis, a disease causing hind limb paralysis in dogs and abortion in cattle, resul}, + file = {/Users/laurent/Zotero/storage/D3RAWJZ2/Calarco et al. - 2018 - Genome Wide Identification of Mutational Hotspots .pdf;/Users/laurent/Zotero/storage/ELPZS7P9/Calarco et al. - 2018 - Genome Wide Identification of Mutational Hotspots .pdf;/Users/laurent/Zotero/storage/9YWRMMWJ/5079401.html;/Users/laurent/Zotero/storage/P5DQ9U2T/5079401.html}, + journal = {Genome Biology and Evolution}, + language = {en}, + number = {9} +} + +@article{camaraMethodsChallengesAnalysis2018, + title = {Methods and Challenges in the Analysis of Single-Cell {{RNA}}-Sequencing Data}, + author = {Camara, Pablo G.}, + year = {2018}, + month = feb, + volume = {7}, + pages = {47--53}, + issn = {24523100}, + doi = {10.1016/j.coisb.2017.12.007}, + abstract = {The recent advent of highly parallelizable single-cell RNAsequencing technologies has opened a new window into the study of cell differentiation, commitment, and diversity. Rapid advances in the development of these technologies are being accompanied by the design of computational methods tailored to address the challenges presented by the analysis of singlecell RNA-sequencing data. This review provides a concise overview of some of the steps, algorithms, and approaches that are currently used in the analysis of single-cell RNAsequencing data, with an emphasis on recent developments.}, + file = {/Users/laurent/Documents/bibliography/to_read/Camara - 2018 - Methods and challenges in the analysis of single-c.pdf}, + journal = {Current Opinion in Systems Biology}, + language = {en} +} + +@article{camaraTopologicalMethodsGenomics2017, + title = {Topological Methods for Genomics: {{Present}} and Future Directions}, + shorttitle = {Topological Methods for Genomics}, + author = {C{\'a}mara, Pablo G.}, + year = {2017}, + month = feb, + volume = {1}, + pages = {95--101}, + issn = {2452-3100}, + doi = {10.1016/j.coisb.2016.12.007}, + abstract = {Topological methods are emerging as a new set of tools for the analysis of large genomic datasets. They are mathematically grounded methods that extract information from the geometric structure of data. In the last few years, applications to evolutionary biology, cancer genomics, and the analysis of complex diseases have uncovered significant biological results, highlighting their utility for fulfilling some of the current analytic needs of genomics. In this review, the state of the art in the application of topological methods to genomics is summarized, and some of the present limitations and possible future developments are reviewed.}, + file = {/Users/laurent/Zotero/storage/YKNWQ4WZ/Cámara - 2017 - Topological methods for genomics Present and futu.pdf;/Users/laurent/Zotero/storage/U3TYDQYE/S2452310016300270.html}, + journal = {Current Opinion in Systems Biology}, + keywords = {Evolution,Genomics,RNA-seq,Topological Data Analysis}, + series = {Future of {{Systems Biology}} \textbullet{} {{Genomics}} and Epigenomics} +} + +@article{campbellProbabilisticModelingBifurcations2017, + title = {Probabilistic Modeling of Bifurcations in Single-Cell Gene Expression Data Using a {{Bayesian}} Mixture of Factor Analyzers}, + author = {Campbell, Kieran R and Yau, Christopher}, + year = {2017}, + month = mar, + volume = {2}, + pages = {19}, + issn = {2398-502X}, + doi = {10.12688/wellcomeopenres.11087.1}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Campbell and Yau - 2017 - Probabilistic modeling of bifurcations in single-c.pdf;/Users/laurent/Zotero/storage/6RRDHFLH/Campbell and Yau - 2017 - Probabilistic modeling of bifurcations in single-c.pdf;/Users/laurent/Zotero/storage/6ZSL3IC9/Campbell and Yau - 2017 - Probabilistic modeling of bifurcations in single-c.pdf;/Users/laurent/Zotero/storage/KUJ6QKA8/Campbell and Yau - 2017 - Probabilistic modeling of bifurcations in single-c.pdf}, + journal = {Wellcome Open Research}, + language = {en} +} + +@article{campbellUncoveringPseudotemporalTrajectories2018, + title = {Uncovering Pseudotemporal Trajectories with Covariates from Single Cell and Bulk Expression Data}, + author = {Campbell, Kieran R. and Yau, Christopher}, + year = {2018}, + month = jun, + volume = {9}, + pages = {2442}, + issn = {2041-1723}, + doi = {10.1038/s41467-018-04696-6}, + abstract = {Cross-sectional omic data often have non-homogeneous genetic, phenotypic, or environmental backgrounds. Here, the authors develop a statistical framework to infer pseudotime trajectories in the presence of such factors as well as their interactions in both single-cell and bulk gene expression analysis}, + copyright = {2018 The Author(s)}, + file = {/Users/laurent/Zotero/storage/8FEMXX6X/Campbell and Yau - 2018 - Uncovering pseudotemporal trajectories with covari.pdf;/Users/laurent/Zotero/storage/D9NSQWWD/s41467-018-04696-6.html}, + journal = {Nature Communications}, + language = {En}, + number = {1} +} + +@book{CanuScalableAccurate, + title = {Canu: Scalable and Accurate Long-Read Assembly via Adaptive k-Mer Weighting and Repeat Separation} +} + +@article{caoSingleCellTranscriptional2019, + title = {The Single Cell Transcriptional Landscape of Mammalian Organogenesis}, + author = {Cao, Junyue and Spielmann, Malte and Qiu, Xiaojie and Huang, Xingfan and Ibrahim, Daniel M. and Hill, Andrew J. and Zhang, Fan and Mundlos, Stefan and Christiansen, Lena and Steemers, Frank J. and Trapnell, Cole and Shendure, Jay}, + year = {2019}, + month = feb, + volume = {566}, + pages = {496--502}, + issn = {0028-0836}, + doi = {10.1038/s41586-019-0969-x}, + abstract = {Mammalian organogenesis is an astonishing process. Within a short window of time, the cells of the three germ layers transform into an embryo that includes most major internal and external organs. Here we set out to investigate the transcriptional dynamics of mouse organogenesis at single cell resolution. With sci-RNA-seq3, we profiled \textasciitilde{}2 million cells, derived from 61 embryos staged between 9.5 and 13.5 days of gestation, in a single experiment. The resulting `mouse organogenesis cell atlas' (MOCA) provides a global view of developmental processes during this critical window. We identify hundreds of cell types and 56 trajectories, many of which are detected only because of the depth of cellular coverage, and collectively define thousands of corresponding marker genes. With Monocle 3, we explore the dynamics of gene expression within cell types and trajectories over time, including focused analyses of the apical ectodermal ridge, limb mesenchyme and skeletal muscle.}, + file = {/Users/laurent/Zotero/storage/32V6PSEX/Cao et al. - 2019 - The single cell transcriptional landscape of mamma.pdf}, + journal = {Nature}, + number = {7745}, + pmcid = {PMC6434952}, + pmid = {30787437} +} + +@article{carbonell-caballeroReferenceGenomeAssessment2017, + title = {Reference Genome Assessment from a Population Scale Perspective: An Accurate Profile of Variability and Noise}, + shorttitle = {Reference Genome Assessment from a Population Scale Perspective}, + author = {{Carbonell-Caballero}, Jos{\'e} and Amadoz, Alicia and Alonso, Roberto and Hidalgo, Marta R and {\c C}ubuk, Cankut and Conesa, David and {L{\'o}pez-Qu{\'i}lez}, Antonio and Dopazo, Joaqu{\'i}n}, + year = {2017}, + month = nov, + volume = {33}, + pages = {3511--3517}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx482}, + abstract = {Motivation: Current plant and animal genomic studies are often based on newly assembled genomes that have not been properly consolidated. In this scenario, misassembled regions can easily lead to false-positive findings. Despite quality control scores are included within genotyping protocols, they are usually employed to evaluate individual sample quality rather than reference sequence reliability. We propose a statistical model that combines quality control scores across samples in order to detect incongruent patterns at every genomic region. Our model is inherently robust since common artifact signals are expected to be shared between independent samples over misassembled regions of the genome.}, + file = {/Users/laurent/Documents/bibliography/to_read/Carbonell-Caballero et al. - 2017 - Reference genome assessment from a population scal.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {22} +} + +@article{carelSimultaneousDimensionReduction, + title = {Simultaneous {{Dimension Reduction}} and {{Clustering}} via the {{NMF}}-{{EM Algorithm}}}, + author = {CAREL, L{\'e}na and ALQUIER, Pierre}, + pages = {30}, + abstract = {Mixture models are among the most popular tools for model based clustering. However, when the dimension and the number of clusters is large, the estimation as well as the interpretation of the clusters become challenging. We propose a reduced-dimension mixture model, where the K components parameters are combinations of words from a small dictionary - say H words with H K. Including a Nonnegative Matrix Factorization (NMF) in the EM algorithm allows to simultaneously estimate the dictionary and the parameters of the mixture. We propose the acronym NMF-EM for this algorithm. This original approach is motivated by passengers clustering from ticketing data: we apply NMF-EM to ticketing data from two Transdev public transport networks. In this case, the words are easily interpreted as typical slots in a timetable.}, + file = {/Users/laurent/Documents/bibliography/stats/CAREL and ALQUIER - Simultaneous Dimension Reduction and Clustering vi.pdf}, + language = {en} +} + +@article{careyTenSimpleRules2018, + title = {Ten Simple Rules for Biologists Learning to Program}, + author = {Carey, Maureen A. and Papin, Jason A.}, + editor = {Markel, Scott}, + year = {2018}, + month = jan, + volume = {14}, + pages = {e1005871}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005871}, + file = {/Users/laurent/Documents/bibliography/to_read/Carey and Papin - 2018 - Ten simple rules for biologists learning to progra.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {1} +} + +@article{carronBoostHiCComputationalEnhancement2019, + title = {Boost-{{HiC}}: Computational Enhancement of Long-Range Contacts in Chromosomal Contact Maps}, + shorttitle = {Boost-{{HiC}}}, + author = {Carron, L. and Morlot, J. B. and Matthys, V. and Lesne, A. and Mozziconacci, J.}, + year = {2019}, + month = aug, + volume = {35}, + pages = {2724--2729}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty1059}, + abstract = {AbstractMotivation. Genome-wide chromosomal contact maps are widely used to uncover the 3D organization of genomes. They rely on collecting millions of contact}, + file = {/Users/laurent/Zotero/storage/CHBE58PS/Carron et al. - 2019 - Boost-HiC computational enhancement of long-range.pdf;/Users/laurent/Zotero/storage/UIX46323/5273482.html}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{castellanoNearlyNeutralEvolution2018, + title = {Nearly {{Neutral Evolution}} across the {{Drosophila}} Melanogaster {{Genome}}}, + author = {Castellano, David and James, Jennifer and {Eyre-Walker}, Adam}, + year = {2018}, + month = nov, + volume = {35}, + pages = {2685--2694}, + issn = {0737-4038}, + doi = {10.1093/molbev/msy164}, + abstract = {Abstract. Under the nearly neutral theory of molecular evolution, the proportion of effectively neutral mutations is expected to depend upon the effective popu}, + file = {/Users/laurent/Zotero/storage/ITSL7DQ8/Castellano et al. - 2018 - Nearly Neutral Evolution across the Drosophila mel.pdf;/Users/laurent/Zotero/storage/5766CA33/5078937.html}, + journal = {Molecular Biology and Evolution}, + language = {en}, + number = {11} +} + +@article{chanGeneRegulatoryNetwork2017, + title = {Gene Regulatory Network Inference from Single-Cell Data Using Multivariate Information Measures}, + author = {Chan, Thalia E and Stumpf, Michael and Babtie, Ann C}, + year = {2017}, + month = sep, + doi = {10.1101/082099}, + abstract = {While single-cell gene expression experiments present new challenges for data processing, the cell-to-cell variability observed also reveals statistical relationships that can be used by information theory. Here, we use multivariate information theory to explore the statistical dependencies between triplets of genes in single-cell gene expression datasets. We develop PIDC, a fast, efficient algorithm that uses partial information decomposition (PID) to identify regulatory relationships between genes. We thoroughly evaluate the performance of our algorithm and demonstrate that the higher order information captured by PIDC allows it to outperform pairwise mutual information-based algorithms when recovering true relationships present in simulated data. We also infer gene regulatory networks from three experimental single-cell data sets and illustrate how network context, choices made during analysis, and sources of variability affect network inference. PIDC tutorials and open-source software for estimating PID are available here: https://github.com/Tchanders/network\_inference\_tutorials. PIDC should facilitate the identification of putative functional relationships and mechanistic hypotheses from single-cell transcriptomic data.}, + file = {/Users/laurent/Documents/bibliography/networks/Chan et al. - 2017 - Gene regulatory network inference from single-cell.pdf}, + language = {en} +} + +@article{chanSegwayGaussianMixture2018, + title = {Segway 2.0: {{Gaussian}} Mixture Models and Minibatch Training}, + shorttitle = {Segway 2.0}, + author = {Chan, Rachel C W and Libbrecht, Maxwell W and Roberts, Eric G and Bilmes, Jeffrey A and Noble, William Stafford and Hoffman, Michael M}, + editor = {Birol, Inanc}, + year = {2018}, + month = feb, + volume = {34}, + pages = {669--671}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx603}, + abstract = {Summary: Segway performs semi-automated genome annotation, discovering joint patterns across multiple genomic signal datasets. We discuss a major new version of Segway and highlight its ability to model data with substantially greater accuracy. Major enhancements in Segway 2.0 include the ability to model data with a mixture of Gaussians, enabling capture of arbitrarily complex signal distributions, and minibatch training, leading to better learned parameters.}, + file = {/Users/laurent/Documents/bibliography/to_read/Chan et al. - 2018 - Segway 2.0 Gaussian mixture models and minibatch .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{chatzouMultipleSequenceAlignment2016, + title = {Multiple Sequence Alignment Modeling: Methods and Applications}, + shorttitle = {Multiple Sequence Alignment Modeling}, + author = {Chatzou, Maria and Magis, Cedrik and Chang, Jia-Ming and Kemena, Carsten and Bussotti, Giovanni and Erb, Ionas and Notredame, Cedric}, + year = {2016}, + month = nov, + volume = {17}, + pages = {1009--1023}, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bbv099}, + abstract = {This review provides an overview on the development of Multiple sequence alignment (MSA) methods and their main applications. It is focused on progress made over the past decade. The three first sections review recent algorithmic developments for protein, RNA/DNA and genomic alignments. The fourth section deals with benchmarks and explores the relationship between empirical and simulated data, along with the impact on method developments. The last part of the review gives an overview on available MSA local reliability estimators and their dependence on various algorithmic properties of available methods.}, + file = {/Users/laurent/Documents/bibliography/bioinfo/documentation/Chatzou et al. - 2016 - Multiple sequence alignment modeling methods and .pdf;/Users/laurent/Documents/bibliography/mapper/Chatzou et al. - 2016 - Multiple sequence alignment modeling methods and .pdf}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {6} +} + +@article{chechikTimingGeneExpression2009, + title = {Timing of {{Gene Expression Responses}} to {{Environmental Changes}}}, + author = {Chechik, Gal and Koller, Daphne}, + year = {2009}, + month = feb, + volume = {16}, + pages = {279--290}, + issn = {1066-5277, 1557-8666}, + doi = {10.1089/cmb.2008.13TT}, + abstract = {Cells respond to environmental perturbations with changes in their gene expression that are coordinated in magnitude and time. Timing information about individual genes, rather than clusters, provides a refined way to view and analyze responses, but it is hard to estimate accurately. To analyze response timing of individual genes, we developed a parametric model that captures the typical temporal responses: an abrupt early response followed by a second transition to a steady state. This impulse model explicitly represents natural temporal properties such as the onset and the offset time, and can be estimated robustly, as demonstrated by its superior ability to impute missing values in gene expression data. Using response time of individual genes, we identify relations between gene function and their response timing, showing, for example, how cytosolic ribosomal genes are only repressed after the mitochondrial ribosome is activated. We further demonstrate a strong relation between the binding affinity of a transcription factor and the activation timing of its targets, suggesting that graded binding affinities could be a widely used mechanism for controlling expression timing. See online Supplementary Material at www.liebertonline.com.}, + file = {/Users/laurent/Documents/bibliography/DEA/Chechik and Koller - 2009 - Timing of Gene Expression Responses to Environment.pdf}, + journal = {Journal of Computational Biology}, + language = {en}, + number = {2} +} + +@article{chenAssessmentComputationalMethods2019, + title = {Assessment of Computational Methods for the Analysis of Single-Cell {{ATAC}}-Seq Data}, + author = {Chen, Huidong and Lareau, Caleb and Andreani, Tommaso and Vinyard, Michael E. and Garcia, Sara P. and Clement, Kendell and {Andrade-Navarro}, Miguel A. and Buenrostro, Jason D. and Pinello, Luca}, + year = {2019}, + month = dec, + volume = {20}, + pages = {1--25}, + issn = {1474-760X}, + doi = {10.1186/s13059-019-1854-5}, + abstract = {Recent innovations in single-cell Assay for Transposase Accessible Chromatin using sequencing (scATAC-seq) enable profiling of the epigenetic landscape of thousands of individual cells. scATAC-seq data analysis presents unique methodological challenges. scATAC-seq experiments sample DNA, which, due to low copy numbers (diploid in humans), lead to inherent data sparsity (1\textendash{}10\% of peaks detected per cell) compared to transcriptomic (scRNA-seq) data (10\textendash{}45\% of expressed genes detected per cell). Such challenges in data generation emphasize the need for informative features to assess cell heterogeneity at the chromatin level. We present a benchmarking framework that is applied to 10 computational methods for scATAC-seq on 13 synthetic and real datasets from different assays, profiling cell types from diverse tissues and organisms. Methods for processing and featurizing scATAC-seq data were compared by their ability to discriminate cell types when combined with common unsupervised clustering approaches. We rank evaluated methods and discuss computational challenges associated with scATAC-seq analysis including inherently sparse data, determination of features, peak calling, the effects of sequencing coverage and noise, and clustering performance. Running times and memory requirements are also discussed. This reference summary of scATAC-seq methods offers recommendations for best practices with consideration for both the non-expert user and the methods developer. Despite variation across methods and datasets, SnapATAC, Cusanovich2018, and cisTopic outperform other methods in separating cell populations of different coverages and noise levels in both synthetic and real datasets. Notably, SnapATAC is the only method able to analyze a large dataset ({$>$} 80,000 cells).}, + copyright = {2019 The Author(s).}, + file = {/Users/laurent/Zotero/storage/NSV2IBSJ/Chen et al. - 2019 - Assessment of computational methods for the analys.pdf;/Users/laurent/Zotero/storage/V8KPK366/s13059-019-1854-5.html}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@article{chenBCseqAccurateSingle2018, + title = {{{BCseq}}: Accurate Single Cell {{RNA}}-Seq Quantification with Bias Correction}, + shorttitle = {{{BCseq}}}, + author = {Chen, Liang and Zheng, Sika}, + year = {2018}, + month = aug, + volume = {46}, + pages = {e82-e82}, + issn = {0305-1048}, + doi = {10.1093/nar/gky308}, + abstract = {Abstract. With rapid technical advances, single cell RNA-seq (scRNA-seq) has been used to detect cell subtypes exhibiting distinct gene expression profiles and}, + file = {/Users/laurent/Zotero/storage/JGTBHAK9/Chen and Zheng - 2018 - BCseq accurate single cell RNA-seq quantification.pdf;/Users/laurent/Zotero/storage/T6C6DT5K/4990025.html}, + journal = {Nucleic Acids Research}, + language = {en}, + number = {14} +} + +@article{chenDensityPathAlgorithmVisualize, + title = {{{DensityPath}}: An Algorithm to Visualize and Reconstruct Cell State-Transition Path on Density Landscape for Single-Cell {{RNA}} Sequencing Data}, + shorttitle = {{{DensityPath}}}, + author = {Chen, Ziwei and An, Shaokun and Bai, Xiangqi and Gong, Fuzhou and Ma, Liang and Wan, Lin}, + doi = {10.1093/bioinformatics/bty1009}, + abstract = {AbstractMotivation. Visualizing and reconstructing cell developmental trajectories intrinsically embedded in high-dimensional expression profiles of single-cel}, + file = {/Users/laurent/Zotero/storage/TL4S3TCX/Chen et al. - DensityPath an algorithm to visualize and reconst.pdf;/Users/laurent/Zotero/storage/I3H92GWK/5233001.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{chenFastpUltrafastAllinone2018, + title = {Fastp: An Ultra-Fast All-in-One {{FASTQ}} Preprocessor}, + shorttitle = {Fastp}, + author = {Chen, Shifu and Zhou, Yanqing and Chen, Yaru and Gu, Jia}, + year = {2018}, + month = sep, + volume = {34}, + pages = {i884-i890}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty560}, + abstract = {AbstractMotivation. Quality control and preprocessing of FASTQ files are essential to providing clean data for downstream analysis. Traditionally, a different}, + file = {/Users/laurent/Zotero/storage/KC7ZLKWN/Chen et al. - 2018 - fastp an ultra-fast all-in-one FASTQ preprocessor.pdf;/Users/laurent/Zotero/storage/RTR4UNXK/Chen et al. - 2018 - fastp an ultra-fast all-in-one FASTQ preprocessor.pdf;/Users/laurent/Zotero/storage/P3CJWZID/5093234.html;/Users/laurent/Zotero/storage/ZVJFARJZ/5093234.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{chengNewParadigmsHematopoietic2019, + title = {New Paradigms on Hematopoietic Stem Cell Differentiation}, + author = {Cheng, Hui and Zheng, Zhaofeng and Cheng, Tao}, + year = {2019}, + month = jun, + issn = {1674-8018}, + doi = {10.1007/s13238-019-0633-0}, + abstract = {Ever since hematopoietic stem cells (HSCs) were first identified half a century ago, their differentiation roadmap has been extensively studied. The classical model of hematopoiesis has long held as a dogma that HSCs reside at the top of a hierarchy in which HSCs possess self-renewal capacity and can progressively give rise to all blood lineage cells. However, over the past several years, with advances in single cell technologies, this developmental scheme has been challenged. In this review, we discuss the evidence supporting heterogeneity within HSC and progenitor populations as well as the hierarchical models revised by novel approaches mainly in mouse system. These evolving views provide further understanding of hematopoiesis and highlight the complexity of hematopoietic differentiation.}, + file = {/Users/laurent/Zotero/storage/I55W2ZYT/Cheng et al. - 2019 - New paradigms on hematopoietic stem cell different.pdf}, + journal = {Protein \& Cell}, + language = {en} +} + +@article{chenGSAEAutoencoderEmbedded, + title = {{{GSAE}}: An Autoencoder with Embedded Gene-Set Nodes for Genomics Functional Characterization}, + author = {Chen, Hung-I Harry and Chiu, Yu-Chiao and Zhang, Tinghe and Zhang, Songyao and Chen, Yidong}, + pages = {39}, + abstract = {Background Bioinformatics tools have been developed to interpret gene expression data at the gene set level, and these gene set based analyses improve the biologists' capability to discover functional relevance of their experiment design. While elucidating gene set individually, inter gene sets association is rarely taken into consideration. Deep learning, an emerging machine learning technique in computational biology, can be used to generate an unbiased combination of gene set, and to determine the biological relevance and analysis consistency of these combining gene sets by leveraging large genomic data sets. +Results In this study, we proposed a gene superset autoencoder (GSAE), a multi-layer autoencoder model with the incorporation of a priori defined gene sets that retain the crucial biological features in the latent layer. We introduced the concept of the gene superset, an unbiased combination of gene sets with weights trained by the autoencoder, where each node in the latent layer is a superset. Trained with genomic data from TCGA and evaluated with their accompanying clinical parameters, we showed gene supersets' ability of discriminating tumor subtypes and their prognostic capability. We further demonstrated the biological relevance of the top component gene sets in the significant supersets. +Conclusions Using autoencoder model and gene superset at its latent layer, we demonstrated that gene supersets retain sufficient biological information with respect to tumor subtypes and}, + file = {/Users/laurent/Zotero/storage/LWKGNRRW/Chen et al. - GSAE an autoencoder with embedded gene-set nodes .pdf}, + language = {en} +} + +@article{chenNovelStatisticalMethod2015, + title = {A Novel Statistical Method for Quantitative Comparison of Multiple {{ChIP}}-Seq Datasets}, + author = {Chen, Li and Wang, Chi and Qin, Zhaohui S. and Wu, Hao}, + year = {2015}, + month = jun, + volume = {31}, + pages = {1889--1896}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btv094}, + abstract = {Motivation: ChIP-seq is a powerful technology to measure the protein binding or histone modification strength in the whole genome scale. Although there are a number of methods available for single ChIP-seq data analysis (e.g. `peak detection'), rigorous statistical method for quantitative comparison of multiple ChIP-seq datasets with the considerations of data from control experiment, signal to noise ratios, biological variations and multiple-factor experimental designs is underdeveloped.}, + file = {/Users/laurent/Documents/bibliography/bioinfo/documentation/Chen et al. - 2015 - A novel statistical method for quantitative compar.pdf;/Users/laurent/Documents/bibliography/ChipSeq/Chen et al. - 2015 - A novel statistical method for quantitative compar.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {12} +} + +@article{chenNovoHaplotypeReconstruction2018, + title = {De Novo Haplotype Reconstruction in Viral Quasispecies Using Paired-End Read Guided Path Finding}, + author = {Chen, Jiao and Zhao, Yingchao and Sun, Yanni}, + year = {2018}, + month = sep, + volume = {34}, + pages = {2927--2935}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty202}, + abstract = {AbstractMotivation. RNA virus populations contain different but genetically related strains, all infecting an individual host. Reconstruction of the viral hapl}, + file = {/Users/laurent/Zotero/storage/SET52CFE/Chen et al. - 2018 - De novo haplotype reconstruction in viral quasispe.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{chenPredictingTranscriptionFactor2017, + title = {Predicting {{Transcription Factor Binding Sites}} with {{Convolutional Kernel Networks}}}, + author = {Chen, Dexiong and Jacob, Laurent and Mairal, Julien}, + year = {2017}, + month = nov, + doi = {10.1101/217257}, + abstract = {The growing amount of biological sequences available makes it possible to learn genotypephenotype relationships from data with increasingly high accuracy. By exploiting large sets of sequences with known phenotypes, machine learning methods can be used to build functions that predict the phenotype of new, unannotated sequences. In particular, deep neural networks have recently obtained good performances on such prediction tasks, but are notoriously difficult to analyze or interpret. Here, we introduce a hybrid approach between kernel methods and convolutional neural networks for sequences, which retains the ability of neural networks to learn good representations for a learning problem at hand, while defining a well characterized Hilbert space to describe prediction functions. Our method outperforms state-of-the-art convolutional neural networks on a transcription factor binding prediction task while being much faster to train and yielding more stable and interpretable results.}, + file = {/Users/laurent/Documents/bibliography/to_read/Chen et al. - 2017 - Predicting Transcription Factor Binding Sites with.pdf}, + language = {en} +} + +@article{chenPyNVRInvestigatingFactors, + title = {{{pyNVR}}: {{Investigating}} Factors Affecting Feature Selection from {{scRNA}}-Seq Data for Lineage Reconstruction}, + shorttitle = {{{pyNVR}}}, + author = {Chen, Bob and Herring, Charles A. and Lau, Ken S.}, + doi = {10.1093/bioinformatics/bty950}, + abstract = {AbstractMotivation. The emergence of single-cell RNA-sequencing (scRNA-seq) has enabled analyses that leverage transitioning cell states to reconstruct pseudot}, + file = {/Users/laurent/Zotero/storage/2DBAD3YD/Chen et al. - pyNVR Investigating factors affecting feature sel.pdf;/Users/laurent/Zotero/storage/HHEFTVUA/5184958.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{chenPyNVRInvestigatingFactors2019, + title = {{{pyNVR}}: Investigating Factors Affecting Feature Selection from {{scRNA}}-Seq Data for Lineage Reconstruction}, + shorttitle = {{{pyNVR}}}, + author = {Chen, Bob and Herring, Charles A. and Lau, Ken S.}, + year = {2019}, + month = jul, + volume = {35}, + pages = {2335--2337}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty950}, + abstract = {AbstractMotivation. The emergence of single-cell RNA-sequencing has enabled analyses that leverage transitioning cell states to reconstruct pseudotemporal traj}, + file = {/Users/laurent/Zotero/storage/N8QMPDT5/Chen et al. - 2019 - pyNVR investigating factors affecting feature sel.pdf;/Users/laurent/Zotero/storage/RUFWREJS/5184958.html}, + journal = {Bioinformatics}, + language = {en}, + number = {13} +} + +@article{chenRobustDistributedLag2018, + title = {Robust Distributed Lag Models Using Data Adaptive Shrinkage}, + author = {Chen, Yin-Hsiu and Mukherjee, Bhramar and Adar, Sara D. and Berrocal, Veronica J. and Coull, Brent A.}, + year = {2018}, + month = oct, + volume = {19}, + pages = {461--478}, + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxx041}, + abstract = {SUMMARY. Distributed lag models (DLMs) have been widely used in environmental epidemiology to quantify the lagged effects of air pollution on an outcome of int}, + file = {/Users/laurent/Zotero/storage/ZED895LT/Chen et al. - 2018 - Robust distributed lag models using data adaptive .pdf;/Users/laurent/Zotero/storage/UZ56C6MT/4508801.html}, + journal = {Biostatistics}, + language = {en}, + number = {4} +} + +@article{chenScanProcedureMultiple2018, + title = {A {{Scan Procedure}} for {{Multiple Testing}}}, + author = {Chen, Shiyun and Ying, Andrew and {Arias-Castro}, Ery}, + year = {2018}, + month = aug, + abstract = {In a multiple testing framework, we propose a method that identifies the interval with the highest estimated false discovery rate of P-values and rejects the corresponding null hypotheses. Unlike the Benjamini-Hochberg method, which does the same but over intervals with an endpoint at the origin, the new procedure `scans' all intervals. In parallel with (Storey, Taylor, and Siegmund, 2004), we show that this scan procedure provides strong control of asymptotic false discovery rate. In addition, we investigate its asymptotic false non-discovery rate, deriving conditions under which it outperforms the Benjamini-Hochberg procedure. For example, the scan procedure is superior in power-law location models.}, + archivePrefix = {arXiv}, + eprint = {1808.00631}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/8WI6RNAM/Chen et al. - 2018 - A Scan Procedure for Multiple Testing.pdf}, + journal = {arXiv:1808.00631 [math, stat]}, + keywords = {Mathematics - Statistics Theory}, + language = {en}, + primaryClass = {math, stat} +} + +@article{chenSinglecellTrajectoriesReconstruction2019, + title = {Single-Cell Trajectories Reconstruction, Exploration and Mapping of Omics Data with {{STREAM}}}, + author = {Chen, Huidong and Albergante, Luca and Hsu, Jonathan Y. and Lareau, Caleb A. and Lo Bosco, Giosu{\`e} and Guan, Jihong and Zhou, Shuigeng and Gorban, Alexander N. and Bauer, Daniel E. and Aryee, Martin J. and Langenau, David M. and Zinovyev, Andrei and Buenrostro, Jason D. and Yuan, Guo-Cheng and Pinello, Luca}, + year = {2019}, + month = dec, + volume = {10}, + issn = {2041-1723}, + doi = {10.1038/s41467-019-09670-4}, + file = {/Users/laurent/Zotero/storage/9546QNHP/Chen et al. - 2019 - Single-cell trajectories reconstruction, explorati.pdf;/Users/laurent/Zotero/storage/F8BBR4M9/Chen et al. - 2019 - Single-cell trajectories reconstruction, explorati.pdf}, + journal = {Nature Communications}, + language = {en}, + number = {1} +} + +@article{chiharaInductionTranscriptionalRegulation2018, + title = {Induction and Transcriptional Regulation of the Co-Inhibitory Gene Module in {{T}} Cells}, + author = {Chihara, Norio and Madi, Asaf and Kondo, Takaaki and Zhang, Huiyuan and Acharya, Nandini and Singer, Meromit and Nyman, Jackson and Marjanovic, Nemanja D. and Kowalczyk, Monika S. and Wang, Chao and Kurtulus, Sema and Law, Travis and Etminan, Yasaman and Nevin, James and Buckley, Christopher D. and Burkett, Patrick R. and Buenrostro, Jason D. and {Rozenblatt-Rosen}, Orit and Anderson, Ana C. and Regev, Aviv and Kuchroo, Vijay K.}, + year = {2018}, + month = jun, + volume = {558}, + pages = {454}, + issn = {1476-4687}, + doi = {10.1038/s41586-018-0206-z}, + abstract = {A module of co-inhibitory T cell receptors, driven by the cytokine IL-27, is identified in mice that is regulated by the transcription factors PRDM1 and c-MAF.}, + copyright = {2018 Macmillan Publishers Ltd., part of Springer Nature}, + file = {/Users/laurent/Zotero/storage/8TV7J3DP/Chihara et al. - 2018 - Induction and transcriptional regulation of the co.pdf;/Users/laurent/Zotero/storage/QD95QHRH/s41586-018-0206-z.html}, + journal = {Nature}, + language = {En}, + number = {7710} +} + +@article{chlisModelbasedBranchingPoint2017, + title = {Model-Based Branching Point Detection in Single-Cell Data by {{K}}-Branches Clustering}, + author = {Chlis, Nikolaos K. and Wolf, F. Alexander and Theis, Fabian J.}, + year = {2017}, + month = oct, + volume = {33}, + pages = {3211--3219}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx325}, + abstract = {Motivation: The identification of heterogeneities in cell populations by utilizing single-cell technologies such as single-cell RNA-Seq, enables inference of cellular development and lineage trees. Several methods have been proposed for such inference from high-dimensional single-cell data. They typically assign each cell to a branch in a differentiation trajectory. However, they commonly assume specific geometries such as tree-like developmental hierarchies and lack statistically sound methods to decide on the number of branching events.}, + file = {/Users/laurent/Documents/bibliography/to_read/Chlis et al. - 2017 - Model-based branching point detection in single-ce.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {20} +} + +@article{choGeneralizableScalableVisualization2018, + title = {Generalizable and {{Scalable Visualization}} of {{Single}}-{{Cell Data Using Neural Networks}}}, + author = {Cho, Hyunghoon and Berger, Bonnie and Peng, Jian}, + year = {2018}, + month = aug, + volume = {7}, + pages = {185-191.e4}, + issn = {2405-4712}, + doi = {10.1016/j.cels.2018.05.017}, + abstract = {Summary +Visualization algorithms are fundamental tools for interpreting single-cell data. However, standard methods, such as t-stochastic neighbor embedding (t-SNE), are not scalable to datasets with millions of cells and the resulting visualizations cannot be generalized to analyze new datasets. Here we introduce net-SNE, a generalizable visualization approach that trains a neural network to learn a mapping function from high-dimensional single-cell gene-expression profiles to a low-dimensional visualization. We benchmark net-SNE on 13 different datasets, and show that it achieves visualization quality and clustering accuracy comparable with t-SNE. Additionally we show that the mapping function learned by net-SNE can accurately position entire new subtypes of cells from previously unseen datasets and can also be used to reduce the runtime of visualizing 1.3 million cells by 36-fold (from 1.5~days to an hour). Our work provides a framework for bootstrapping single-cell analysis from existing datasets.}, + file = {/Users/laurent/Zotero/storage/AEWLDAWD/S2405471218302357.html}, + journal = {Cell Systems}, + keywords = {data visualization,neural network,single-cell RNA sequencing}, + number = {2} +} + +@techreport{choiBayesianModelSelection2020, + title = {Bayesian Model Selection Reveals Biological Origins of Zero Inflation in Single-Cell Transcriptomics}, + author = {Choi, Kwangbom and Chen, Yang and Skelly, Daniel A. and Churchill, Gary A.}, + year = {2020}, + month = mar, + institution = {{Bioinformatics}}, + doi = {10.1101/2020.03.03.974808}, + abstract = {Single-cell RNA sequencing is a powerful tool for characterizing cellular heterogeneity in gene expression. However, high variability and a large number of zero counts present challenges for analysis and interpretation. There is substantial controversy over the origins and proper treatment of zeros and no consensus on whether zero-inflated count distributions are necessary or even useful. While some studies assume the existence of zero inflation due to technical artifacts and attempt to impute the missing information, other recent studies of argue that there is no zero inflation in scRNA-Seq data. We apply a Bayesian model selection approach to unambiguously demonstrate zero inflation in multiple biologically realistic scRNA-Seq datasets. We show that the primary causes of zero inflation are not technical but rather biological in nature. We also demonstrate that parameter estimates from the zero-inflated negative binomial distribution are an unreliable indicator of zero inflation. Despite the existence of zero inflation of scRNA-Seq counts, we recommend the generalized linear model with negative binomial count distribution (not zero-inflated) as a suitable reference model for scRNA-Seq analysis.}, + file = {/Users/laurent/Zotero/storage/VJ84945X/Choi et al. - 2020 - Bayesian model selection reveals biological origin.pdf}, + language = {en}, + type = {Preprint} +} + +@article{choMathematicalModelingSinglecell2019, + title = {Mathematical Modeling with Single-Cell Sequencing Data}, + author = {Cho, Heyrim and Rockne, Russell C.}, + year = {2019}, + month = jul, + pages = {710640}, + doi = {10.1101/710640}, + abstract = {{$<$}p{$>$}Single-cell sequencing technologies have revolutionized molecular and cellular biology and stimulated the development of computational tools to analyze the data generated from these technology platforms. However, despite the recent explosion of computational analysis tools, relatively few mathematical models have been developed to utilize these data. Here we compare and contrast two approaches for building mathematical models of cell state-transitions with single-cell RNA-sequencing data with hematopoeisis as a model system; by solving partial differential equations on a graph representing discrete cell state relationships, and by solving the equations on a continuous cell state-space. We demonstrate how to calibrate model parameters from single or multiple time-point single-cell sequencing data, and examine the effects of data processing algorithms on the model calibration and predictions. As an application of our approach, we demonstrate how the calibrated models may be used to mathematically perturb normal hematopoeisis to simulate, predict, and study the emergence of novel cell types during the pathogenesis of acute myeloid leukemia. The mathematical modeling framework we present is general and can be applied to study cell state-transitions in any single-cell genome sequencing dataset.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2019, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + file = {/Users/laurent/Zotero/storage/VD4ARV3E/Cho and Rockne - 2019 - Mathematical modeling with single-cell sequencing .pdf;/Users/laurent/Zotero/storage/GAZIEEXT/710640v1.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{choMathematicalModelingSinglecell2019a, + title = {Mathematical Modeling with Single-Cell Sequencing Data}, + author = {Cho, Heyrim and Rockne, Russell C.}, + year = {2019}, + month = jul, + pages = {710640}, + doi = {10.1101/710640}, + abstract = {{$<$}h3{$>$}Abstract{$<$}/h3{$>$} {$<$}p{$>$}Single-cell sequencing technologies have revolutionized molecular and cellular biology and stimulated the development of computational tools to analyze the data generated from these technology platforms. However, despite the recent explosion of computational analysis tools, relatively few mathematical models have been developed to utilize these data. Here we compare and contrast two approaches for building mathematical models of cell state-transitions with single-cell RNA-sequencing data with hematopoeisis as a model system; by solving partial differential equations on a graph representing discrete cell state relationships, and by solving the equations on a continuous cell state-space. We demonstrate how to calibrate model parameters from single or multiple time-point single-cell sequencing data, and examine the effects of data processing algorithms on the model calibration and predictions. As an application of our approach, we demonstrate how the calibrated models may be used to mathematically perturb normal hematopoeisis to simulate, predict, and study the emergence of novel cell types during the pathogenesis of acute myeloid leukemia. The mathematical modeling framework we present is general and can be applied to study cell state-transitions in any single-cell genome sequencing dataset.{$<$}/p{$><$}h3{$>$}Author summary{$<$}/h3{$>$} {$<$}p{$>$}Here we compare and contrast graph- and continuum-based approaches for constructing mathematical models of cell state-transitions using single-cell RNA-sequencing data. Using two publicly available datasets, we demonstrate how to calibrate mathematical models of hematopoeisis and how to use the models to predict dynamics of acute myeloid leukemia pathogenesis by mathematically perturbing the process of cellular proliferation and differentiation. We apply these modeling approaches to study the effects of perturbing individual or sets of genes in subsets of cells, or by modeling the dynamics of cell state-transitions directly in a reduced dimensional space. We examine the effects of different graph abstraction and trajectory inference algorithms on calibrating the models and the subsequent model predictions. We conclude that both the graph- and continuum-based modeling approaches can be equally well calibrated to data and discuss situations in which one method may be preferable over the other. This work presents a general mathematical modeling framework, applicable to any single-cell sequencing dataset where cell state-transitions are of interest.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2019, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + file = {/Users/laurent/Zotero/storage/XB4YLBHQ/Cho and Rockne - 2019 - Mathematical modeling with single-cell sequencing .pdf;/Users/laurent/Zotero/storage/7NEANELM/710640v1.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{chouAlleleHMMDatadrivenMethod2018, + title = {{{AlleleHMM}}: A Data-Driven Method to Identify Allele-Specific Differences in Distributed Functional Genomic Marks}, + shorttitle = {{{AlleleHMM}}}, + author = {Chou, Shao-Pei and Danko, Charles G.}, + year = {2018}, + month = aug, + pages = {389262}, + doi = {10.1101/389262}, + abstract = {How DNA sequence variation influences gene expression remains poorly understood. Diploid organisms have two homologous copies of their DNA sequence in the same nucleus, providing a rich source of information about how genetic variation affects a wealth of biochemical processes. However, few computational methods have been developed to discover allele-specific differences in functional genomic data. Existing methods either treat each SNP independently, limiting statistical power, or combine SNPs across gene annotations, preventing the discovery of allele specific differences in unexpected genomic regions. Here we introduce AlleleHMM, a new computational method to identify blocks of neighboring SNPs that share similar allele-specific differences in mark abundance. AlleleHMM uses a hidden Markov model to divide the genome among three hidden states based on allele frequencies in genomic data: a symmetric state (state S) which shows no difference between alleles, and regions with a higher signal on the maternal (state M) or paternal (state P) allele. AlleleHMM substantially outperformed naive methods using both simulated and real genomic data, particularly when input data had realistic levels of overdispersion. Using PRO-seq data, AlleleHMM identified thousands of allele specific blocks of transcription in both coding and non-coding genomic regions. AlleleHMM is a powerful tool for discovering allele-specific regions in functional genomic datasets.}, + copyright = {\textcopyright{} 2018, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + file = {/Users/laurent/Zotero/storage/4BIYKDTY/Chou and Danko - 2018 - AlleleHMM a data-driven method to identify allele.pdf;/Users/laurent/Zotero/storage/YH7ZYSMS/389262.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{cickovskiConstructingLightweightFlexible2018, + title = {Constructing Lightweight and Flexible Pipelines Using {{Plugin}}-{{Based Microbiome Analysis}} ({{PluMA}})}, + author = {Cickovski, Trevor and Narasimhan, Giri}, + year = {2018}, + month = sep, + volume = {34}, + pages = {2881--2888}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty198}, + abstract = {AbstractMotivation. Software pipelines have become almost standardized tools for microbiome analysis. Currently many pipelines are available, often sharing som}, + file = {/Users/laurent/Zotero/storage/4XTUP4IY/Cickovski and Narasimhan - 2018 - Constructing lightweight and flexible pipelines us.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{clarkALEGenericAssembly2013, + title = {{{ALE}}: A Generic Assembly Likelihood Evaluation Framework for Assessing the Accuracy of Genome and Metagenome Assemblies}, + shorttitle = {{{ALE}}}, + author = {Clark, Scott C. and Egan, Rob and Frazier, Peter I. and Wang, Zhong}, + year = {2013}, + month = feb, + volume = {29}, + pages = {435--443}, + issn = {1367-4811}, + doi = {10.1093/bioinformatics/bts723}, + abstract = {MOTIVATION: Researchers need general purpose methods for objectively evaluating the accuracy of single and metagenome assemblies and for automatically detecting any errors they may contain. Current methods do not fully meet this need because they require a reference, only consider one of the many aspects of assembly quality or lack statistical justification, and none are designed to evaluate metagenome assemblies. RESULTS: In this article, we present an Assembly Likelihood Evaluation (ALE) framework that overcomes these limitations, systematically evaluating the accuracy of an assembly in a reference-independent manner using rigorous statistical methods. This framework is comprehensive, and integrates read quality, mate pair orientation and insert length (for paired-end reads), sequencing coverage, read alignment and k-mer frequency. ALE pinpoints synthetic errors in both single and metagenomic assemblies, including single-base errors, insertions/deletions, genome rearrangements and chimeric assemblies presented in metagenomes. At the genome level with real-world data, ALE identifies three large misassemblies from the Spirochaeta smaragdinae finished genome, which were all independently validated by Pacific Biosciences sequencing. At the single-base level with Illumina data, ALE recovers 215 of 222 (97\%) single nucleotide variants in a training set from a GC-rich Rhodobacter sphaeroides genome. Using real Pacific Biosciences data, ALE identifies 12 of 12 synthetic errors in a Lambda Phage genome, surpassing even Pacific Biosciences' own variant caller, EviCons. In summary, the ALE framework provides a comprehensive, reference-independent and statistically rigorous measure of single genome and metagenome assembly accuracy, which can be used to identify misassemblies or to optimize the assembly process. AVAILABILITY: ALE is released as open source software under the UoI/NCSA license at http://www.alescore.org. It is implemented in C and Python.}, + file = {/Users/laurent/Zotero/storage/XXI2PEHQ/Clark et al. - 2013 - ALE a generic assembly likelihood evaluation fram.pdf}, + journal = {Bioinformatics (Oxford, England)}, + keywords = {Bayes Theorem,Escherichia coli,Genetic Variation,Genomics,High-Throughput Nucleotide Sequencing,Humans,Metagenomics,Models,Models; Statistical,Probability,Software,Statistical}, + language = {eng}, + number = {4}, + pmid = {23303509} +} + +@article{clarkeGGRaSPRpackageSelecting2018, + title = {{{GGRaSP}}: A {{R}}-Package for Selecting Representative Genomes Using {{Gaussian}} Mixture Models}, + shorttitle = {{{GGRaSP}}}, + author = {Clarke, Thomas H. and Brinkac, Lauren M. and Sutton, Granger and Fouts, Derrick E.}, + year = {2018}, + month = sep, + volume = {34}, + pages = {3032--3034}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty300}, + abstract = {AbstractMotivation. The vast number of available sequenced bacterial genomes occasionally exceeds the facilities of comparative genomic methods or is dominated}, + file = {/Users/laurent/Zotero/storage/JVWY484R/Clarke et al. - 2018 - GGRaSP a R-package for selecting representative g.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{clearyEfficientGenerationTranscriptomic2017, + title = {Efficient {{Generation}} of {{Transcriptomic Profiles}} by {{Random Composite Measurements}}}, + author = {Cleary, Brian and Cong, Le and Cheung, Anthea and Lander, Eric S. and Regev, Aviv}, + year = {2017}, + month = nov, + volume = {171}, + pages = {1424-1436.e18}, + issn = {00928674}, + doi = {10.1016/j.cell.2017.10.023}, + abstract = {RNA profiles are an informative phenotype of cellular and tissue states but can be costly to generate at massive scale. Here, we describe how gene expression levels can be efficiently acquired with random composite measurements\textemdash{}in which abundances are combined in a random weighted sum. We show (1) that the similarity between pairs of expression profiles can be approximated with very few composite measurements; (2) that by leveraging sparse, modular representations of gene expression, we can use random composite measurements to recover high-dimensional gene expression levels (with 100 times fewer measurements than genes); and (3) that it is possible to blindly recover gene expression from composite measurements, even without access to training data. Our results suggest new compressive modalities as a foundation for massive scaling in high-throughput measurements and new insights into the interpretation of high-dimensional data.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Cleary et al. - 2017 - Efficient Generation of Transcriptomic Profiles by.pdf;/Users/laurent/Zotero/storage/4SR3X6XP/Cleary et al. - 2017 - Efficient Generation of Transcriptomic Profiles by.pdf;/Users/laurent/Zotero/storage/9Y4WF7LV/Cleary et al. - 2017 - Efficient Generation of Transcriptomic Profiles by.pdf;/Users/laurent/Zotero/storage/LPX7UZFS/Cleary et al. - 2017 - Efficient Generation of Transcriptomic Profiles by.pdf}, + journal = {Cell}, + language = {en}, + number = {6} +} + +@article{clementeInferringSexspecificDemographic2018, + title = {Inferring Sex-Specific Demographic History from {{SNP}} Data}, + author = {Clemente, Florian and Gautier, Mathieu and Vitalis, Renaud}, + editor = {King, Nicole}, + year = {2018}, + month = jan, + volume = {14}, + pages = {e1007191}, + issn = {1553-7404}, + doi = {10.1371/journal.pgen.1007191}, + abstract = {The relative female and male contributions to demography are of great importance to better understand the history and dynamics of populations. While earlier studies relied on uniparental markers to investigate sex-specific questions, the increasing amount of sequence data now enables us to take advantage of tens to hundreds of thousands of independent loci from autosomes and the X chromosome. Here, we develop a novel method to estimate effective sex ratios or ESR (defined as the female proportion of the effective population) from allele count data for each branch of a rooted tree topology that summarizes the history of the populations of interest. Our method relies on Kimura's time-dependent diffusion approximation for genetic drift, and is based on a hierarchical Bayesian model to integrate over the allele frequencies along the branches. We show via simulations that parameters are inferred robustly, even under scenarios that violate some of the model assumptions. Analyzing bovine SNP data, we infer a strongly female-biased ESR in both dairy and beef cattle, as expected from the underlying breeding scheme. Conversely, we observe a strongly male-biased ESR in early domestication times, consistent with an easier taming and management of cows, and/or introgression from wild auroch males, that would both cause a relative increase in male effective population size. In humans, analyzing a subsample of non-African populations, we find a male-biased ESR in Oceanians that may reflect complex marriage patterns in Aboriginal Australians. Because our approach relies on allele count data, it may be applied on a wide range of species.}, + file = {/Users/laurent/Documents/bibliography/to_read/Clemente et al. - 2018 - Inferring sex-specific demographic history from SN.pdf}, + journal = {PLOS Genetics}, + language = {en}, + number = {1} +} + +@article{coffeyClusteringLongitudinalProfiles2014, + title = {Clustering Longitudinal Profiles Using {{P}}-Splines and Mixed Effects Models Applied to Time-Course Gene Expression Data}, + author = {Coffey, N. and Hinde, J. and Holian, E.}, + year = {2014}, + month = mar, + volume = {71}, + pages = {14--29}, + issn = {0167-9473}, + doi = {10.1016/j.csda.2013.04.001}, + abstract = {Longitudinal data is becoming increasingly common and various methods have been developed to analyze this type of data. Profiles from time-course gene expression studies, where cluster analysis plays an important role to identify groups of co-expressed genes over time, are investigated. A number of procedures have been used to cluster time-course gene expression data, however there are many limitations to the techniques previously described. An alternative approach is proposed, which aims to alleviate some of these limitations. The method exploits the connection between the linear mixed effects model and P-spline smoothing to simultaneously smooth the gene expression data to remove any measurement error/noise and cluster the expression profiles using finite mixtures of mixed effects models. This approach has a number of advantages, including decreased computation time and ease of implementation in standard software packages.}, + file = {/Users/laurent/Zotero/storage/BVQSLJHD/Coffey et al. - 2014 - Clustering longitudinal profiles using P-splines a.pdf}, + journal = {Computational Statistics \& Data Analysis}, + keywords = {Clustering,Finite mixture model,Longitudinal profiles,Mixed effects model,Time-course gene expression}, + language = {en} +} + +@article{coifmanGeometricDiffusionsTool2005, + title = {Geometric Diffusions as a Tool for Harmonic Analysis and Structure Definition of Data: {{Diffusion}} Maps}, + shorttitle = {Geometric Diffusions as a Tool for Harmonic Analysis and Structure Definition of Data}, + author = {Coifman, R. R. and Lafon, S. and Lee, A. B. and Maggioni, M. and Nadler, B. and Warner, F. and Zucker, S. W.}, + year = {2005}, + month = may, + volume = {102}, + pages = {7426--7431}, + issn = {0027-8424, 1091-6490}, + doi = {10.1073/pnas.0500334102}, + file = {/Users/laurent/Documents/bibliography/stats/Coifman et al. - 2005 - Geometric diffusions as a tool for harmonic analys.pdf}, + journal = {Proceedings of the National Academy of Sciences}, + language = {en}, + number = {21} +} + +@article{colePerformanceAssessmentSelection2017, + title = {Performance {{Assessment}} and {{Selection}} of {{Normalization Procedures}} for {{Single}}-{{Cell RNA}}-{{Seq}}}, + author = {Cole, Michael B and Risso, Davide and Wagner, Allon and DeTomaso, David and Ngai, John and Purdom, Elizabeth and Dudoit, Sandrine and Yosef, Nir}, + year = {2017}, + month = dec, + doi = {10.1101/235382}, + abstract = {Due to the presence of systematic measurement biases, data normalization is an essential preprocessing step in the analysis of single-cell RNA sequencing (scRNA-seq) data. While a variety of normalization procedures are available for bulk RNA-seq, their suitability with respect to single-cell data is still largely unexplored. Furthermore, there may be multiple, competing considerations behind the assessment of normalization performance, some of them study-specific. The choice of normalization method can have a large impact on the results of downstream analyses (e.g., clustering, inference of cell lineages, differential expression analysis), and thus it is critically important to assess the performance of competing methods in order to select a suitable procedure for the study at hand.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Cole et al. - 2017 - Performance Assessment and Selection of Normalizat.pdf;/Users/laurent/Zotero/storage/GG69V9UE/Cole et al. - 2018 - Performance Assessment and Selection of Normalizat.pdf;/Users/laurent/Zotero/storage/H29AL88F/Cole et al. - 2017 - Performance Assessment and Selection of Normalizat.pdf;/Users/laurent/Zotero/storage/NJ6ALVNK/Cole et al. - 2018 - Performance Assessment and Selection of Normalizat.pdf}, + language = {en} +} + +@article{colePerformanceAssessmentSelection2019, + title = {Performance {{Assessment}} and {{Selection}} of {{Normalization Procedures}} for {{Single}}-{{Cell RNA}}-{{Seq}}}, + author = {Cole, Michael B. and Risso, Davide and Wagner, Allon and DeTomaso, David and Ngai, John and Purdom, Elizabeth and Dudoit, Sandrine and Yosef, Nir}, + year = {2019}, + month = apr, + volume = {8}, + pages = {315-328.e8}, + issn = {24054712}, + doi = {10.1016/j.cels.2019.03.010}, + abstract = {Systematic measurement biases make normalization an essential step in single-cell RNA sequencing (scRNA-seq) analysis. There may be multiple competing considerations behind the assessment of normalization performance, of which some may be study specific. We have developed ``scone''\textemdash{} a flexible framework for assessing performance based on a comprehensive panel of data-driven metrics. Through graphical summaries and quantitative reports, scone summarizes trade-offs and ranks large numbers of normalization methods by panel performance. The method is implemented in the opensource Bioconductor R software package scone. We show that top-performing normalization methods lead to better agreement with independent validation data for a collection of scRNA-seq datasets. scone can be downloaded at http://bioconductor. org/packages/scone/.}, + file = {/Users/laurent/Zotero/storage/ICEQRQK9/Cole et al. - 2019 - Performance Assessment and Selection of Normalizat.pdf;/Users/laurent/Zotero/storage/J8PE52IC/Cole et al. - 2019 - Performance Assessment and Selection of Normalizat.pdf}, + journal = {Cell Systems}, + language = {en}, + number = {4} +} + +@article{conwayUpSetRPackageVisualization2017, + title = {{{UpSetR}}: An {{R}} Package for the Visualization of Intersecting Sets and Their Properties}, + shorttitle = {{{UpSetR}}}, + author = {Conway, Jake R. and Lex, Alexander and Gehlenborg, Nils}, + year = {2017}, + month = sep, + volume = {33}, + pages = {2938--2940}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx364}, + abstract = {Motivation: Venn and Euler diagrams are a popular yet inadequate solution for quantitative visualization of set intersections. A scalable alternative to Venn and Euler diagrams for visualizing intersecting sets and their properties is needed.}, + file = {/Users/laurent/Documents/bibliography/to_read/Conway et al. - 2017 - UpSetR an R package for the visualization of inte.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{cooperEndlessConflictsDetecting2019, + title = {Endless {{Conflicts}}: {{Detecting Molecular Arms Races}} in {{Mammalian Genomes}}}, + shorttitle = {Endless {{Conflicts}}}, + author = {Cooper, Jacob C. and Leonard, Christopher J. and Pedersen, Brent S. and Carey, Clayton M. and Quinlan, Aaron R. and Elde, Nels C. and Phadnis, Nitin}, + year = {2019}, + month = jun, + pages = {685321}, + doi = {10.1101/685321}, + abstract = {{$<$}h3{$>$}Abstract{$<$}/h3{$>$} {$<$}p{$>$}Recurrent positive selection at the codon level is often a sign that a gene is engaged in a molecular arms race \textendash{} a conflict between the genome of its host and the genome of another species over mutually exclusive access to a resource that has a direct effect on the fitness of both individuals. Detecting molecular arms races has led to a better understanding of how evolution changes the molecular interfaces of proteins when organisms compete over time, especially in the realm of host-pathogen interactions. Here, we present a method for detection of gene-level recurrent positive selection across entire genomes for a given phylogenetic group. We deploy this method on five mammalian clades \textendash{} primates, mice, deer mice, dogs, and bats \textendash{} to both detect novel instances of recurrent positive selection and to compare the prevalence of recurrent positive selection between clades. We analyze the frequency at which individual genes are targets of recurrent positive selection in multiple clades. We find that coincidence of selection occurs far more frequently than expected by chance, indicating that all clades experience shared selective pressures. Additionally, we highlight Polymeric Immunoglobulin Receptor (PIGR) as a gene which shares specific amino acids under recurrent positive selection in multiple clades, indicating that it has been locked in a molecular arms race for {$\sim$}100My. These data provide an in-depth comparison of recurrent positive selection across the mammalian phylogeny, and highlights of the power of comparative evolutionary approaches to generate specific hypotheses about the molecular interactions of rapidly evolving genes.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2019, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial 4.0 International), CC BY-NC 4.0, as described at http://creativecommons.org/licenses/by-nc/4.0/}, + file = {/Users/laurent/Zotero/storage/RM8TBAYF/Cooper et al. - 2019 - Endless Conflicts Detecting Molecular Arms Races .pdf;/Users/laurent/Zotero/storage/HX8C5CY3/685321v1.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{cornishRedPandaNovel2020, + title = {Red {{Panda}}: {{A}} Novel Method for Detecting Variants in Single-Cell {{RNA}} Sequencing}, + shorttitle = {Red {{Panda}}}, + author = {Cornish, Adam and Roychoudhury, Shrabasti and Sarma, Krishna and Pramanik, Suravi and Bhakat, Kishor and Dudley, Andrew and Mishra, Nitish K and Guda, Chittibabu}, + year = {2020}, + month = jan, + doi = {10.1101/2020.01.08.898874}, + abstract = {Single-cell sequencing enables us to better understand genetic diseases, such as cancer or autoimmune disorders, which are often affected by changes in rare cells. Currently, no existing software is aimed at identifying single nucleotide variations or micro (1-50bp) insertions and deletions in single-cell RNA sequencing (scRNA-seq) data. Generating high-quality variant data is vital to the study of the aforementioned diseases, among others. In this study, we report the design and implementation of Red Panda, a novel method to accurately identify variants in scRNA-seq data. Variants were called on scRNA-seq data from human articular chondrocytes, mouse embryonic fibroblasts (MEFs), and simulated data stemming from the MEF alignments. Red Panda had the highest Positive Predictive Value at 45.0\%, while other tools\textemdash{}FreeBayes, GATK HaplotypeCaller, GATK UnifiedGenotyper, Monovar, and Platypus\textemdash{}ranged from 5.8\%-41.53\%. From the simulated data, Red Panda had the highest sensitivity at 72.44\%. We show that our method provides a novel and improved mechanism to identify variants in scRNA-seq as compared to currently-existing software.}, + file = {/Users/laurent/Zotero/storage/XJC3N4GV/Cornish et al. - 2020 - Red Panda A novel method for detecting variants i.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{costaGraphDDPGraphembeddingApproach2018, + title = {{{GraphDDP}}: A Graph-Embedding Approach to Detect Differentiation Pathways in Single-Cell-Data Using Prior Class Knowledge}, + shorttitle = {{{GraphDDP}}}, + author = {Costa, Fabrizio and Gr{\"u}n, Dominic and Backofen, Rolf}, + year = {2018}, + month = sep, + volume = {9}, + pages = {3685}, + issn = {2041-1723}, + doi = {10.1038/s41467-018-05988-7}, + abstract = {Inference and representation of differentiation trajectories from single cell RNA-seq data remains a challenge. Here, the authors offer a visualization approach that captures both continuous differentiation trajectories and discrete clusters representing metastable states along the trajectories.}, + copyright = {2018 The Author(s)}, + file = {/Users/laurent/Zotero/storage/QS7K3KBJ/Costa et al. - 2018 - GraphDDP a graph-embedding approach to detect dif.pdf;/Users/laurent/Zotero/storage/UE786WTB/login.html}, + journal = {Nature Communications}, + language = {En}, + number = {1} +} + +@article{crowCharacterizingReplicabilityCell2018, + title = {Characterizing the Replicability of Cell Types Defined by Single Cell {{RNA}}-Sequencing Data Using {{MetaNeighbor}}}, + author = {Crow, Megan and Paul, Anirban and Ballouz, Sara and Huang, Z. Josh and Gillis, Jesse}, + year = {2018}, + month = dec, + volume = {9}, + issn = {2041-1723}, + doi = {10.1038/s41467-018-03282-0}, + file = {/Users/laurent/Documents/bibliography/to_read/Crow et al. - 2018 - Characterizing the replicability of cell types def.pdf}, + journal = {Nature Communications}, + language = {en}, + number = {1} +} + +@article{crowCoexpressionSingleCellAnalysis2018, + title = {Co-Expression in {{Single}}-{{Cell Analysis}}: {{Saving Grace}} or {{Original Sin}}?}, + shorttitle = {Co-Expression in {{Single}}-{{Cell Analysis}}}, + author = {Crow, Megan and Gillis, Jesse}, + year = {2018}, + month = nov, + volume = {34}, + pages = {823--831}, + issn = {0168-9525}, + doi = {10.1016/j.tig.2018.07.007}, + abstract = {As a fundamental unit of life, the cell has rightfully been the subject of intense investigation throughout the history of biology. Technical innovations now make it possible to assay cellular features at genomic scale, yielding breakthroughs in our understanding of the molecular organization of tissues, and even whole organisms. As these data accumulate we will soon be faced with a new challenge: making sense of the plethora of results. Early investigations into the replicability of cell type profiles inferred from single-cell RNA sequencing data have indicated that this is likely to be surprisingly straightforward due to consistent gene co-expression. In this opinion article we discuss the evidence for this claim and its implications for interpreting cell type-specific gene expression.}, + file = {/Users/laurent/Zotero/storage/NEXZNERP/Crow and Gillis - 2018 - Co-expression in Single-Cell Analysis Saving Grac.pdf;/Users/laurent/Zotero/storage/D65XKSFN/S0168952518301288.html}, + journal = {Trends in Genetics}, + keywords = {cell type,co-expression,replicability,single-cell RNA-seq,transcriptome}, + number = {11} +} + +@article{csalaSparseRedundancyAnalysis2017, + title = {Sparse Redundancy Analysis of High-Dimensional Genetic and Genomic Data}, + author = {Csala, Attila and Voorbraak, Frans P. J. M. and Zwinderman, Aeilko H. and Hof, Michel H.}, + year = {2017}, + month = oct, + volume = {33}, + pages = {3228--3234}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx374}, + abstract = {Motivation: Recent technological developments have enabled the possibility of genetic and genomic integrated data analysis approaches, where multiple omics datasets from various biological levels are combined and used to describe (disease) phenotypic variations. The main goal is to explain and ultimately predict phenotypic variations by understanding their genetic basis and the interaction of the associated genetic factors. Therefore, understanding the underlying genetic mechanisms of phenotypic variations is an ever increasing research interest in biomedical sciences. In many situations, we have a set of variables that can be considered to be the outcome variables and a set that can be considered to be explanatory variables. Redundancy analysis (RDA) is an analytic method to deal with this type of directionality. Unfortunately, current implementations of RDA cannot deal optimally with the high dimensionality of omics data (p ) n). The existing theoretical framework, based on Ridge penalization, is suboptimal, since it includes all variables in the analysis. As a solution, we propose to use Elastic Net penalization in an iterative RDA framework to obtain a sparse solution.}, + file = {/Users/laurent/Documents/bibliography/GWAS/Csala et al. - 2017 - Sparse redundancy analysis of high-dimensional gen.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {20} +} + +@article{dadanehBayesianGammaNegativeBinomial2019, + title = {Bayesian {{Gamma}}-{{Negative Binomial Modeling}} of {{Single}}-{{Cell RNA Sequencing Data}}}, + author = {Dadaneh, Siamak Zamani and {de Figueiredo}, Paul and Sze, Sing-Hoi and Zhou, Mingyuan and Qian, Xiaoning}, + year = {2019}, + month = aug, + abstract = {Background: Single-cell RNA sequencing (scRNA-seq) is a powerful profiling technique at the single-cell resolution. Appropriate analysis of scRNA-seq data can characterize molecular heterogeneity and shed light into the underlying cellular process to better understand development and disease mechanisms. The unique analytic challenge is to appropriately model highly over-dispersed scRNA-seq count data with prevalent dropouts (zero counts), making zero-inflated dimensionality reduction techniques popular for scRNA-seq data analyses. Employing zero-inflated distributions, however, may place extra emphasis on zero counts, leading to potential bias when identifying the latent structure of the data. +Results: In this paper, we propose a fully generative hierarchical gamma-negative binomial (hGNB) model of scRNA-seq data, obviating the need for explicitly modeling zero inflation. At the same time, hGNB can naturally account for covariate effects at both the gene and cell levels to identify complex latent representations of scRNA-seq data, without the need for commonly adopted pre-processing steps such as normalization. Efficient Bayesian model inference is derived by exploiting conditional conjugacy via novel data augmentation techniques. +Conclusion: Experimental results on both simulated data and several real-world scRNA-seq datasets suggest that hGNB is a powerful tool for cell cluster discovery as well as cell lineage inference.}, + archivePrefix = {arXiv}, + eprint = {1908.00650}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/VP4QW49E/Dadaneh et al. - 2019 - Bayesian Gamma-Negative Binomial Modeling of Singl.pdf}, + journal = {arXiv:1908.00650 [stat]}, + keywords = {Statistics - Applications}, + language = {en}, + primaryClass = {stat} +} + +@article{daiSequence2VecNovelEmbedding2017, + title = {{{Sequence2Vec}}: A Novel Embedding Approach for Modeling Transcription Factor Binding Affinity Landscape}, + shorttitle = {{{Sequence2Vec}}}, + author = {Dai, Hanjun and Umarov, Ramzan and Kuwahara, Hiroyuki and Li, Yu and Song, Le and Gao, Xin}, + year = {2017}, + month = nov, + volume = {33}, + pages = {3575--3583}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx480}, + abstract = {Motivation: An accurate characterization of transcription factor (TF)-DNA affinity landscape is crucial to a quantitative understanding of the molecular mechanisms underpinning endogenous gene regulation. While recent advances in biotechnology have brought the opportunity for building binding affinity prediction methods, the accurate characterization of TF-DNA binding affinity landscape still remains a challenging problem.}, + file = {/Users/laurent/Documents/bibliography/to_read/Dai et al. - 2017 - Sequence2Vec a novel embedding approach for model.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {22} +} + +@article{dasNovelClusteringMethod2018, + title = {A Novel Clustering Method to Identify Cell Types from Single Cell Transcriptional Profiles}, + author = {Das, Priyojit and Nazeer, K. A. Abdul}, + year = {2018}, + month = jan, + volume = {132}, + pages = {983--992}, + issn = {1877-0509}, + doi = {10.1016/j.procs.2018.05.114}, + abstract = {With the advancement of the high throughput single cell techniques, transcriptomics data generation at the single cell level becomes very easy. Analysis of this single cell expression values can reveal lots of unprecedented information about complex cellular heterogeneity and tissue composition. To date, different statistical methods are applied to analyze expression data at the cellular level, but there is still pretty much scope for the development of new bioinformatics tools. In this article, a graph theoretic clustering algorithm is proposed to identify cellular states from single cell gene expression data. The proposed algorithm first generates a shared nearest neighbor graph from the single cell RNA-seq dataset and then applies minimum spanning tree based clustering method to cluster the graph nodes. To compare our proposed algorithm's performance with other unsupervised clustering methods, we used three real scRNA-seq datasets (human cancer cells, human embryonic cells and mouse embryonic cells). From the comparison result, it is evident that the proposed algorithm outperforms other standard single cell analysis methods.}, + file = {/Users/laurent/Zotero/storage/BQXQ4LIK/Das and Nazeer - 2018 - A novel clustering method to identify cell types f.pdf;/Users/laurent/Zotero/storage/R9AVYSI8/S1877050918308469.html}, + journal = {Procedia Computer Science}, + keywords = {Clustering,Gene Expression,Hierarchical Clustering,Minimum Spanning Tree,Single Cell RNA-seq,SNN graph}, + series = {International {{Conference}} on {{Computational Intelligence}} and {{Data Science}}} +} + +@article{daveigaleprevostBioContainersOpensourceCommunitydriven2017, + title = {{{BioContainers}}: An Open-Source and Community-Driven Framework for Software Standardization}, + shorttitle = {{{BioContainers}}}, + author = {{da Veiga Leprevost}, Felipe and Gr{\"u}ning, Bj{\"o}rn A. and Alves Aflitos, Saulo and R{\"o}st, Hannes L. and Uszkoreit, Julian and Barsnes, Harald and Vaudel, Marc and Moreno, Pablo and Gatto, Laurent and Weber, Jonas and Bai, Mingze and Jimenez, Rafael C. and Sachsenberg, Timo and Pfeuffer, Julianus and Vera Alvarez, Roberto and Griss, Johannes and Nesvizhskii, Alexey I. and {Perez-Riverol}, Yasset}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2580--2582}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx192}, + file = {/Users/laurent/Documents/bibliography/bioinfo/da Veiga Leprevost et al. - 2017 - BioContainers an open-source and community-driven.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{davisSCOPITSampleSize2019, + title = {{{SCOPIT}}: Sample Size Calculations for Single-Cell Sequencing Experiments}, + shorttitle = {{{SCOPIT}}}, + author = {Davis, Alexander and Gao, Ruli and Navin, Nicholas E.}, + year = {2019}, + month = nov, + volume = {20}, + pages = {566}, + issn = {1471-2105}, + doi = {10.1186/s12859-019-3167-9}, + abstract = {BackgroundIn single cell DNA and RNA sequencing experiments, the number of cells to sequence must be decided before running an experiment, and afterwards, it is necessary to decide whether sufficient cells were sampled. These questions can be addressed by calculating the probability of sampling at least a defined number of cells from each subpopulation (cell type or cancer clone).ResultsWe developed an interactive web application called SCOPIT (Single-Cell One-sided Probability Interactive Tool), which calculates the required probabilities using a multinomial distribution (www.navinlab.com/SCOPIT). In addition, we created an R package called pmultinom for scripting these calculations.ConclusionsOur tool for fast multinomial calculations provide a simple and intuitive procedure for prospectively planning single-cell experiments or retrospectively evaluating if sufficient numbers of cells have been sequenced. The web application can be accessed at navinlab.com/SCOPIT.}, + file = {/Users/laurent/Zotero/storage/V4MQ227S/Davis et al. - 2019 - SCOPIT sample size calculations for single-cell s.pdf}, + journal = {BMC Bioinformatics}, + keywords = {Multinomial distributions,Sample size,Single cell sequencing}, + language = {en}, + number = {1} +} + +@article{deboerBROCKMANDecipheringVariance2018, + title = {{{BROCKMAN}}: Deciphering Variance in Epigenomic Regulators by k-Mer Factorization}, + shorttitle = {{{BROCKMAN}}}, + author = {{de Boer}, Carl G. and Regev, Aviv}, + year = {2018}, + month = jul, + volume = {19}, + pages = {253}, + issn = {1471-2105}, + doi = {10.1186/s12859-018-2255-6}, + abstract = {Variation in chromatin organization across single cells can help shed important light on the mechanisms controlling gene expression, but scale, noise, and sparsity pose significant challenges for interpretation of single cell chromatin data. Here, we develop BROCKMAN (Brockman Representation Of Chromatin by K-mers in Mark-Associated Nucleotides), an approach to infer variation in transcription factor (TF) activity across samples through unsupervised analysis of the variation in DNA sequences associated with an epigenomic mark.}, + file = {/Users/laurent/Zotero/storage/WSEKYQ9R/de Boer and Regev - 2018 - BROCKMAN deciphering variance in epigenomic regul.pdf;/Users/laurent/Zotero/storage/WQ5282VW/s12859-018-2255-6.html}, + journal = {BMC Bioinformatics}, + number = {1} +} + +@article{decosterNanoPackVisualizingProcessing2018, + title = {{{NanoPack}}: Visualizing and Processing Long-Read Sequencing Data}, + shorttitle = {{{NanoPack}}}, + author = {De Coster, Wouter and D'Hert, Svenn and Schultz, Darrin T. and Cruts, Marc and Van Broeckhoven, Christine}, + year = {2018}, + month = aug, + volume = {34}, + pages = {2666--2669}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty149}, + abstract = {AbstractSummary. Here we describe NanoPack, a set of tools developed for visualization and processing of long-read sequencing data from Oxford Nanopore Technol}, + file = {/Users/laurent/Zotero/storage/6A6VHHUB/De Coster et al. - 2018 - NanoPack visualizing and processing long-read seq.pdf;/Users/laurent/Zotero/storage/6N7LJRB4/De Coster et al. - 2018 - NanoPack visualizing and processing long-read seq.pdf;/Users/laurent/Zotero/storage/GXAF2FAL/4934939.html;/Users/laurent/Zotero/storage/X2VTKQUH/4934939.html}, + journal = {Bioinformatics}, + language = {en}, + number = {15} +} + +@article{dehghannasiriAmbiguousSpliceSites2019, + title = {Ambiguous Splice Sites Distinguish {{circRNA}} and Linear Splicing in the Human Genome}, + author = {Dehghannasiri, Roozbeh and Szabo, Linda and Salzman, Julia}, + year = {2019}, + month = apr, + volume = {35}, + pages = {1263--1268}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty785}, + abstract = {AbstractMotivation. Identification of splice sites is critical to gene annotation and to determine which sequences control circRNA biogenesis. Full-length RNA}, + file = {/Users/laurent/Zotero/storage/XRM2AQEG/Dehghannasiri et al. - 2019 - Ambiguous splice sites distinguish circRNA and lin.pdf;/Users/laurent/Zotero/storage/373PFEXC/5091181.html}, + journal = {Bioinformatics}, + language = {en}, + number = {8} +} + +@article{dekanterCHETAHSelectiveHierarchical2019, + title = {{{CHETAH}}: A Selective, Hierarchical Cell Type Identification Method for Single-Cell {{RNA}} Sequencing}, + shorttitle = {{{CHETAH}}}, + author = {{de Kanter}, Jurrian Kornelis and Lijnzaad, Philip and Candelli, Tito and Margaritis, Thanasis and Holstege, Frank}, + year = {2019}, + month = feb, + doi = {10.1101/558908}, + abstract = {Cell type identification is essential for single-cell RNA sequencing (scRNA-seq) studies that are currently transforming the life sciences. CHETAH (CHaracterization of cEll Types Aided by Hierarchical clustering) is an accurate cell type identification algorithm that is rapid and selective, including the possibility of intermediate or unassigned categories. Evidence for assignment is based on a classification tree of previously available scRNA-seq reference data and includes a confidence score based on the variance in gene expression per cell type. For cell types represented in the reference data, CHETAH's accuracy is as good as existing methods. Its specificity is superior when cells of an unknown type are encountered, such as malignant cells in tumor samples which it pinpoints as intermediate or unassigned. Although designed for tumor samples in particular, the use of unassigned and intermediate types is also valuable in other exploratory studies. This is exemplified in pancreas datasets where CHETAH highlights cell populations not well represented in the reference dataset, including cells with profiles that lie on a continuum between that of acinar and ductal cell types. Having the possibility of unassigned and intermediate cell types is pivotal for preventing misclassification and can yield important biological information for previously unexplored tissues.}, + file = {/Users/laurent/Zotero/storage/5WBAQLLT/de Kanter et al. - 2019 - CHETAH a selective, hierarchical cell type identi.pdf;/Users/laurent/Zotero/storage/LN62FEH3/de Kanter et al. - 2019 - CHETAH a selective, hierarchical cell type identi.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{dengRapidTranscriptionalBursts2019, + title = {Rapid Transcriptional Bursts Upregulate the {{X}} Chromosome}, + author = {Deng, Xinxian and Disteche, Christine M.}, + year = {2019}, + month = oct, + volume = {26}, + pages = {851--853}, + issn = {1545-9985}, + doi = {10.1038/s41594-019-0314-y}, + abstract = {Upregulation of the X chromosome compensates for the presence of a single active X chromosome in mammals, but this has been difficult to measure and to understand mechanistically. A study now demonstrates that increased burst frequency boosts the transcriptional output of X-linked genes in male and female cells with a single active X chromosome. Interestingly, female embryonic stem cells lack increased burst frequency, which is established only after inactivation of the X chromosome takes place; this finding reveals a switch that can modulate transcriptional bursting.}, + copyright = {2019 Springer Nature America, Inc.}, + file = {/Users/laurent/Zotero/storage/2LSSIB2C/Deng and Disteche - 2019 - Rapid transcriptional bursts upregulate the X chro.pdf;/Users/laurent/Zotero/storage/D5BP7N72/s41594-019-0314-y.html}, + journal = {Nature Structural \& Molecular Biology}, + language = {en}, + number = {10} +} + +@article{dengScalableAnalysisCelltype2019, + title = {Scalable Analysis of Cell-Type Composition from Single-Cell Transcriptomics Using Deep Recurrent Learning}, + author = {Deng, Yue and Bao, Feng and Dai, Qionghai and Wu, Lani F. and Altschuler, Steven J.}, + year = {2019}, + month = apr, + volume = {16}, + pages = {311--314}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/s41592-019-0353-7}, + file = {/Users/laurent/Zotero/storage/6AYXGBFA/Deng et al. - 2019 - Scalable analysis of cell-type composition from si.pdf;/Users/laurent/Zotero/storage/U9GLY5HM/Deng et al. - 2019 - Scalable analysis of cell-type composition from si.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {4} +} + +@article{desvillechabrolDetectionCharacterizationLow2016, + title = {Detection and Characterization of Low and High Genome Coverage Regions Using an Efficient Running Median and a Double Threshold Approach.}, + author = {Desvillechabrol, Dimitri and Bouchier, Christiane and Kennedy, Sean and Cokelaer, Thomas}, + year = {2016}, + month = dec, + pages = {092478}, + doi = {10.1101/092478}, + abstract = {Motivation: Next Generation Sequencing (NGS) provides researchers with powerful tools to investigate both prokaryotic and eukaryotic genetics. An accurate assessment of reads mapped to a specific genome consists of inspecting the genome coverage as number of reads mapped to a specific genome location. Most current methods use the average of the genome coverage (sequencing depth) to summarize the overall coverage. This metric quickly assess the sequencing quality but ignores valuable biological information like the presence of repetitive regions or deleted genes. The detection of such information may be challenging due to a wide spectrum of heterogeneous coverage regions, a mixture of underlying models or the presence of a non-constant trend along the genome. Using robust statistics to systematically identify genomic regions with unusual coverage is needed to characterize these regions more precisely. Results: We implemented an efficient running median algorithm to estimate the genome coverage trend. The distribution of the normalized genome coverage is then estimated using a Gaussian mixture model. A z-score statistics is then assigned to each base position and used to separate the central distribution from the regions of interest (ROI) (i.e., under- and over-covered regions). Finally, a double threshold mechanism is used to cluster the genomic ROIs. HTML reports provide a summary with interactive visual representations of the genomic ROIs. Availability: An implementation of the genome coverage characterization is available within the Sequana project. The standalone application is called sequana\_coverage. The source code is available on GitHub (http://github.com/sequana/sequana), and documentation on ReadTheDocs (http://sequana.readtheodcs.org). An example of HTML report is provided on http://sequana.github.io .}, + copyright = {\textcopyright{} 2016, Posted by Cold Spring Harbor Laboratory Press. This pre-print is available under a Creative Commons License (Attribution-NonCommercial 4.0 International), CC BY-NC 4.0, as described at http://creativecommons.org/licenses/by-nc/4.0/}, + file = {/Users/laurent/Zotero/storage/TLJHFC2P/Desvillechabrol et al. - 2016 - Detection and characterization of low and high gen.pdf;/Users/laurent/Zotero/storage/7SKSFWKZ/092478.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{devaillyInsightsMammalianTranscription2018, + title = {Insights into Mammalian Transcription Control by Systematic Analysis of {{ChIP}} Sequencing Data}, + author = {Devailly, Guillaume and Joshi, Anagha}, + year = {2018}, + month = nov, + volume = {19}, + pages = {409}, + issn = {1471-2105}, + doi = {10.1186/s12859-018-2377-x}, + abstract = {Transcription regulation is a major controller of gene expression dynamics during development and disease, where transcription factors (TFs) modulate expression of genes through direct or indirect DNA interaction. ChIP sequencing has become the most widely used technique to get a genome wide view of TF occupancy in a cell type of interest, mainly due to established standard protocols and a rapid decrease in the cost of sequencing. The number of available ChIP sequencing data sets in public domain is therefore ever increasing, including data generated by individual labs together with consortia such as the ENCODE project.}, + file = {/Users/laurent/Zotero/storage/KQKRC5J7/Devailly and Joshi - 2018 - Insights into mammalian transcription control by s.pdf;/Users/laurent/Zotero/storage/U9XYNURE/s12859-018-2377-x.html}, + journal = {BMC Bioinformatics}, + number = {14} +} + +@article{devenyiTenSimpleRules2018, + title = {Ten Simple Rules for Collaborative Lesson Development}, + author = {Devenyi, Gabriel A. and Emonet, R{\'e}mi and Harris, Rayna M. and Hertweck, Kate L. and Irving, Damien and Milligan, Ian and Wilson, Greg}, + editor = {Markel, Scott}, + year = {2018}, + month = mar, + volume = {14}, + pages = {e1005963}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005963}, + file = {/Users/laurent/Documents/bibliography/to_read/Devenyi et al. - 2018 - Ten simple rules for collaborative lesson developm.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {3} +} + +@article{deyVisualizingStructureRNAseq2017, + title = {Visualizing the Structure of {{RNA}}-Seq Expression Data Using Grade of Membership Models}, + author = {Dey, Kushal K and Hsiao, Chiaowen Joyce and Stephens, Matthew}, + year = {2017}, + pages = {22}, + abstract = {Grade of membership models, also known as ``admixture models'', ``topic models'' or ``Latent Dirichlet Allocation'', are a generalization of cluster models that allow each sample to have membership in multiple clusters. These models are widely used in population genetics to model admixed individuals who have ancestry from multiple ``populations'', and in natural language processing to model documents having words from multiple ``topics''. Here we illustrate the potential for these models to cluster samples of RNA-seq gene expression data, measured on either bulk samples or single cells. We also provide methods to help interpret the clusters, by identifying genes that are distinctively expressed in each cluster. By applying these methods to several example RNA-seq applications we demonstrate their utility in identifying and summarizing structure and heterogeneity. Applied to data from the GTEx project on 53 human tissues, the approach highlights similarities among biologicallyrelated tissues and identifies distinctively-expressed genes that recapitulate known biology. Applied to single-cell expression data from mouse preimplantation embryos, the approach highlights both discrete and continuous variation through early embryonic development stages, and highlights genes involved in a variety of relevant processes\textemdash{}from germ cell development, through compaction and morula formation, to the formation of inner cell mass and trophoblast at the blastocyst stage. The methods are implemented in the Bioconductor package CountClust.}, + file = {/Users/laurent/Documents/bibliography/DEA/Dey et al. - 2017 - Visualizing the structure of RNA-seq expression da.pdf}, + journal = {PLOS Genetics}, + language = {en} +} + +@article{diaz-mejiaEvaluationMethodsAssign2019, + title = {Evaluation of Methods to Assign Cell Type Labels to Cell Clusters from Single-Cell {{RNA}}-Sequencing Data}, + author = {{Diaz-Mejia}, J. Javier and Meng, Elaine C. and Pico, Alexander R. and MacParland, Sonya A. and Ketela, Troy and Pugh, Trevor J. and Bader, Gary D. and Morris, John H.}, + year = {2019}, + month = feb, + doi = {10.1101/562082}, + abstract = {Identification of cell type subpopulations from complex cell mixtures using single-cell RNA-sequencing (scRNAseq) data includes automated computational steps like data normalization, dimensionality reduction and cell clustering. However, assigning cell type labels to cell clusters is still conducted manually by most researchers, resulting in limited documentation, low reproducibility and uncontrolled vocabularies. Two bottlenecks to automating this task are the scarcity of reference cell type gene expression signatures and that some dedicated methods are available only as web servers with limited cell type gene expression signatures. In this study, we benchmarked four methods (CIBERSORT, GSEA, GSVA, and ORA) for the task of assigning cell type labels to cell clusters from scRNA-seq data. We used scRNA-seq datasets from liver, peripheral blood mononuclear cells and retinal neurons for which reference cell type gene expression signatures were available. Our results show that, in general, all four methods show a high performance in the task as evaluated by Receiver Operating Characteristic curve analysis (average AUC = 0.94, sd = 0.036), whereas Precision-Recall curve analyses show a wide variation depending on the method and dataset (average AUC = 0.53, sd = 0.24). CIBERSORT and GSVA were the top two performers. Additionally, GSVA was the fastest of the four methods and was more robust in cell type gene expression signature subsampling simulations. We provide an extensible framework to evaluate other methods and datasets at https://github.com/jdime/scRNAseq\_cell\_cluster\_labeling.}, + file = {/Users/laurent/Zotero/storage/2AP6VWAJ/Diaz-Mejia et al. - 2019 - Evaluation of methods to assign cell type labels t.pdf;/Users/laurent/Zotero/storage/566T45KK/Diaz-Mejia et al. - 2019 - Evaluation of methods to assign cell type labels t.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{diazSCellIntegratedAnalysis2016, + title = {{{SCell}}: Integrated Analysis of Single-Cell {{RNA}}-Seq Data}, + shorttitle = {{{SCell}}}, + author = {Diaz, Aaron and Liu, Siyuan J. and Sandoval, Carmen and Pollen, Alex and Nowakowski, Tom J. and Lim, Daniel A. and Kriegstein, Arnold}, + year = {2016}, + month = jul, + volume = {32}, + pages = {2219--2220}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw201}, + abstract = {Summary: Analysis of the composition of heterogeneous tissue has been greatly enabled by recent developments in single-cell transcriptomics. We present SCell, an integrated software tool for quality filtering, normalization, feature selection, iterative dimensionality reduction, clustering and the estimation of gene-expression gradients from large ensembles of single-cell RNA-seq datasets. SCell is open source, and implemented with an intuitive graphical interface. Scripts and protocols for the high-throughput pre-processing of large ensembles of single-cell, RNA-seq datasets are provided as an additional resource.}, + file = {/Users/laurent/Zotero/storage/A4RUF6BM/Diaz et al. - 2016 - SCell integrated analysis of single-cell RNA-seq .pdf;/Users/laurent/Zotero/storage/EM33D5QC/Diaz et al. - 2016 - SCell integrated analysis of single-cell RNA-seq .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{digginsCharacterizingCellSubsets2017, + title = {Characterizing Cell Subsets Using Marker Enrichment Modeling}, + author = {Diggins, Kirsten E and Greenplate, Allison R and Leelatian, Nalin and Wogsland, Cara E and Irish, Jonathan M}, + year = {2017}, + month = mar, + volume = {14}, + pages = {275--278}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4149}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Diggins et al. - 2017 - Characterizing cell subsets using marker enrichmen.pdf;/Users/laurent/Zotero/storage/M5A6MBI5/Diggins et al. - 2017 - Characterizing cell subsets using marker enrichmen.pdf;/Users/laurent/Zotero/storage/X6KBKCGQ/Diggins et al. - 2017 - Characterizing cell subsets using marker enrichmen.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {3} +} + +@article{dilliesComprehensiveEvaluationNormalization2013, + title = {A Comprehensive Evaluation of Normalization Methods for {{Illumina}} High-Throughput {{RNA}} Sequencing Data Analysis}, + author = {Dillies, M.-A. and Rau, A. and Aubert, J. and {Hennequet-Antier}, C. and Jeanmougin, M. and Servant, N. and Keime, C. and Marot, G. and Castel, D. and Estelle, J. and Guernec, G. and Jagla, B. and Jouneau, L. and Laloe, D. and Le Gall, C. and Schaeffer, B. and Le Crom, S. and Guedj, M. and Jaffrezic, F. and {on behalf of The French StatOmique Consortium}}, + year = {2013}, + month = nov, + volume = {14}, + pages = {671--683}, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bbs046}, + abstract = {During the last 3 years, a number of approaches for the normalization of RNA sequencing data have emerged in the literature, differing both in the type of bias adjustment and in the statistical strategy adopted. However, as data continue to accumulate, there has been no clear consensus on the appropriate normalization method to be used or the impact of a chosen method on the downstream analysis. In this work, we focus on a comprehensive comparison of seven recently proposed normalization methods for the differential analysis of RNA-seq data, with an emphasis on the use of varied real and simulated datasets involving different species and experimental designs to represent data characteristics commonly observed in practice. Based on this comparison study, we propose practical recommendations on the appropriate normalization method to be used and its impact on the differential analysis of RNA-seq data.}, + file = {/Users/laurent/Documents/bibliography/RNASeq/Dillies et al. - 2013 - A comprehensive evaluation of normalization method.pdf}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {6} +} + +@article{dobinSTARUltrafastUniversal2013, + title = {{{STAR}}: Ultrafast Universal {{RNA}}-Seq Aligner}, + shorttitle = {{{STAR}}}, + author = {Dobin, Alexander and Davis, Carrie A. and Schlesinger, Felix and Drenkow, Jorg and Zaleski, Chris and Jha, Sonali and Batut, Philippe and Chaisson, Mark and Gingeras, Thomas R.}, + year = {2013}, + month = jan, + volume = {29}, + pages = {15--21}, + issn = {1367-4811}, + doi = {10.1093/bioinformatics/bts635}, + abstract = {MOTIVATION: Accurate alignment of high-throughput RNA-seq data is a challenging and yet unsolved problem because of the non-contiguous transcript structure, relatively short read lengths and constantly increasing throughput of the sequencing technologies. Currently available RNA-seq aligners suffer from high mapping error rates, low mapping speed, read length limitation and mapping biases. RESULTS: To align our large (\textbackslash{}textgreater80 billon reads) ENCODE Transcriptome RNA-seq dataset, we developed the Spliced Transcripts Alignment to a Reference (STAR) software based on a previously undescribed RNA-seq alignment algorithm that uses sequential maximum mappable seed search in uncompressed suffix arrays followed by seed clustering and stitching procedure. STAR outperforms other aligners by a factor of \textbackslash{}textgreater50 in mapping speed, aligning to the human genome 550 million 2 \texttimes{} 76 bp paired-end reads per hour on a modest 12-core server, while at the same time improving alignment sensitivity and precision. In addition to unbiased de novo detection of canonical junctions, STAR can discover non-canonical splices and chimeric (fusion) transcripts, and is also capable of mapping full-length RNA sequences. Using Roche 454 sequencing of reverse transcription polymerase chain reaction amplicons, we experimentally validated 1960 novel intergenic splice junctions with an 80-90\% success rate, corroborating the high precision of the STAR mapping strategy. AVAILABILITY AND IMPLEMENTATION: STAR is implemented as a standalone C++ code. STAR is free open source software distributed under GPLv3 license and can be downloaded from http://code.google.com/p/rna-star/.}, + file = {/Users/laurent/Zotero/storage/6WLPFRCN/Dobin et al. - 2013 - STAR ultrafast universal RNA-seq aligner.pdf;/Users/laurent/Zotero/storage/YEUUQ3UW/272537.html}, + journal = {Bioinformatics (Oxford, England)}, + keywords = {Algorithms,Cluster Analysis,Gene Expression Profiling,Genome,Human,Humans,RNA,RNA Splicing,Sequence Alignment,Sequence Analysis,Software}, + language = {eng}, + number = {1}, + pmcid = {PMC3530905}, + pmid = {23104886} +} + +@article{dobribanFlexibleMultipleTesting2018, + title = {Flexible {{Multiple Testing}} with the {{FACT Algorithm}}}, + author = {Dobriban, Edgar}, + year = {2018}, + month = jun, + abstract = {Modern high-throughput science often leads to multiple testing problems: researchers test many hypotheses, wishing to find the significant discoveries. The development of flexible multiple testing methods is thus a central problem in statistics. In this paper, we introduce the new Fast Closed Testing (FACT) method for multiple testing, controlling the family-wise error rate. Our method relies on symmetry and monotonicity to enable the classical closed testing principle in the important setting of large datasets. As the closed testing principle is more than 40 years old, we find it surprising that this simple and fundamental algorithm has not been described before. Our FACT method is general and flexible, and can be used to design powerful new architectures for multiple testing. We showcase it by proposing the Simes-Higher Criticism fusion test, which is powerful for detecting both a few strong signals, and also many moderate signals. We illustrate the method in simulations and in a genome-wide association study of coronary artery disease, and obtain more power than with existing methods.}, + archivePrefix = {arXiv}, + eprint = {1806.10163}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/PQKQMMZE/Dobriban - 2018 - Flexible Multiple Testing with the FACT Algorithm.pdf}, + journal = {arXiv:1806.10163 [stat]}, + keywords = {Statistics - Methodology}, + language = {en}, + primaryClass = {stat} +} + +@article{dongTOBMITransomicsBlock2019, + title = {{{TOBMI}}: Trans-Omics Block Missing Data Imputation Using a k-Nearest Neighbor Weighted Approach}, + shorttitle = {{{TOBMI}}}, + author = {Dong, Xuesi and Lin, Lijuan and Zhang, Ruyang and Zhao, Yang and Christiani, David C. and Wei, Yongyue and Chen, Feng}, + year = {2019}, + month = apr, + volume = {35}, + pages = {1278--1283}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty796}, + abstract = {AbstractMotivation. Stitching together trans-omics data is a powerful approach to assess the complex mechanisms of cancer occurrence, progression and treatment}, + file = {/Users/laurent/Zotero/storage/NGG7RFM8/5092930.html}, + journal = {Bioinformatics}, + language = {en}, + number = {8} +} + +@article{droriCircosVCFCircosVisualization2017, + title = {{{CircosVCF}}: Circos Visualization of Whole-Genome Sequence Variations Stored in {{VCF}} Files}, + shorttitle = {{{CircosVCF}}}, + author = {Drori, E. and Levy, D. and {Smirin-Yosef}, P. and Rahimi, O. and {Salmon-Divon}, M.}, + year = {2017}, + month = may, + volume = {33}, + pages = {1392--1393}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw834}, + abstract = {Summary: Visualization of whole-genomic variations in a meaningful manner assists researchers in gaining new insights into the underlying data, especially when it comes in the context of whole genome comparisons. CircosVCF is a web based visualization tool for genome-wide variant data described in VCF files, using circos plots. The user friendly interface of CircosVCF supports an interactive design of the circles in the plot, and the integration of additional information such as experimental data or annotations. The provided visualization capabilities give a broad overview of the genomic relationships between genomes, and allow identification of specific meaningful SNPs regions.}, + file = {/Users/laurent/Documents/bibliography/to_read/Drori et al. - 2017 - CircosVCF circos visualization of whole-genome se.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {9} +} + +@article{duanParallelClusteringSingle, + title = {Parallel Clustering of Single Cell Transcriptomic Data with Split-Merge Sampling on {{Dirichlet}} Process Mixtures}, + author = {Duan, Tiehang and Pinto, Jos{\'e} P. and Xie, Xiaohui}, + doi = {10.1093/bioinformatics/bty702}, + abstract = {AbstractMotivation. With the development of droplet based systems, massive single cell transcriptome data has become available, which enables analysis of cellu}, + file = {/Users/laurent/Zotero/storage/FW46GCI5/Duan et al. - Parallel clustering of single cell transcriptomic .pdf;/Users/laurent/Zotero/storage/HKGS9PL8/Duan et al. - 2019 - Parallel clustering of single cell transcriptomic .pdf;/Users/laurent/Zotero/storage/ZCRUG3RA/Duan et al. - Parallel clustering of single cell transcriptomic .pdf;/Users/laurent/Zotero/storage/ZPPV3SQU/Duan et al. - 2019 - Parallel clustering of single cell transcriptomic .pdf;/Users/laurent/Zotero/storage/G9Z2NVG5/5085373.html;/Users/laurent/Zotero/storage/JFSUL8JU/5085373.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{duanParallelizedInferenceSingle2018, + title = {Parallelized {{Inference}} for {{Single Cell Transcriptomic Clustering}} with {{Split Merge Sampling}} on {{DPMM Model}}}, + author = {Duan, Tiehang and Pinto, Jos{\'e} P. and Xie, Xiaohui}, + year = {2018}, + month = feb, + doi = {10.1101/271163}, + abstract = {Motivation: With the development of droplet based systems, massive single cell transcriptome data has become available, which enables analysis of cellular and molecular processes at single cell resolution and is instrumental to understanding many biological processes. While state-of-the-art clustering methods have been applied to the data, they face challenges in the following aspects: (1) the clustering quality still needs to be improved; (2) most models need prior knowledge on number of clusters, which is not always available; (3) there is a demand for faster computational speed. +Results: We propose to tackle these challenges with Parallelized Split Merge Sampling on Dirichlet Process Mixture Model (the Para-DPMM model). Unlike classic DPMM methods that perform sampling on each single data point, the split merge mechanism samples on the cluster level, which significantly improves convergence and optimality of the result. The model is highly parallelized and can utilize the computing power of high performance computing (HPC) clusters, enabling massive inference on huge datasets. Experiment results show the model achieves about 7\% improvement in clustering accuracy for small datasets and more than 20\% improvement for large challenging datasets compared with current widely used models. In the mean time, the model's computing speed is significantly faster. Availability: The model is implemented as a user friendly Matlab package available at https://github.com/tiehangd/Para\_DPMM/tree/master/Para\_DPMM\_package}, + file = {/Users/laurent/Documents/bibliography/to_read/Duan et al. - 2018 - Parallelized Inference for Single Cell Transcripto.pdf}, + language = {en} +} + +@article{duDeconvSeqDeconvolutionCell2019, + title = {{{deconvSeq}}: Deconvolution of Cell Mixture Distribution in Sequencing Data}, + shorttitle = {{{deconvSeq}}}, + author = {Du, Rose and Carey, Vince and Weiss, Scott T.}, + year = {2019}, + month = dec, + volume = {35}, + pages = {5095--5102}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz444}, + abstract = {AbstractMotivation. Although single-cell sequencing is becoming more widely available, many tissue samples such as intracranial aneurysms are both fibrous and}, + file = {/Users/laurent/Zotero/storage/3J7B6ML8/Du et al. - 2019 - deconvSeq deconvolution of cell mixture distribut.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{dudleyQuickGuideDeveloping2009, + title = {A {{Quick Guide}} for {{Developing Effective Bioinformatics Programming Skills}}}, + author = {Dudley, Joel T. and Butte, Atul J.}, + editor = {Lewitter, Fran}, + year = {2009}, + month = dec, + volume = {5}, + pages = {e1000589}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1000589}, + file = {/Users/laurent/Documents/bibliography/bioinfo/documentation/Dudley and Butte - 2009 - A Quick Guide for Developing Effective Bioinformat.pdf;/Users/laurent/Documents/bibliography/bioinfo/Dudley and Butte - 2009 - A Quick Guide for Developing Effective Bioinformat.pdf}, + journal = {PLoS Computational Biology}, + language = {en}, + number = {12} +} + +@article{duFactoradjustedMultipleTesting2018, + title = {Factor-Adjusted Multiple Testing of Correlations}, + author = {Du, Lilun and Lan, Wei and Luo, Ronghua and Zhong, Pingshou}, + year = {2018}, + month = dec, + volume = {128}, + pages = {34--47}, + issn = {0167-9473}, + doi = {10.1016/j.csda.2018.06.001}, + abstract = {Both global and multiple testing procedures have previously been proposed to untangle the correlation structures among high-dimensional data. In this article, we extend the results of both tests to learn the correlations of the factor-adjusted residuals in an approximate factor model, which can be used to simultaneously detect the highly matched pairs of stocks in finance. The factor-adjusted residuals are not observed and estimated using the method of principal components. We theoretically investigate the effects of estimating the factor-adjusted residuals on the subsequent global and multiple testing procedures. Furthermore, we demonstrate that the correlation structure of the factor-adjusted residuals can be recovered if appropriate thresholds are used in the proposed multiple testing procedure. Extensive simulation studies and a real data analysis are presented in which the proposed method is applied to select stock pairs in China's stock market.}, + file = {/Users/laurent/Zotero/storage/YNVJ38WH/Du et al. - 2018 - Factor-adjusted multiple testing of correlations.pdf;/Users/laurent/Zotero/storage/75D69QGI/S0167947318301397.html}, + journal = {Computational Statistics \& Data Analysis}, + keywords = {Factor-adjusted correlation learning,False discovery rate,Model selection consistency,Pairs trading} +} + +@article{durifHighDimensionalClassification2018, + title = {High Dimensional Classification with Combined Adaptive Sparse {{PLS}} and Logistic Regression}, + author = {Durif, Ghislain and Modolo, Laurent and Michaelsson, Jakob and Mold, Jeff E and {Lambert-Lacroix}, Sophie and Picard, Franck}, + year = {2018}, + month = feb, + volume = {34}, + pages = {485--493}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx571}, + abstract = {Motivation: The high dimensionality of genomic data calls for the development of specific classification methodologies, especially to prevent over-optimistic predictions. This challenge can be tackled by compression and variable selection, which combined constitute a powerful framework for classification, as well as data visualization and interpretation. However, current proposed combinations lead to unstable and non convergent methods due to inappropriate computational frameworks. We hereby propose a computationally stable and convergent approach for classification in high dimensional based on sparse Partial Least Squares (sparse PLS).}, + file = {/Users/laurent/Documents/bibliography/stats/Durif et al. - 2018 - High dimensional classification with combined adap.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {3} +} + +@article{durifProbabilisticCountMatrix, + title = {Probabilistic Count Matrix Factorization for Single Cell Expression Data Analysis}, + author = {Durif, Ghislain and Modolo, Laurent and Mold, Jeff E. and {Lambert-Lacroix}, Sophie and Picard, Franck}, + doi = {10.1093/bioinformatics/btz177}, + abstract = {AbstractMotivation. The development of high-throughput single-cell sequencing technologies now allows the investigation of the population diversity of cellular}, + file = {/Users/laurent/Zotero/storage/VRHSQ68Y/Durif et al. - Probabilistic count matrix factorization for singl.pdf;/Users/laurent/Zotero/storage/V2D8Q7YN/5378703.html;/Users/laurent/Zotero/storage/XIXBP9KI/5378703.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{ekstromSequentialRankAgreement2019, + title = {Sequential Rank Agreement Methods for Comparison of Ranked Lists}, + author = {Ekstr{\o}m, Claus Thorn and Gerds, Thomas Alexander and Jensen, Andreas Kryger}, + year = {2019}, + month = oct, + volume = {20}, + pages = {582--598}, + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxy017}, + abstract = {Summary. The comparison of alternative rankings of a set of items is a general and common task in applied statistics. Predictor variables are ranked according}, + file = {/Users/laurent/Zotero/storage/J4QJQZPB/Ekstrøm et al. - 2019 - Sequential rank agreement methods for comparison o.pdf;/Users/laurent/Zotero/storage/8ETTH7V4/5032579.html}, + journal = {Biostatistics}, + language = {en}, + number = {4} +} + +@article{elingChallengesMeasuringUnderstanding2019, + title = {Challenges in Measuring and Understanding Biological Noise}, + author = {Eling, Nils and Morgan, Michael D. and Marioni, John C.}, + year = {2019}, + month = may, + issn = {1471-0056, 1471-0064}, + doi = {10.1038/s41576-019-0130-6}, + abstract = {Biochemical reactions are intrinsically stochastic, leading to variation in the production of mRNAs and proteins within cells. In the scientific literature, this source of variation is typically referred to as `noise'. The observed variability in molecular phenotypes arises from a combination of processes that amplify and attenuate noise. Our ability to quantify cell-t o-cell variability in numerous biological contexts has been revolutionized by recent advances in single-c ell technology, from imaging approaches through to `omics' strategies. However, defining, accurately measuring and disentangling the stochastic and deterministic components of cell-to-cell variability is challenging. In this Review, we discuss the sources, impact and function of molecular phenotypic variability and highlight future directions to understand its role.}, + file = {/Users/laurent/Zotero/storage/EF6EIUG8/Eling et al. - 2019 - Challenges in measuring and understanding biologic.pdf;/Users/laurent/Zotero/storage/NC4XUEN7/Eling et al. - 2019 - Challenges in measuring and understanding biologic.pdf}, + journal = {Nature Reviews Genetics}, + language = {en} +} + +@article{elingCorrectingMeanVarianceDependency2018, + title = {Correcting the {{Mean}}-{{Variance Dependency}} for {{Differential Variability Testing Using Single}}-{{Cell RNA Sequencing Data}}}, + author = {Eling, Nils and Richard, Arianne C. and Richardson, Sylvia and Marioni, John C. and Vallejos, Catalina A.}, + year = {2018}, + month = sep, + volume = {7}, + pages = {284-294.e12}, + issn = {2405-4712}, + doi = {10.1016/j.cels.2018.06.011}, + abstract = {Summary +Cell-to-cell transcriptional variability in otherwise homogeneous cell populations plays an important role in tissue function and development. Single-cell RNA sequencing can characterize this variability in a transcriptome-wide manner. However, technical variation and the confounding between variability and mean expression estimates hinder meaningful comparison of expression variability between cell populations. To address this problem, we introduce an analysis approach that extends the BASiCS statistical framework to derive a residual measure of variability that is not confounded by mean expression. This includes a robust procedure for quantifying technical noise in experiments where technical spike-in molecules are not available. We illustrate how our method provides biological insight into the dynamics of cell-to-cell expression variability, highlighting a synchronization of biosynthetic machinery components in immune cells upon activation. In contrast to the uniform up-regulation of the~biosynthetic machinery, CD4+ T~cells show heterogeneous up-regulation of immune-related and lineage-defining genes during activation and differentiation.}, + file = {/Users/laurent/Zotero/storage/6NACJQ7U/Eling et al. - 2018 - Correcting the Mean-Variance Dependency for Differ.pdf;/Users/laurent/Zotero/storage/AUJYUEXP/Eling et al. - 2018 - Correcting the Mean-Variance Dependency for Differ.pdf;/Users/laurent/Zotero/storage/NX44Z379/Eling et al. - 2018 - Correcting the Mean-Variance Dependency for Differ.pdf;/Users/laurent/Zotero/storage/NM6ML3BQ/S2405471218302783.html}, + journal = {Cell Systems}, + keywords = {Bayesian,immune activation,single-cell RNA sequencing,statistics,transcriptional noise,variability}, + number = {3} +} + +@article{eraslanDeepLearningNew2019, + title = {Deep Learning: New Computational Modelling Techniques for Genomics}, + shorttitle = {Deep Learning}, + author = {Eraslan, G{\"o}kcen and Avsec, {\v Z}iga and Gagneur, Julien and Theis, Fabian J.}, + year = {2019}, + month = jul, + volume = {20}, + pages = {389--403}, + issn = {1471-0064}, + doi = {10.1038/s41576-019-0122-6}, + abstract = {This Review describes different deep learning techniques and how they can be applied to extract biologically relevant information from large, complex genomic data sets.}, + copyright = {2019 Springer Nature Limited}, + file = {/Users/laurent/Zotero/storage/7EC2RZVF/Eraslan et al. - 2019 - Deep learning new computational modelling techniq.pdf;/Users/laurent/Zotero/storage/Q2SJGF7A/s41576-019-0122-6.html}, + journal = {Nature Reviews Genetics}, + language = {en}, + number = {7} +} + +@article{eraslanSinglecellRNAseqDenoising2019, + title = {Single-Cell {{RNA}}-Seq Denoising Using a Deep Count Autoencoder}, + author = {Eraslan, G{\"o}kcen and Simon, Lukas M. and Mircea, Maria and Mueller, Nikola S. and Theis, Fabian J.}, + year = {2019}, + month = jan, + volume = {10}, + pages = {390}, + issn = {2041-1723}, + doi = {10.1038/s41467-018-07931-2}, + abstract = {Single-cell RNA sequencing is a powerful method to study gene expression, but noise in the data can obstruct analysis. Here the authors develop a denoising method based on a deep count autoencoder network that scales linearly with the number of cells, and therefore is compatible with large data sets.}, + copyright = {2019 The Author(s)}, + file = {/Users/laurent/Zotero/storage/SL8BJRMW/Eraslan et al. - 2019 - Single-cell RNA-seq denoising using a deep count a.pdf;/Users/laurent/Zotero/storage/WIR5Q9VN/s41467-018-07931-2.html}, + journal = {Nature Communications}, + language = {En}, + number = {1} +} + +@article{erhardEstimatingPseudocountsFold2018, + title = {Estimating Pseudocounts and Fold Changes for Digital Expression Measurements}, + author = {Erhard, Florian}, + year = {2018}, + month = dec, + volume = {34}, + pages = {4054--4063}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty471}, + abstract = {AbstractMotivation. Fold changes from count based high-throughput experiments such as RNA-seq suffer from a zero-frequency problem. To circumvent division by z}, + file = {/Users/laurent/Zotero/storage/27LPHJ82/Erhard - 2018 - Estimating pseudocounts and fold changes for digit.pdf;/Users/laurent/Zotero/storage/UC4F7IYL/5040306.html}, + journal = {Bioinformatics}, + language = {en}, + number = {23} +} + +@article{erhardScSLAMseqRevealsCore2019, + title = {{{scSLAM}}-Seq Reveals Core Features of Transcription Dynamics in Single Cells}, + author = {Erhard, Florian and Baptista, Marisa A. P. and Krammer, Tobias and Hennig, Thomas and Lange, Marius and Arampatzi, Panagiota and J{\"u}rges, Christopher S. and Theis, Fabian J. and Saliba, Antoine-Emmanuel and D{\"o}lken, Lars}, + year = {2019}, + month = jul, + pages = {1}, + issn = {1476-4687}, + doi = {10.1038/s41586-019-1369-y}, + abstract = {A technique known as scSLAM-seq that combines single-cell RNA sequencing with metabolic RNA labelling and nucleoside conversion is used to study the onset of cytomegalovirus infection in single mouse fibroblasts.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature Limited}, + file = {/Users/laurent/Zotero/storage/7WQGC45F/login.html}, + journal = {Nature}, + language = {En} +} + +@misc{EvaluationCellType, + title = {Evaluation of {{Cell Type Deconvolution R Packages}} on {{Single Cell RNA}}-Seq {{Data}} - {{Abstract}} - {{Europe PMC}}}, + file = {/Users/laurent/Zotero/storage/V6K7V6IC/ppr100442.html}, + howpublished = {https://europepmc-org.insb.bib.cnrs.fr/article/ppr/ppr100442} +} + +@article{ewelsMultiQCSummarizeAnalysis2016, + title = {{{MultiQC}}: Summarize Analysis Results for Multiple Tools and Samples in a Single Report}, + shorttitle = {{{MultiQC}}}, + author = {Ewels, Philip and Magnusson, M{\aa}ns and Lundin, Sverker and K{\"a}ller, Max}, + year = {2016}, + month = oct, + volume = {32}, + pages = {3047--3048}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btw354}, + abstract = {Abstract. Motivation: Fast and accurate quality control is essential for studies involving next-generation sequencing data. Whilst numerous tools exist to quan}, + file = {/Users/laurent/Zotero/storage/UZSWIEUN/Ewels et al. - 2016 - MultiQC summarize analysis results for multiple t.pdf;/Users/laurent/Zotero/storage/I5LGPHG9/2196507.html}, + journal = {Bioinformatics}, + language = {en}, + number = {19} +} + +@article{fangComparativeAnalysisSinglecell2020, + title = {Comparative Analysis of Single-Cell {{RNA}}-Seq Cluster Methods}, + author = {Fang, Jingwen and Yin, Zhaohua and Guo, Chuang}, + year = {2020}, + month = feb, + volume = {2208}, + pages = {020026}, + issn = {0094-243X}, + doi = {10.1063/5.0000336}, + file = {/Users/laurent/Zotero/storage/X3CG2WSY/5.html}, + journal = {AIP Conference Proceedings}, + number = {1} +} + +@inproceedings{fangComparativeAnalysisSinglecell2020a, + title = {Comparative Analysis of Single-Cell {{RNA}}-Seq Cluster Methods}, + booktitle = {{{2ND INTERNATIONAL CONFERENCE ON FRONTIERS OF BIOLOGICAL SCIENCES AND ENGINEERING}} ({{FSBE}} 2019)}, + author = {Fang, Jingwen and Yin, Zhaohua and Guo, Chuang}, + year = {2020}, + pages = {020026}, + address = {{Jinan City, China}}, + doi = {10.1063/5.0000336}, + abstract = {The emerging Single-cell transcriptome sequencing technologies give rise to new resource for cell biology. Transcriptomic landscapes of heterogenetic samples at the single-cell resolution enable characterization of cell sub-types and reveal gene co-expression pattern. Numerous efficient algorithms have been developed to accurately normalize, cluster and visualize cells from single-cell transcriptome sequencing profiles, including but not limited to Seurat, SC3, SIMLR, and SCANPY. However, systematic comparisons of the performance of these scRNA-seq cluster method are lacking. Here, we use 7 gold-standard scRNA-seq datasets with clear label and Tabula Muris, a dataset of millions of single-cell transcriptomes, to evaluate the 4 scRNA-seq cluster method. Results shows that SCANPY is more time-cost-efficient for large-scale data but SC3 is more precise for cell sub-types recall. Our quantitative comparison offers an informed choice among 4 scRNA-seq cluster methods, and it provides a hint for further improvements of scRNA-seq analysis methods.}, + file = {/Users/laurent/Zotero/storage/CZPFR8DQ/Fang et al. - 2020 - Comparative analysis of single-cell RNA-seq cluste.pdf}, + language = {en} +} + +@book{FastGappedreadAlignment, + title = {Fast Gapped-Read Alignment with {{Bowtie}} 2 \textbackslash{}textbar {{Nature Methods}}} +} + +@article{faustSAMBLASTERFastDuplicate2014, + title = {{{SAMBLASTER}}: Fast Duplicate Marking and Structural Variant Read Extraction}, + shorttitle = {{{SAMBLASTER}}}, + author = {Faust, Gregory G. and Hall, Ira M.}, + year = {2014}, + month = sep, + volume = {30}, + pages = {2503--2505}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btu314}, + abstract = {Motivation: Illumina DNA sequencing is now the predominant source of raw genomic data, and data volumes are growing rapidly. Bioinformatic analysis pipelines are having trouble keeping pace. A common bottleneck in such pipelines is the requirement to read, write, sort and compress large BAM files multiple times., Results: We present SAMBLASTER, a tool that reduces the number of times such costly operations are performed. SAMBLASTER is designed to mark duplicates in read-sorted SAM files as a piped post-pass on DNA aligner output before it is compressed to BAM. In addition, it can simultaneously output into separate files the discordant read-pairs and/or split-read mappings used for structural variant calling. As an alignment post-pass, its own runtime overhead is negligible, while dramatically reducing overall pipeline complexity and runtime. As a stand-alone duplicate marking tool, it performs significantly better than PICARD or SAMBAMBA in terms of both speed and memory usage, while achieving nearly identical results., Availability and implementation: SAMBLASTER is open-source C++ code and freely available for download from https://github.com/GregoryFaust/samblaster., Contact: +imh4y@virginia.edu}, + file = {/Users/laurent/Zotero/storage/4Y2PZSHI/Faust and Hall - 2014 - SAMBLASTER fast duplicate marking and structural .pdf}, + journal = {Bioinformatics}, + number = {17}, + pmcid = {PMC4147885}, + pmid = {24812344} +} + +@article{feiScBatchBatchEffect, + title = {{{scBatch}}: {{Batch Effect Correction}} of {{RNA}}-Seq {{Data}} through {{Sample Distance Matrix Adjustment}}}, + shorttitle = {{{scBatch}}}, + author = {Fei, Teng and Yu, Tianwei}, + doi = {10.1093/bioinformatics/btaa097}, + abstract = {AbstractMotivation. Batch effect is a frequent challenge in deep sequencing data analysis that can lead to misleading conclusions. Existing methods do not corr}, + file = {/Users/laurent/Zotero/storage/PCKXMWI9/Fei and Yu - scBatch Batch Effect Correction of RNA-seq Data t.pdf;/Users/laurent/Zotero/storage/CS8B9BYA/5735411.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{fengCircViewVisualizationExploration2018, + title = {{{CircView}}: A Visualization and Exploration Tool for Circular {{RNAs}}}, + shorttitle = {{{CircView}}}, + author = {Feng, Jing and Xiang, Yu and Xia, Siyu and Liu, Huan and Wang, Jun and Ozguc, Fatma Muge and Lei, Lijun and Kong, Ruoshan and Diao, Lixia and He, Chunjiang and Han, Leng}, + year = {2018}, + month = nov, + volume = {19}, + pages = {1310--1316}, + issn = {1467-5463}, + doi = {10.1093/bib/bbx070}, + abstract = {Abstract. Circular RNAs (circRNAs) are novel rising stars of noncoding RNAs, which are highly abundant and evolutionarily conserved across species. Number of p}, + file = {/Users/laurent/Zotero/storage/RQ3EQX7Y/Feng et al. - 2018 - CircView a visualization and exploration tool for.pdf;/Users/laurent/Zotero/storage/G6FE4PNI/3914758.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {6} +} + +@article{fengScTIMSeekingCellTypeIndicative, + title = {{{scTIM}}: {{Seeking Cell}}-{{Type}}-{{Indicative Marker}} from Single Cell {{RNA}}-Seq Data by Consensus Optimization}, + shorttitle = {{{scTIM}}}, + author = {Feng, Zhanying and Ren, Xianwen and Fang, Yuan and Yin, Yining and Huang, Chutian and Zhao, Yimin and Wang, Yong}, + doi = {10.1093/bioinformatics/btz936}, + abstract = {AbstractMotivation. Single cell RNA-seq data offers us new resource and resolution to study cell type identity and its conversion. However, data analysis is ch}, + file = {/Users/laurent/Zotero/storage/KIQLAVQG/Feng et al. - scTIM Seeking Cell-Type-Indicative Marker from si.pdf;/Users/laurent/Zotero/storage/K2SQ67TR/5679774.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{finakMASTFlexibleStatistical2015, + title = {{{MAST}}: A Flexible Statistical Framework for Assessing Transcriptional Changes and Characterizing Heterogeneity in Single-Cell {{RNA}} Sequencing Data}, + shorttitle = {{{MAST}}}, + author = {Finak, Greg and McDavid, Andrew and Yajima, Masanao and Deng, Jingyuan and Gersuk, Vivian and Shalek, Alex K. and Slichter, Chloe K. and Miller, Hannah W. and McElrath, M. Juliana and Prlic, Martin and Linsley, Peter S. and Gottardo, Raphael}, + year = {2015}, + month = dec, + volume = {16}, + issn = {1474-760X}, + doi = {10.1186/s13059-015-0844-5}, + abstract = {Single-cell transcriptomics reveals gene expression heterogeneity but suffers from stochastic dropout and characteristic bimodal expression distributions in which expression is either strongly non-zero or non-detectable. We propose a two-part, generalized linear model for such bimodal data that parameterizes both of these features. We argue that the cellular detection rate, the fraction of genes expressed in a cell, should be adjusted for as a source of nuisance variation. Our model provides gene set enrichment analysis tailored to single-cell data. It provides insights into how networks of co-expressed genes evolve across an experimental treatment. MAST is available at https://github.com/RGLab/MAST.}, + file = {/Users/laurent/Documents/bibliography/to_read/Finak et al. - 2015 - MAST a flexible statistical framework for assessi.pdf}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@article{fletcherSCRAMPipelineFast2018, + title = {{{SCRAM}}: A Pipeline for Fast Index-Free Small {{RNA}} Read Alignment and Visualization}, + shorttitle = {{{SCRAM}}}, + author = {Fletcher, Stephen J. and Boden, Mikael and Mitter, Neena and Carroll, Bernard J.}, + year = {2018}, + month = aug, + volume = {34}, + pages = {2670--2672}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty161}, + abstract = {AbstractSummary. Small RNAs play key roles in gene regulation, defense against viral pathogens and maintenance of genome stability, though many aspects of thei}, + file = {/Users/laurent/Zotero/storage/U2JWHCKX/Fletcher et al. - 2018 - SCRAM a pipeline for fast index-free small RNA re.pdf;/Users/laurent/Zotero/storage/6CRL8RFT/4938488.html}, + journal = {Bioinformatics}, + language = {en}, + number = {15} +} + +@article{fletez-brantRemovingUnwantedVariation2017, + title = {Removing Unwanted Variation between Samples in {{Hi}}-{{C}} Experiments}, + author = {{Fletez-Brant}, Kipper and Qiu, Yunjiang and Gorkin, David U. and Hu, Ming and Hansen, Kasper D.}, + year = {2017}, + month = nov, + doi = {10.1101/214361}, + abstract = {Hi-C data is commonly normalized using single sample processing methods, with focus on comparisons between regions within a given contact map. Here, we aim to compare contact maps across different samples. We demonstrate that unwanted variation is present in Hi-C data on biological replicates, and that this unwanted variation changes across the contact map. We present BNBC, a method for normalization and batch correction of Hi-C data and show that it substantially improves comparisons across samples.}, + file = {/Users/laurent/Zotero/storage/IE5JC566/Fletez-Brant et al. - 2017 - Removing unwanted variation between samples in Hi-.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{fongRankbasedTwosampleTests2018, + title = {Rank-Based Two-Sample Tests for Paired Data with Missing Values}, + author = {Fong, Youyi and Huang, Ying and Lemos, Maria P. and Mcelrath, M. Juliana}, + year = {2018}, + month = jul, + volume = {19}, + pages = {281--294}, + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxx039}, + abstract = {SUMMARY. Two-sample location problem is one of the most encountered problems in statistical practice. The two most commonly studied subtypes of two-sample loca}, + file = {/Users/laurent/Zotero/storage/A86MGXBT/Fong et al. - 2018 - Rank-based two-sample tests for paired data with m.pdf;/Users/laurent/Zotero/storage/MBF7DRVJ/4093659.html}, + journal = {Biostatistics}, + language = {en}, + number = {3} +} + +@article{forrowStatisticalOptimalTransport, + title = {Statistical {{Optimal Transport}} via {{Factored Couplings}}}, + author = {Forrow, Aden and H{\"u}tter, Jan-Christian and Nitzan, Mor and Rigollet, Philippe and Schiebinger, Geoffrey and Weed, Jonathan}, + pages = {29}, + abstract = {We propose a new method to estimate Wasserstein distances and optimal transport plans between two probability distributions from samples in high dimension. Unlike plug-in rules that simply replace the true distributions by their empirical counterparts, our method promotes couplings with low transport rank, a new structural assumption that is similar to the nonnegative rank of a matrix. Regularizing based on this assumption leads to drastic improvements on high-dimensional data for various tasks, including domain adaptation in single-cell RNA sequencing data. These findings are supported by a theoretical analysis that indicates that the transport rank is key in overcoming the curse of dimensionality inherent to data-driven optimal transport.}, + file = {/Users/laurent/Zotero/storage/9MU3AVAJ/Forrow et al. - 2018 - Statistical Optimal Transport via Factored Couplin.pdf;/Users/laurent/Zotero/storage/EAUJRWG3/Forrow et al. - Statistical Optimal Transport via Factored Couplin.pdf;/Users/laurent/Zotero/storage/FR7GJ5RX/Forrow et al. - 2018 - Statistical Optimal Transport via Factored Couplin.pdf}, + keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}, + language = {en} +} + +@article{fournierADModelBuilder2012, + title = {{{AD Model Builder}}: Using Automatic Differentiation for Statistical Inference of Highly Parameterized Complex Nonlinear Models}, + author = {Fournier, D. A. and Skaug, H. J. and Ancheta, J. and Ianelli, J. and Magnusson, A. and Maunder, M. N. and Nielsen, A. and Sibert, J.}, + year = {2012}, + volume = {27}, + pages = {233--249}, + journal = {Optim. Methods Softw.} +} + +@article{frishbergCellCompositionAnalysis2019, + title = {Cell Composition Analysis of Bulk Genomics Using Single-Cell Data}, + author = {Frishberg, Amit and {Peshes-Yaloz}, Naama and Cohn, Ofir and Rosentul, Diana and Steuerman, Yael and Valadarsky, Liran and Yankovitz, Gal and Mandelboim, Michal and Iraqi, Fuad A. and Amit, Ido and Mayo, Lior and Bacharach, Eran and {Gat-Viks}, Irit}, + year = {2019}, + month = apr, + volume = {16}, + pages = {327--332}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/s41592-019-0355-5}, + file = {/Users/laurent/Zotero/storage/7THWAPRV/Frishberg et al. - 2019 - Cell composition analysis of bulk genomics using s.pdf;/Users/laurent/Zotero/storage/F77D4QFV/Frishberg et al. - 2019 - Cell composition analysis of bulk genomics using s.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {4} +} + +@misc{FrontiersNormalizationMethods, + title = {Frontiers | {{Normalization Methods}} on {{Single}}-{{Cell RNA}}-Seq {{Data}}: {{An Empirical Survey}} | {{Genetics}}}, + file = {/Users/laurent/Zotero/storage/EXBZGZ4H/full.html}, + howpublished = {https://www.frontiersin.org/articles/10.3389/fgene.2020.00041/full} +} + +@article{fuCountingIndividualDNA2011, + title = {Counting Individual {{DNA}} Molecules by the Stochastic Attachment of Diverse Labels}, + author = {Fu, G. K. and Hu, J. and Wang, P.-H. and Fodor, S. P. A.}, + year = {2011}, + month = may, + volume = {108}, + pages = {9026--9031}, + issn = {0027-8424, 1091-6490}, + doi = {10.1073/pnas.1017621108}, + file = {/Users/laurent/Documents/bibliography/to_read/Fu et al. - 2011 - Counting individual DNA molecules by the stochasti.pdf}, + journal = {Proceedings of the National Academy of Sciences}, + language = {en}, + number = {22} +} + +@article{ganelSVScoreImpactPrediction2016, + title = {{{SVScore}}: An Impact Prediction Tool for Structural Variation}, + shorttitle = {{{SVScore}}}, + author = {Ganel, Liron and Abel, Haley J. and {FinMetSeq Consortium} and Hall, Ira M.}, + year = {2016}, + month = dec, + pages = {btw789}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw789}, + abstract = {Summary: Here we present SVScore, a tool for in silico structural variation (SV) impact prediction. SVScore aggregates per-base single nucleotide polymorphism (SNP) pathogenicity scores across relevant genomic intervals for each SV in a manner that considers variant type, gene features and positional uncertainty. We show that the allele frequency spectrum of high-scoring SVs is strongly skewed toward lower frequencies, suggesting that they are under purifying selection, and that SVScore identifies deleterious variants more effectively than alternative methods. Notably, our results also suggest that duplications are under surprisingly strong selection relative to deletions, and that there are a similar number of strongly pathogenic SVs and SNPs in the human population. Availability and Implementation: SVScore is implemented in Perl and available freely at \{\{http:// www.github.com/lganel/SVScore\}\} for use under the MIT license.}, + file = {/Users/laurent/Documents/bibliography/to_read/Ganel et al. - 2016 - SVScore an impact prediction tool for structural .pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{gaoClusterMapCompareMultiple2019, + title = {{{ClusterMap}}: Compare Multiple Single Cell {{RNA}}-{{Seq}} Datasets across Different Experimental Conditions}, + shorttitle = {{{ClusterMap}}}, + author = {Gao, Xin and Hu, Deqing and Gogol, Madelaine and Li, Hua}, + year = {2019}, + month = sep, + volume = {35}, + pages = {3038--3045}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz024}, + abstract = {AbstractMotivation. Single cell RNA-Seq (scRNA-Seq) facilitates the characterization of cell type heterogeneity and developmental processes. Further study of s}, + file = {/Users/laurent/Zotero/storage/9VPJSK95/5289328.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@techreport{gaoComparisonHighThroughputSingleCell2020, + title = {Comparison of {{High}}-{{Throughput Single}}-{{Cell RNA Sequencing Data Processing Pipelines}}}, + author = {Gao, Mingxuan and Ling, Mingyi and Tang, Xinwei and Wang, Shun and Xiao, Xu and Qiao, Ying and Yang, Wenxian and Yu, Rongshan}, + year = {2020}, + month = feb, + institution = {{Bioinformatics}}, + doi = {10.1101/2020.02.09.940221}, + abstract = {With the development of single-cell RNA sequencing (scRNA-seq) technology, it has become possible to perform large-scale transcript profiling for tens of thousands of cells in a single experiment. Many analysis pipelines have been developed for data generated from different high-throughput scRNA-seq platforms, bringing a new challenge to users to choose a proper workflow that is efficient, robust and reliable for a specific sequencing platform. Moreover, as the amount of public scRNA-seq data has increased rapidly, integrated analysis of scRNAseq data from different sources has become increasingly popular. However, it remains unclear whether such integrated analysis would be biased if the data were processed by different upstream pipelines. In this study, we encapsulated seven existing high-throughput scRNA-seq data processing pipelines with Nextflow, a general integrative workflow management framework, and evaluated their performances in terms of running time, computational resource consumption, and data processing consistency using nine public datasets generated from five different high-throughput scRNA-seq platforms. Our work provides a useful guideline for the selection of scRNA-seq data processing pipelines based on their performances on different real datasets. In addition, these guidelines can serve as a performance evaluation framework for future developments in high-throughput scRNA-seq data processing.}, + file = {/Users/laurent/Zotero/storage/WDRN4DPI/Gao et al. - 2020 - Comparison of High-Throughput Single-Cell RNA Sequ.pdf}, + language = {en}, + type = {Preprint} +} + +@article{garrido-martinGgsashimiSashimiPlot2018, + title = {Ggsashimi: {{Sashimi}} Plot Revised for Browser- and Annotation-Independent Splicing Visualization}, + shorttitle = {Ggsashimi}, + author = {{Garrido-Mart{\'i}n}, Diego and Palumbo, Emilio and Guig{\'o}, Roderic and Breschi, Alessandra}, + year = {2018}, + month = aug, + volume = {14}, + pages = {e1006360}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006360}, + abstract = {We present ggsashimi, a command-line tool for the visualization of splicing events across multiple samples. Given a specified genomic region, ggsashimi creates sashimi plots for individual RNA-seq experiments as well as aggregated plots for groups of experiments, a feature unique to this software. Compared to the existing versions of programs generating sashimi plots, it uses popular bioinformatics file formats, it is annotation-independent, and allows the visualization of splicing events even for large genomic regions by scaling down the genomic segments between splice sites. ggsashimi is freely available at https://github.com/guigolab/ggsashimi. It is implemented in python, and internally generates R code for plotting.}, + file = {/Users/laurent/Zotero/storage/EDW9V4UU/Garrido-Martín et al. - 2018 - ggsashimi Sashimi plot revised for browser- and a.pdf;/Users/laurent/Zotero/storage/EEI4D6UM/Garrido-Martín et al. - 2018 - ggsashimi Sashimi plot revised for browser- and a.pdf;/Users/laurent/Zotero/storage/2VEW3CN6/article.html;/Users/laurent/Zotero/storage/LTAZFQYH/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Alternative splicing,Comparative genomics,Data visualization,Genome analysis,Genome annotation,Genomic libraries,Introns,RNA sequencing}, + language = {en}, + number = {8} +} + +@article{gehringHighlyMultiplexedSinglecell2019, + title = {Highly Multiplexed Single-Cell {{RNA}}-Seq by {{DNA}} Oligonucleotide Tagging of Cellular Proteins}, + author = {Gehring, Jase and Park, Jong Hwee and Chen, Sisi and Thomson, Matthew and Pachter, Lior}, + year = {2019}, + month = dec, + pages = {1--4}, + issn = {1546-1696}, + doi = {10.1038/s41587-019-0372-z}, + abstract = {Single-cell RNA sequencing is readily multiplexed by labeling cells with ClickTags.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature America, Inc.}, + file = {/Users/laurent/Zotero/storage/J9IVNZC5/Gehring et al. - 2019 - Highly multiplexed single-cell RNA-seq by DNA olig.pdf;/Users/laurent/Zotero/storage/SMLFTT9T/s41587-019-0372-z.html}, + journal = {Nature Biotechnology}, + language = {en} +} + +@article{genestSemiparametricEstimationProcedure1995, + title = {A Semiparametric Estimation Procedure of Dependence Parameters in Multivariate Families of Distributions}, + author = {Genest, C. and Ghoudi, K. and Rivest, L.-P.}, + year = {1995}, + volume = {82}, + pages = {543--552}, + issn = {0006-3444, 1464-3510}, + doi = {10.1093/biomet/82.3.543}, + journal = {Biometrika}, + language = {en}, + number = {3} +} + +@misc{GenomewideTranscriptomicAnalysis, + title = {A Genome-Wide Transcriptomic Analysis of Protein-Coding Genes in Human Blood Cells | {{Science}}}, + file = {/Users/laurent/Zotero/storage/I6L6UDHS/eaax9198.html}, + howpublished = {https://science-sciencemag-org.insb.bib.cnrs.fr/content/366/6472/eaax9198} +} + +@article{gerardEmpiricalBayesShrinkage2020, + title = {Empirical {{Bayes}} Shrinkage and False Discovery Rate Estimation, Allowing for Unwanted Variation}, + author = {Gerard, David and Stephens, Matthew}, + year = {2020}, + month = jan, + volume = {21}, + pages = {15--32}, + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxy029}, + abstract = {Summary. We combine two important ideas in the analysis of large-scale genomics experiments (e.g. experiments that aim to identify genes that are differentiall}, + file = {/Users/laurent/Zotero/storage/IVVAVYGW/Gerard and Stephens - 2020 - Empirical Bayes shrinkage and false discovery rate.pdf;/Users/laurent/Zotero/storage/E8R67HR8/5050477.html}, + journal = {Biostatistics}, + language = {en}, + number = {1} +} + +@article{geSupervisedAdversarialAlignment2020b, + title = {Supervised {{Adversarial Alignment}} of {{Single}}-{{Cell RNA}}-Seq {{Data}}}, + author = {Ge, Songwei and Wang, Haohan and Alavi, Amir and Xing, Eric and {Bar-Joseph}, Ziv}, + year = {2020}, + month = jan, + doi = {10.1101/2020.01.06.896621}, + abstract = {Dimensionality reduction is an important first step in the analysis of single cell RNA-seq (scRNA-seq) data. In addition to enabling the visualization of the profiled cells, such representations are used by many downstream analyses methods ranging from pseudo-time reconstruction to clustering to alignment of scRNA-seq data from different experiments, platforms, and labs. Both supervised and unsupervised methods have been proposed to reduce the dimension of scRNA-seq. However, all methods to date are sensitive to batch effects. When batches correlate with cell types, as is often the case, their impact can lead to representations that are batch rather than cell type specific. To overcome this we developed a domain adversarial neural network model for learning a reduced dimension representation of scRNA-seq data. The adversarial model tries to simultaneously optimize two objectives. The first is the accuracy of cell type assignment and the second is the inability to distinguish the batch (domain). We tested the method by using the resulting representation to align several different datasets. As we show, by overcoming batch effects our method was able to correctly separate cell types, improving on several prior methods suggested for this task. Analysis of the top features used by the network indicates that by taking the batch impact into account, the reduced representation is much better able to focus on key genes for each cell type.}, + file = {/Users/laurent/Zotero/storage/6M6RGL78/Ge et al. - 2020 - Supervised Adversarial Alignment of Single-Cell RN.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{geSupervisedAdversarialAlignment2020c, + title = {Supervised {{Adversarial Alignment}} of {{Single}}-{{Cell RNA}}-Seq {{Data}}}, + author = {Ge, Songwei and Wang, Haohan and Alavi, Amir and Xing, Eric and {Bar-Joseph}, Ziv}, + year = {2020}, + month = jan, + doi = {10.1101/2020.01.06.896621}, + abstract = {Dimensionality reduction is an important first step in the analysis of single cell RNA-seq (scRNA-seq) data. In addition to enabling the visualization of the profiled cells, such representations are used by many downstream analyses methods ranging from pseudo-time reconstruction to clustering to alignment of scRNA-seq data from different experiments, platforms, and labs. Both supervised and unsupervised methods have been proposed to reduce the dimension of scRNA-seq. However, all methods to date are sensitive to batch effects. When batches correlate with cell types, as is often the case, their impact can lead to representations that are batch rather than cell type specific. To overcome this we developed a domain adversarial neural network model for learning a reduced dimension representation of scRNA-seq data. The adversarial model tries to simultaneously optimize two objectives. The first is the accuracy of cell type assignment and the second is the inability to distinguish the batch (domain). We tested the method by using the resulting representation to align several different datasets. As we show, by overcoming batch effects our method was able to correctly separate cell types, improving on several prior methods suggested for this task. Analysis of the top features used by the network indicates that by taking the batch impact into account, the reduced representation is much better able to focus on key genes for each cell type.}, + file = {/Users/laurent/Zotero/storage/FZSBR9QI/Ge et al. - 2020 - Supervised Adversarial Alignment of Single-Cell RN.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@book{ghahramaniProceedingsTwentyFourthInternational2007, + title = {Proceedings, {{Twenty}}-{{Fourth International Conference}} on {{Machine Learning}}: {{June}} 20th - 24th, {{Oregon State University}}, {{Corvallis}}, {{Oregon}}, {{USA}} ; Co-Located with the {{International Conference}} on {{Inductive Logic Programming}} ({{ILP}} 2007), {{ICML}} and {{ILP}} Held Joint Sessions on the First Day of {{ICML}} 2007}, + shorttitle = {Proceedings, {{Twenty}}-{{Fourth International Conference}} on {{Machine Learning}}}, + editor = {Ghahramani, Zoubin and Oregon State University}, + year = {2007}, + address = {{Madison, Wis}}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Ghahramani and Oregon State University - 2007 - Proceedings, Twenty-Fourth International Conferenc.pdf;/Users/laurent/Zotero/storage/CSBSYG6N/Ghahramani and Oregon State University - 2007 - Proceedings, Twenty-Fourth International Conferenc.pdf;/Users/laurent/Zotero/storage/EXN4JIDP/Ghahramani and Oregon State University - 2007 - Proceedings, Twenty-Fourth International Conferenc.pdf;/Users/laurent/Zotero/storage/IWDFSAA5/Ghahramani and Oregon State University - 2007 - Proceedings, Twenty-Fourth International Conferenc.pdf}, + isbn = {978-1-59593-793-3}, + language = {en}, + note = {OCLC: 255822315} +} + +@article{ghanbariDistancePrecisionMatrix2019, + title = {The {{Distance Precision Matrix}}: Computing Networks from Non-Linear Relationships}, + shorttitle = {The {{Distance Precision Matrix}}}, + author = {Ghanbari, Mahsa and Lasserre, Julia and Vingron, Martin}, + year = {2019}, + month = mar, + volume = {35}, + pages = {1009--1017}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty724}, + abstract = {AbstractMotivation. Full-order partial correlation, a fundamental approach for network reconstruction, e.g. in the context of gene regulation, relies on the pr}, + file = {/Users/laurent/Zotero/storage/8Z7J2WC9/Ghanbari et al. - 2019 - The Distance Precision Matrix computing networks .pdf;/Users/laurent/Zotero/storage/IDMIUJT6/5079333.html}, + journal = {Bioinformatics}, + language = {en}, + number = {6} +} + +@article{ghuryeModernTechnologiesAlgorithms2019, + title = {Modern Technologies and Algorithms for Scaffolding Assembled Genomes}, + author = {Ghurye, Jay and Pop, Mihai}, + year = {2019}, + month = jun, + volume = {15}, + pages = {e1006994}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006994}, + abstract = {The computational reconstruction of genome sequences from shotgun sequencing data has been greatly simplified by the advent of sequencing technologies that generate long reads. In the case of relatively small genomes (e.g., bacterial or viral), complete genome sequences can frequently be reconstructed computationally without the need for further experiments. However, large and complex genomes, such as those of most animals and plants, continue to pose significant challenges. In such genomes, assembly software produces incomplete and fragmented reconstructions that require additional experimentally derived information and manual intervention in order to reconstruct individual chromosome arms. Recent technologies originally designed to capture chromatin structure have been shown to effectively complement sequencing data, leading to much more contiguous reconstructions of genomes than previously possible. Here, we survey these technologies and the algorithms used to assemble and analyze large eukaryotic genomes, placed within the historical context of genome scaffolding technologies that have been in existence since the dawn of the genomic era.}, + file = {/Users/laurent/Zotero/storage/3TPWYL88/Ghurye and Pop - 2019 - Modern technologies and algorithms for scaffolding.pdf;/Users/laurent/Zotero/storage/LCV7HC4C/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Chromosome structure and function,Genome analysis,Genome complexity,Genomic medicine,Genomics,Human genomics,Plant genomics,Restriction fragment mapping}, + language = {en}, + number = {6} +} + +@article{gierlinskiStatisticalModelsRNAseq2015, + title = {Statistical Models for {{RNA}}-Seq Data Derived from a Two-Condition 48-Replicate Experiment}, + author = {Gierli{\'n}ski, Marek and Cole, Christian and Schofield, Piet{\`a} and Schurch, Nicholas J. and Sherstnev, Alexander and Singh, Vijender and Wrobel, Nicola and Gharbi, Karim and Simpson, Gordon and {Owen-Hughes}, Tom and Blaxter, Mark and Barton, Geoffrey J.}, + year = {2015}, + month = nov, + volume = {31}, + pages = {3625--3630}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btv425}, + abstract = {High-throughput RNA sequencing (RNA-seq) is now the standard method to determine differential gene expression. Identifying differentially expressed genes crucially depends on estimates of read count variability. These estimates are typically based on statistical models such as the negative binomial distribution, which is employed by the tools edgeR, DESeq and cuffdiff. Until now, the validity of these models has usually been tested on either low-replicate RNA-seq data or simulations. Here, a 48-replicate RNA-seq experiment in yeast was performed and data tested against theoretical models. The observed gene read counts were consistent with both log-normal and negative binomial distributions, while the mean-variance relation followed the line of constant dispersion parameter of \textasciitilde{}0.01. The high-replicate data also allowed for strict quality control and screening of ``bad'' replicates, which can drastically affect the gene read-count distribution.}, + file = {/Users/laurent/Documents/bibliography/RNASeq/Gierliński et al. - 2015 - Statistical models for RNA-seq data derived from a.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {22} +} + +@article{giletClusteringFeatureSelection, + title = {Clustering with Feature Selection Using Alternating Minimization. {{Application}} to Computational Biology}, + author = {Gilet, Cyprien and Deprez, Marie and Caillau, Jean-Baptiste and Barlaud, Michel}, + pages = {9}, + abstract = {This paper deals with unsupervised clustering with feature selection. The problem is to estimate both labels and a sparse projection matrix of weights. To address this combinatorial non-convex problem maintaining a strict control on the sparsity of the matrix of weights, we propose an alternating minimization of the Frobenius norm criterion. We provide a new efficient algorithm named K-sparse which alternates k-means with projection-gradient minimization. The projection-gradient step is a method of splitting type, with exact projection on the 1 ball to promote sparsity. The convergence of the gradientprojection step is addressed, and a preliminary analysis of the alternating minimization is made. The Frobenius norm criterion converges as the number of iterates in Algorithm K-sparse goes to infinity. Experiments on Single Cell RNA sequencing datasets show that our method significantly improves the results of PCA k-means, spectral clustering, SIMLR, and Sparcl methods. The complexity of K-sparse is linear in the number of samples (cells), so that the method scales up to large datasets. Finally, we extend K-sparse to supervised classification.}, + file = {/Users/laurent/Documents/bibliography/stats/Gilet et al. - Clustering with feature selection using alternatin.pdf;/Users/laurent/Zotero/storage/426CS6B8/Gilet et al. - 2017 - Clustering with feature selection using alternatin.pdf;/Users/laurent/Zotero/storage/N7Z8LNR7/Gilet et al. - 2017 - Clustering with feature selection using alternatin.pdf}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Statistics - Machine Learning}, + language = {en} +} + +@article{goldmanComparingDistributionsMultiple, + title = {Comparing Distributions by Multiple Testing across Quantiles or {{CDF}} Values}, + author = {Goldman, Matt and Kaplan, David M}, + pages = {55}, + abstract = {When comparing two distributions, it is often helpful to learn at which quantiles or values there is a statistically significant difference. This provides more information than the binary ``reject'' or ``do not reject'' decision of a global goodness-of-fit test. Framing our question as multiple testing across the continuum of quantiles {$\tau$} {$\in$} (0, 1) or values r {$\in$} R, we show that the Kolmogorov\textendash{}Smirnov test (interpreted as a multiple testing procedure) achieves strong control of the familywise error rate. However, its well-known flaw of low sensitivity in the tails remains. We provide an alternative method that retains such strong control of familywise error rate while also having even sensitivity, i.e., equal pointwise type I error rates at each of n \textrightarrow{} {$\infty$} order statistics across the distribution. Our one-sample method computes instantly, using our new formula that also instantly computes goodness-of-fit p-values and uniform confidence bands. To improve power, we also propose stepdown and pre-test procedures that maintain control of the asymptotic familywise error rate. One-sample and two-sample cases are considered, as well as extensions to regression discontinuity designs and conditional distributions. Simulations, empirical examples, and code are provided.}, + file = {/Users/laurent/Documents/bibliography/stats/Goldman and Kaplan - Comparing distributions by multiple testing across.pdf}, + language = {en} +} + +@article{gongDrImputeImputingDropout2018, + title = {{{DrImpute}}: Imputing Dropout Events in Single Cell {{RNA}} Sequencing Data}, + shorttitle = {{{DrImpute}}}, + author = {Gong, Wuming and Kwak, Il-Youp and Pota, Pruthvi and {Koyano-Nakagawa}, Naoko and Garry, Daniel J.}, + year = {2018}, + month = jun, + volume = {19}, + pages = {220}, + issn = {1471-2105}, + doi = {10.1186/s12859-018-2226-y}, + abstract = {The single cell RNA sequencing (scRNA-seq) technique begin a new era by allowing the observation of gene expression at the single cell level. However, there is also a large amount of technical and biological noise. Because of the low number of RNA transcriptomes and the stochastic nature of the gene expression pattern, there is a high chance of missing nonzero entries as zero, which are called dropout events.}, + file = {/Users/laurent/Zotero/storage/EMAB4SFF/Gong et al. - 2018 - DrImpute imputing dropout events in single cell R.pdf;/Users/laurent/Zotero/storage/ZMKEIKEC/s12859-018-2226-y.html}, + journal = {BMC Bioinformatics}, + number = {1} +} + +@article{gongTCMVisualizesTrajectories2018, + title = {{{TCM}} Visualizes Trajectories and Cell Populations from Single Cell Data}, + author = {Gong, Wuming and Kwak, Il-Youp and {Koyano-Nakagawa}, Naoko and Pan, Wei and Garry, Daniel J.}, + year = {2018}, + month = jul, + volume = {9}, + pages = {2749}, + issn = {2041-1723}, + doi = {10.1038/s41467-018-05112-9}, + abstract = {Time series single cell expression data has large variance between time points and is challenging for analysis. Here, the authors develop a new dimension reduction and data visualization tool for large scale temporal scRNA-seq data which identifies trajectories and subpopulations.}, + copyright = {2018 The Author(s)}, + file = {/Users/laurent/Zotero/storage/H4QDLXC8/Gong et al. - 2018 - TCM visualizes trajectories and cell populations f.pdf;/Users/laurent/Zotero/storage/NH6897CD/s41467-018-05112-9.html}, + journal = {Nature Communications}, + language = {En}, + number = {1} +} + +@article{gorenBinQuasiPeakDetection2018, + title = {{{BinQuasi}}: A Peak Detection Method for {{ChIP}}-Sequencing Data with Biological Replicates}, + shorttitle = {{{BinQuasi}}}, + author = {Goren, Emily and Liu, Peng and Wang, Chao and Wang, Chong}, + year = {2018}, + month = sep, + volume = {34}, + pages = {2909--2917}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty227}, + abstract = {AbstractMotivation. ChIP-seq experiments that are aimed at detecting DNA-protein interactions require biological replication to draw inferential conclusions, h}, + file = {/Users/laurent/Zotero/storage/VG7AL35W/Goren et al. - 2018 - BinQuasi a peak detection method for ChIP-sequenc.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{goubertNovoAssemblyAnnotation2015, + title = {De {{Novo Assembly}} and {{Annotation}} of the {{Asian Tiger Mosquito}} ({{Aedes}} Albopictus) {{Repeatome}} with {{dnaPipeTE}} from {{Raw Genomic Reads}} and {{Comparative Analysis}} with the {{Yellow Fever Mosquito}} ({{Aedes}} Aegypti)}, + author = {Goubert, Cl{\'e}ment and Modolo, Laurent and Vieira, Cristina and ValienteMoro, Claire and Mavingui, Patrick and Boulesteix, Matthieu}, + year = {2015}, + month = apr, + volume = {7}, + pages = {1192--1205}, + issn = {1759-6653}, + doi = {10.1093/gbe/evv050}, + abstract = {Repetitive DNA, including transposable elements (TEs), is found throughout eukaryotic genomes. Annotating and assembling the ``repeatome'' during genome-wide analysis often poses a challenge. To address this problem, we present dnaPipeTE\textemdash{}a new bioinformatics pipeline that uses a sample of raw genomic reads. It produces precise estimates of repeated DNA content and TE consensus sequences, as well as the relative ages of TE families. We shows that dnaPipeTE performs well using very low coverage sequencing in different genomes, losing accuracy only with old TE families. We applied this pipeline to the genome of the Asian tiger mosquito Aedes albopictus, an invasive species of human health interest, for which the genome size is estimated to be over 1 Gbp. Using dnaPipeTE, we showed that this species harbors a large (50\% of the genome) and potentially active repeatome with an overall TE class and order composition similar to that of Aedes aegypti, the yellow fever mosquito. However, intraorder dynamics show clear distinctions between the two species, with differences at the TE family level. Our pipeline's ability to manage the repeatome annotation problem will make it helpful for new or ongoing assembly projects, and our results will benefit future genomic studies of A. albopictus.}, + file = {/Users/laurent/Zotero/storage/8C2P5BW9/Goubert et al. - 2015 - De Novo Assembly and Annotation of the Asian Tiger.pdf;/Users/laurent/Zotero/storage/ILUVI8EK/533768.html}, + journal = {Genome Biology and Evolution}, + language = {en}, + number = {4} +} + +@article{govekClusteringindependentAnalysisGenomic2019, + title = {Clustering-Independent Analysis of Genomic Data Using Spectral Simplicial Theory}, + author = {Govek, Kiya W. and Yamajala, Venkata S. and Camara, Pablo G.}, + editor = {Arsuaga, Javier}, + year = {2019}, + month = nov, + volume = {15}, + pages = {e1007509}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007509}, + file = {/Users/laurent/Zotero/storage/RJ4MW4SK/Govek et al. - 2019 - Clustering-independent analysis of genomic data us.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {11} +} + +@misc{GraphDDPGraphembeddingApproach, + title = {{{GraphDDP}}: A Graph-Embedding Approach to Detect Differentiation Pathways in Single-Cell-Data Using Prior Class Knowledge | {{Nature Communications}}}, + file = {/Users/laurent/Zotero/storage/L9QN76HK/s41467-018-05988-7.html}, + howpublished = {https://www-nature-com.insb.bib.cnrs.fr/articles/s41467-018-05988-7} +} + +@article{grauDepLogoVisualizingSequence2019, + title = {{{DepLogo}}: Visualizing Sequence Dependencies in {{R}}}, + shorttitle = {{{DepLogo}}}, + author = {Grau, Jan and Nettling, Martin and Keilwagen, Jens}, + year = {2019}, + month = nov, + volume = {35}, + pages = {4812--4814}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz507}, + abstract = {AbstractSummary. Statistical dependencies are present in a variety of sequence data, but are not discernible from traditional sequence logos. Here, we present}, + file = {/Users/laurent/Zotero/storage/K7WFSTEP/Grau et al. - 2019 - DepLogo visualizing sequence dependencies in R.pdf;/Users/laurent/Zotero/storage/9VD9N2ZK/5521622.html}, + journal = {Bioinformatics}, + language = {en}, + number = {22} +} + +@article{grossoPervasiveTranscriptionReadthrough2015, + title = {Pervasive Transcription Read-through Promotes Aberrant Expression of Oncogenes and {{RNA}} Chimeras in Renal Carcinoma}, + author = {Grosso, Ana R and Leite, Ana P and Carvalho, S{\'i}lvia and Matos, Mafalda R and Martins, Filipa B and V{\'i}tor, Alexandra C and Desterro, Joana MP and {Carmo-Fonseca}, Maria and {de Almeida}, S{\'e}rgio F}, + year = {2015}, + month = nov, + volume = {4}, + issn = {2050-084X}, + doi = {10.7554/eLife.09214}, + file = {/Users/laurent/Documents/bibliography/readthrough/Grosso et al. - 2015 - Pervasive transcription read-through promotes aber.pdf}, + journal = {eLife}, + language = {en} +} + +@article{grouxSParKMethodPartition, + title = {{{SPar}}-{{K}}: A Method to Partition {{NGS}} Signal Data}, + shorttitle = {{{SPar}}-{{K}}}, + author = {Groux, Romain and Bucher, Philipp}, + doi = {10.1093/bioinformatics/btz416}, + abstract = {AbstractSummary. We present SPar-K (Signal Partitioning with K-means), a method to search for archetypical chromatin architectures by partitioning a set of gen}, + file = {/Users/laurent/Zotero/storage/GPSJ5JZW/Groux and Bucher - SPar-K a method to partition NGS signal data.pdf;/Users/laurent/Zotero/storage/QUR42C2K/5497248.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{grunValidationNoiseModels2014, + title = {Validation of Noise Models for Single-Cell Transcriptomics}, + author = {Gr{\"u}n, Dominic and Kester, Lennart and {van Oudenaarden}, Alexander}, + year = {2014}, + month = jun, + volume = {11}, + pages = {637--640}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.2930}, + file = {/Users/laurent/Zotero/storage/34EQJTTD/Grün et al. - 2014 - Validation of noise models for single-cell transcr.pdf;/Users/laurent/Zotero/storage/JTPV7HPJ/Grün et al. - 2014 - Validation of noise models for single-cell transcr.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {6} +} + +@article{guanAEGSIdentifyingAberrantly2018, + title = {{{AEGS}}: Identifying Aberrantly Expressed Gene Sets for Differential Variability Analysis}, + shorttitle = {{{AEGS}}}, + author = {Guan, Jinting and Chen, Moliang and Ye, Congting and Cai, James J and Ji, Guoli}, + editor = {Stegle, Oliver}, + year = {2018}, + month = mar, + volume = {34}, + pages = {881--883}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx646}, + abstract = {Motivation: In gene expression studies, differential expression (DE) analysis has been widely used to identify genes with shifted expression mean between groups. Recently, differential variability (DV) analysis has been increasingly applied as analyzing changed expression variability (e.g., the changes in expression variance) between groups may reveal underlying genetic heterogeneity and undetected interactions, which has great implications in many fields of biology. An easy-to-use tool for DV analysis is needed.}, + file = {/Users/laurent/Documents/bibliography/to_read/Guan et al. - 2018 - AEGS identifying aberrantly expressed gene sets f.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {5} +} + +@article{guoHopLandSinglecellPseudotime2017, + title = {{{HopLand}}: Single-Cell Pseudotime Recovery Using Continuous {{Hopfield}} Network-Based Modeling of {{Waddington}}'s Epigenetic Landscape}, + shorttitle = {{{HopLand}}}, + author = {Guo, Jing and Zheng, Jie}, + year = {2017}, + month = jul, + volume = {33}, + pages = {i102-i109}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx232}, + abstract = {Motivation: The interpretation of transcriptional dynamics in single-cell data, especially pseudotime estimation, could help understand the transition of gene expression profiles. The recovery of pseudotime increases the temporal resolution of single-cell transcriptional data, but is challenging due to the high variability in gene expression between individual cells. Here, we introduce HopLand, a pseudotime recovery method using continuous Hopfield network to map cells to a Waddington's epigenetic landscape. It reveals from the single-cell data the combinatorial regulatory interactions among genes that control the dynamic progression through successive cell states.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Guo and Zheng - 2017 - HopLand single-cell pseudotime recovery using con.pdf;/Users/laurent/Zotero/storage/GSLDCJSX/Guo and Zheng - 2017 - HopLand single-cell pseudotime recovery using con.pdf;/Users/laurent/Zotero/storage/TK5QLQNM/Guo and Zheng - 2017 - HopLand single-cell pseudotime recovery using con.pdf;/Users/laurent/Zotero/storage/ZA5XCBNY/Guo and Zheng - 2017 - HopLand single-cell pseudotime recovery using con.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{guoRepLongNovoRepeat2018, + title = {{{RepLong}}: De Novo Repeat Identification Using Long Read Sequencing Data}, + shorttitle = {{{RepLong}}}, + author = {Guo, Rui and Li, Yan-Ran and He, Shan and {Ou-Yang}, Le and Sun, Yiwen and Zhu, Zexuan}, + year = {2018}, + month = apr, + volume = {34}, + pages = {1099--1107}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx717}, + abstract = {Motivation: The identification of repetitive elements is important in genome assembly and phylogenetic analyses. The existing de novo repeat identification methods exploiting the use of short reads are impotent in identifying long repeats. Since long reads are more likely to cover repeat regions completely, using long reads is more favorable for recognizing long repeats.}, + file = {/Users/laurent/Documents/bibliography/to_read/Guo et al. - 2018 - RepLong de novo repeat identification using long .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {7} +} + +@incollection{guoSingleCellTranscriptomeAnalysis2018, + title = {Single-{{Cell Transcriptome Analysis Using SINCERA Pipeline}}}, + booktitle = {Transcriptome {{Data Analysis}}}, + author = {Guo, Minzhe and Xu, Yan}, + editor = {Wang, Yejun and Sun, Ming-an}, + year = {2018}, + volume = {1751}, + pages = {209--222}, + publisher = {{Springer New York}}, + address = {{New York, NY}}, + doi = {10.1007/978-1-4939-7710-9_15}, + abstract = {Genome-scale single-cell biology has recently emerged as a powerful technology with important implications for both basic and medical research. There are urgent needs for the development of computational methods or analytic pipelines to facilitate large amounts of single-cell RNA-Seq data analysis. Here, we present a detailed protocol for SINCERA (SINgle CEll RNA-Seq profiling Analysis), a generally applicable analytic pipeline for processing single-cell data from a whole organ or sorted cells. The pipeline supports the analysis for the identification of major cell types, cell type-specific gene signatures, and driving forces of given cell types. In this chapter, we provide step-by-step instructions for the functions and features of SINCERA together with application examples to provide a practical guide for the research community. SINCERA is implemented in R, licensed under the GNU General Public License v3, and freely available from CCHMC PBGE website, https://research.cchmc.org/pbge/sincera.html.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Guo and Xu - 2018 - Single-Cell Transcriptome Analysis Using SINCERA P.pdf;/Users/laurent/Zotero/storage/462CC6X3/Guo and Xu - 2018 - Single-Cell Transcriptome Analysis Using SINCERA P.pdf;/Users/laurent/Zotero/storage/JA5CBQCS/Guo and Xu - 2018 - Single-Cell Transcriptome Analysis Using SINCERA P.pdf;/Users/laurent/Zotero/storage/L3NPJ5RR/Guo and Xu - 2018 - Single-Cell Transcriptome Analysis Using SINCERA P.pdf}, + isbn = {978-1-4939-7709-3 978-1-4939-7710-9}, + language = {en} +} + +@article{gurevichQUASTQualityAssessment2013, + title = {{{QUAST}}: Quality Assessment Tool for Genome Assemblies}, + shorttitle = {{{QUAST}}}, + author = {Gurevich, Alexey and Saveliev, Vladislav and Vyahhi, Nikolay and Tesler, Glenn}, + year = {2013}, + month = apr, + volume = {29}, + pages = {1072--1075}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btt086}, + abstract = {Summary: Limitations of genome sequencing techniques have led to dozens of assembly algorithms, none of which is perfect. A number of methods for comparing assemblers have been developed, but none is yet a recognized benchmark. Further, most existing methods for comparing assemblies are only applicable to new assemblies of finished genomes; the problem of evaluating assemblies of previously unsequenced species has not been adequately considered. Here, we present QUAST\textemdash{}a quality assessment tool for evaluating and comparing genome assemblies. This tool improves on leading assembly comparison software with new ideas and quality metrics. QUAST can evaluate assemblies both with a reference genome, as well as without a reference. QUAST produces many reports, summary tables and plots to help scientists in their research and in their publications. In this study, we used QUAST to compare several genome assemblers on three datasets. QUAST tables and plots for all of them are available in the Supplementary Material, and interactive versions of these reports are on the QUAST website., Availability: http://bioinf.spbau.ru/quast, Contact: gurevich@bioinf.spbau.ru, Supplementary information: Supplementary data are available at Bioinformatics online.}, + file = {/Users/laurent/Zotero/storage/L7C4G9ST/Gurevich et al. - 2013 - QUAST quality assessment tool for genome assembli.pdf}, + journal = {Bioinformatics}, + keywords = {Algorithms,Animals,Chromosome Mapping,Contig Mapping,Escherichia coli,Genome,Genomic Structural Variation,Genomics,Humans,Quality Control,Sequence Alignment,Sequence Analysis; DNA,Software}, + number = {8}, + pmcid = {PMC3624806}, + pmid = {23422339} +} + +@article{gwinnerNetworkbasedAnalysisOmics2016, + title = {Network-Based Analysis of Omics Data: {{The LEAN}} Method}, + shorttitle = {Network-Based Analysis of Omics Data}, + author = {Gwinner, Frederik and Boulday, Gw{\'e}nola and Vandiedonck, Claire and Arnould, Minh and Cardoso, C{\'e}cile and Nikolayeva, Iryna and {Guitart-Pla}, Oriol and Denis, C{\'e}cile V. and Christophe, Olivier D. and Beghain, Johann and {Tournier-Lasserve}, Elisabeth and Schwikowski, Benno}, + year = {2016}, + month = oct, + pages = {btw676}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw676}, + abstract = {Motivation: Most computational approaches for the analysis of omics data in the context of interaction networks have very long running times, provide single or partial, often heuristic, solutions and/or contain user-tuneable parameters.}, + file = {/Users/laurent/Documents/bibliography/networks/Gwinner et al. - 2016 - Network-based analysis of omics data The LEAN met.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@book{haagEvolutionNematodeSex2005, + title = {The Evolution of Nematode Sex Determination: {{C}}. Elegans as a Reference Point for Comparative Biology}, + shorttitle = {The Evolution of Nematode Sex Determination}, + author = {Haag, Eric S.}, + year = {2005}, + month = dec, + publisher = {{WormBook}}, + abstract = {Sex determination was a founding topic of C. elegans research. After three decades of research, a complex signal transduction pathway with multiple layers of regulation has been elucidated. This pathway links karyotype to phenotype by coordinating the development of sexually dimorphic tissues. In this article, this pathway is placed in two broader contexts. The first is that of nematodes and animals in general. The important role of C. elegans studies in revealing the first universally conserved component of metazoan sex determination is discussed, as is the role of cooption of genes into the sex determination and dosage compensation pathways. The second context is that of a subset of more closely related species, with emphasis on other members of the genus Caenorhabditis. Studies reviewed here have determined the gene-level conservation of the known pathway and the relative rates of molecular evolution in conserved components, and made substantial progress in the manipulation of gene activity in non-elegans species. Special attention is paid to the origins of hermaphroditism, which evolved from gonochorism through germline-specific changes in sex determination. Recent studies suggest that the most rapidly evolving aspects of sex determination are germline functions related to evolutionary shifts in mating systems, while somatic sex determination is relatively conservative. From all of these studies, a picture emerges in which C. elegans utilizes an intriguing mixture of general and species-specific genes and regulatory mechanisms.}, + keywords = {sex nematodes}, + language = {en}, + pmid = {18050417} +} + +@article{hafemeisterNormalizationVarianceStabilization2019, + title = {Normalization and Variance Stabilization of Single-Cell {{RNA}}-Seq Data Using Regularized Negative Binomial Regression}, + author = {Hafemeister, Christoph and Satija, Rahul}, + year = {2019}, + month = mar, + pages = {576827}, + doi = {10.1101/576827}, + abstract = {{$<$}h3{$>$}Abstract{$<$}/h3{$>$} {$<$}p{$>$}Single-cell RNA-seq (scRNA-seq) data exhibits significant cell-to-cell variation due to technical factors, including the number of molecules detected in each cell, which can confound biological heterogeneity with technical effects. To address this, we present a modeling framework for the normalization and variance stabilization of molecular count data from scRNA-seq experiments. We propose that the Pearson residuals from 'regularized negative binomial regression', where cellular sequencing depth is utilized as a covariate in a generalized linear model, successfully remove the influence of technical characteristics from downstream analyses while preserving biological heterogeneity. Importantly, we show that an unconstrained negative binomial model may overfit scRNA-seq data, and overcome this by pooling information across genes with similar abundances to obtain stable parameter estimates. Our procedure omits the need for heuristic steps including pseudocount addition or log-transformation, and improves common downstream analytical tasks such as variable gene selection, dimensional reduction, and differential expression. Our approach can be applied to any UMI-based scRNA-seq dataset and is freely available as part of the R package sctransform, with a direct interface to our single-cell toolkit Seurat.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2019, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + file = {/Users/laurent/Zotero/storage/68HLIJIL/Hafemeister and Satija - 2019 - Normalization and variance stabilization of single.pdf;/Users/laurent/Zotero/storage/BJQYKWYA/Hafemeister and Satija - 2019 - Normalization and variance stabilization of single.pdf;/Users/laurent/Zotero/storage/ZPSCMTNG/576827v2.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{hagemann-jensenSinglecellRNACounting2019, + title = {Single-Cell {{RNA}} Counting at Allele- and Isoform-Resolution Using {{Smart}}-Seq3}, + author = {{Hagemann-Jensen}, Michael and Ziegenhain, Christoph and Chen, Ping and Ramsk{\"o}ld, Daniel and Hendriks, Gert-Jan and Larsson, Anton J.M. and Faridani, Omid R. and Sandberg, Rickard}, + year = {2019}, + month = oct, + doi = {10.1101/817924}, + abstract = {Large-scale sequencing of RNAs from individual cells can reveal patterns of gene, isoform and allelic expression across cell types and states + + . However, current single-cell RNA-sequencing (scRNA-seq) methods have limited ability to count RNAs at allele- and isoform resolution, and long-read sequencing techniques lack the depth required for large-scale applications across cells + + . Here, we introduce Smart-seq3 that combines full-length transcriptome coverage with a 5' unique molecular identifier (UMI) RNA counting strategy that enabled + + reconstruction of thousands of RNA molecules per cell. Importantly, a large portion of counted and reconstructed RNA molecules could be directly assigned to specific isoforms and allelic origin, and we identified significant transcript isoform regulation in mouse strains and human cell types. Moreover, Smart-seq3 showed a dramatic increase in sensitivity and typically detected thousands more genes per cell than Smart-seq2. Altogether, we developed a short-read sequencing strategy for single-cell RNA counting at isoform and allele-resolution applicable to large-scale characterization of cell types and states across tissues and organisms.}, + file = {/Users/laurent/Zotero/storage/BXKVVM2S/Hagemann-Jensen et al. - 2019 - Single-cell RNA counting at allele- and isoform-re.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{hagemann-jensenSinglecellRNACounting2019a, + title = {Single-Cell {{RNA}} Counting at Allele- and Isoform-Resolution Using {{Smart}}-Seq3}, + author = {{Hagemann-Jensen}, Michael and Ziegenhain, Christoph and Chen, Ping and Ramsk{\"o}ld, Daniel and Hendriks, Gert-Jan and Larsson, Anton J.M. and Faridani, Omid R. and Sandberg, Rickard}, + year = {2019}, + month = oct, + doi = {10.1101/817924}, + abstract = {Large-scale sequencing of RNAs from individual cells can reveal patterns of gene, isoform and allelic expression across cell types and states + + . However, current single-cell RNA-sequencing (scRNA-seq) methods have limited ability to count RNAs at allele- and isoform resolution, and long-read sequencing techniques lack the depth required for large-scale applications across cells + + . Here, we introduce Smart-seq3 that combines full-length transcriptome coverage with a 5' unique molecular identifier (UMI) RNA counting strategy that enabled + + reconstruction of thousands of RNA molecules per cell. Importantly, a large portion of counted and reconstructed RNA molecules could be directly assigned to specific isoforms and allelic origin, and we identified significant transcript isoform regulation in mouse strains and human cell types. Moreover, Smart-seq3 showed a dramatic increase in sensitivity and typically detected thousands more genes per cell than Smart-seq2. Altogether, we developed a short-read sequencing strategy for single-cell RNA counting at isoform and allele-resolution applicable to large-scale characterization of cell types and states across tissues and organisms.}, + file = {/Users/laurent/Zotero/storage/AI9K8WAY/Hagemann-Jensen et al. - 2019 - Single-cell RNA counting at allele- and isoform-re.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{hagemann-jensenSinglecellRNACounting2019b, + title = {Single-Cell {{RNA}} Counting at Allele- and Isoform-Resolution Using {{Smart}}-Seq3}, + author = {{Hagemann-Jensen}, Michael and Ziegenhain, Christoph and Chen, Ping and Ramsk{\"o}ld, Daniel and Hendriks, Gert-Jan and Larsson, Anton J.M. and Faridani, Omid R. and Sandberg, Rickard}, + year = {2019}, + month = oct, + doi = {10.1101/817924}, + abstract = {Large-scale sequencing of RNAs from individual cells can reveal patterns of gene, isoform and allelic expression across cell types and states + + . However, current single-cell RNA-sequencing (scRNA-seq) methods have limited ability to count RNAs at allele- and isoform resolution, and long-read sequencing techniques lack the depth required for large-scale applications across cells + + . Here, we introduce Smart-seq3 that combines full-length transcriptome coverage with a 5' unique molecular identifier (UMI) RNA counting strategy that enabled + + reconstruction of thousands of RNA molecules per cell. Importantly, a large portion of counted and reconstructed RNA molecules could be directly assigned to specific isoforms and allelic origin, and we identified significant transcript isoform regulation in mouse strains and human cell types. Moreover, Smart-seq3 showed a dramatic increase in sensitivity and typically detected thousands more genes per cell than Smart-seq2. Altogether, we developed a short-read sequencing strategy for single-cell RNA counting at isoform and allele-resolution applicable to large-scale characterization of cell types and states across tissues and organisms.}, + file = {/Users/laurent/Zotero/storage/LENC9GGY/Hagemann-Jensen et al. - 2019 - Single-cell RNA counting at allele- and isoform-re.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{hagemann-jensenSinglecellRNACounting2019c, + title = {Single-Cell {{RNA}} Counting at Allele- and Isoform-Resolution Using {{Smart}}-Seq3}, + author = {{Hagemann-Jensen}, Michael and Ziegenhain, Christoph and Chen, Ping and Ramsk{\"o}ld, Daniel and Hendriks, Gert-Jan and Larsson, Anton J.M. and Faridani, Omid R. and Sandberg, Rickard}, + year = {2019}, + month = oct, + doi = {10.1101/817924}, + abstract = {Large-scale sequencing of RNAs from individual cells can reveal patterns of gene, isoform and allelic expression across cell types and states + + . However, current single-cell RNA-sequencing (scRNA-seq) methods have limited ability to count RNAs at allele- and isoform resolution, and long-read sequencing techniques lack the depth required for large-scale applications across cells + + . Here, we introduce Smart-seq3 that combines full-length transcriptome coverage with a 5' unique molecular identifier (UMI) RNA counting strategy that enabled + + reconstruction of thousands of RNA molecules per cell. Importantly, a large portion of counted and reconstructed RNA molecules could be directly assigned to specific isoforms and allelic origin, and we identified significant transcript isoform regulation in mouse strains and human cell types. Moreover, Smart-seq3 showed a dramatic increase in sensitivity and typically detected thousands more genes per cell than Smart-seq2. Altogether, we developed a short-read sequencing strategy for single-cell RNA counting at isoform and allele-resolution applicable to large-scale characterization of cell types and states across tissues and organisms.}, + file = {/Users/laurent/Zotero/storage/XV2BSUNT/Hagemann-Jensen et al. - 2019 - Single-cell RNA counting at allele- and isoform-re.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{haghverdiBatchEffectsSinglecell2018, + title = {Batch Effects in Single-Cell {{RNA}}-Sequencing Data Are Corrected by Matching Mutual Nearest Neighbors}, + author = {Haghverdi, Laleh and Lun, Aaron T L and Morgan, Michael D and Marioni, John C}, + year = {2018}, + month = apr, + issn = {1087-0156, 1546-1696}, + doi = {10.1038/nbt.4091}, + file = {/Users/laurent/Documents/bibliography/to_read/Haghverdi et al. - 2018 - Batch effects in single-cell RNA-sequencing data a.pdf}, + journal = {Nature Biotechnology}, + language = {en} +} + +@article{hamazakiRAINBOWHaplotypebasedGenomewide2020, + title = {{{RAINBOW}}: {{Haplotype}}-Based Genome-Wide Association Study Using a Novel {{SNP}}-Set Method}, + shorttitle = {{{RAINBOW}}}, + author = {Hamazaki, Kosuke and Iwata, Hiroyoshi}, + year = {2020}, + month = feb, + volume = {16}, + pages = {e1007663}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007663}, + abstract = {Difficulty in detecting rare variants is one of the problems in conventional genome-wide association studies (GWAS). The problem is closely related to the complex gene compositions comprising multiple alleles, such as haplotypes. Several single nucleotide polymorphism (SNP) set approaches have been proposed to solve this problem. These methods, however, have been rarely discussed in connection with haplotypes. In this study, we developed a novel SNP-set method named ``RAINBOW'' and applied the method to haplotype-based GWAS by regarding a haplotype block as a SNP-set. Combining haplotype block estimation and SNP-set GWAS, haplotype-based GWAS can be conducted without prior information of haplotypes. We prepared 100 datasets of simulated phenotypic data and real marker genotype data of Oryza sativa subsp. indica, and performed GWAS of the datasets. We compared the power of our method, the conventional single-SNP GWAS, the conventional haplotype-based GWAS, and the conventional SNP-set GWAS. Our proposed method was shown to be superior to these in three aspects: (1) controlling false positives; (2) in detecting causal variants without relying on the linkage disequilibrium if causal variants were genotyped in the dataset; and (3) it showed greater power than the other methods, i.e., it was able to detect causal variants that were not detected by the others, primarily when the causal variants were located very close to each other, and the directions of their effects were opposite. By using the SNP-set approach as in this study, we expect that detecting not only rare variants but also genes with complex mechanisms, such as genes with multiple causal variants, can be realized. RAINBOW was implemented as an R package named ``RAINBOWR'' and is available from CRAN (https://cran.r-project.org/web/packages/RAINBOWR/index.html) and GitHub (https://github.com/KosukeHamazaki/RAINBOWR).}, + file = {/Users/laurent/Zotero/storage/32XH7T5G/Hamazaki and Iwata - 2020 - RAINBOW Haplotype-based genome-wide association s.pdf;/Users/laurent/Zotero/storage/FLBWWXAT/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Alleles,Genome-wide association studies,Haplotypes,Kernel methods,Rice,Simulation and modeling,Source code,Variant genotypes}, + language = {en}, + number = {2} +} + +@article{hameyDemystifyingBloodStem2017, + title = {Demystifying Blood Stem Cell Fates}, + author = {Hamey, Fiona K. and G{\"o}ttgens, Berthold}, + year = {2017}, + month = apr, + volume = {19}, + pages = {261--263}, + issn = {1465-7392, 1476-4679}, + doi = {10.1038/ncb3494}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Hamey and Göttgens - 2017 - Demystifying blood stem cell fates.pdf;/Users/laurent/Zotero/storage/JC2T8BNN/Hamey and Göttgens - 2017 - Demystifying blood stem cell fates.pdf;/Users/laurent/Zotero/storage/YYH8LXXC/Hamey and Göttgens - 2017 - Demystifying blood stem cell fates.pdf;/Users/laurent/Zotero/storage/ZYSXTU5W/Hamey and Göttgens - 2017 - Demystifying blood stem cell fates.pdf}, + journal = {Nature Cell Biology}, + language = {en}, + number = {4} +} + +@article{hanAccurateRapidContinuous2018, + title = {An Accurate and Rapid Continuous Wavelet Dynamic Time Warping Algorithm for End-to-End Mapping in Ultra-Long Nanopore Sequencing}, + author = {Han, Renmin and Li, Yu and Gao, Xin and Wang, Sheng}, + year = {2018}, + month = sep, + volume = {34}, + pages = {i722-i731}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty555}, + abstract = {AbstractMotivation. Long-reads, point-of-care and polymerase chain reaction-free are the promises brought by nanopore sequencing. Among various steps in nanopo}, + file = {/Users/laurent/Zotero/storage/99TT4NUF/Han et al. - 2018 - An accurate and rapid continuous wavelet dynamic t.pdf;/Users/laurent/Zotero/storage/RHKZW848/5093233.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{hardConbaseSoftwareDiscovery2018, + title = {Conbase: A Software for Discovery of Clonal Somatic Mutations in Single Cells through Read Phasing}, + shorttitle = {Conbase}, + author = {Hard, Joanna and Al Hakim, Ezeddin and Kindblom, Marie and Bjorklund, Asa and Demirci, Ilke and Paterlini, Marta and Sennblad, Bengt and Borgstrom, Erik and Stahl, Patrik L and Michaelsson, Jakob and Mold, Jeff E and Frisen, Jonas}, + year = {2018}, + month = feb, + doi = {10.1101/259994}, + abstract = {Somatic variant analysis in single cells is challenging due to the large fraction of false positive and false negative variant calls. To overcome these obstacles we developed Conbase, a software for calling clonal somatic mutations in genome sequencing data from single cells. Conbase takes advantage of read phasing and observations across the single cell dataset and an unamplified bulk sample, to mitigate effects of amplification errors and other aspects of bioinformatic analysis, including alignment artefacts and an incomplete reference genome with respect to the genome of the donor. This strategy enables determination of presence or absence of mutations despite low read depth and high rates of allelic dropout (ADO) (\>90\%). We demonstrate the potential of Conbase by unambiguously defining the clonal relationships of single cells from two in vivo expanded T cell clones isolated directly from the peripheral blood of a healthy human donor.}, + file = {/Users/laurent/Documents/bibliography/to_read/Hard et al. - 2018 - Conbase a software for discovery of clonal somati.pdf}, + language = {en} +} + +@article{harmanciMUSICIdentificationEnriched2014, + title = {{{MUSIC}}: Identification of Enriched Regions in {{ChIP}}-{{Seq}} Experiments Using a Mappability-Corrected Multiscale Signal Processing Framework}, + author = {Harmanci, Arif and Rozowsky, Joel and Gerstein, Mark}, + year = {2014}, + pages = {15}, + abstract = {We present MUSIC, a signal processing approach for identification of enriched regions in ChIP-Seq data, available at music.gersteinlab.org. MUSIC first filters the ChIP-Seq read-depth signal for systematic noise from non-uniform mappability, which fragments enriched regions. Then it performs a multiscale decomposition, using median filtering, identifying enriched regions at multiple length scales. This is useful given the wide range of scales probed in ChIP-Seq assays. MUSIC performs favorably in terms of accuracy and reproducibility compared with other methods. In particular, analysis of RNA polymerase II data reveals a clear distinction between the stalled and elongating forms of the polymerase.}, + file = {/Users/laurent/Documents/bibliography/ChipSeq/Harmanci et al. - 2014 - MUSIC identification of enriched regions in ChIP-.pdf}, + language = {en} +} + +@article{hellerSVIMStructuralVariant2019, + title = {{{SVIM}}: Structural Variant Identification Using Mapped Long Reads}, + shorttitle = {{{SVIM}}}, + author = {Heller, David and Vingron, Martin}, + year = {2019}, + month = sep, + volume = {35}, + pages = {2907--2915}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz041}, + abstract = {AbstractMotivation. Structural variants are defined as genomic variants larger than 50~bp. They have been shown to affect more bases in any given genome than s}, + file = {/Users/laurent/Zotero/storage/HEIIS4I2/Heller and Vingron - 2019 - SVIM structural variant identification using mapp.pdf;/Users/laurent/Zotero/storage/4JCSDQFJ/5298305.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@misc{hengBioAWK2017, + title = {{{BioAWK}}}, + author = {Heng, Li}, + year = {2017} +} + +@article{henriquesDatadrivenReverseEngineering2017, + title = {Data-Driven Reverse Engineering of Signaling Pathways Using Ensembles of Dynamic Models}, + author = {Henriques, David and Villaverde, Alejandro F. and Rocha, Miguel and {Saez-Rodriguez}, Julio and Banga, Julio R.}, + editor = {Tan, Kai}, + year = {2017}, + month = feb, + volume = {13}, + pages = {e1005379}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005379}, + file = {/Users/laurent/Documents/bibliography/networks/Henriques et al. - 2017 - Data-driven reverse engineering of signaling pathw.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {2} +} + +@article{herbachInferringGeneRegulatory2017, + title = {Inferring Gene Regulatory Networks from Single-Cell Data: A Mechanistic Approach}, + shorttitle = {Inferring Gene Regulatory Networks from Single-Cell Data}, + author = {Herbach, Ulysse and Bonnaffoux, Arnaud and Espinasse, Thibault and Gandrillon, Olivier}, + year = {2017}, + month = nov, + volume = {11}, + pages = {105}, + issn = {1752-0509}, + doi = {10.1186/s12918-017-0487-0}, + abstract = {The recent development of single-cell transcriptomics has enabled gene expression to be measured in individual cells instead of being population-averaged. Despite this considerable precision improvement, inferring regulatory networks remains challenging because stochasticity now proves to play a fundamental role in gene expression. In particular, mRNA synthesis is now acknowledged to occur in a highly bursty manner.}, + file = {/Users/laurent/Zotero/storage/HS6LMTCA/Herbach et al. - 2017 - Inferring gene regulatory networks from single-cel.pdf;/Users/laurent/Zotero/storage/EY8UXRZ3/s12918-017-0487-0.html}, + journal = {BMC Systems Biology}, + number = {1} +} + +@article{hessPartitionedLearningDeep2017, + title = {Partitioned Learning of Deep {{Boltzmann}} Machines for {{SNP}} Data}, + author = {Hess, Moritz and Lenz, Stefan and Bl{\"a}tte, Tamara J. and Bullinger, Lars and Binder, Harald}, + year = {2017}, + month = oct, + volume = {33}, + pages = {3173--3180}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx408}, + abstract = {Motivation: Learning the joint distributions of measurements, and in particular identification of an appropriate low-dimensional manifold, has been found to be a powerful ingredient of deep leaning approaches. Yet, such approaches have hardly been applied to single nucleotide polymorphism (SNP) data, probably due to the high number of features typically exceeding the number of studied individuals.}, + file = {/Users/laurent/Documents/bibliography/to_read/Hess et al. - 2017 - Partitioned learning of deep Boltzmann machines fo.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {20} +} + +@misc{HiCcompareRpackageJoint, + title = {{{HiCcompare}}: An {{R}}-Package for Joint Normalization and Comparison of {{HI}}-{{C}} Datasets | {{BMC Bioinformatics}} | {{Full Text}}}, + file = {/Users/laurent/Zotero/storage/9NXK65FX/s12859-018-2288-x.html}, + howpublished = {https://bmcbioinformatics-biomedcentral-com.insb.bib.cnrs.fr/articles/10.1186/s12859-018-2288-x} +} + +@article{hicksMissingDataTechnical2018, + title = {Missing Data and Technical Variability in Single-Cell {{RNA}}-Sequencing Experiments}, + author = {Hicks, Stephanie C and Townes, F William and Teng, Mingxiang and Irizarry, Rafael A}, + year = {2018}, + month = oct, + volume = {19}, + pages = {562--578}, + issn = {1465-4644, 1468-4357}, + doi = {10.1093/biostatistics/kxx053}, + file = {/Users/laurent/Zotero/storage/9QFSSEKT/Hicks et al. - 2018 - Missing data and technical variability in single-c.pdf;/Users/laurent/Zotero/storage/NVVMPSRU/Hicks et al. - 2018 - Missing data and technical variability in single-c.pdf}, + journal = {Biostatistics}, + language = {en}, + number = {4} +} + +@article{hicksMissingDataTechnical2018a, + title = {Missing Data and Technical Variability in Single-Cell {{RNA}}-Sequencing Experiments}, + author = {Hicks, Stephanie C. and Townes, F. William and Teng, Mingxiang and Irizarry, Rafael A.}, + year = {2018}, + month = oct, + volume = {19}, + pages = {562--578}, + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxx053}, + abstract = {SUMMARY. Until recently, high-throughput gene expression technology, such as RNA-Sequencing (RNA-seq) required hundreds of thousands of cells to produce reliab}, + file = {/Users/laurent/Zotero/storage/34C2RCY5/Hicks et al. - 2018 - Missing data and technical variability in single-c.pdf;/Users/laurent/Zotero/storage/TIEDNCJD/login.html}, + journal = {Biostatistics}, + language = {en}, + number = {4} +} + +@article{hicksSmoothQuantileNormalization2018, + title = {Smooth Quantile Normalization}, + author = {Hicks, Stephanie C and Okrah, Kwame and Paulson, Joseph N and Quackenbush, John and Irizarry, Rafael A and Bravo, H{\'e}ctor Corrada}, + year = {2018}, + month = apr, + volume = {19}, + pages = {185--198}, + issn = {1465-4644, 1468-4357}, + doi = {10.1093/biostatistics/kxx028}, + abstract = {Between-sample normalization is a critical step in genomic data analysis to remove systematic bias and unwanted technical variation in high-throughput data. Global normalization methods are based on the assumption that observed variability in global properties is due to technical reasons and are unrelated to the biology of interest. For example, some methods correct for differences in sequencing read counts by scaling features to have similar median values across samples, but these fail to reduce other forms of unwanted technical variation. Methods such as quantile normalization transform the statistical distributions across samples to be the same and assume global differences in the distribution are induced by only technical variation. However, it remains unclear how to proceed with normalization if these assumptions are violated, for example, if there are global differences in the statistical distributions between biological conditions or groups, and external information, such as negative or control features, is not available. Here, we introduce a generalization of quantile normalization, referred to as smooth quantile normalization (qsmooth), which is based on the assumption that the statistical distribution of each sample should be the same (or have the same distributional shape) within biological groups or conditions, but allowing that they may differ between groups. We illustrate the advantages of our method on several high-throughput datasets with global differences in distributions corresponding to different biological conditions. We also perform a Monte Carlo simulation study to illustrate the bias-variance tradeoff and root mean squared error of qsmooth compared to other global normalization methods. A software implementation is available from https://github.com/stephaniehicks/qsmooth.}, + file = {/Users/laurent/Documents/bibliography/to_read/Hicks et al. - 2018 - Smooth quantile normalization.pdf}, + journal = {Biostatistics}, + language = {en}, + number = {2} +} + +@article{hieEfficientIntegrationHeterogeneous2019, + title = {Efficient Integration of Heterogeneous Single-Cell Transcriptomes Using {{Scanorama}}}, + author = {Hie, Brian and Bryson, Bryan and Berger, Bonnie}, + year = {2019}, + month = may, + issn = {1087-0156, 1546-1696}, + doi = {10.1038/s41587-019-0113-3}, + file = {/Users/laurent/Zotero/storage/B7K344ME/Hie et al. - 2019 - Efficient integration of heterogeneous single-cell.pdf;/Users/laurent/Zotero/storage/D9VCMFSV/Hie et al. - 2019 - Efficient integration of heterogeneous single-cell.pdf}, + journal = {Nature Biotechnology}, + language = {en} +} + +@article{hobzaImpactRepetitiveElements2017, + title = {Impact of {{Repetitive Elements}} on the {{Y Chromosome Formation}} in {{Plants}}}, + author = {Hobza, Roman and Cegan, Radim and Jesionek, Wojciech and Kejnovsky, Eduard and Vyskot, Boris and Kubat, Zdenek}, + year = {2017}, + month = nov, + volume = {8}, + issn = {2073-4425}, + doi = {10.3390/genes8110302}, + abstract = {In contrast to animals, separate sexes and sex chromosomes in plants are very rare. Although the evolution of sex chromosomes has been the subject of numerous studies, the impact of repetitive sequences on sex chromosome architecture is not fully understood. New genomic approaches shed light on the role of satellites and transposable elements in the process of Y chromosome evolution. We discuss the impact of repetitive sequences on the structure and dynamics of sex chromosomes with specific focus on Rumex acetosa and Silene latifolia. Recent papers showed that both the expansion and shrinkage of the Y chromosome is influenced by sex-specific regulation of repetitive DNA spread. We present a view that the dynamics of Y chromosome formation is an interplay of genetic and epigenetic processes.}, + journal = {Genes}, + number = {11}, + pmcid = {PMC5704215}, + pmid = {29104214} +} + +@article{hofertLikelihoodInferenceArchimedean2012, + title = {Likelihood Inference for {{Archimedean}} Copulas in High Dimensions under Known Margins}, + author = {Hofert, Marius and M{\"a}chler, Martin and McNeil, Alexander J.}, + year = {2012}, + month = sep, + volume = {110}, + pages = {133--150}, + issn = {0047-259X}, + doi = {10.1016/j.jmva.2012.02.019}, + abstract = {Explicit functional forms for the generator derivatives of well-known one-parameter Archimedean copulas are derived. These derivatives are essential for likelihood inference as they appear in the copula density, conditional distribution functions, and the Kendall distribution function. They are also required for several asymmetric extensions of Archimedean copulas such as Khoudraji-transformed Archimedean copulas. Availability of the generator derivatives in a form that permits fast and accurate computation makes maximum-likelihood estimation for Archimedean copulas feasible, even in large dimensions. It is shown, by large scale simulation of the performance of maximum likelihood estimators under known margins, that the root mean squared error actually decreases with both dimension and sample size at a similar rate. Confidence intervals for the parameter vector are derived under known margins. Moreover, extensions to multi-parameter Archimedean families are given. All presented methods are implemented in the R package nacopula and can thus be studied in detail.}, + file = {/Users/laurent/Zotero/storage/6867V3XS/Hofert et al. - 2012 - Likelihood inference for Archimedean copulas in hi.pdf;/Users/laurent/Zotero/storage/9ICECSVK/S0047259X12000607.html}, + journal = {Journal of Multivariate Analysis}, + keywords = {Archimedean copulas,Confidence intervals,Maximum-likelihood estimation,Multi-parameter families}, + language = {en}, + series = {Special {{Issue}} on {{Copula Modeling}} and {{Dependence}}} +} + +@article{hofertLikelihoodInferenceArchimedean2012a, + title = {Likelihood Inference for {{Archimedean}} Copulas in High Dimensions under Known Margins}, + author = {Hofert, Marius and M{\"a}chler, Martin and McNeil, Alexander J.}, + year = {2012}, + month = sep, + volume = {110}, + pages = {133--150}, + issn = {0047-259X}, + doi = {10.1016/j.jmva.2012.02.019}, + abstract = {Explicit functional forms for the generator derivatives of well-known one-parameter Archimedean copulas are derived. These derivatives are essential for likelihood inference as they appear in the copula density, conditional distribution functions, and the Kendall distribution function. They are also required for several asymmetric extensions of Archimedean copulas such as Khoudraji-transformed Archimedean copulas. Availability of the generator derivatives in a form that permits fast and accurate computation makes maximum-likelihood estimation for Archimedean copulas feasible, even in large dimensions. It is shown, by large scale simulation of the performance of maximum likelihood estimators under known margins, that the root mean squared error actually decreases with both dimension and sample size at a similar rate. Confidence intervals for the parameter vector are derived under known margins. Moreover, extensions to multi-parameter Archimedean families are given. All presented methods are implemented in the R package nacopula and can thus be studied in detail.}, + file = {/Users/laurent/Zotero/storage/BVV6DAE9/Hofert et al. - 2012 - Likelihood inference for Archimedean copulas in hi.pdf;/Users/laurent/Zotero/storage/U43FLPT3/S0047259X12000607.html}, + journal = {Journal of Multivariate Analysis}, + keywords = {Archimedean copulas,Confidence intervals,Maximum-likelihood estimation,Multi-parameter families}, + language = {en}, + series = {Special {{Issue}} on {{Copula Modeling}} and {{Dependence}}} +} + +@article{hoffBRAKER1UnsupervisedRNASeqBased2016, + title = {{{BRAKER1}}: {{Unsupervised RNA}}-{{Seq}}-{{Based Genome Annotation}} with {{GeneMark}}-{{ET}} and {{AUGUSTUS}}: {{Table}} 1.}, + shorttitle = {{{BRAKER1}}}, + author = {Hoff, Katharina J. and Lange, Simone and Lomsadze, Alexandre and Borodovsky, Mark and Stanke, Mario}, + year = {2016}, + month = mar, + volume = {32}, + pages = {767--769}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btv661}, + abstract = {Motivation: Gene finding in eukaryotic genomes is notoriously difficult to automate. The task is to design a work flow with a minimal set of tools that would reach state-of-the-art performance across a wide range of species. GeneMark-ET is a gene prediction tool that incorporates RNA-Seq data into unsupervised training and subsequently generates ab initio gene predictions. AUGUSTUS is a gene finder that usually requires supervised training and uses information from RNA-Seq reads in the prediction step. Complementary strengths of GeneMark-ET and AUGUSTUS provided motivation for designing a new combined tool for automatic gene prediction.}, + file = {/Users/laurent/Zotero/storage/6HFKIFPD/Hoff et al. - 2016 - BRAKER1 Unsupervised RNA-Seq-Based Genome Annotat.pdf}, + journal = {Bioinformatics}, + keywords = {Eukaryota,Genome,RNA,Sequence Analysis; RNA,Software}, + language = {en}, + number = {5} +} + +@article{hoffmannAccurateMappingTRNA2018, + title = {Accurate Mapping of {{tRNA}} Reads}, + author = {Hoffmann, Anne and Fallmann, J{\"o}rg and Vilardo, Elisa and M{\"o}rl, Mario and Stadler, Peter F and Amman, Fabian}, + year = {2018}, + month = apr, + volume = {34}, + pages = {1116--1124}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx756}, + abstract = {Motivation: Many repetitive DNA elements are transcribed at appreciable expression levels. Mapping the corresponding RNA sequencing reads back to a reference genome is notoriously difficult and error-prone task, however. This is in particular true if chemical modifications introduce systematic mismatches, while at the same time the genomic loci are only approximately identical, as in the case of tRNAs.}, + file = {/Users/laurent/Documents/bibliography/tRNA/Hoffmann et al. - 2018 - Accurate mapping of tRNA reads.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {7} +} + +@article{holtzGeneticMapComparator2017, + title = {The {{Genetic Map Comparator}}: A User-Friendly Application to Display and Compare Genetic Maps}, + shorttitle = {The {{Genetic Map Comparator}}}, + author = {Holtz, Yan and David, Jacques and Ranwez, Vincent}, + year = {2017}, + month = jan, + pages = {btw816}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw816}, + abstract = {Motivation: Marker-assisted selection strongly relies on genetic maps to accelerate breeding programs. High-density maps are now available for numerous species. Dedicated tools are required to compare several high-density maps on the basis of their key characteristics, while pinpointing their differences and similarities.}, + file = {/Users/laurent/Documents/bibliography/to_read/Holtz et al. - 2017 - The Genetic Map Comparator a user-friendly applic.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{howisonMeasurementErrorVariantcalling2019, + title = {Measurement Error and Variant-Calling in Deep {{Illumina}} Sequencing of {{HIV}}}, + author = {Howison, Mark and Coetzer, Mia and Kantor, Rami}, + year = {2019}, + month = jun, + volume = {35}, + pages = {2029--2035}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty919}, + abstract = {AbstractMotivation. Next-generation deep sequencing of viral genomes, particularly on the Illumina platform, is increasingly applied in HIV research. Yet, ther}, + file = {/Users/laurent/Zotero/storage/GG9PCVY2/Howison et al. - 2019 - Measurement error and variant-calling in deep Illu.pdf;/Users/laurent/Zotero/storage/7LPL9FBB/5165375.html}, + journal = {Bioinformatics}, + language = {en}, + number = {12} +} + +@article{huaEstimatingTotalGenome2019, + title = {Estimating the Total Genome Length of a Metagenomic Sample Using K-Mers}, + author = {Hua, Kui and Zhang, Xuegong}, + year = {2019}, + month = apr, + volume = {20}, + pages = {183}, + issn = {1471-2164}, + doi = {10.1186/s12864-019-5467-x}, + abstract = {Metagenomic sequencing is a powerful technology for studying the mixture of microbes or the microbiomes on human and in the environment. One basic task of analyzing metagenomic data is to identify the component genomes in the community. This task is challenging due to the complexity of microbiome composition, limited availability of known reference genomes, and usually insufficient sequencing coverage.}, + file = {/Users/laurent/Zotero/storage/FF82LVII/Hua and Zhang - 2019 - Estimating the total genome length of a metagenomi.pdf;/Users/laurent/Zotero/storage/6XNBUQYW/s12864-019-5467-x.html}, + journal = {BMC Genomics}, + number = {2} +} + +@article{huangBRIETranscriptomewideSplicing2017, + title = {{{BRIE}}: Transcriptome-Wide Splicing Quantification in Single Cells}, + shorttitle = {{{BRIE}}}, + author = {Huang, Yuanhua and Sanguinetti, Guido}, + year = {2017}, + month = dec, + volume = {18}, + issn = {1474-760X}, + doi = {10.1186/s13059-017-1248-5}, + abstract = {Single-cell RNA-seq (scRNA-seq) provides a comprehensive measurement of stochasticity in transcription, but the limitations of the technology have prevented its application to dissect variability in RNA processing events such as splicing. Here, we present BRIE (Bayesian regression for isoform estimation), a Bayesian hierarchical model that resolves these problems by learning an informative prior distribution from sequence features. We show that BRIE yields reproducible estimates of exon inclusion ratios in single cells and provides an effective tool for differential isoform quantification between scRNA-seq data sets. BRIE, therefore, expands the scope of scRNA-seq experiments to probe the stochasticity of RNA processing.}, + file = {/Users/laurent/Documents/bibliography/to_read/Huang and Sanguinetti - 2017 - BRIE transcriptome-wide splicing quantification i}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@article{huangBRIETranscriptomewideSplicing2017a, + title = {{{BRIE}}: Transcriptome-Wide Splicing Quantification in Single Cells}, + shorttitle = {{{BRIE}}}, + author = {Huang, Yuanhua and Sanguinetti, Guido}, + year = {2017}, + month = dec, + volume = {18}, + pages = {1--11}, + issn = {1474-760X}, + doi = {10.1186/s13059-017-1248-5}, + abstract = {Single-cell RNA-seq (scRNA-seq) provides a comprehensive measurement of stochasticity in transcription, but the limitations of the technology have prevented its application to dissect variability in RNA processing events such as splicing. Here, we present BRIE (Bayesian regression for isoform estimation), a Bayesian hierarchical model that resolves these problems by learning an informative prior distribution from sequence features. We show that BRIE yields reproducible estimates of exon inclusion ratios in single cells and provides an effective tool for differential isoform quantification between scRNA-seq data sets. BRIE, therefore, expands the scope of scRNA-seq experiments to probe the stochasticity of RNA processing.}, + copyright = {2017 The Author(s)}, + file = {/Users/laurent/Zotero/storage/P7AM8AS2/Huang and Sanguinetti - 2017 - BRIE transcriptome-wide splicing quantification i.pdf;/Users/laurent/Zotero/storage/XR7G4MM9/s13059-017-1248-5.html}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@article{huangSAVERGeneExpression2018, + title = {{{SAVER}}: Gene Expression Recovery for Single-Cell {{RNA}} Sequencing}, + shorttitle = {{{SAVER}}}, + author = {Huang, Mo and Wang, Jingshu and Torre, Eduardo and Dueck, Hannah and Shaffer, Sydney and Bonasio, Roberto and Murray, John I. and Raj, Arjun and Li, Mingyao and Zhang, Nancy R.}, + year = {2018}, + month = jul, + volume = {15}, + pages = {539--542}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/s41592-018-0033-z}, + file = {/Users/laurent/Zotero/storage/CEYKL2GV/Huang et al. - 2018 - SAVER gene expression recovery for single-cell RN.pdf;/Users/laurent/Zotero/storage/USY63Z6V/Huang et al. - 2018 - SAVER gene expression recovery for single-cell RN.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {7} +} + +@article{huDependencePatternsFinancial2006, + title = {Dependence Patterns across Financial Markets: A Mixed Copula Approach}, + shorttitle = {Dependence Patterns across Financial Markets}, + author = {Hu, Ling}, + year = {2006}, + month = jun, + volume = {16}, + pages = {717--729}, + issn = {0960-3107}, + doi = {10.1080/09603100500426515}, + abstract = {This paper studies the modelling and estimation of dependence across international financial markets, with a focus on the structure of dependence. A new approach is proposed based on a mixed copula model and the model is constructed so that it can capture various patterns of dependence structures. The marginal distribution of asset returns in each market is estimated non-parametrically and a quasi-ML method is used to estimate the mixed copula. The methodology is applied to estimate the dependence across several international stock markets. The empirical findings are shown to have some implications that are important for a wide range of multivariate studies in Economics and Finance.}, + file = {/Users/laurent/Zotero/storage/QYIHSGEP/Hu - 2006 - Dependence patterns across financial markets a mi.pdf;/Users/laurent/Zotero/storage/VG3JXDRZ/09603100500426515.html}, + journal = {Applied Financial Economics}, + number = {10} +} + +@article{huHiCNormRemovingBiases2012, + title = {{{HiCNorm}}: Removing Biases in {{Hi}}-{{C}} Data via {{Poisson}} Regression}, + shorttitle = {{{HiCNorm}}}, + author = {Hu, Ming and Deng, Ke and Selvaraj, Siddarth and Qin, Zhaohui and Ren, Bing and Liu, Jun S.}, + year = {2012}, + month = dec, + volume = {28}, + pages = {3131--3133}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bts570}, + abstract = {Summary: We propose a parametric model, HiCNorm, to remove systematic biases in the raw Hi-C contact maps, resulting in a simple, fast, yet accurate normalization procedure. Compared with the existing Hi-C normalization method developed by Yaffe and Tanay, HiCNorm has fewer parameters, runs {$>$}1000 times faster and achieves higher reproducibility., Availability: Freely available on the web at: http://www.people.fas.harvard.edu/{$\sim$}junliu/HiCNorm/., Contact: +jliu@stat.harvard.edu, Supplementary information: Supplementary data are available at Bioinformatics online.}, + file = {/Users/laurent/Zotero/storage/B5DMZJND/Hu et al. - 2012 - HiCNorm removing biases in Hi-C data via Poisson .pdf}, + journal = {Bioinformatics}, + number = {23}, + pmcid = {PMC3509491}, + pmid = {23023982} +} + +@article{huPanoViewIterativeClustering2019, + title = {{{PanoView}}: {{An}} Iterative Clustering Method for Single-Cell {{RNA}} Sequencing Data}, + shorttitle = {{{PanoView}}}, + author = {Hu, Ming-Wen and Kim, Dong Won and Liu, Sheng and Zack, Donald J. and Blackshaw, Seth and Qian, Jiang}, + year = {2019}, + month = aug, + volume = {15}, + pages = {e1007040}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007040}, + abstract = {Single-cell RNA-sequencing (scRNA-seq) provides new opportunities to gain a mechanistic understanding of many biological processes. Current approaches for single cell clustering are often sensitive to the input parameters and have difficulty dealing with cell types with different densities. Here, we present Panoramic View (PanoView), an iterative method integrated with a novel density-based clustering, Ordering Local Maximum by Convex hull (OLMC), that uses a heuristic approach to estimate the required parameters based on the input data structures. In each iteration, PanoView will identify the most confident cell clusters and repeat the clustering with the remaining cells in a new PCA space. Without adjusting any parameter in PanoView, we demonstrated that PanoView was able to detect major and rare cell types simultaneously and outperformed other existing methods in both simulated datasets and published single-cell RNA-sequencing datasets. Finally, we conducted scRNA-Seq analysis of embryonic mouse hypothalamus, and PanoView was able to reveal known cell types and several rare cell subpopulations.}, + file = {/Users/laurent/Zotero/storage/EWMUJS7V/Hu et al. - 2019 - PanoView An iterative clustering method for singl.pdf;/Users/laurent/Zotero/storage/9W9VAS8T/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Algorithms,Density based clustering,Gene expression,Hypothalamus,Marker genes,Principal component analysis,Simulation and modeling,Transcriptome analysis}, + language = {en}, + number = {8} +} + +@article{huPennDiffDetectingDifferential2018, + title = {{{PennDiff}}: Detecting Differential Alternative Splicing and Transcription by {{RNA}} Sequencing}, + shorttitle = {{{PennDiff}}}, + author = {Hu, Yu and Lin, Jennie and Hu, Jian and Hu, Gang and Wang, Kui and Zhang, Hanrui and Reilly, Muredach P. and Li, Mingyao}, + year = {2018}, + month = jul, + volume = {34}, + pages = {2384--2391}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty097}, + abstract = {AbstractMotivation. Alternative splicing and alternative transcription are a major mechanism for generating transcriptome diversity. Differential alternative s}, + file = {/Users/laurent/Zotero/storage/EGJ335PX/Hu et al. - 2018 - PennDiff detecting differential alternative splic.pdf;/Users/laurent/Zotero/storage/GBZLIJ2A/4883492.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@book{HybridAssemblyLarge, + title = {Hybrid Assembly of the Large and Highly Repetitive Genome of {{Aegilops}} Tauschii, a Progenitor of Bread Wheat, with the {{MaSuRCA}} Mega-Reads Algorithm. - {{PubMed}} - {{NCBI}}} +} + +@book{IntegrativeGenomicsViewer, + title = {Integrative {{Genomics Viewer}}} +} + +@article{jaakkolaComparisonMethodsDetect2016, + title = {Comparison of Methods to Detect Differentially Expressed Genes between Single-Cell Populations}, + author = {Jaakkola, Maria K. and Seyednasrollah, Fatemeh and Mehmood, Arfa and Elo, Laura L.}, + year = {2016}, + month = jul, + pages = {bbw057}, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bbw057}, + abstract = {We compared five statistical methods to detect differentially expressed genes between two distinct single-cell populations. Currently, it remains unclear whether differential expression methods developed originally for conventional bulk RNA-seq data can also be applied to single-cell RNA-seq data analysis. Our results in three diverse comparison settings showed marked differences between the different methods in terms of the number of detections as well as their sensitivity and specificity. They, however, did not reveal systematic benefits of the currently available single-cell-specific methods. Instead, our previously introduced reproducibility-optimization method showed good performance in all comparison settings without any single-cell-specific modifications.}, + file = {/Users/laurent/Zotero/storage/M9GQB5D5/Jaakkola et al. - 2017 - Comparison of methods to detect differentially exp.pdf;/Users/laurent/Zotero/storage/P4JHYADV/Jaakkola et al. - 2016 - Comparison of methods to detect differentially exp.pdf;/Users/laurent/Zotero/storage/W97HB4YZ/Jaakkola et al. - 2016 - Comparison of methods to detect differentially exp.pdf}, + journal = {Briefings in Bioinformatics}, + keywords = {comparison,differential expression,Gene Expression,Gene Expression Profiling,reproducibility,Reproducibility of Results,RNA,RNA-seq,Sequence Analysis; RNA,single-cell,Single-Cell Analysis}, + language = {en} +} + +@article{jagannathanTranscomppUnderstandingPhenotypic, + title = {Transcompp: {{Understanding}} Phenotypic Plasticity by Estimating {{Markov}} Transition Rates for Cell State Transitions}, + shorttitle = {Transcompp}, + author = {Jagannathan, N. Suhas and Ihsan, Mario O. and Kin, Xiao Xuan and Welsch, Roy E. and Clement, Marie-V{\'e}ronique and {Tucker-Kellogg}, Lisa}, + doi = {10.1093/bioinformatics/btaa021}, + abstract = {AbstractMotivation. Gradual population-level changes in tissues can be driven by stochastic plasticity, meaning rare stochastic transitions of single-cell phen}, + file = {/Users/laurent/Zotero/storage/U2JS897L/Jagannathan et al. - Transcompp Understanding phenotypic plasticity by.pdf;/Users/laurent/Zotero/storage/QBAN2AJE/5714733.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{jainNanoporeSequencingAssembly2018, + title = {Nanopore Sequencing and Assembly of a Human Genome with Ultra-Long Reads}, + author = {Jain, Miten and Koren, Sergey and Miga, Karen H. and Quick, Josh and Rand, Arthur C. and Sasani, Thomas A. and Tyson, John R. and Beggs, Andrew D. and Dilthey, Alexander T. and Fiddes, Ian T. and Malla, Sunir and Marriott, Hannah and Nieto, Tom and O'Grady, Justin and Olsen, Hugh E. and Pedersen, Brent S. and Rhie, Arang and Richardson, Hollian and Quinlan, Aaron R. and Snutch, Terrance P. and Tee, Louise and Paten, Benedict and Phillippy, Adam M. and Simpson, Jared T. and Loman, Nicholas J. and Loose, Matthew}, + year = {2018}, + month = apr, + volume = {36}, + pages = {338--345}, + issn = {1546-1696}, + doi = {10.1038/nbt.4060}, + abstract = {We report the sequencing and assembly of a reference genome for the human GM12878 Utah/Ceph cell line using the MinION (Oxford Nanopore Technologies) nanopore sequencer. 91.2 Gb of sequence data, representing {$\sim$}30\texttimes{} theoretical coverage, were produced. Reference-based alignment enabled detection of large structural variants and epigenetic modifications. De novo assembly of nanopore reads alone yielded a contiguous assembly (NG50 {$\sim$}3 Mb). We developed a protocol to generate ultra-long reads (N50 \textbackslash{}textgreater 100 kb, read lengths up to 882 kb). Incorporating an additional 5\texttimes{} coverage of these ultra-long reads more than doubled the assembly contiguity (NG50 {$\sim$}6.4 Mb). The final assembled genome was 2,867 million bases in size, covering 85.8\% of the reference. Assembly accuracy, after incorporating complementary short-read sequencing data, exceeded 99.8\%. Ultra-long reads enabled assembly and phasing of the 4-Mb major histocompatibility complex (MHC) locus in its entirety, measurement of telomere repeat length, and closure of gaps in the reference human genome assembly GRCh38.}, + copyright = {2018 Nature Publishing Group}, + journal = {Nature Biotechnology}, + language = {en}, + number = {4} +} + +@article{jamiesonBanditApproachMultiple, + title = {A {{Bandit Approach}} to {{Multiple Testing}} with {{False Discovery Control}}}, + author = {Jamieson, Kevin and Jain, Lalit}, + pages = {26}, + abstract = {We propose a new adaptive sampling approach to multiple testing which aims to maximize statistical power while ensuring anytime false discovery control. We consider n distributions whose means are partitioned by whether they are below or equal to a baseline (nulls), versus above the baseline (actual positives). In addition, each distribution can be sequentially and repeatedly sampled. Using techniques from multi-armed bandits, we provide an algorithm that takes as few samples as possible to exceed a target true positive proportion (i.e. proportion of actual positives discovered) while giving anytime control of the false discovery proportion (nulls predicted as actual positives). Our sample complexity results match known information theoretic lower bounds and through simulations we show a substantial performance improvement over uniform sampling and an adaptive elimination style algorithm. Given the simplicity of the approach, and its sample efficiency, the method has promise for wide adoption in the biological sciences, clinical testing for drug discovery, and maximization of click through in A/B/n testing problems.}, + file = {/Users/laurent/Zotero/storage/7HQGN635/Jamieson and Jain - A Bandit Approach to Multiple Testing with False D.pdf}, + language = {en} +} + +@article{jangMultiresolutionCorrectionGC2019, + title = {Multiresolution Correction of {{GC}} Bias and Application to Identification of Copy Number Alterations}, + author = {Jang, Ho and Lee, Hyunju}, + year = {2019}, + month = oct, + volume = {35}, + pages = {3890--3897}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz174}, + abstract = {AbstractMotivation. Whole-genome sequencing (WGS) data are affected by various sequencing biases such as GC bias and mappability bias. These biases degrade per}, + file = {/Users/laurent/Zotero/storage/BSUNAVP3/Jang and Lee - 2019 - Multiresolution correction of GC bias and applicat.pdf;/Users/laurent/Zotero/storage/B66NFALU/5378704.html}, + journal = {Bioinformatics}, + language = {en}, + number = {20} +} + +@article{jiaAccountingTechnicalNoise2017, + title = {Accounting for Technical Noise in Differential Expression Analysis of Single-Cell {{RNA}} Sequencing Data}, + author = {Jia, Cheng and Hu, Yu and Kelly, Derek and Kim, Junhyong and Li, Mingyao and Zhang, Nancy R.}, + year = {2017}, + month = nov, + volume = {45}, + pages = {10978--10988}, + issn = {0305-1048, 1362-4962}, + doi = {10.1093/nar/gkx754}, + abstract = {Recent technological breakthroughs have made it possible to measure RNA expression at the singlecell level, thus paving the way for exploring expression heterogeneity among individual cells. Current single-cell RNA sequencing (scRNA-seq) protocols are complex and introduce technical biases that vary across cells, which can bias downstream analysis without proper adjustment. To account for cell-tocell technical differences, we propose a statistical framework, TASC (Toolkit for Analysis of Single Cell RNA-seq), an empirical Bayes approach to reliably model the cell-specific dropout rates and amplification bias by use of external RNA spike-ins. TASC incorporates the technical parameters, which reflect cell-to-cell batch effects, into a hierarchical mixture model to estimate the biological variance of a gene and detect differentially expressed genes. More importantly, TASC is able to adjust for covariates to further eliminate confounding that may originate from cell size and cell cycle differences. In simulation and real scRNA-seq data, TASC achieves accurate Type I error control and displays competitive sensitivity and improved robustness to batch effects in differential expression analysis, compared to existing methods. TASC is programmed to be computationally efficient, taking advantage of multi-threaded parallelization. We believe that TASC will provide a robust platform for researchers to leverage the power of scRNA-seq.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Jia et al. - 2017 - Accounting for technical noise in differential exp.pdf;/Users/laurent/Zotero/storage/6RTWMJN6/Jia et al. - 2017 - Accounting for technical noise in differential exp.pdf;/Users/laurent/Zotero/storage/6ZLSZTJ5/Jia et al. - 2017 - Accounting for technical noise in differential exp.pdf;/Users/laurent/Zotero/storage/R6H9E99F/Jia et al. - 2017 - Accounting for technical noise in differential exp.pdf}, + journal = {Nucleic Acids Research}, + language = {en}, + number = {19} +} + +@article{jiangGiniClustDetectingRare2016, + title = {{{GiniClust}}: Detecting Rare Cell Types from Single-Cell Gene Expression Data with {{Gini}} Index}, + shorttitle = {{{GiniClust}}}, + author = {Jiang, Lan and Chen, Huidong and Pinello, Luca and Yuan, Guo-Cheng}, + year = {2016}, + month = jul, + volume = {17}, + issn = {1474-7596}, + doi = {10.1186/s13059-016-1010-4}, + abstract = {High-throughput single-cell technologies have great potential to discover new cell types; however, it remains challenging to detect rare cell types that are distinct from a large population. We present a novel computational method, called GiniClust, to overcome this challenge. Validation against a benchmark dataset indicates that GiniClust achieves high sensitivity and specificity. Application of GiniClust to public single-cell RNA-seq datasets uncovers previously unrecognized rare cell types, including Zscan4-expressing cells within mouse embryonic stem cells and hemoglobin-expressing cells in the mouse cortex and hippocampus. GiniClust also correctly detects a small number of normal cells that are mixed in a cancer cell population.}, + file = {/Users/laurent/Zotero/storage/ZKVLU3ZS/Jiang et al. - 2016 - GiniClust detecting rare cell types from single-c.pdf}, + journal = {Genome Biology}, + pmcid = {PMC4930624}, + pmid = {27368803} +} + +@article{jiangIntegratingChIPseqOther2018, + title = {Integrating {{ChIP}}-Seq with Other Functional Genomics Data}, + author = {Jiang, Shan and Mortazavi, Ali}, + year = {2018}, + month = mar, + volume = {17}, + pages = {104--115}, + issn = {2041-2649, 2041-2657}, + doi = {10.1093/bfgp/ely002}, + abstract = {Transcription is regulated by transcription factor (TF) binding at promoters and distal regulatory elements and histone modifications that control the accessibility of these elements. Chromatin immunoprecipitation followed by sequencing (ChIP-seq) has become the standard assay for identifying genome-wide protein\textendash{}DNA interactions in vitro and in vivo. As large-scale ChIPseq data sets have been collected for different TFs and histone modifications, their potential to predict gene expression can be used to test hypotheses about the mechanisms of gene regulation. In addition, complementary functional genomics assays provide a global view of chromatin accessibility and long-range cis-regulatory interactions that are being combined with TF binding and histone remodeling to study the regulation of gene expression. Thus, ChIP-seq analysis is now widely integrated with other functional genomics assays to better understand gene regulatory mechanisms. In this review, we discuss advances and challenges in integrating ChIP-seq data to identify context-specific chromatin states associated with gene activity. We describe the overall computational design of integrating ChIP-seq data with other functional genomics assays. We also discuss the challenges of extending these methods to low-input ChIP-seq assays and related single-cell assays.}, + file = {/Users/laurent/Documents/bibliography/to_read/Jiang and Mortazavi - 2018 - Integrating ChIP-seq with other functional genomic.pdf}, + journal = {Briefings in Functional Genomics}, + language = {en}, + number = {2} +} + +@article{jiangModelingAllelespecificGene2017, + title = {Modeling Allele-Specific Gene Expression by Single-Cell {{RNA}} Sequencing}, + author = {Jiang, Yuchao and Zhang, Nancy R and Li, Mingyao}, + year = {2017}, + month = feb, + doi = {10.1101/109629}, + abstract = {Allele-specific expression is traditionally studied by bulk RNA sequencing, which measures average expression across cells. Single-cell RNA sequencing (scRNA-seq) allows the comparison of expression distribution between the two alleles of a diploid organism and thus the characterization of allele-specific bursting. We propose SCALE to analyze genome-wide allele-specific bursting, with adjustment of technical variability. SCALE detects genes exhibiting allelic differences in bursting parameters, and genes whose alleles burst non-independently. We apply SCALE to mouse blastocyst and human fibroblast cells and find that, globally, cis control in gene expression overwhelmingly manifests as differences in burst frequency.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Jiang et al. - 2017 - Modeling allele-specific gene expression by single.pdf;/Users/laurent/Zotero/storage/LFFNZ6HI/Jiang et al. - 2017 - Modeling allele-specific gene expression by single.pdf;/Users/laurent/Zotero/storage/QH2JGMKR/Jiang et al. - 2017 - Modeling allele-specific gene expression by single.pdf;/Users/laurent/Zotero/storage/Z8LR3RUM/Jiang et al. - 2017 - Modeling allele-specific gene expression by single.pdf}, + language = {en} +} + +@article{jinScAIUnsupervisedApproach2020, + title = {{{scAI}}: An Unsupervised Approach for the Integrative Analysis of Parallel Single-Cell Transcriptomic and Epigenomic Profiles}, + shorttitle = {{{scAI}}}, + author = {Jin, Suoqin and Zhang, Lihua and Nie, Qing}, + year = {2020}, + month = dec, + volume = {21}, + pages = {1--19}, + issn = {1474-760X}, + doi = {10.1186/s13059-020-1932-8}, + abstract = {Simultaneous measurements of transcriptomic and epigenomic profiles in the same individual cells provide an unprecedented opportunity to understand cell fates. However, effective approaches for the integrative analysis of such data are lacking. Here, we present a single-cell aggregation and integration (scAI) method to deconvolute cellular heterogeneity from parallel transcriptomic and epigenomic profiles. Through iterative learning, scAI aggregates sparse epigenomic signals in similar cells learned in an unsupervised manner, allowing coherent fusion with transcriptomic measurements. Simulation studies and applications to three real datasets demonstrate its capability of dissecting cellular heterogeneity within both transcriptomic and epigenomic layers and understanding transcriptional regulatory mechanisms.}, + copyright = {2020 The Author(s).}, + file = {/Users/laurent/Zotero/storage/5LF8N3IB/Jin et al. - 2020 - scAI an unsupervised approach for the integrative.pdf;/Users/laurent/Zotero/storage/W8TF64XP/s13059-020-1932-8.html}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@article{jinScEpathEnergyLandscapebased2018, + title = {{{scEpath}}: Energy Landscape-Based Inference of Transition Probabilities and Cellular Trajectories from Single-Cell Transcriptomic Data}, + shorttitle = {{{scEpath}}}, + author = {Jin, Suoqin and MacLean, Adam L. and Peng, Tao and Nie, Qing}, + year = {2018}, + month = jun, + volume = {34}, + pages = {2077--2086}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty058}, + abstract = {AbstractMotivation. Single-cell RNA-sequencing (scRNA-seq) offers unprecedented resolution for studying cellular decision-making processes. Robust inference of}, + file = {/Users/laurent/Zotero/storage/WJPXA7UV/Jin et al. - 2018 - scEpath energy landscape-based inference of trans.pdf;/Users/laurent/Zotero/storage/3ZV44PW4/4838235.html}, + journal = {Bioinformatics}, + language = {en}, + number = {12} +} + +@article{jiSinglecellRegulomeData2017, + title = {Single-Cell Regulome Data Analysis by {{SCRAT}}}, + author = {Ji, Zhicheng and Zhou, Weiqiang and Ji, Hongkai}, + year = {2017}, + month = sep, + volume = {33}, + pages = {2930--2932}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx315}, + abstract = {Summary: Emerging single-cell technologies (e.g. single-cell ATAC-seq, DNase-seq or ChIP-seq) have made it possible to assay regulome of individual cells. Single-cell regulome data are highly sparse and discrete. Analyzing such data is challenging. User-friendly software tools are still lacking. We present SCRAT, a Single-Cell Regulome Analysis Toolbox with a graphical user interface, for studying cell heterogeneity using single-cell regulome data. SCRAT can be used to conveniently summarize regulatory activities according to different features (e.g. gene sets, transcription factor binding motif sites, etc.). Using these features, users can identify cell subpopulations in a heterogeneous biological sample, infer cell identities of each subpopulation, and discover distinguishing features such as gene sets and transcription factors that show different activities among subpopulations.}, + file = {/Users/laurent/Documents/bibliography/to_read/Ji et al. - 2017 - Single-cell regulome data analysis by SCRAT.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{johnSpectrumFastDensityaware2020, + title = {Spectrum: Fast Density-Aware Spectral Clustering for Single and Multi-Omic Data}, + shorttitle = {Spectrum}, + author = {John, Christopher R. and Watson, David and Barnes, Michael R. and Pitzalis, Costantino and Lewis, Myles J.}, + year = {2020}, + month = feb, + volume = {36}, + pages = {1159--1166}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz704}, + abstract = {AbstractMotivation. Clustering patient omic data is integral to developing precision medicine because it allows the identification of disease subtypes. A curre}, + file = {/Users/laurent/Zotero/storage/WTNRBURL/John et al. - 2020 - Spectrum fast density-aware spectral clustering f.pdf;/Users/laurent/Zotero/storage/JQIR3WSH/5566508.html}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{jungTimesVectorVectorizedClustering2017, + title = {{{TimesVector}}: A Vectorized Clustering Approach to the Analysis of Time Series Transcriptome Data from Multiple Phenotypes}, + shorttitle = {{{TimesVector}}}, + author = {Jung, Inuk and Jo, Kyuri and Kang, Hyejin and Ahn, Hongryul and Yu, Youngjae and Kim, Sun}, + year = {2017}, + month = jan, + pages = {btw780}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw780}, + abstract = {Motivation: Identifying biologically meaningful gene expression patterns from time series gene expression data is important to understand the underlying biological mechanisms. To identify significantly perturbed gene sets between different phenotypes, analysis of time series transcriptome data requires consideration of time and sample dimensions. Thus, the analysis of such time series data seeks to search gene sets that exhibit similar or different expression patterns between two or more sample conditions, constituting the three-dimensional data, i.e. gene-time-condition. Computational complexity for analyzing such data is very high, compared to the already difficult NP-hard two dimensional biclustering algorithms. Because of this challenge, traditional time series clustering algorithms are designed to capture co-expressed genes with similar expression pattern in two sample conditions.}, + file = {/Users/laurent/Documents/bibliography/to_read/Jung et al. - 2017 - TimesVector a vectorized clustering approach to t.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{jurtzIntroductionDeepLearning2017, + title = {An Introduction to Deep Learning on Biological Sequence Data: Examples and Solutions}, + shorttitle = {An Introduction to Deep Learning on Biological Sequence Data}, + author = {Jurtz, Vanessa Isabell and Johansen, Alexander Rosenberg and Nielsen, Morten and Almagro Armenteros, Jose Juan and Nielsen, Henrik and S{\o}nderby, Casper Kaae and Winther, Ole and S{\o}nderby, S{\o}ren Kaae}, + year = {2017}, + month = nov, + volume = {33}, + pages = {3685--3690}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx531}, + abstract = {Motivation: Deep neural network architectures such as convolutional and long short-term memory networks have become increasingly popular as machine learning tools during the recent years. The availability of greater computational resources, more data, new algorithms for training deep models and easy to use libraries for implementation and training of neural networks are the drivers of this development. The use of deep learning has been especially successful in image recognition; and the development of tools, applications and code examples are in most cases centered within this field rather than within biology.}, + file = {/Users/laurent/Documents/bibliography/to_read/Jurtz et al. - 2017 - An introduction to deep learning on biological seq.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {22} +} + +@article{kangCDSeqNovelComplete2019, + title = {{{CDSeq}}: {{A}} Novel Complete Deconvolution Method for Dissecting Heterogeneous Samples Using Gene Expression Data}, + shorttitle = {{{CDSeq}}}, + author = {Kang, Kai and Meng, Qian and Shats, Igor and Umbach, David M. and Li, Melissa and Li, Yuanyuan and Li, Xiaoling and Li, Leping}, + year = {2019}, + month = dec, + volume = {15}, + pages = {e1007510}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007510}, + abstract = {Quantifying cell-type proportions and their corresponding gene expression profiles in tissue samples would enhance understanding of the contributions of individual cell types to the physiological states of the tissue. Current approaches that address tissue heterogeneity have drawbacks. Experimental techniques, such as fluorescence-activated cell sorting, and single cell RNA sequencing are expensive. Computational approaches that use expression data from heterogeneous samples are promising, but most of the current methods estimate either cell-type proportions or cell-type-specific expression profiles by requiring the other as input. Although such partial deconvolution methods have been successfully applied to tumor samples, the additional input required may be unavailable. We introduce a novel complete deconvolution method, CDSeq, that uses only RNA-Seq data from bulk tissue samples to simultaneously estimate both cell-type proportions and cell-type-specific expression profiles. Using several synthetic and real experimental datasets with known cell-type composition and cell-type-specific expression profiles, we compared CDSeq's complete deconvolution performance with seven other established deconvolution methods. Complete deconvolution using CDSeq represents a substantial technical advance over partial deconvolution approaches and will be useful for studying cell mixtures in tissue samples. CDSeq is available at GitHub repository (MATLAB and Octave code): https://github.com/kkang7/CDSeq.}, + file = {/Users/laurent/Zotero/storage/P6MMI768/Kang et al. - 2019 - CDSeq A novel complete deconvolution method for d.pdf;/Users/laurent/Zotero/storage/FRXU2JQJ/article.html}, + journal = {PLOS Computational Biology}, + keywords = {B cells,Flow cytometry,Gene expression,Random variables,RNA extraction,Statistical data,T cells,White blood cells}, + language = {en}, + number = {12} +} + +@article{kanterClusterRobustnessScore, + title = {A Cluster Robustness Score for Identifying Cell Subpopulations in Single Cell Gene Expression Datasets from Heterogeneous Tissues and Tumors}, + author = {Kanter, Itamar and Dalerba, Piero and Kalisky, Tomer}, + doi = {10.1093/bioinformatics/bty708}, + abstract = {AbstractMotivation. A major aim of single cell biology is to identify important cell types such as stem cells in heterogeneous tissues and tumors. This is typi}, + file = {/Users/laurent/Zotero/storage/JJ3F3JP6/Kanter et al. - 2019 - A cluster robustness score for identifying cell su.pdf;/Users/laurent/Zotero/storage/KAU6Y3J6/Kanter et al. - A cluster robustness score for identifying cell su.pdf;/Users/laurent/Zotero/storage/SDJU76QI/Kanter et al. - 2019 - A cluster robustness score for identifying cell su.pdf;/Users/laurent/Zotero/storage/82F9CI64/5085371.html;/Users/laurent/Zotero/storage/JSYDTMNZ/5085371.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{kesterSingleCellTranscriptomicsMeets2018, + title = {Single-{{Cell Transcriptomics Meets Lineage Tracing}}}, + author = {Kester, Lennart and {van Oudenaarden}, Alexander}, + year = {2018}, + month = aug, + volume = {23}, + pages = {166--179}, + issn = {19345909}, + doi = {10.1016/j.stem.2018.04.014}, + file = {/Users/laurent/Zotero/storage/7362TZ75/Kester and van Oudenaarden - 2018 - Single-Cell Transcriptomics Meets Lineage Tracing.pdf;/Users/laurent/Zotero/storage/8JCJVSAC/Kester and van Oudenaarden - 2018 - Single-Cell Transcriptomics Meets Lineage Tracing.pdf}, + journal = {Cell Stem Cell}, + language = {en}, + number = {2} +} + +@article{khalfaouiDropLassoRobustVariant, + title = {{{DropLasso}}: {{A}} Robust Variant of {{Lasso}} for Single Cell {{RNA}}-Seq Data}, + author = {Khalfaoui, Beyrem and Vert, Jean-Philippe}, + pages = {13}, + abstract = {Single-cell RNA sequencing (scRNA-seq) is a fast growing approach to measure the genome-wide transcriptome of many individual cells in parallel, but results in noisy data with many dropout events. Existing methods to learn molecular signatures from bulk transcriptomic data may therefore not be adapted to scRNA-seq data, in order to automatically classify individual cells into predefined classes.}, + file = {/Users/laurent/Documents/bibliography/stats/Khalfaoui and Vert - DropLasso A robust variant of Lasso for single ce.pdf}, + language = {en} +} + +@article{khanChopStitchExonAnnotation2018, + title = {{{ChopStitch}}: Exon Annotation and Splice Graph Construction Using Transcriptome Assembly and Whole Genome Sequencing Data}, + shorttitle = {{{ChopStitch}}}, + author = {Khan, Hamza and Mohamadi, Hamid and Vandervalk, Benjamin P. and Warren, Rene L. and Chu, Justin and Birol, Inanc}, + year = {2018}, + month = may, + volume = {34}, + pages = {1697--1704}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btx839}, + abstract = {AbstractMotivation. Sequencing studies on non-model organisms often interrogate both genomes and transcriptomes with massive amounts of short sequences. Such s}, + file = {/Users/laurent/Zotero/storage/DU96ML2L/Khan et al. - 2018 - ChopStitch exon annotation and splice graph const.pdf;/Users/laurent/Zotero/storage/BJUTT4UR/4781691.html}, + journal = {Bioinformatics}, + language = {en}, + number = {10} +} + +@article{kimCellBICBimodalitybasedTopdown2018, + title = {{{CellBIC}}: Bimodality-Based Top-down Clustering of Single-Cell {{RNA}} Sequencing Data Reveals Hierarchical Structure of the Cell Type}, + shorttitle = {{{CellBIC}}}, + author = {Kim, Junil and Stanescu, Diana E. and Won, Kyoung Jae}, + year = {2018}, + month = nov, + volume = {46}, + pages = {e124-e124}, + issn = {0305-1048}, + doi = {10.1093/nar/gky698}, + abstract = {Abstract. Single-cell RNA sequencing (scRNA-seq) is a powerful tool to study heterogeneity and dynamic changes in cell populations. Clustering scRNA-seq is ess}, + file = {/Users/laurent/Zotero/storage/F4TQ7JKC/Kim et al. - 2018 - CellBIC bimodality-based top-down clustering of s.pdf;/Users/laurent/Zotero/storage/PEMH5PRP/Kim et al. - 2018 - CellBIC bimodality-based top-down clustering of s.pdf;/Users/laurent/Zotero/storage/FK3LZ2BY/5068243.html;/Users/laurent/Zotero/storage/MGLINC4T/5068243.html}, + journal = {Nucleic Acids Research}, + language = {en}, + number = {21} +} + +@article{kimScReClassifyPostHoc2019, + title = {{{scReClassify}}: Post Hoc Cell Type Classification of Single-Cell {{rNA}}-Seq Data}, + shorttitle = {{{scReClassify}}}, + author = {Kim, Taiyun and Lo, Kitty and Geddes, Thomas A. and Kim, Hani Jieun and Yang, Jean Yee Hwa and Yang, Pengyi}, + year = {2019}, + month = dec, + volume = {20}, + pages = {913}, + issn = {1471-2164}, + doi = {10.1186/s12864-019-6305-x}, + abstract = {Single-cell RNA-sequencing (scRNA-seq) is a fast emerging technology allowing global transcriptome profiling on the single cell level. Cell type identification from scRNA-seq data is a critical task in a variety of research such as developmental biology, cell reprogramming, and cancers. Typically, cell type identification relies on human inspection using a combination of prior biological knowledge (e.g. marker genes and morphology) and computational techniques (e.g. PCA and clustering). Due to the incompleteness of our current knowledge and the subjectivity involved in this process, a small amount of cells may be subject to mislabelling.}, + file = {/Users/laurent/Zotero/storage/DUG8WXRQ/Kim et al. - 2019 - scReClassify post hoc cell type classification of.pdf;/Users/laurent/Zotero/storage/9DTVHJ9C/s12864-019-6305-x.html}, + journal = {BMC Genomics}, + number = {9} +} + +@article{kinalisDeconvolutionAutoencodersLearn2019, + title = {Deconvolution of Autoencoders to Learn Biological Regulatory Modules from Single Cell {{mRNA}} Sequencing Data}, + author = {Kinalis, Savvas and Nielsen, Finn Cilius and Winther, Ole and Bagger, Frederik Otzen}, + year = {2019}, + month = dec, + volume = {20}, + pages = {1--9}, + issn = {1471-2105}, + doi = {10.1186/s12859-019-2952-9}, + abstract = {Unsupervised machine learning methods (deep learning) have shown their usefulness with noisy single cell mRNA-sequencing data (scRNA-seq), where the models generalize well, despite the zero-inflation of the data. A class of neural networks, namely autoencoders, has been useful for denoising of single cell data, imputation of missing values and dimensionality reduction. Here, we present a striking feature with the potential to greatly increase the usability of autoencoders: With specialized training, the autoencoder is not only able to generalize over the data, but also to tease apart biologically meaningful modules, which we found encoded in the representation layer of the network. Our model can, from scRNA-seq data, delineate biological meaningful modules that govern a dataset, as well as give information as to which modules are active in each single cell. Importantly, most of these modules can be explained by known biological functions, as provided by the Hallmark gene sets. We discover that tailored training of an autoencoder makes it possible to deconvolute biological modules inherent in the data, without any assumptions. By comparisons with gene signatures of canonical pathways we see that the modules are directly interpretable. The scope of this discovery has important implications, as it makes it possible to outline the drivers behind a given effect of a cell. In comparison with other dimensionality reduction methods, or supervised models for classification, our approach has the benefit of both handling well the zero-inflated nature of scRNA-seq, and validating that the model captures relevant information, by establishing a link between input and decoded data. In perspective, our model in combination with clustering methods is able to provide information about which subtype a given single cell belongs to, as well as which biological functions determine that membership.}, + copyright = {2019 The Author(s).}, + file = {/Users/laurent/Zotero/storage/MDCHEK9I/Kinalis et al. - 2019 - Deconvolution of autoencoders to learn biological .pdf;/Users/laurent/Zotero/storage/TEQRL6E9/s12859-019-2952-9.html}, + journal = {BMC Bioinformatics}, + language = {en}, + number = {1} +} + +@article{kirkpatrickEfficientComputationKinship2019, + title = {Efficient Computation of the Kinship Coefficients}, + author = {Kirkpatrick, Brent and Ge, Shufei and Wang, Liangliang}, + year = {2019}, + month = mar, + volume = {35}, + pages = {1002--1008}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty725}, + abstract = {AbstractMotivation. For families, kinship coefficients are quantifications of the amount of genetic sharing between a pair of individuals. These coefficients a}, + file = {/Users/laurent/Zotero/storage/UUWP8V7F/Kirkpatrick et al. - 2019 - Efficient computation of the kinship coefficients.pdf;/Users/laurent/Zotero/storage/D9G3U4U6/5085372.html}, + journal = {Bioinformatics}, + language = {en}, + number = {6} +} + +@article{kiselevChallengesUnsupervisedClustering2019, + title = {Challenges in Unsupervised Clustering of Single-Cell {{RNA}}-Seq Data}, + author = {Kiselev, Vladimir Yu and Andrews, Tallulah S. and Hemberg, Martin}, + year = {2019}, + month = may, + volume = {20}, + pages = {273--282}, + issn = {1471-0056, 1471-0064}, + doi = {10.1038/s41576-018-0088-9}, + abstract = {Single-c ell RNA sequencing (scRNA-s eq) allows researchers to collect large catalogues detailing the transcriptomes of individual cells. Unsupervised clustering is of central importance for the analysis of these data, as it is used to identify putative cell types. However, there are many challenges involved. We discuss why clustering is a challenging problem from a computational point of view and what aspects of the data make it challenging. We also consider the difficulties related to the biological interpretation and annotation of the identified clusters.}, + file = {/Users/laurent/Zotero/storage/BNF6C4ZK/Kiselev et al. - 2019 - Challenges in unsupervised clustering of single-ce.pdf;/Users/laurent/Zotero/storage/CIWISJXQ/Kiselev et al. - 2019 - Challenges in unsupervised clustering of single-ce.pdf}, + journal = {Nature Reviews Genetics}, + language = {en}, + number = {5} +} + +@article{kiselevSC3ConsensusClustering2017, + title = {{{SC3}}: Consensus Clustering of Single-Cell {{RNA}}-Seq Data}, + shorttitle = {{{SC3}}}, + author = {Kiselev, Vladimir Yu and Kirschner, Kristina and Schaub, Michael T and Andrews, Tallulah and Yiu, Andrew and Chandra, Tamir and Natarajan, Kedar N and Reik, Wolf and Barahona, Mauricio and Green, Anthony R and Hemberg, Martin}, + year = {2017}, + month = mar, + volume = {14}, + pages = {483--486}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4236}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Kiselev et al. - 2017 - SC3 consensus clustering of single-cell RNA-seq d.pdf;/Users/laurent/Zotero/storage/3U8KURD8/Kiselev et al. - 2017 - SC3 consensus clustering of single-cell RNA-seq d.pdf;/Users/laurent/Zotero/storage/CWH7QZFN/Kiselev et al. - 2017 - SC3 consensus clustering of single-cell RNA-seq d.pdf;/Users/laurent/Zotero/storage/PK56JAVI/Kiselev et al. - 2017 - SC3 consensus clustering of single-cell RNA-seq d.pdf;/Users/laurent/Zotero/storage/TK966WYX/Kiselev et al. - 2017 - SC3 consensus clustering of single-cell RNA-seq d.pdf;/Users/laurent/Zotero/storage/W69SFNK2/Kiselev et al. - 2017 - SC3 consensus clustering of single-cell RNA-seq d.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {5} +} + +@article{kiselevScmapProjectionSinglecell2018, + title = {Scmap: Projection of Single-Cell {{RNA}}-Seq Data across Data Sets}, + shorttitle = {Scmap}, + author = {Kiselev, Vladimir Yu and Yiu, Andrew and Hemberg, Martin}, + year = {2018}, + month = apr, + volume = {15}, + pages = {359--362}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4644}, + abstract = {Data availability. All data used here are from published studies, and information about their original publication can be found in Supplementary Table 1. Source data for Figures 1 and 2 and Supplementary Figures 1\textendash{}4 are available online.}, + file = {/Users/laurent/Documents/bibliography/to_read/Kiselev et al. - 2018 - scmap projection of single-cell RNA-seq data acro.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {5} +} + +@article{kobakArtUsingTSNE2019, + title = {The Art of Using T-{{SNE}} for Single-Cell Transcriptomics}, + author = {Kobak, Dmitry and Berens, Philipp}, + year = {2019}, + month = nov, + volume = {10}, + pages = {1--14}, + issn = {2041-1723}, + doi = {10.1038/s41467-019-13056-x}, + abstract = {t-SNE is widely used for dimensionality reduction and visualization of high-dimensional single-cell data. Here, the authors introduce a protocol to help avoid common shortcomings of t-SNE, for example, enabling preservation of the global structure of the data.}, + copyright = {2019 The Author(s)}, + file = {/Users/laurent/Zotero/storage/K5ECABTQ/Kobak and Berens - 2019 - The art of using t-SNE for single-cell transcripto.pdf;/Users/laurent/Zotero/storage/ASCYM8HJ/s41467-019-13056-x.html}, + journal = {Nature Communications}, + language = {en}, + number = {1} +} + +@article{kochPowerTCRModelbasedApproach2018, + title = {{{powerTCR}}: A Model-Based Approach to Comparative Analysis of the Clone Size Distribution of the {{T}} Cell Receptor Repertoire}, + shorttitle = {{{powerTCR}}}, + author = {Koch, Hillary and Starenki, Dmytro and Cooper, Sara J and Myers, Richard M and Li, Qunhua}, + year = {2018}, + month = apr, + doi = {10.1101/297119}, + abstract = {Sequencing of the T cell receptor repertoire is a powerful tool for deeper study of immune response, but the unique structure of this type of data makes its meaningful quantification challenging. We introduce a new method, the Gamma-GPD spliced threshold model, to address this difficulty. This biologically interpretable model captures the distribution of the TCR repertoire, demonstrates stability across varying sequencing depths, and permits comparative analysis across any number of sampled individuals. We apply our method to several datasets and obtain interesting biological insights. We have implemented our method in the open-source R package powerTCR.}, + file = {/Users/laurent/Documents/bibliography/to_read/Koch et al. - 2018 - powerTCR a model-based approach to comparative an.pdf}, + language = {en} +} + +@article{kojadinovicComparisonThreeSemiparametric2010, + title = {Comparison of Three Semiparametric Methods for Estimating Dependence Parameters in Copula Models}, + author = {Kojadinovic, Ivan and Yan, Jun}, + year = {2010}, + month = aug, + volume = {47}, + pages = {52--63}, + issn = {0167-6687}, + doi = {10.1016/j.insmatheco.2010.03.008}, + abstract = {Three semiparametric methods for estimating dependence parameters in copula models are compared, namely maximum pseudo-likelihood estimation and the two method-of-moment approaches based on the inversion of Spearman's rho and Kendall's tau. For each of these three asymptotically normal estimators, an estimator of their asymptotic (co)variance is stated in three different situations, namely the bivariate one-parameter case, the multivariate one-parameter case and the multivariate multiparameter case. An extensive Monte Carlo study is carried out to compare the finite-sample performance of the three estimators under consideration in these three situations. In the one-parameter case, it involves up to six bivariate and four-variate copula families, and up to five levels of dependence. In the multiparameter case, attention is restricted to trivariate and four-variate normal and t copulas. The maximum pseudo-likelihood estimator appears as the best choice in terms of mean square error in all situations except for small and weakly dependent samples. It is followed by the method-of-moment estimator based on Kendall's tau, which overall appears to be significantly better than its analogue based on Spearman's rho. The simulation results are complemented by asymptotic relative efficiency calculations. The numerical computation of Spearman's rho, Kendall's tau and their derivatives in the case of copula families for which explicit expressions are not available is also investigated.}, + file = {/Users/laurent/Zotero/storage/TZH5LJTC/Kojadinovic and Yan - 2010 - Comparison of three semiparametric methods for est.pdf;/Users/laurent/Zotero/storage/48QY6EIB/S0167668710000363.html}, + journal = {Insurance: Mathematics and Economics}, + keywords = {Asymptotic relative efficiency,Kendall’s tau,Numerical approximation,Pseudo-likelihood,Pseudo-observations,Ranks,Spearman’s rho}, + language = {en}, + number = {1} +} + +@article{kojadinovicCopulaInferenceProcedures2017, + title = {Some Copula Inference Procedures Adapted to the Presence of Ties}, + author = {Kojadinovic, Ivan}, + year = {2017}, + month = feb, + abstract = {When modeling the distribution of a multivariate continuous random vector using the so-called \textbackslash{}emph\{copula approach\}, it is not uncommon to have ties in the coordinate samples of the available data because of rounding or lack of measurement precision. Yet, the vast majority of existing inference procedures on the underlying copula were both theoretically derived and practically implemented under the assumption of no ties. Applying them nonetheless can lead to strongly biased results. Some of the existing statistical tests can however be adapted to provide meaningful results in the presence of ties. It is the case of some tests of exchangeability, radial symmetry, extreme-value dependence and goodness of fit. Detailed algorithms for computing approximate p-values for the modified tests are provided and their finite-sample behaviors are empirically investigated through extensive Monte Carlo experiments. An illustration on a real-world insurance data set concludes the work.}, + archivePrefix = {arXiv}, + eprint = {1609.05519}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/L79SDSVX/Kojadinovic - 2017 - Some copula inference procedures adapted to the pr.pdf;/Users/laurent/Zotero/storage/XQJU7RXJ/1609.html}, + journal = {arXiv:1609.05519 [stat]}, + keywords = {Statistics - Methodology}, + primaryClass = {stat} +} + +@article{koppImprovedCompoundPoisson2017, + title = {An Improved Compound {{Poisson}} Model for the Number of Motif Hits in {{DNA}} Sequences}, + author = {Kopp, Wolfgang and Vingron, Martin}, + year = {2017}, + month = dec, + volume = {33}, + pages = {3929--3937}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx539}, + abstract = {Motivation: Transcription factors play a crucial role in gene regulation by binding to specific regulatory sequences. The sequence motifs recognized by a transcription factor can be described in terms of position frequency matrices. When scanning a sequence for matches to a position frequency matrix, one needs to determine a cut-off, which then in turn results in a certain number of hits. In this paper we describe how to compute the distribution of match scores and of the number of motif hits, which are the prerequisites to perform motif hit enrichment analysis.}, + file = {/Users/laurent/Documents/bibliography/to_read/Kopp and Vingron - 2017 - An improved compound Poisson model for the number .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{korenCanuScalableAccurate2017, + title = {Canu: Scalable and Accurate Long-Read Assembly via Adaptive k-Mer Weighting and Repeat Separation}, + shorttitle = {Canu}, + author = {Koren, Sergey and Walenz, Brian P. and Berlin, Konstantin and Miller, Jason R. and Bergman, Nicholas H. and Phillippy, Adam M.}, + year = {2017}, + month = may, + volume = {27}, + pages = {722--736}, + issn = {1088-9051}, + doi = {10.1101/gr.215087.116}, + abstract = {Long-read single-molecule sequencing has revolutionized de novo genome assembly and enabled the automated reconstruction of reference-quality genomes. However, given the relatively high error rates of such technologies, efficient and accurate assembly of large repeats and closely related haplotypes remains challenging. We address these issues with Canu, a successor of Celera Assembler that is specifically designed for noisy single-molecule sequences. Canu introduces support for nanopore sequencing, halves depth-of-coverage requirements, and improves assembly continuity while simultaneously reducing runtime by an order of magnitude on large genomes versus Celera Assembler 8.2. These advances result from new overlapping and assembly algorithms, including an adaptive overlapping strategy based on tf-idf weighted MinHash and a sparse assembly graph construction that avoids collapsing diverged repeats and haplotypes. We demonstrate that Canu can reliably assemble complete microbial genomes and near-complete eukaryotic chromosomes using either Pacific Biosciences (PacBio) or Oxford Nanopore technologies and achieves a contig NG50 of \textbackslash{}textgreater21 Mbp on both human and Drosophila melanogaster PacBio data sets. For assembly structures that cannot be linearly represented, Canu provides graph-based assembly outputs in graphical fragment assembly (GFA) format for analysis or integration with complementary phasing and scaffolding techniques. The combination of such highly resolved assembly graphs with long-range scaffolding information promises the complete and automated assembly of complex genomes.}, + journal = {Genome Research}, + keywords = {hybrid assembly}, + number = {5}, + pmcid = {PMC5411767}, + pmid = {28298431} +} + +@article{korsunskyFastSensitiveAccurate2019, + title = {Fast, Sensitive and Accurate Integration of Single-Cell Data with {{Harmony}}}, + author = {Korsunsky, Ilya and Millard, Nghia and Fan, Jean and Slowikowski, Kamil and Zhang, Fan and Wei, Kevin and Baglaenko, Yuriy and Brenner, Michael and Loh, Po-ru and Raychaudhuri, Soumya}, + year = {2019}, + month = dec, + volume = {16}, + pages = {1289--1296}, + issn = {1548-7105}, + doi = {10.1038/s41592-019-0619-0}, + abstract = {Harmony, for the integration of single-cell transcriptomic data, identifies broad and fine-grained populations, scales to large datasets, and can integrate sequencing- and imaging-based data.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature America, Inc.}, + file = {/Users/laurent/Zotero/storage/X4IC8XRZ/Korsunsky et al. - 2019 - Fast, sensitive and accurate integration of single.pdf;/Users/laurent/Zotero/storage/BUSZ5YCD/s41592-019-0619-0.html}, + journal = {Nature Methods}, + language = {en}, + number = {12} +} + +@article{korthauerDetectionAccurateFalse2019, + title = {Detection and Accurate False Discovery Rate Control of Differentially Methylated Regions from Whole Genome Bisulfite Sequencing}, + author = {Korthauer, Keegan and Chakraborty, Sutirtha and Benjamini, Yuval and Irizarry, Rafael A.}, + year = {2019}, + month = jul, + volume = {20}, + pages = {367--383}, + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxy007}, + abstract = {Summary. With recent advances in sequencing technology, it is now feasible to measure DNA methylation at tens of millions of sites across the entire genome. In}, + file = {/Users/laurent/Zotero/storage/PTA2EBM8/Korthauer et al. - 2019 - Detection and accurate false discovery rate contro.pdf;/Users/laurent/Zotero/storage/XKEFWRVC/4899074.html}, + journal = {Biostatistics}, + language = {en}, + number = {3} +} + +@article{kosmidisModelbasedClusteringUsing2016, + title = {Model-Based Clustering Using Copulas with Applications}, + author = {Kosmidis, Ioannis and Karlis, Dimitris}, + year = {2016}, + month = sep, + volume = {26}, + pages = {1079--1099}, + issn = {0960-3174, 1573-1375}, + doi = {10.1007/s11222-015-9590-5}, + abstract = {The majority of model-based clustering techniques is based on multivariate Normal models and their variants. In this paper copulas are used for the construction of flexible families of models for clustering applications. The use of copulas in model-based clustering offers two direct advantages over current methods: i) the appropriate choice of copulas provides the ability to obtain a range of exotic shapes for the clusters, and ii) the explicit choice of marginal distributions for the clusters allows the modelling of multivariate data of various modes (either discrete or continuous) in a natural way. This paper introduces and studies the framework of copula-based finite mixture models for clustering applications. Estimation in the general case can be performed using standard EM, and, depending on the mode of the data, more efficient procedures are provided that can fully exploit the copula structure. The closure properties of the mixture models under marginalization are discussed, and for continuous, real-valued data parametric rotations in the sample space are introduced, with a parallel discussion on parameter identifiability depending on the choice of copulas for the components. The exposition of the methodology is accompanied and motivated by the analysis of real and artificial data.}, + archivePrefix = {arXiv}, + eprint = {1404.4077}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/MLKLCT7X/Kosmidis and Karlis - 2016 - Model-based clustering using copulas with applicat.pdf}, + journal = {Statistics and Computing}, + keywords = {Statistics - Methodology}, + language = {en}, + number = {5} +} + +@article{krismerIDR2DIdentifiesReproducible2019, + title = {{{IDR2D}} Identifies Reproducible Genomic Interactions}, + author = {Krismer, Konstantin and Guo, Yuchun and Gifford, David K.}, + year = {2019}, + month = jul, + pages = {691295}, + doi = {10.1101/691295}, + abstract = {{$<$}p{$>$}Chromatin interaction data from protocols such as ChIA-PET and HiChIP provide valuable insights into genome organization and gene regulation, but can include spurious interactions that do not reflect underlying genome biology. We introduce a generalization of the Irreproducible Discovery Rate (IDR) method called IDR2D that identifies replicable interactions shared by experiments. IDR2D provides a principled set of interactions and eliminates artifacts from single experiments.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2019, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + file = {/Users/laurent/Zotero/storage/FL8DDMYU/Krismer et al. - 2019 - IDR2D identifies reproducible genomic interactions.pdf;/Users/laurent/Zotero/storage/HEJIQWF5/691295v1.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{krizanovicEvaluationToolsLong, + title = {Evaluation of Tools for Long Read {{RNA}}-Seq Splice-Aware Alignment}, + author = {Kri{\v z}anovi{\'c}, Kre{\v s}imir and Echchiki, Amina and Roux, Julien and {\v S}iki{\'c}, Mile}, + pages = {7}, + abstract = {Motivation: High\textendash{}throughput sequencing has transformed the study of gene expression levels through RNA-seq, a technique that is now routinely used by various fields, such as genetic research or diagnostics. The advent of third generation sequencing technologies providing significantly longer reads opens up new possibilities. However, the high error rates common to these technologies set new bioinformatics challenges for the gapped alignment of reads to their genomic origin. In this study, we have explored how currently available RNA-seq splice-aware alignment tools cope with increased read lengths and error rates. All tested tools were initially developed for short NGS reads, but some have claimed support for long Pacific Biosciences (PacBio) or even Oxford Nanopore Technologies (ONT) MinION reads.}, + file = {/Users/laurent/Documents/bibliography/to_read/Križanović et al. - Evaluation of tools for long read RNA-seq splice-a.pdf}, + language = {en} +} + +@article{kuosmanenEvaluatingApproachesFind2018, + title = {Evaluating Approaches to Find Exon Chains Based on Long Reads}, + author = {Kuosmanen, Anna and Norri, Tuukka and M{\"a}kinen, Veli}, + year = {2018}, + month = may, + volume = {19}, + pages = {404--414}, + issn = {1467-5463}, + doi = {10.1093/bib/bbw137}, + abstract = {Abstract. Transcript prediction can be modeled as a graph problem where exons are modeled as nodes and reads spanning two or more exons are modeled as exon cha}, + file = {/Users/laurent/Zotero/storage/U7353ED9/Kuosmanen et al. - 2018 - Evaluating approaches to find exon chains based on.pdf;/Users/laurent/Zotero/storage/8JFW2X84/2876831.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {3} +} + +@article{lafond-lapalmeNewMethodDecontamination2016, + title = {A New Method for Decontamination of {\emph{de Novo}} Transcriptomes Using a Hierarchical Clustering Algorithm}, + author = {{Lafond-Lapalme}, Jo{\"e}l and Duceppe, Marc-Olivier and Wang, Shengrui and Moffett, Peter and Mimee, Benjamin}, + year = {2016}, + month = dec, + pages = {btw793}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw793}, + abstract = {Motivation: The identification of contaminating sequences in a de novo assembly is challenging because of the absence of information on the target species. For sample types where the target organism is impossible to isolate from its matrix, such as endoparasites, endosymbionts and soilharvested samples, contamination is unavoidable. A few post-assembly decontamination methods are currently available but are based only on alignments to databases, which can lead to poor decontamination.}, + file = {/Users/laurent/Documents/bibliography/to_read/Lafond-Lapalme et al. - 2016 - A new method for decontamination of ide novoi.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{lafziTutorialGuidelinesExperimental2018, + title = {Tutorial: Guidelines for the Experimental Design of Single-Cell {{RNA}} Sequencing Studies}, + shorttitle = {Tutorial}, + author = {Lafzi, Atefeh and Moutinho, Catia and Picelli, Simone and Heyn, Holger}, + year = {2018}, + month = dec, + volume = {13}, + pages = {2742}, + issn = {1750-2799}, + doi = {10.1038/s41596-018-0073-y}, + abstract = {In this tutorial, the authors provide a comprehensive description of the considerations for designing single-cell transcriptomics studies, from sample preparation and single-cell RNA sequencing methodologies through data processing and analysis.}, + copyright = {2018 Springer Nature Limited}, + file = {/Users/laurent/Zotero/storage/WNJI38G5/Lafzi et al. - 2018 - Tutorial guidelines for the experimental design o.pdf;/Users/laurent/Zotero/storage/2ILPQUC3/s41596-018-0073-y.html}, + journal = {Nature Protocols}, + language = {En}, + number = {12} +} + +@article{langmeadFastGappedreadAlignment2012, + title = {Fast Gapped-Read Alignment with {{Bowtie}} 2}, + author = {Langmead, Ben and Salzberg, Steven L.}, + year = {2012}, + month = apr, + volume = {9}, + pages = {357--359}, + issn = {1548-7105}, + doi = {10.1038/nmeth.1923}, + abstract = {As the rate of sequencing increases, greater throughput is demanded from read aligners. The full-text minute index is often used to make alignment very fast and memory-efficient, but the approach is ill-suited to finding longer, gapped alignments. Bowtie 2 combines the strengths of the full-text minute index with the flexibility and speed of hardware-accelerated dynamic programming algorithms to achieve a combination of high speed, sensitivity and accuracy.}, + copyright = {2012 Nature Publishing Group}, + file = {/Users/laurent/Zotero/storage/3KGIH8D7/Langmead and Salzberg - 2012 - Fast gapped-read alignment with Bowtie 2.pdf;/Users/laurent/Zotero/storage/WK689CBE/nmeth.html}, + journal = {Nature Methods}, + language = {en}, + number = {4} +} + +@article{lareauDiffloopComputationalFramework2018, + title = {Diffloop: A Computational Framework for Identifying and Analyzing Differential {{DNA}} Loops from Sequencing Data}, + shorttitle = {Diffloop}, + author = {Lareau, Caleb A and Aryee, Martin J}, + editor = {Berger, Bonnie}, + year = {2018}, + month = feb, + volume = {34}, + pages = {672--674}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx623}, + abstract = {Summary: The 3D architecture of DNA within the nucleus is a key determinant of interactions between genes, regulatory elements, and transcriptional machinery. As a result, differences in DNA looping structure are associated with variation in gene expression and cell state. To systematically assess changes in DNA looping architecture between samples, we introduce diffloop, an R/ Bioconductor package that provides a suite of functions for the quality control, statistical testing, annotation, and visualization of DNA loops. We demonstrate this functionality by detecting differences between ENCODE ChIA-PET samples and relate looping to variability in epigenetic state.}, + file = {/Users/laurent/Documents/bibliography/to_read/Lareau and Aryee - 2018 - diffloop a computational framework for identifyin.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{larssonXchromosomeUpregulationDriven2019, + title = {X-Chromosome Upregulation Is Driven by Increased Burst Frequency}, + author = {Larsson, Anton J. M. and Coucoravas, Christos and Sandberg, Rickard and Reinius, Bj{\"o}rn}, + year = {2019}, + month = oct, + volume = {26}, + pages = {963--969}, + issn = {1545-9985}, + doi = {10.1038/s41594-019-0306-y}, + abstract = {Analysis of X-chromosome upregulation using single-cell transcriptional kinetics data reveals increased burst frequency of X-linked genes that appear on the active X chromosome when X inactivation takes place.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature America, Inc.}, + file = {/Users/laurent/Zotero/storage/XV5BFCB7/Larsson et al. - 2019 - X-chromosome upregulation is driven by increased b.pdf;/Users/laurent/Zotero/storage/CMUYUJE2/s41594-019-0306-y.html}, + journal = {Nature Structural \& Molecular Biology}, + language = {en}, + number = {10} +} + +@article{lee-sixPopulationDynamicsNormal2018, + title = {Population Dynamics of Normal Human Blood Inferred from Somatic Mutations}, + author = {{Lee-Six}, Henry and {\O}bro, Nina Friesgaard and Shepherd, Mairi S. and Grossmann, Sebastian and Dawson, Kevin and Belmonte, Miriam and Osborne, Robert J. and Huntly, Brian J. P. and Martincorena, Inigo and Anderson, Elizabeth and O'Neill, Laura and Stratton, Michael R. and Laurenti, Elisa and Green, Anthony R. and Kent, David G. and Campbell, Peter J.}, + year = {2018}, + month = sep, + volume = {561}, + pages = {473}, + issn = {1476-4687}, + doi = {10.1038/s41586-018-0497-0}, + abstract = {Analysis of blood from a healthy human show that haematopoietic stem cells increase rapidly in numbers through early life, reaching a stable plateau in adulthood, and contribute to myeloid and B lymphocyte populations throughout life.}, + copyright = {2018 Springer Nature Limited}, + file = {/Users/laurent/Zotero/storage/2AMQY2JH/Lee-Six et al. - 2018 - Population dynamics of normal human blood inferred.pdf;/Users/laurent/Zotero/storage/MYL6VBAG/s41586-018-0497-0.html}, + journal = {Nature}, + language = {En}, + number = {7724} +} + +@article{leeAltHapAlignRImprovedAccuracy2018, + title = {{{AltHapAlignR}}: Improved Accuracy of {{RNA}}-Seq Analyses through the Use of Alternative Haplotypes}, + shorttitle = {{{AltHapAlignR}}}, + author = {Lee, Wanseon and Plant, Katharine and Humburg, Peter and Knight, Julian C.}, + year = {2018}, + month = jul, + volume = {34}, + pages = {2401--2408}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty125}, + abstract = {AbstractMotivation. Reliance on mapping to a single reference haplotype currently limits accurate estimation of allele or haplotype-specific expression using R}, + file = {/Users/laurent/Zotero/storage/84ZZJJV5/Lee et al. - 2018 - AltHapAlignR improved accuracy of RNA-seq analyse.pdf;/Users/laurent/Zotero/storage/VZERXN7L/4921174.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{leipzigReviewBioinformaticPipeline2016, + title = {A Review of Bioinformatic Pipeline Frameworks}, + author = {Leipzig, Jeremy}, + year = {2016}, + month = mar, + pages = {bbw020}, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bbw020}, + abstract = {High-throughput bioinformatic analyses increasingly rely on pipeline frameworks to process sequence and metadata. Modern implementations of these frameworks differ on three key dimensions: using an implicit or explicit syntax, using a configuration, convention or class-based design paradigm and offering a command line or workbench interface. Here I survey and compare the design philosophies of several current pipeline frameworks. I provide practical recommendations based on analysis requirements and the user base.}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Leipzig - 2016 - A review of bioinformatic pipeline frameworks.pdf}, + journal = {Briefings in Bioinformatics}, + language = {en} +} + +@article{leungInferenceHumanPolyadenylation2018, + title = {Inference of the Human Polyadenylation Code}, + author = {Leung, Michael K. K. and Delong, Andrew and Frey, Brendan J.}, + year = {2018}, + month = sep, + volume = {34}, + pages = {2889--2898}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty211}, + abstract = {AbstractMotivation. Processing of transcripts at the 3{${'}$}-end involves cleavage at a polyadenylation site followed by the addition of a poly(A)-tail. By selectin}, + file = {/Users/laurent/Zotero/storage/K5LC33GQ/Leung et al. - 2018 - Inference of the human polyadenylation code.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{leungOMToolsSoftwarePackage2017, + title = {{{OMTools}}: A Software Package for Visualizing and Processing Optical Mapping Data}, + shorttitle = {{{OMTools}}}, + author = {Leung, Alden King-Yung and Jin, Nana and Yip, Kevin Y. and Chan, Ting-Fung}, + year = {2017}, + month = sep, + volume = {33}, + pages = {2933--2935}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx317}, + abstract = {Summary: Optical mapping is a molecular technique capturing specific patterns of fluorescent labels along DNA molecules. It has been widely applied in assisted-scaffolding in sequence assemblies, microbial strain typing and detection of structural variations. Various computational methods have been developed to analyze optical mapping data. However, existing tools for processing and visualizing optical map data still have many shortcomings. Here, we present OMTools, an efficient and intuitive data processing and visualization suite to handle and explore large-scale optical mapping profiles. OMTools includes modules for visualization (OMView), data processing and simulation. These modules together form an accessible and convenient pipeline for optical mapping analyses.}, + file = {/Users/laurent/Documents/bibliography/to_read/Leung et al. - 2017 - OMTools a software package for visualizing and pr.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{levitskySingleChIPseqDataset2019, + title = {A Single {{ChIP}}-Seq Dataset Is Sufficient for Comprehensive Analysis of Motifs Co-Occurrence with {{MCOT}} Package}, + author = {Levitsky, Victor and Zemlyanskaya, Elena and Oshchepkov, Dmitry and Podkolodnaya, Olga and Ignatieva, Elena and Grosse, Ivo and Mironova, Victoria and Merkulova, Tatyana}, + year = {2019}, + month = dec, + volume = {47}, + pages = {e139-e139}, + issn = {0305-1048}, + doi = {10.1093/nar/gkz800}, + abstract = {Abstract. Recognition of composite elements consisting of two transcription factor binding sites gets behind the studies of tissue-, stage- and condition-speci}, + file = {/Users/laurent/Zotero/storage/QY7WV463/Levitsky et al. - 2019 - A single ChIP-seq dataset is sufficient for compre.pdf;/Users/laurent/Zotero/storage/YKCNVWUZ/5570700.html}, + journal = {Nucleic Acids Research}, + language = {en}, + number = {21} +} + +@article{liAccurateRobustImputation2018, + title = {An Accurate and Robust Imputation Method {{scImpute}} for Single-Cell {{RNA}}-Seq Data}, + author = {Li, Wei Vivian and Li, Jingyi Jessica}, + year = {2018}, + month = dec, + volume = {9}, + issn = {2041-1723}, + doi = {10.1038/s41467-018-03405-7}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Li and Li - 2018 - An accurate and robust imputation method scImpute .pdf;/Users/laurent/Zotero/storage/E3LKPC66/Li and Li - 2018 - An accurate and robust imputation method scImpute .pdf;/Users/laurent/Zotero/storage/GP5L5HMN/Li and Li - 2018 - An accurate and robust imputation method scImpute .pdf;/Users/laurent/Zotero/storage/UJTGL2PE/Li and Li - 2018 - An accurate and robust imputation method scImpute .pdf}, + journal = {Nature Communications}, + language = {en}, + number = {1} +} + +@article{liaoFeatureCountsEfficientGeneral2014, + title = {{{featureCounts}}: An Efficient General Purpose Program for Assigning Sequence Reads to Genomic Features}, + shorttitle = {{{featureCounts}}}, + author = {Liao, Y. and Smyth, G. K. and Shi, W.}, + year = {2014}, + month = apr, + volume = {30}, + pages = {923--930}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btt656}, + abstract = {Motivation: Next-generation sequencing technologies generate millions of short sequence reads, which are usually aligned to a reference genome. In many applications, the key information required for downstream analysis is the number of reads mapping to each genomic feature, for example to each exon or each gene. The process of counting reads is called read summarization. Read summarization is required for a great variety of genomic analyses but has so far received relatively little attention in the literature.}, + file = {/Users/laurent/Documents/bibliography/RNASeq/Liao et al. - 2014 - featureCounts an efficient general purpose progra.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {7} +} + +@article{liCopulaModelingData2016, + title = {Copula {{Modeling}} for {{Data}} with {{Ties}}}, + author = {Li, Yan and Li, Yang and Qin, Yichen and Yan, Jun}, + year = {2016}, + month = dec, + abstract = {Copula modeling has gained much attention in many fields recently with the advantage of separating dependence structure from marginal distributions. In real data, however, serious ties are often present in one or multiple margins, which cause problems to many rank-based statistical methods developed under the assumption of continuous data with no ties. Simple methods such as breaking the ties at random or using average rank introduce independence into the data and, hence, lead to biased estimation. We propose an estimation method that treats the ranks of tied data as being interval censored and maximizes a pseudo-likelihood based on interval censored pseudo-observations. A parametric bootstrap procedure that preserves the observed tied ranks in the data is adapted to assess the estimation uncertainty and perform goodnessof-fit tests. The proposed approach is shown to be very competitive in comparison to the simple treatments in a large scale simulation study. Application to a bivariate insurance data illustrates the methodology.}, + archivePrefix = {arXiv}, + eprint = {1612.06968}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/85JWXJFY/Li et al. - 2016 - Copula Modeling for Data with Ties.pdf}, + journal = {arXiv:1612.06968 [stat]}, + keywords = {Statistics - Methodology}, + language = {en}, + primaryClass = {stat} +} + +@article{liDeepSimulatorDeepSimulator2018, + title = {{{DeepSimulator}}: A Deep Simulator for {{Nanopore}} Sequencing}, + shorttitle = {{{DeepSimulator}}}, + author = {Li, Yu and Han, Renmin and Bi, Chongwei and Li, Mo and Wang, Sheng and Gao, Xin}, + year = {2018}, + month = sep, + volume = {34}, + pages = {2899--2908}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty223}, + abstract = {AbstractMotivation. Oxford Nanopore sequencing is a rapidly developed sequencing technology in recent years. To keep pace with the explosion of the downstream}, + file = {/Users/laurent/Zotero/storage/K8LYYIU3/Li et al. - 2018 - DeepSimulator a deep simulator for Nanopore seque.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{liDropoutImputationBatch2019, + title = {Dropout Imputation and Batch Effect Correction for Single-Cell {{RNA}} Sequencing Data}, + author = {Li, Gang and Yang, Yuchen and Buren, Eric Van and Li, Yun}, + year = {2019}, + month = dec, + volume = {2}, + pages = {169--177}, + issn = {2096-5672}, + doi = {10.1097/JBR.0000000000000053}, + abstract = {AbstractSingle-cell RNA sequencing (scRNA-seq) allows researchers to examine the transcriptome at the single-cell level and has been increasingly employed as technologies continue to advance. Due to technical and biological reasons unique to scRNA-seq data, denoising and batch effect correction are}, + file = {/Users/laurent/Zotero/storage/VHW9PULQ/02070904.html}, + journal = {Journal of Bio-x Research}, + language = {ENGLISH}, + number = {4} +} + +@article{liDropoutImputationBatch2019a, + title = {Dropout Imputation and Batch Effect Correction for Single-Cell {{RNA}} Sequencing Data}, + author = {Li, Gang and Yang, Yuchen and Van Buren, Eric and Li, Yun}, + year = {2019}, + month = dec, + volume = {2}, + pages = {169}, + issn = {2096-5672}, + doi = {10.1097/JBR.0000000000000053}, + abstract = {Single-cell RNA sequencing (scRNA-seq) allows researchers to examine the transcriptome at the single-cell level and has been increasingly employed as technologies continue to advance. Due to technical and biological reasons unique to scRNA-seq data, denoising and batch effect correction are almost indispensable to ensure valid and powerful data analysis. However, various aspects of scRNA-seq data pose grand challenges for such essential tasks pertaining to data pre-processing, normalization or harmonization. In this review, we first discuss properties of scRNA-seq data that contribute to the challenges for denoising and batch effect correction from a computational perspective. We then focus on reviewing several state-of-the-art methods for dropout imputation and batch effect correction, comparing their strengths and weaknesses. Finally, we benchmarked three widely used correction tools using two hematopoietic scRNA-seq datasets to show their performance in a real data application.}, + file = {/Users/laurent/Zotero/storage/GD5KZKMJ/Dropout_imputation_and_batch_effect_correction_for.4.html}, + journal = {Journal of Bio-X Research}, + language = {en-US}, + number = {4} +} + +@article{liDropoutImputationBatch2019b, + title = {Dropout Imputation and Batch Effect Correction for Single-Cell {{RNA}} Sequencing Data}, + author = {Li, Gang and Yang, Yuchen and Van Buren, Eric and Li, Yun}, + year = {2019}, + month = dec, + volume = {2}, + pages = {169}, + issn = {2096-5672}, + doi = {10.1097/JBR.0000000000000053}, + abstract = {Single-cell RNA sequencing (scRNA-seq) allows researchers to examine the transcriptome at the single-cell level and has been increasingly employed as technologies continue to advance. Due to technical and biological reasons unique to scRNA-seq data, denoising and batch effect correction are almost indispensable to ensure valid and powerful data analysis. However, various aspects of scRNA-seq data pose grand challenges for such essential tasks pertaining to data pre-processing, normalization or harmonization. In this review, we first discuss properties of scRNA-seq data that contribute to the challenges for denoising and batch effect correction from a computational perspective. We then focus on reviewing several state-of-the-art methods for dropout imputation and batch effect correction, comparing their strengths and weaknesses. Finally, we benchmarked three widely used correction tools using two hematopoietic scRNA-seq datasets to show their performance in a real data application.}, + file = {/Users/laurent/Zotero/storage/HNPZUN7D/Dropout_imputation_and_batch_effect_correction_for.4.html}, + journal = {Journal of Bio-X Research}, + language = {en-US}, + number = {4} +} + +@article{liForestQCQualityControl2019, + title = {{{ForestQC}}: {{Quality}} Control on Genetic Variants from next-Generation Sequencing Data Using Random Forest}, + shorttitle = {{{ForestQC}}}, + author = {Li, Jiajin and Jew, Brandon and Zhan, Lingyu and Hwang, Sungoo and Coppola, Giovanni and Freimer, Nelson B. and Sul, Jae Hoon}, + year = {2019}, + month = dec, + volume = {15}, + pages = {e1007556}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007556}, + abstract = {Next-generation sequencing technology (NGS) enables the discovery of nearly all genetic variants present in a genome. A subset of these variants, however, may have poor sequencing quality due to limitations in NGS or variant callers. In genetic studies that analyze a large number of sequenced individuals, it is critical to detect and remove those variants with poor quality as they may cause spurious findings. In this paper, we present ForestQC, a statistical tool for performing quality control on variants identified from NGS data by combining a traditional filtering approach and a machine learning approach. Our software uses the information on sequencing quality, such as sequencing depth, genotyping quality, and GC contents, to predict whether a particular variant is likely to be false-positive. To evaluate ForestQC, we applied it to two whole-genome sequencing datasets where one dataset consists of related individuals from families while the other consists of unrelated individuals. Results indicate that ForestQC outperforms widely used methods for performing quality control on variants such as VQSR of GATK by considerably improving the quality of variants to be included in the analysis. ForestQC is also very efficient, and hence can be applied to large sequencing datasets. We conclude that combining a machine learning algorithm trained with sequencing quality information and the filtering approach is a practical approach to perform quality control on genetic variants from sequencing data.}, + file = {/Users/laurent/Zotero/storage/JYMPXVJQ/Li et al. - 2019 - ForestQC Quality control on genetic variants from.pdf;/Users/laurent/Zotero/storage/93Q6G67A/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Data processing,Genome-wide association studies,Genotyping,Machine learning,Machine learning algorithms,Microarrays,Next-generation sequencing,Variant genotypes}, + language = {en}, + number = {12} +} + +@article{liLncDIFFNovelQuasilikelihood2019, + title = {{{lncDIFF}}: A Novel Quasi-Likelihood Method for Differential Expression Analysis of Non-Coding {{RNA}}}, + shorttitle = {{{lncDIFF}}}, + author = {Li, Qian and Yu, Xiaoqing and Chaudhary, Ritu and Slebos, Robbert J. C. and Chung, Christine H. and Wang, Xuefeng}, + year = {2019}, + month = jul, + volume = {20}, + pages = {539}, + issn = {1471-2164}, + doi = {10.1186/s12864-019-5926-4}, + abstract = {Long non-coding RNA (lncRNA) expression data have been increasingly used in finding diagnostic and prognostic biomarkers in cancer studies. Existing differential analysis tools for RNA sequencing do not effectively accommodate low abundant genes, as commonly observed in lncRNAs.}, + file = {/Users/laurent/Zotero/storage/XVKJQDQ4/Li et al. - 2019 - lncDIFF a novel quasi-likelihood method for diffe.pdf;/Users/laurent/Zotero/storage/EGYAYRXB/s12864-019-5926-4.html}, + journal = {BMC Genomics}, + number = {1} +} + +@article{liMeasuringReproducibilityHighthroughput2011, + title = {Measuring Reproducibility of High-Throughput Experiments}, + author = {Li, Qunhua and Brown, James B. and Huang, Haiyan and Bickel, Peter J.}, + year = {2011}, + month = sep, + volume = {5}, + pages = {1752--1779}, + issn = {1932-6157, 1941-7330}, + doi = {10.1214/11-AOAS466}, + abstract = {Reproducibility is essential to reliable scientific discovery in high-throughput experiments. In this work we propose a unified approach to measure the reproducibility of findings identified from replicate experiments and identify putative discoveries using reproducibility. Unlike the usual scalar measures of reproducibility, our approach creates a curve, which quantitatively assesses when the findings are no longer consistent across replicates. Our curve is fitted by a copula mixture model, from which we derive a quantitative reproducibility score, which we call the ``irreproducible discovery rate'' (IDR) analogous to the FDR. This score can be computed at each set of paired replicate ranks and permits the principled setting of thresholds both for assessing reproducibility and combining replicates. Since our approach permits an arbitrary scale for each replicate, it provides useful descriptive measures in a wide variety of situations to be explored. We study the performance of the algorithm using simulations and give a heuristic analysis of its theoretical properties. We demonstrate the effectiveness of our method in a ChIP-seq experiment.}, + file = {/Users/laurent/Zotero/storage/RXFPP3Y8/Li et al. - 2011 - Measuring reproducibility of high-throughput exper.pdf;/Users/laurent/Zotero/storage/QPBULL3S/1318514284.html}, + journal = {The Annals of Applied Statistics}, + keywords = {association,copula,genomics,high-throughput experiment,irreproducible discovery rate,iterative algorithm,mixture model,Reproducibility}, + language = {EN}, + mrnumber = {MR2884921}, + number = {3}, + zmnumber = {1231.62124} +} + +@article{liMeasuringReproducibilityHighthroughput2011a, + title = {Measuring Reproducibility of High-Throughput Experiments}, + author = {Li, Qunhua and Brown, James B. and Huang, Haiyan and Bickel, Peter J.}, + year = {2011}, + month = sep, + volume = {5}, + pages = {1752--1779}, + issn = {1932-6157, 1941-7330}, + doi = {10.1214/11-AOAS466}, + abstract = {Reproducibility is essential to reliable scientific discovery in high-throughput experiments. In this work we propose a unified approach to measure the reproducibility of findings identified from replicate experiments and identify putative discoveries using reproducibility. Unlike the usual scalar measures of reproducibility, our approach creates a curve, which quantitatively assesses when the findings are no longer consistent across replicates. Our curve is fitted by a copula mixture model, from which we derive a quantitative reproducibility score, which we call the ``irreproducible discovery rate'' (IDR) analogous to the FDR. This score can be computed at each set of paired replicate ranks and permits the principled setting of thresholds both for assessing reproducibility and combining replicates. Since our approach permits an arbitrary scale for each replicate, it provides useful descriptive measures in a wide variety of situations to be explored. We study the performance of the algorithm using simulations and give a heuristic analysis of its theoretical properties. We demonstrate the effectiveness of our method in a ChIP-seq experiment.}, + file = {/Users/laurent/Zotero/storage/236Z62BA/Li et al. - 2011 - Measuring reproducibility of high-throughput exper.pdf;/Users/laurent/Zotero/storage/5VJAVKWZ/1318514284.html}, + journal = {The Annals of Applied Statistics}, + keywords = {association,copula,genomics,high-throughput experiment,irreproducible discovery rate,iterative algorithm,mixture model,Reproducibility}, + language = {EN}, + mrnumber = {MR2884921}, + number = {3}, + zmnumber = {1231.62124} +} + +@article{liMinimap2PairwiseAlignment2018, + title = {Minimap2: Pairwise Alignment for Nucleotide Sequences}, + shorttitle = {Minimap2}, + author = {Li, Heng}, + year = {2018}, + month = sep, + volume = {34}, + pages = {3094--3100}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty191}, + abstract = {AbstractMotivation. Recent advances in sequencing technologies promise ultra-long reads of {$\sim$}100 kb in average, full-length mRNA or cDNA reads in high throughpu}, + file = {/Users/laurent/Zotero/storage/W68HA2S4/Li - 2018 - Minimap2 pairwise alignment for nucleotide sequen.pdf;/Users/laurent/Zotero/storage/AP9R5BKJ/4994778.html}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{linContinuousstateHMMsModeling2019, + title = {Continuous-State {{HMMs}} for Modeling Time-Series Single-Cell {{RNA}}-{{Seq}} Data}, + author = {Lin, Chieh and {Bar-Joseph}, Ziv}, + year = {2019}, + month = nov, + volume = {35}, + pages = {4707--4715}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz296}, + abstract = {AbstractMotivation. Methods for reconstructing developmental trajectories from time-series single-cell RNA-Seq (scRNA-Seq) data can be largely divided into two}, + file = {/Users/laurent/Zotero/storage/UJEECJVB/5481957.html}, + journal = {Bioinformatics}, + language = {en}, + number = {22} +} + +@article{linContinuousstateHMMsModeling2019a, + title = {Continuous-State {{HMMs}} for Modeling Time-Series Single-Cell {{RNA}}-{{Seq}} Data}, + author = {Lin, Chieh and {Bar-Joseph}, Ziv}, + year = {2019}, + month = nov, + volume = {35}, + pages = {4707--4715}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz296}, + abstract = {AbstractMotivation. Methods for reconstructing developmental trajectories from time-series single-cell RNA-Seq (scRNA-Seq) data can be largely divided into two}, + file = {/Users/laurent/Zotero/storage/I7P8H2SH/Lin and Bar-Joseph - 2019 - Continuous-state HMMs for modeling time-series sin.pdf;/Users/laurent/Zotero/storage/XASZRXPG/5481957.html}, + journal = {Bioinformatics}, + language = {en}, + number = {22} +} + +@article{linDARTFastAccurate2018, + title = {{{DART}}: A Fast and Accurate {{RNA}}-Seq Mapper with a Partitioning Strategy}, + shorttitle = {{{DART}}}, + author = {Lin, Hsin-Nan and Hsu, Wen-Lian}, + year = {2018}, + month = jan, + volume = {34}, + pages = {190--197}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx558}, + abstract = {Motivation: In recent years, the massively parallel cDNA sequencing (RNA-Seq) technologies have become a powerful tool to provide high resolution measurement of expression and high sensitivity in detecting low abundance transcripts. However, RNA-seq data requires a huge amount of computational efforts. The very fundamental and critical step is to align each sequence fragment against the reference genome. Various de novo spliced RNA aligners have been developed in recent years. Though these aligners can handle spliced alignment and detect splice junctions, some challenges still remain to be solved. With the advances in sequencing technologies and the ongoing collection of sequencing data in the ENCODE project, more efficient alignment algorithms are highly demanded. Most read mappers follow the conventional seed-and-extend strategy to deal with inexact matches for sequence alignment. However, the extension is much more time consuming than the seeding step.}, + file = {/Users/laurent/Documents/bibliography/to_read/Lin and Hsu - 2018 - DART a fast and accurate RNA-seq mapper with a pa.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {2} +} + +@article{linDeepAdversarialVariational2020, + title = {A Deep Adversarial Variational Autoencoder Model for Dimensionality Reduction in Single-Cell {{RNA}} Sequencing Analysis}, + author = {Lin, Eugene and Mukherjee, Sudipto and Kannan, Sreeram}, + year = {2020}, + month = dec, + volume = {21}, + pages = {1--11}, + issn = {1471-2105}, + doi = {10.1186/s12859-020-3401-5}, + abstract = {Single-cell RNA sequencing (scRNA-seq) is an emerging technology that can assess the function of an individual cell and cell-to-cell variability at the single cell level in an unbiased manner. Dimensionality reduction is an essential first step in downstream analysis of the scRNA-seq data. However, the scRNA-seq data are challenging for traditional methods due to their high dimensional measurements as well as an abundance of dropout events (that is, zero expression measurements). To overcome these difficulties, we propose DR-A (Dimensionality Reduction with Adversarial variational autoencoder), a data-driven approach to fulfill the task of dimensionality reduction. DR-A leverages a novel adversarial variational autoencoder-based framework, a variant of generative adversarial networks. DR-A is well-suited for unsupervised learning tasks for the scRNA-seq data, where labels for cell types are costly and often impossible to acquire. Compared with existing methods, DR-A is able to provide a more accurate low dimensional representation of the scRNA-seq data. We illustrate this by utilizing DR-A for clustering of scRNA-seq data. Our results indicate that DR-A significantly enhances clustering performance over state-of-the-art methods.}, + copyright = {2020 The Author(s).}, + file = {/Users/laurent/Zotero/storage/AHDPZVWE/Lin et al. - 2020 - A deep adversarial variational autoencoder model f.pdf}, + journal = {BMC Bioinformatics}, + language = {en}, + number = {1} +} + +@article{lindermanEFFICIENTALGORITHMSTDISTRIBUTED, + title = {{{EFFICIENT ALGORITHMS FOR T}}-{{DISTRIBUTED STOCHASTIC NEIGHBORHOOD EMBEDDING}}}, + author = {LINDERMAN, GEORGE C and RACHH, MANAS and HOSKINS, JEREMY G and STEINERBERGER, STEFAN and KLUGER, YUVAL}, + pages = {14}, + file = {/Users/laurent/Documents/bibliography/stats/LINDERMAN et al. - EFFICIENT ALGORITHMS FOR T-DISTRIBUTED STOCHASTIC .pdf}, + language = {en} +} + +@article{liNetworkEmbeddingbasedRepresentation2017, + title = {Network Embedding-Based Representation Learning for Single Cell {{RNA}}-Seq Data}, + author = {Li, Xiangyu and Chen, Weizheng and Chen, Yang and Zhang, Xuegong and Gu, Jin and Zhang, Michael Q.}, + year = {2017}, + month = nov, + volume = {45}, + pages = {e166-e166}, + issn = {0305-1048, 1362-4962}, + doi = {10.1093/nar/gkx750}, + abstract = {Single cell RNA-seq (scRNA-seq) techniques can reveal valuable insights of cell-to-cell heterogeneities. Projection of high-dimensional data into a lowdimensional subspace is a powerful strategy in general for mining such big data. However, scRNA-seq suffers from higher noise and lower coverage than traditional bulk RNA-seq, hence bringing in new computational difficulties. One major challenge is how to deal with the frequent drop-out events. The events, usually caused by the stochastic burst effect in gene transcription and the technical failure of RNA transcript capture, often render traditional dimension reduction methods work inefficiently. To overcome this problem, we have developed a novel Single Cell Representation Learning (SCRL) method based on network embedding. This method can efficiently implement data-driven non-linear projection and incorporate prior biological knowledge (such as pathway information) to learn more meaningful low-dimensional representations for both cells and genes. Benchmark results show that SCRL outperforms other dimensional reduction methods on several recent scRNAseq datasets.}, + file = {/Users/laurent/Documents/bibliography/to_read/Li et al. - 2017 - Network embedding-based representation learning fo.pdf}, + journal = {Nucleic Acids Research}, + language = {en}, + number = {19} +} + +@article{linInferringTFActivation2020, + title = {Inferring {{TF}} Activation Order in Time Series {{scRNA}}-{{Seq}} Studies}, + author = {Lin, Chieh and Ding, Jun and {Bar-Joseph}, Ziv}, + year = {2020}, + month = feb, + volume = {16}, + pages = {e1007644}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007644}, + abstract = {Methods for the analysis of time series single cell expression data (scRNA-Seq) either do not utilize information about transcription factors (TFs) and their targets or only study these as a post-processing step. Using such information can both, improve the accuracy of the reconstructed model and cell assignments, while at the same time provide information on how and when the process is regulated. We developed the Continuous-State Hidden Markov Models TF (CSHMM-TF) method which integrates probabilistic modeling of scRNA-Seq data with the ability to assign TFs to specific activation points in the model. TFs are assumed to influence the emission probabilities for cells assigned to later time points allowing us to identify not just the TFs controlling each path but also their order of activation. We tested CSHMM-TF on several mouse and human datasets. As we show, the method was able to identify known and novel TFs for all processes, assigned time of activation agrees with both expression information and prior knowledge and combinatorial predictions are supported by known interactions. We also show that CSHMM-TF improves upon prior methods that do not utilize TF-gene interaction.}, + file = {/Users/laurent/Zotero/storage/PT3BIEIP/Lin et al. - 2020 - Inferring TF activation order in time series scRNA.pdf;/Users/laurent/Zotero/storage/S59ZDER7/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Cell differentiation,Data processing,Gene expression,Gene regulation,Hidden Markov models,Lung development,Neurons,Transcription factors}, + language = {en}, + number = {2} +} + +@article{linInferringTFActivation2020a, + title = {Inferring {{TF}} Activation Order in Time Series {{scRNA}}-{{Seq}} Studies}, + author = {Lin, Chieh and Ding, Jun and {Bar-Joseph}, Ziv}, + editor = {Aerts, Stein}, + year = {2020}, + month = feb, + volume = {16}, + pages = {e1007644}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007644}, + abstract = {Methods for the analysis of time series single cell expression data (scRNA-Seq) either do not utilize information about transcription factors (TFs) and their targets or only study these as a post-processing step. Using such information can both, improve the accuracy of the reconstructed model and cell assignments, while at the same time provide information on how and when the process is regulated. We developed the Continuous-State Hidden Markov Models TF (CSHMM-TF) method which integrates probabilistic modeling of scRNA-Seq data with the ability to assign TFs to specific activation points in the model. TFs are assumed to influence the emission probabilities for cells assigned to later time points allowing us to identify not just the TFs controlling each path but also their order of activation. We tested CSHMM-TF on several mouse and human datasets. As we show, the method was able to identify known and novel TFs for all processes, assigned time of activation agrees with both expression information and prior knowledge and combinatorial predictions are supported by known interactions. We also show that CSHMM-TF improves upon prior methods that do not utilize TF-gene interaction.}, + file = {/Users/laurent/Zotero/storage/V4H7BNIA/Lin et al. - 2020 - Inferring TF activation order in time series scRNA.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {2} +} + +@article{linKartDivideandconquerAlgorithm2017, + title = {Kart: A Divide-and-Conquer Algorithm for {{NGS}} Read Alignment}, + shorttitle = {Kart}, + author = {Lin, Hsin-Nan and Hsu, Wen-Lian}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2281--2287}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx189}, + abstract = {Motivation: Next-generation sequencing (NGS) provides a great opportunity to investigate genome-wide variation at nucleotide resolution. Due to the huge amount of data, NGS applications require very fast and accurate alignment algorithms. Most existing algorithms for read mapping basically adopt seed-and-extend strategy, which is sequential in nature and takes much longer time on longer reads.}, + file = {/Users/laurent/Documents/bibliography/to_read/Lin and Hsu - 2017 - Kart a divide-and-conquer algorithm for NGS read .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {15} +} + +@article{linMixtureProportionEstimation, + title = {Mixture {{Proportion Estimation}} for {{Positive}}\textendash{{Unlabeled Learning}} via {{Classifier Dimension Reduction}}}, + author = {Lin, Zhenfeng}, + pages = {24}, + abstract = {Positive\textendash{}unlabeled (PU) learning considers two samples, a positive set P with observations from only one class and an unlabeled set U with observations from two classes. The goal is to classify observations in U . Class mixture proportion estimation (MPE) in U is a key step in PU learning. In this paper, we show that PU learning is a generalization of local False Discovery Rate estimation. Further we show that PU learning MPE can be reduced to a one\textendash{}dimensional problem via construction of a classifier trained on the P and U data sets. These observations enable application of methodology from the multiple testing literature to the PU learning problem. In particular we adapt ideas from Storey [2002] and Patra and Sen [2015] to address parameter identifiability and MPE. We prove consistency of two mixture proportion estimators using bounds from empirical process theory, develop tuning parameter free implementations, and demonstrate that they have competitive performance on simulated waveform data and a protein signaling problem.}, + file = {/Users/laurent/Documents/bibliography/stats/Lin - Mixture Proportion Estimation for Positive–Unlabel.pdf}, + language = {en} +} + +@article{linScMergeLeveragesFactor2019, + title = {{{scMerge}} Leverages Factor Analysis, Stable Expression, and Pseudoreplication to Merge Multiple Single-Cell {{RNA}}-Seq Datasets}, + author = {Lin, Yingxin and Ghazanfar, Shila and Wang, Kevin Y. X. and {Gagnon-Bartsch}, Johann A. and Lo, Kitty K. and Su, Xianbin and Han, Ze-Guang and Ormerod, John T. and Speed, Terence P. and Yang, Pengyi and Yang, Jean Yee Hwa}, + year = {2019}, + month = apr, + pages = {201820006}, + issn = {0027-8424, 1091-6490}, + doi = {10.1073/pnas.1820006116}, + file = {/Users/laurent/Zotero/storage/ADKVG24T/Lin et al. - 2019 - scMerge leverages factor analysis, stable expressi.pdf;/Users/laurent/Zotero/storage/PL6K7CPB/Lin et al. - 2019 - scMerge leverages factor analysis, stable expressi.pdf}, + journal = {Proceedings of the National Academy of Sciences}, + language = {en} +} + +@article{liReferenceComponentAnalysis2017, + title = {Reference Component Analysis of Single-Cell Transcriptomes Elucidates Cellular Heterogeneity in Human Colorectal Tumors}, + author = {Li, Huipeng and Courtois, Elise T and Sengupta, Debarka and Tan, Yuliana and Chen, Kok Hao and Goh, Jolene Jie Lin and Kong, Say Li and Chua, Clarinda and Hon, Lim Kiat and Tan, Wah Siew and Wong, Mark and Choi, Paul Jongjoon and Wee, Lawrence J K and Hillmer, Axel M and Tan, Iain Beehuat and Robson, Paul and Prabhakar, Shyam}, + year = {2017}, + month = mar, + volume = {49}, + pages = {708--718}, + issn = {1061-4036, 1546-1718}, + doi = {10.1038/ng.3818}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Li et al. - 2017 - Reference component analysis of single-cell transc.pdf;/Users/laurent/Zotero/storage/IJH56HMD/Li et al. - 2017 - Reference component analysis of single-cell transc.pdf;/Users/laurent/Zotero/storage/NFNGXVSJ/Li et al. - 2017 - Reference component analysis of single-cell transc.pdf;/Users/laurent/Zotero/storage/W8Y3TQP5/Li et al. - 2017 - Reference component analysis of single-cell transc.pdf}, + journal = {Nature Genetics}, + language = {en}, + number = {5} +} + +@article{liSequenceAlignmentMap2009, + title = {The {{Sequence Alignment}}/{{Map}} Format and {{SAMtools}}}, + author = {Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan, Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin, Richard and {1000 Genome Project Data Processing Subgroup}}, + year = {2009}, + month = aug, + volume = {25}, + pages = {2078--2079}, + issn = {1367-4811}, + doi = {10.1093/bioinformatics/btp352}, + abstract = {SUMMARY: The Sequence Alignment/Map (SAM) format is a generic alignment format for storing read alignments against reference sequences, supporting short and long reads (up to 128 Mbp) produced by different sequencing platforms. It is flexible in style, compact in size, efficient in random access and is the format in which alignments from the 1000 Genomes Project are released. SAMtools implements various utilities for post-processing alignments in the SAM format, such as indexing, variant caller and alignment viewer, and thus provides universal tools for processing read alignments. +AVAILABILITY: http://samtools.sourceforge.net.}, + file = {/Users/laurent/Zotero/storage/6SKP2NZB/Li et al. - 2009 - The Sequence AlignmentMap format and SAMtools.pdf}, + journal = {Bioinformatics (Oxford, England)}, + keywords = {Algorithms,Base Sequence,Computational Biology,Genome,Genomics,Molecular Sequence Data,Sequence Alignment,Sequence Analysis; DNA,Software}, + language = {eng}, + number = {16}, + pmcid = {PMC2723002}, + pmid = {19505943} +} + +@article{liSinglecellRNAseqInterpretations2019, + title = {Single-Cell {{RNA}}-Seq Interpretations Using Evolutionary Multiobjective Ensemble Pruning}, + author = {Li, Xiangtao and Zhang, Shixiong and Wong, Ka-Chun}, + year = {2019}, + month = aug, + volume = {35}, + pages = {2809--2817}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty1056}, + abstract = {AbstractMotivation. In recent years, single-cell RNA sequencing enables us to discover cell types or even subtypes. Its increasing availability provides opport}, + file = {/Users/laurent/Zotero/storage/HR8XT2GD/Li et al. - 2019 - Single-cell RNA-seq interpretations using evolutio.pdf;/Users/laurent/Zotero/storage/E5HLJ37Z/5265329.html}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{liStatisticalFrameworkSNP2011, + title = {A Statistical Framework for {{SNP}} Calling, Mutation Discovery, Association Mapping and Population Genetical Parameter Estimation from Sequencing Data}, + author = {Li, H.}, + year = {2011}, + month = nov, + volume = {27}, + pages = {2987--2993}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btr509}, + abstract = {Motivation: Most existing methods for DNA sequence analysis rely on accurate sequences or genotypes. However, in applications of the next-generation sequencing (NGS), accurate genotypes may not be easily obtained (e.g. multi-sample low-coverage sequencing or somatic mutation discovery). These applications press for the development of new methods for analyzing sequence data with uncertainty.}, + file = {/Users/laurent/Documents/bibliography/to_read/Li - 2011 - A statistical framework for SNP calling, mutation .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {21} +} + +@article{liStatisticalSimulatorScDesign2019, + title = {A Statistical Simulator {{scDesign}} for Rational {{scRNA}}-Seq Experimental Design}, + author = {Li, Wei Vivian and Li, Jingyi Jessica}, + year = {2019}, + month = jul, + volume = {35}, + pages = {i41-i50}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz321}, + abstract = {AbstractMotivation. Single-cell RNA sequencing (scRNA-seq) has revolutionized biological sciences by revealing genome-wide gene expression levels within indivi}, + file = {/Users/laurent/Zotero/storage/ULX2I3X7/Li and Li - 2019 - A statistical simulator scDesign for rational scRN.pdf;/Users/laurent/Zotero/storage/ZRYTD7IH/5529133.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{listTenSimpleRules2017, + title = {Ten {{Simple Rules}} for {{Developing Usable Software}} in {{Computational Biology}}}, + author = {List, Markus and Ebert, Peter and Albrecht, Felipe}, + editor = {Markel, Scott}, + year = {2017}, + month = jan, + volume = {13}, + pages = {e1005265}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005265}, + file = {/Users/laurent/Documents/bibliography/bioinfo/List et al. - 2017 - Ten Simple Rules for Developing Usable Software in.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {1} +} + +@article{liuHicGANInfersSuper2019, + title = {{{hicGAN}} Infers Super Resolution {{Hi}}-{{C}} Data with Generative Adversarial Networks}, + author = {Liu, Qiao and Lv, Hairong and Jiang, Rui}, + year = {2019}, + month = jul, + volume = {35}, + pages = {i99-i107}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz317}, + abstract = {AbstractMotivation. Hi-C is a genome-wide technology for investigating 3D chromatin conformation by measuring physical contacts between pairs of genomic region}, + file = {/Users/laurent/Zotero/storage/SQW3YHRL/Liu et al. - 2019 - hicGAN infers super resolution Hi-C data with gene.pdf;/Users/laurent/Zotero/storage/MCDARYQP/5529246.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{liuHiCNNVeryDeep2019, + title = {{{HiCNN}}: A Very Deep Convolutional Neural Network to Better Enhance the Resolution of {{Hi}}-{{C}} Data}, + shorttitle = {{{HiCNN}}}, + author = {Liu, Tong and Wang, Zheng}, + year = {2019}, + month = nov, + volume = {35}, + pages = {4222--4228}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz251}, + abstract = {AbstractMotivation. High-resolution Hi-C data are indispensable for the studies of three-dimensional (3D) genome organization at kilobase level. However, gener}, + file = {/Users/laurent/Zotero/storage/A7C43KH3/Liu and Wang - 2019 - HiCNN a very deep convolutional neural network to.pdf;/Users/laurent/Zotero/storage/QJWMJTW4/5436129.html}, + journal = {Bioinformatics}, + language = {en}, + number = {21} +} + +@article{liuLaggedKernelMachine2018, + title = {Lagged Kernel Machine Regression for Identifying Time Windows of Susceptibility to Exposures of Complex Mixtures}, + author = {Liu, Shelley H. and Bobb, Jennifer F. and Lee, Kyu Ha and Gennings, Chris and Claus Henn, Birgit and Bellinger, David and Austin, Christine and Schnaas, Lourdes and {Tellez-Rojo}, Martha M. and Hu, Howard and Wright, Robert O. and Arora, Manish and Coull, Brent A.}, + year = {2018}, + month = jul, + volume = {19}, + pages = {325--341}, + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxx036}, + abstract = {SUMMARY. The impact of neurotoxic chemical mixtures on children's health is a critical public health concern. It is well known that during early life, toxic ex}, + file = {/Users/laurent/Zotero/storage/DAWAJCJQ/Liu et al. - 2018 - Lagged kernel machine regression for identifying t.pdf;/Users/laurent/Zotero/storage/QGTQ7K78/4105006.html}, + journal = {Biostatistics}, + language = {en}, + number = {3} +} + +@article{liuLLRLatentLowrank2017, + title = {{{LLR}}: A Latent Low-Rank Approach to Colocalizing Genetic Risk Variants in Multiple {{GWAS}}}, + shorttitle = {{{LLR}}}, + author = {Liu, Jin and Wan, Xiang and Wang, Chaolong and Yang, Chao and Zhou, Xiaowei and Yang, Can}, + year = {2017}, + month = dec, + volume = {33}, + pages = {3878--3886}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx512}, + abstract = {Motivation: Genome-wide association studies (GWAS), which genotype millions of single nucleotide polymorphisms (SNPs) in thousands of individuals, are widely used to identify the risk SNPs underlying complex human phenotypes (quantitative traits or diseases). Most conventional statistical methods in GWAS only investigate one phenotype at a time. However, an increasing number of reports suggest the ubiquity of pleiotropy, i.e., many complex phenotypes sharing common genetic bases. This motivated us to leverage pleiotropy to develop new statistical approaches to joint analysis of multiple GWAS.}, + file = {/Users/laurent/Documents/bibliography/to_read/Liu et al. - 2017 - LLR a latent low-rank approach to colocalizing ge.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{liuScRNABatchQCMultisamplesQuality, + title = {{{scRNABatchQC}}: Multi-Samples Quality Control for Single Cell {{RNA}}-Seq Data}, + shorttitle = {{{scRNABatchQC}}}, + author = {Liu, Qi and Sheng, Quanhu and Ping, Jie and Ramirez, Marisol Adelina and Lau, Ken S. and Coffey, Robert J. and Shyr, Yu}, + doi = {10.1093/bioinformatics/btz601}, + abstract = {AbstractSummary. Single cell RNA sequencing is a revolutionary technique to characterize inter-cellular transcriptomics heterogeneity. However, the data are no}, + file = {/Users/laurent/Zotero/storage/RXVVZCFE/Liu et al. - scRNABatchQC multi-samples quality control for si.pdf;/Users/laurent/Zotero/storage/X4UHTLUE/5542946.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{liuScRNABatchQCMultisamplesQuality2019, + title = {{{scRNABatchQC}}: Multi-Samples Quality Control for Single Cell {{RNA}}-Seq Data}, + shorttitle = {{{scRNABatchQC}}}, + author = {Liu, Qi and Sheng, Quanhu and Ping, Jie and Ramirez, Marisol Adelina and Lau, Ken S. and Coffey, Robert J. and Shyr, Yu}, + year = {2019}, + month = dec, + volume = {35}, + pages = {5306--5308}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz601}, + abstract = {AbstractSummary. Single cell RNA sequencing is a revolutionary technique to characterize inter-cellular transcriptomics heterogeneity. However, the data are no}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{liuScRNABatchQCMultisamplesQuality2019a, + title = {{{scRNABatchQC}}: Multi-Samples Quality Control for Single Cell {{RNA}}-Seq Data}, + shorttitle = {{{scRNABatchQC}}}, + author = {Liu, Qi and Sheng, Quanhu and Ping, Jie and Ramirez, Marisol Adelina and Lau, Ken S. and Coffey, Robert J. and Shyr, Yu}, + year = {2019}, + month = dec, + volume = {35}, + pages = {5306--5308}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz601}, + abstract = {AbstractSummary. Single cell RNA sequencing is a revolutionary technique to characterize inter-cellular transcriptomics heterogeneity. However, the data are no}, + file = {/Users/laurent/Zotero/storage/FBU7MU37/Liu et al. - 2019 - scRNABatchQC multi-samples quality control for si.pdf;/Users/laurent/Zotero/storage/5C4KC4KH/5542946.html}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{liuScRNAssSinglecellRNAseq2019, + title = {{{scRNAss}}: A Single-Cell {{RNA}}-Seq Assembler via Imputing Dropouts and Combing Junctions}, + shorttitle = {{{scRNAss}}}, + author = {Liu, Juntao and Liu, Xiangyu and Ren, Xianwen and Li, Guojun}, + editor = {Berger, Bonnie}, + year = {2019}, + month = apr, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btz240}, + abstract = {Motivation: Full-length transcript reconstruction is essential for single-cell RNA-seq data analysis, but dropout events, which can cause transcripts discarded completely or broken into pieces, pose great challenges for transcript assembly. Currently available RNA-seq assemblers are generally designed for bulk RNA sequencing. To fill the gap, we introduce single-cell RNA-seq assembler, a method that applies explicit strategies to impute lost information caused by dropout events and a combing strategy to infer transcripts using scRNA-seq.}, + file = {/Users/laurent/Zotero/storage/4H33ZTE4/Liu et al. - 2019 - scRNAss a single-cell RNA-seq assembler via imput.pdf;/Users/laurent/Zotero/storage/NBZQFUVW/Liu et al. - 2019 - scRNAss a single-cell RNA-seq assembler via imput.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{liuScRNAssSinglecellRNAseq2019a, + title = {{{scRNAss}}: A Single-Cell {{RNA}}-Seq Assembler via Imputing Dropouts and Combing Junctions}, + shorttitle = {{{scRNAss}}}, + author = {Liu, Juntao and Liu, Xiangyu and Ren, Xianwen and Li, Guojun}, + year = {2019}, + month = nov, + volume = {35}, + pages = {4264--4271}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz240}, + abstract = {AbstractMotivation. Full-length transcript reconstruction is essential for single-cell RNA-seq data analysis, but dropout events, which can cause transcripts d}, + file = {/Users/laurent/Zotero/storage/JW4WZK97/Liu et al. - 2019 - scRNAss a single-cell RNA-seq assembler via imput.pdf;/Users/laurent/Zotero/storage/WMZ3XGK3/5429354.html}, + journal = {Bioinformatics}, + language = {en}, + number = {21} +} + +@article{liuStrawberryFastAccurate2017, + title = {Strawberry: {{Fast}} and Accurate Genome-Guided Transcript Reconstruction and Quantification from {{RNA}}-{{Seq}}}, + shorttitle = {Strawberry}, + author = {Liu, Ruolin and Dickerson, Julie}, + editor = {Tan, Kai}, + year = {2017}, + month = nov, + volume = {13}, + pages = {e1005851}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005851}, + abstract = {We propose a novel method and software tool, Strawberry, for transcript reconstruction and quantification from RNA-Seq data under the guidance of genome alignment and independent of gene annotation. Strawberry consists of two modules: assembly and quantification. The novelty of Strawberry is that the two modules use different optimization frameworks but utilize the same data graph structure, which allows a highly efficient, expandable and accurate algorithm for dealing large data. The assembly module parses aligned reads into splicing graphs, and uses network flow algorithms to select the most likely transcripts. The quantification module uses a latent class model to assign read counts from the nodes of splicing graphs to transcripts. Strawberry simultaneously estimates the transcript abundances and corrects for sequencing bias through an EM algorithm. Based on simulations, Strawberry outperforms Cufflinks and StringTie in terms of both assembly and quantification accuracies. Under the evaluation of a real data set, the estimated transcript expression by Strawberry has the highest correlation with Nanostring probe counts, an independent experiment measure for transcript expression. Availability: Strawberry is written in C++14, and is available as open source software at https://github.com/ruolin/strawberry under the MIT license.}, + file = {/Users/laurent/Documents/bibliography/to_read/Liu and Dickerson - 2017 - Strawberry Fast and accurate genome-guided transc.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {11} +} + +@article{lopezDeepGenerativeModel, + title = {A Deep Generative Model for Gene Expression Profiles from Single-Cell {{RNA}} Sequencing}, + author = {Lopez, Romain and Regier, Jeffrey and Cole, Michael and Jordan, Michael and Yosef, Nir}, + pages = {6}, + file = {/Users/laurent/Documents/bibliography/stats/Lopez et al. - A deep generative model for gene expression profile.pdf}, + language = {en} +} + +@article{lopezDeepGenerativeModeling2018, + title = {Deep Generative Modeling for Single-Cell Transcriptomics}, + author = {Lopez, Romain and Regier, Jeffrey and Cole, Michael B. and Jordan, Michael I. and Yosef, Nir}, + year = {2018}, + month = dec, + volume = {15}, + pages = {1053}, + issn = {1548-7105}, + doi = {10.1038/s41592-018-0229-2}, + abstract = {scVI is a ready-to-use generative deep learning tool for large-scale single-cell RNA-seq data that enables raw data processing and a wide range of rapid and accurate downstream analyses.}, + copyright = {2018 The Author(s), under exclusive licence to Springer Nature America, Inc.}, + file = {/Users/laurent/Zotero/storage/GXWKPHT3/Lopez et al. - 2018 - Deep generative modeling for single-cell transcrip.pdf;/Users/laurent/Zotero/storage/VDKM69W4/Lopez et al. - 2018 - Deep generative modeling for single-cell transcrip.pdf;/Users/laurent/Zotero/storage/ZP72PSVV/s41592-018-0229-2.html}, + journal = {Nature Methods}, + language = {En}, + number = {12} +} + +@article{lopezExploreEditLeverage2019, + title = {Explore, Edit and Leverage Genomic Annotations Using {{Python GTF}} Toolkit}, + author = {Lopez, F. and Charbonnier, G. and Kermezli, Y. and Belhocine, M. and Ferr{\'e}, Q. and Zweig, N. and Aribi, M. and Gonzalez, A. and Spicuglia, S. and Puthier, D.}, + year = {2019}, + month = sep, + volume = {35}, + pages = {3487--3488}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz116}, + abstract = {AbstractMotivation. While Python has become very popular in bioinformatics, a limited number of libraries exist for fast manipulation of gene coordinates in En}, + file = {/Users/laurent/Zotero/storage/CWLXRLT4/Lopez et al. - 2019 - Explore, edit and leverage genomic annotations usi.pdf;/Users/laurent/Zotero/storage/M3TJIENI/5320559.html}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{lopezJointModelUnpaired2019, + title = {A Joint Model of Unpaired Data from {{scRNA}}-Seq and Spatial Transcriptomics for Imputing Missing Gene Expression Measurements}, + author = {Lopez, Romain and Nazaret, Achille and Langevin, Maxime and Samaran, Jules and Regier, Jeffrey and Jordan, Michael I. and Yosef, Nir}, + year = {2019}, + month = may, + abstract = {Spatial studies of transcriptome provide biologists with gene expression maps of heterogeneous and complex tissues. However, most experimental protocols for spatial transcriptomics suffer from the need to select beforehand a small fraction of genes to be quantified over the entire transcriptome. Standard single-cell RNA sequencing (scRNA-seq) is more prevalent, easier to implement and can in principle capture any gene but cannot recover the spatial location of the cells. In this manuscript, we focus on the problem of imputation of missing genes in spatial transcriptomic data based on (unpaired) standard scRNA-seq data from the same biological tissue. Building upon domain adaptation work, we propose gimVI, a deep generative model for the integration of spatial transcriptomic data and scRNA-seq data that can be used to impute missing genes. After describing our generative model and an inference procedure for it, we compare gimVI to alternative methods from computational biology or domain adaptation on real datasets and outperform Seurat Anchors, Liger and CORAL to impute held-out genes.}, + archivePrefix = {arXiv}, + eprint = {1905.02269}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/SSC73TIV/Lopez et al. - 2019 - A joint model of unpaired data from scRNA-seq and .pdf;/Users/laurent/Zotero/storage/VT7X74UD/Lopez et al. - 2019 - A joint model of unpaired data from scRNA-seq and .pdf}, + journal = {arXiv:1905.02269 [cs, q-bio, stat]}, + keywords = {Computer Science - Machine Learning,Quantitative Biology - Genomics,Statistics - Machine Learning}, + language = {en}, + primaryClass = {cs, q-bio, stat} +} + +@article{lortieTenSimpleRules2017, + title = {Ten Simple Rules for Short and Swift Presentations}, + author = {Lortie, Christopher J.}, + year = {2017}, + month = mar, + volume = {13}, + pages = {e1005373}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005373}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Lortie - 2017 - Ten simple rules for short and swift presentations.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {3} +} + +@article{lotfollahiScGenPredictsSinglecell2019, + title = {{{scGen}} Predicts Single-Cell Perturbation Responses}, + author = {Lotfollahi, Mohammad and Wolf, F. Alexander and Theis, Fabian J.}, + year = {2019}, + month = aug, + volume = {16}, + pages = {715--721}, + issn = {1548-7105}, + doi = {10.1038/s41592-019-0494-8}, + abstract = {scGen predicts cellular responses to phenomena absent from the training data. Such out-of-sample predictions are shown across cell types and species.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature America, Inc.}, + file = {/Users/laurent/Zotero/storage/NBPMU2EY/Lotfollahi et al. - 2019 - scGen predicts single-cell perturbation responses.pdf;/Users/laurent/Zotero/storage/YVI2U97W/s41592-019-0494-8.html}, + journal = {Nature Methods}, + language = {en}, + number = {8} +} + +@article{loveModeratedEstimationFold2014, + title = {Moderated Estimation of Fold Change and Dispersion for {{RNA}}-Seq Data with {{DESeq2}}}, + author = {Love, Michael I and Huber, Wolfgang and Anders, Simon}, + year = {2014}, + month = dec, + volume = {15}, + issn = {1474-760X}, + doi = {10.1186/s13059-014-0550-8}, + abstract = {In comparative high-throughput sequencing assays, a fundamental task is the analysis of count data, such as read counts per gene in RNA-seq, for evidence of systematic changes across experimental conditions. Small replicate numbers, discreteness, large dynamic range and the presence of outliers require a suitable statistical approach. We present DESeq2, a method for differential analysis of count data, using shrinkage estimation for dispersions and fold changes to improve stability and interpretability of estimates. This enables a more quantitative analysis focused on the strength rather than the mere presence of differential expression. The DESeq2 package is available at http://www. bioconductor.org/packages/release/bioc/html/DESeq2.html.}, + file = {/Users/laurent/Documents/bibliography/DEA/Love et al. - 2014 - Moderated estimation of fold change and dispersion.pdf;/Users/laurent/Documents/bibliography/DEA/Love et al. - Moderated estimation of fold change and dispersion.pdf}, + journal = {Genome Biology}, + language = {en}, + number = {12} +} + +@article{loveModeratedEstimationFold2014a, + title = {Moderated Estimation of Fold Change and Dispersion for {{RNA}}-Seq Data with {{DESeq2}}}, + author = {Love, Michael I and Huber, Wolfgang and Anders, Simon}, + year = {2014}, + month = dec, + volume = {15}, + issn = {1474-760X}, + doi = {10.1186/s13059-014-0550-8}, + abstract = {In comparative high-throughput sequencing assays, a fundamental task is the analysis of count data, such as read counts per gene in RNA-seq, for evidence of systematic changes across experimental conditions. Small replicate numbers, discreteness, large dynamic range and the presence of outliers require a suitable statistical approach. We present DESeq2, a method for differential analysis of count data, using shrinkage estimation for dispersions and fold changes to improve stability and interpretability of estimates. This enables a more quantitative analysis focused on the strength rather than the mere presence of differential expression. The DESeq2 package is available at http://www. bioconductor.org/packages/release/bioc/html/DESeq2.html.}, + file = {/Users/laurent/Zotero/storage/VI7WGGVW/Love et al. - 2014 - Moderated estimation of fold change and dispersion.pdf}, + journal = {Genome Biology}, + language = {en}, + number = {12} +} + +@article{luClusteringTemporalGene2019, + title = {Clustering of Temporal Gene Expression Data with Mixtures of Mixed Effects Models with a Penalized Likelihood}, + author = {Lu, Darlene and Tripodis, Yorghos and Gerstenfeld, Louis C. and Demissie, Serkalem}, + year = {2019}, + month = mar, + volume = {35}, + pages = {778--786}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty696}, + abstract = {AbstractMotivation. Clustering algorithms like K-Means and standard Gaussian mixture models (GMM) fail to account for the structure of variability of replicate}, + file = {/Users/laurent/Zotero/storage/9XHF6QHU/Lu et al. - 2019 - Clustering of temporal gene expression data with m.pdf;/Users/laurent/Zotero/storage/AR8SGGHI/5068161.html}, + journal = {Bioinformatics}, + language = {en}, + number = {5} +} + +@article{luczakSurveyEvaluationsHistogrambased2019, + title = {A Survey and Evaluations of Histogram-Based Statistics in Alignment-Free Sequence Comparison}, + author = {Luczak, Brian B. and James, Benjamin T. and Girgis, Hani Z.}, + year = {2019}, + month = jul, + volume = {20}, + pages = {1222--1237}, + issn = {1467-5463}, + doi = {10.1093/bib/bbx161}, + abstract = {AbstractMotivation. Since the dawn of the bioinformatics field, sequence alignment scores have been the main method for comparing sequences. However, alignment}, + file = {/Users/laurent/Zotero/storage/3HPTKUZN/Luczak et al. - 2019 - A survey and evaluations of histogram-based statis.pdf;/Users/laurent/Zotero/storage/TUKVZ68P/4696316.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {4} +} + +@article{lueckenCurrentBestPractices2019, + title = {Current Best Practices in Single-Cell {{RNA}}-Seq Analysis: A Tutorial}, + shorttitle = {Current Best Practices in Single-Cell {{RNA}}-Seq Analysis}, + author = {Luecken, Malte D and Theis, Fabian J}, + year = {2019}, + month = jun, + volume = {15}, + pages = {e8746}, + issn = {1744-4292}, + doi = {10.15252/msb.20188746}, + abstract = {Abstract Single-cell RNA-seq has enabled gene expression to be studied at an unprecedented resolution. The promise of this technology is attracting a growing user base for single-cell analysis methods. As more analysis tools are becoming available, it is becoming increasingly difficult to navigate this landscape and produce an up-to-date workflow to analyse one's data. Here, we detail the steps of a typical single-cell RNA-seq analysis, including pre-processing (quality control, normalization, data correction, feature selection, and dimensionality reduction) and cell- and gene-level downstream analysis. We formulate current best-practice recommendations for these steps based on independent comparison studies. We have integrated these best-practice recommendations into a workflow, which we apply to a public dataset to further illustrate how these steps work in practice. Our documented case study can be found at https://www.github.com/theislab/single-cell-tutorial. This review will serve as a workflow tutorial for new entrants into the field, and help established users update their analysis pipelines.}, + file = {/Users/laurent/Zotero/storage/YC984422/Luecken and Theis - 2019 - Current best practices in single-cell RNA-seq anal.pdf;/Users/laurent/Zotero/storage/PTUIYVE9/msb.html}, + journal = {Molecular Systems Biology}, + keywords = {analysis pipeline development,computational biology,data analysis tutorial,single-cell RNA-seq}, + number = {6} +} + +@article{luoIntegrativeSinglecellOmics2018, + title = {Integrative Single-Cell Omics Analyses Reveal Epigenetic Heterogeneity in Mouse Embryonic Stem Cells}, + author = {Luo, Yanting and He, Jianlin and Xu, Xiguang and Sun, Ming-an and Wu, Xiaowei and Lu, Xuemei and Xie, Hehuang}, + editor = {Ioshikhes, Ilya}, + year = {2018}, + month = mar, + volume = {14}, + pages = {e1006034}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006034}, + abstract = {Embryonic stem cells (ESCs) consist of a population of self-renewing cells displaying extensive phenotypic and functional heterogeneity. Research towards the understanding of the epigenetic mechanisms underlying the heterogeneity among ESCs is still in its initial stage. Key issues, such as how to identify cell-subset specifically methylated loci and how to interpret the biological meanings of methylation variations remain largely unexplored. To fill in the research gap, we implemented a computational pipeline to analyze single-cell methylome and to perform an integrative analysis with single-cell transcriptome data. According to the origins of variation in DNA methylation, we determined the genomic loci associated with allelic-specific methylation or asymmetric DNA methylation, and explored a beta mixture model to infer the genomic loci exhibiting cell-subset specific methylation (CSM). We observed that the putative CSM loci in ESCs are significantly enriched in CpG island (CGI) shelves and regions with histone marks for promoter and enhancer, and the genes hosting putative CSM loci show wide-ranging expression among ESCs. More interestingly, the putative CSM loci may be clustered into co-methylated modules enriching the binding motifs of distinct sets of transcription factors. Taken together, our study provided a novel tool to explore single-cell methylome and transcriptome to reveal the underlying transcriptional regulatory networks associated with epigenetic heterogeneity of ESCs.}, + file = {/Users/laurent/Documents/bibliography/to_read/Luo et al. - 2018 - Integrative single-cell omics analyses reveal epig.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {3} +} + +@article{lynchFrailtyAdaptiveHypotheses2007, + title = {The Frailty of Adaptive Hypotheses for the Origins of Organismal Complexity}, + author = {Lynch, M.}, + year = {2007}, + month = may, + volume = {104}, + pages = {8597--8604}, + issn = {0027-8424, 1091-6490}, + doi = {10.1073/pnas.0702207104}, + file = {/Users/laurent/Documents/bibliography/evolution/Lynch - 2007 - The frailty of adaptive hypotheses for the origins.pdf}, + journal = {Proceedings of the National Academy of Sciences}, + language = {en}, + number = {Supplement 1} +} + +@article{lynchGeneticDriftSelection2016, + title = {Genetic Drift, Selection and the Evolution of the Mutation Rate}, + author = {Lynch, Michael and Ackerman, Matthew S. and Gout, Jean-Francois and Long, Hongan and Sung, Way and Thomas, W. Kelley and Foster, Patricia L.}, + year = {2016}, + month = nov, + volume = {17}, + pages = {704--714}, + issn = {1471-0056, 1471-0064}, + doi = {10.1038/nrg.2016.104}, + abstract = {As one of the few cellular traits that can be quantified across the tree of life, DNA-replication fidelity provides an excellent platform for understanding fundamental evolutionary processes. Furthermore, because mutation is the ultimate source of all genetic variation, clarifying why mutation rates vary is crucial for understanding all areas of biology. A potentially revealing hypothesis for mutation-rate evolution is that natural selection primarily operates to improve replication fidelity, with the ultimate limits to what can be achieved set by the power of random genetic drift. This drift-barrier hypothesis is consistent with comparative measures of mutation rates, provides a simple explanation for the existence of error-prone polymerases and yields a formal counter-argument to the view that selection fine-tunes gene-specific mutation rates.}, + file = {/Users/laurent/Documents/bibliography/evolution/Lynch et al. - 2016 - Genetic drift, selection and the evolution of the .pdf}, + journal = {Nature Reviews Genetics}, + language = {en}, + number = {11} +} + +@article{lyuConditionadaptiveFusedGraphical2018, + title = {Condition-Adaptive Fused Graphical Lasso ({{CFGL}}): {{An}} Adaptive Procedure for Inferring Condition-Specific Gene Co-Expression Network}, + shorttitle = {Condition-Adaptive Fused Graphical Lasso ({{CFGL}})}, + author = {Lyu, Yafei and Xue, Lingzhou and Zhang, Feipeng and Koch, Hillary and Saba, Laura and Kechris, Katerina and Li, Qunhua}, + year = {2018}, + month = sep, + volume = {14}, + pages = {e1006436}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006436}, + abstract = {Co-expression network analysis provides useful information for studying gene regulation in biological processes. Examining condition-specific patterns of co-expression can provide insights into the underlying cellular processes activated in a particular condition. One challenge in this type of analysis is that the sample sizes in each condition are usually small, making the statistical inference of co-expression patterns highly underpowered. A joint network construction that borrows information from related structures across conditions has the potential to improve the power of the analysis. One possible approach to constructing the co-expression network is to use the Gaussian graphical model. Though several methods are available for joint estimation of multiple graphical models, they do not fully account for the heterogeneity between samples and between co-expression patterns introduced by condition specificity. Here we develop the condition-adaptive fused graphical lasso (CFGL), a data-driven approach to incorporate condition specificity in the estimation of co-expression networks. We show that this method improves the accuracy with which networks are learned. The application of this method on a rat multi-tissue dataset and The Cancer Genome Atlas (TCGA) breast cancer dataset provides interesting biological insights. In both analyses, we identify numerous modules enriched for Gene Ontology functions and observe that the modules that are upregulated in a particular condition are often involved in condition-specific activities. Interestingly, we observe that the genes strongly associated with survival time in the TCGA dataset are less likely to be network hubs, suggesting that genes associated with cancer progression are likely to govern specific functions or execute final biological functions in pathways, rather than regulating a large number of biological processes. Additionally, we observed that the tumor-specific hub genes tend to have few shared edges with normal tissue, revealing tumor-specific regulatory mechanism.}, + file = {/Users/laurent/Zotero/storage/JUCNWPF3/Lyu et al. - 2018 - Condition-adaptive fused graphical lasso (CFGL) A.pdf;/Users/laurent/Zotero/storage/XAYLSIFD/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Breast cancer,Covariance,Gene expression,Gene regulation,Genetic networks,Heart,Network analysis,Simulation and modeling}, + language = {en}, + number = {9} +} + +@article{maACTINNAutomatedIdentification, + title = {{{ACTINN}}: Automated Identification of Cell Types in Single Cell {{RNA}} Sequencing}, + shorttitle = {{{ACTINN}}}, + author = {Ma, Feiyang and Pellegrini, Matteo}, + doi = {10.1093/bioinformatics/btz592}, + abstract = {AbstractMotivation. Cell type identification is one of the major goals in single cell RNA sequencing (scRNA-seq). Current methods for assigning cell types typi}, + file = {/Users/laurent/Zotero/storage/UGFXK6DA/Ma and Pellegrini - ACTINN automated identification of cell types in .pdf;/Users/laurent/Zotero/storage/BBFEE9DD/5540320.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{madanitonekaboniSIGNSimilarityIdentification2019, + title = {{{SIGN}}: Similarity Identification in Gene Expression}, + shorttitle = {{{SIGN}}}, + author = {Madani Tonekaboni, Seyed Ali and Manem, Venkata Satya Kumar and {El-Hachem}, Nehme and {Haibe-Kains}, Benjamin}, + year = {2019}, + month = nov, + volume = {35}, + pages = {4830--4833}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz485}, + abstract = {AbstractMotivation. High-throughput molecular profiles of human cells have been used in predictive computational approaches for stratification of healthy and m}, + file = {/Users/laurent/Zotero/storage/8I4DXPVI/Madani Tonekaboni et al. - 2019 - SIGN similarity identification in gene expression.pdf;/Users/laurent/Zotero/storage/5EJRCQXT/5518919.html}, + journal = {Bioinformatics}, + language = {en}, + number = {22} +} + +@article{magiNanoporeSequencingData2018, + title = {Nanopore Sequencing Data Analysis: State of the Art, Applications and Challenges}, + shorttitle = {Nanopore Sequencing Data Analysis}, + author = {Magi, Alberto and Semeraro, Roberto and Mingrino, Alessandra and Giusti, Betti and D'Aurizio, Romina}, + year = {2018}, + month = nov, + volume = {19}, + pages = {1256--1272}, + issn = {1467-5463}, + doi = {10.1093/bib/bbx062}, + abstract = {Abstract. The nanopore sequencing process is based on the transit of a DNA molecule through a nanoscopic pore, and since the 90s is considered as one of the mo}, + file = {/Users/laurent/Zotero/storage/VJ57KPX2/Magi et al. - 2018 - Nanopore sequencing data analysis state of the ar.pdf;/Users/laurent/Zotero/storage/UPIXRP5C/3869205.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {6} +} + +@article{makalowskiTransposableElementsClassification2019, + title = {Transposable {{Elements}}: {{Classification}}, {{Identification}}, and {{Their Use As}} a {{Tool For Comparative Genomics}}}, + shorttitle = {Transposable {{Elements}}}, + author = {Maka{\l}owski, Wojciech and Gotea, Valer and Pande, Amit and Maka{\l}owska, Izabela}, + year = {2019}, + volume = {1910}, + pages = {177--207}, + issn = {1940-6029}, + doi = {10.1007/978-1-4939-9074-0_6}, + abstract = {Most genomes are populated by hundreds of thousands of sequences originated from mobile elements. On the one hand, these sequences present a real challenge in the process of genome analysis and annotation. On the other hand, they are very interesting biological subjects involved in many cellular processes. Here we present an overview of transposable elements biodiversity, and we discuss different approaches to transposable elements detection and analyses.}, + file = {/Users/laurent/Zotero/storage/THBM9DAF/Makałowski et al. - 2019 - Transposable Elements Classification, Identificat.pdf}, + journal = {Methods in Molecular Biology (Clifton, N.J.)}, + keywords = {Genome analysis,Genome evolution,Mobile elements,Repetitive elements,Transposable elements,Transposons}, + language = {eng}, + pmid = {31278665} +} + +@article{makrodimitrisMetricLearningExpression2019, + title = {Metric Learning on Expression Data for Gene Function Prediction}, + author = {Makrodimitris, Stavros and Reinders, Marcel J T and {van Ham}, Roeland C H J}, + editor = {Robinson, Mark}, + year = {2019}, + month = sep, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btz731}, + abstract = {Motivation: Co-expression of two genes across different conditions is indicative of their involvement in the same biological process. However, when using RNA-Seq datasets with many experimental conditions from diverse sources, only a subset of the experimental conditions is expected to be relevant for finding genes related to a particular Gene Ontology (GO) term. Therefore, we hypothesize that when the purpose is to find similarly functioning genes, the co-expression of genes should not be determined on all samples but only on those samples informative for the GO term of interest.}, + file = {/Users/laurent/Zotero/storage/5V7TGIMX/Makrodimitris et al. - 2019 - Metric learning on expression data for gene functi.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{makrodimitrisMetricLearningExpression2020, + title = {Metric Learning on Expression Data for Gene Function Prediction}, + author = {Makrodimitris, Stavros and Reinders, Marcel J. T. and {van Ham}, Roeland C. H. J.}, + year = {2020}, + month = feb, + volume = {36}, + pages = {1182--1190}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz731}, + abstract = {AbstractMotivation. Co-expression of two genes across different conditions is indicative of their involvement in the same biological process. However, when usi}, + file = {/Users/laurent/Zotero/storage/XTX6MRJ3/Makrodimitris et al. - 2020 - Metric learning on expression data for gene functi.pdf;/Users/laurent/Zotero/storage/ZESXVTEX/5575758.html}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{malcomSexChromosomesFrogs2014, + title = {The {{Sex Chromosomes}} of {{Frogs}}: {{Variability}} and {{Tolerance Offer Clues}} to {{Genome Evolution}} and {{Function}}}, + shorttitle = {The {{Sex Chromosomes}} of {{Frogs}}}, + author = {Malcom, Jacob W. and Kudra, Randal S. and Malone, John H.}, + year = {2014}, + month = mar, + volume = {2}, + pages = {68--76}, + issn = {1839-9940}, + doi = {10.7150/jgen.8044}, + abstract = {Frog sex chromosomes offer an ideal system for advancing our understanding of genome evolution and function because of the variety of sex determination systems in the group, the diversity of sex chromosome maturation states, the ease of experimental manipulation during early development. After briefly reviewing sex chromosome biology generally, we focus on what is known about frog sex determination, sex chromosome evolution, and recent, genomics-facilitated advances in the field. In closing we highlight gaps in our current knowledge of frog sex chromosomes, and suggest priorities for future research that can advance broad knowledge of gene dose and sex chromosome evolution.}, + journal = {Journal of Genomics}, + pmcid = {PMC4091447}, + pmid = {25031658} +} + +@article{malikGrouperGraphbasedClustering2018, + title = {Grouper: Graph-Based Clustering and Annotation for Improved de Novo Transcriptome Analysis}, + shorttitle = {Grouper}, + author = {Malik, Laraib and Almodaresi, Fatemeh and Patro, Rob}, + year = {2018}, + month = oct, + volume = {34}, + pages = {3265--3272}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty378}, + abstract = {AbstractMotivation. De novo transcriptome analysis using RNA-seq offers a promising means to study gene expression in non-model organisms. Yet, the difficulty}, + file = {/Users/laurent/Zotero/storage/ZVBVG49Q/Malik et al. - 2018 - Grouper graph-based clustering and annotation for.pdf;/Users/laurent/Zotero/storage/FK2DCNYJ/4994263.html}, + journal = {Bioinformatics}, + language = {en}, + number = {19} +} + +@article{mandricRepeatawareEvaluationScaffolding2018, + title = {Repeat-Aware Evaluation of Scaffolding Tools}, + author = {Mandric, Igor and Knyazev, Sergey and Zelikovsky, Alex}, + year = {2018}, + month = aug, + volume = {34}, + pages = {2530--2537}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty131}, + abstract = {AbstractSummary. Genomic sequences are assembled into a variable, but large number of contigs that should be scaffolded (ordered and oriented) for facilitating}, + file = {/Users/laurent/Zotero/storage/NYD3M6MW/Mandric et al. - 2018 - Repeat-aware evaluation of scaffolding tools.pdf;/Users/laurent/Zotero/storage/84R82NFN/4934936.html}, + journal = {Bioinformatics}, + language = {en}, + number = {15} +} + +@article{mangulChallengesRecommendationsImprove2019, + title = {Challenges and Recommendations to Improve the Installability and Archival Stability of Omics Computational Tools}, + author = {Mangul, Serghei and Mosqueiro, Thiago and Abdill, Richard J. and Duong, Dat and Mitchell, Keith and Sarwal, Varuni and Hill, Brian and Brito, Jaqueline and Littman, Russell Jared and Statz, Benjamin and Lam, Angela Ka-Mei and Dayama, Gargi and Grieneisen, Laura and Martin, Lana S. and Flint, Jonathan and Eskin, Eleazar and Blekhman, Ran}, + year = {2019}, + month = jun, + volume = {17}, + pages = {e3000333}, + issn = {1545-7885}, + doi = {10.1371/journal.pbio.3000333}, + abstract = {Developing new software tools for analysis of large-scale biological data is a key component of advancing modern biomedical research. Scientific reproduction of published findings requires running computational tools on data generated by such studies, yet little attention is presently allocated to the installability and archival stability of computational software tools. Scientific journals require data and code sharing, but none currently require authors to guarantee the continuing functionality of newly published tools. We have estimated the archival stability of computational biology software tools by performing an empirical analysis of the internet presence for 36,702 omics software resources published from 2005 to 2017. We found that almost 28\% of all resources are currently not accessible through uniform resource locators (URLs) published in the paper they first appeared in. Among the 98 software tools selected for our installability test, 51\% were deemed ``easy to install,'' and 28\% of the tools failed to be installed at all because of problems in the implementation. Moreover, for papers introducing new software, we found that the number of citations significantly increased when authors provided an easy installation process. We propose for incorporation into journal policy several practical solutions for increasing the widespread installability and archival stability of published bioinformatics software.}, + file = {/Users/laurent/Zotero/storage/UIFJ6BSD/Mangul et al. - 2019 - Challenges and recommendations to improve the inst.pdf;/Users/laurent/Zotero/storage/2PUQWGZS/article.html}, + journal = {PLOS Biology}, + keywords = {Altmetrics,Bioinformatics,Computational biology,Genome analysis,Internet,Software design,Software development,Software tools}, + language = {en}, + number = {6} +} + +@article{manimaranBatchQCInteractiveSoftware2016, + title = {{{BatchQC}}: Interactive Software for Evaluating Sample and Batch Effects in Genomic Data}, + shorttitle = {{{BatchQC}}}, + author = {Manimaran, Solaiappan and Selby, Heather Marie and Okrah, Kwame and Ruberman, Claire and Leek, Jeffrey T. and Quackenbush, John and {Haibe-Kains}, Benjamin and Bravo, Hector Corrada and Johnson, W. Evan}, + year = {2016}, + month = dec, + volume = {32}, + pages = {3836--3838}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw538}, + abstract = {Sequencing and microarray samples often are collected or processed in multiple batches or at different times. This often produces technical biases that can lead to incorrect results in the downstream analysis. There are several existing batch adjustment tools for `-omics' data, but they do not indicate a priori whether adjustment needs to be conducted or how correction should be applied. We present a software pipeline, BatchQC, which addresses these issues using interactive visualizations and statistics that evaluate the impact of batch effects in a genomic dataset. BatchQC can also apply existing adjustment tools and allow users to evaluate their benefits interactively. We used the BatchQC pipeline on both simulated and real data to demonstrate the effectiveness of this software toolkit.}, + file = {/Users/laurent/Documents/bibliography/bioinfo/documentation/Manimaran et al. - 2016 - BatchQC interactive software for evaluating sampl.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{mannoRNAVelocitySingle2018, + title = {{{RNA}} Velocity of Single Cells}, + author = {Manno, Gioele La and Soldatov, Ruslan and Zeisel, Amit and Braun, Emelie and Hochgerner, Hannah and Petukhov, Viktor and Lidschreiber, Katja and Kastriti, Maria E. and L{\"o}nnerberg, Peter and Furlan, Alessandro and Fan, Jean and Borm, Lars E. and Liu, Zehua and van Bruggen, David and Guo, Jimin and He, Xiaoling and Barker, Roger and Sundstr{\"o}m, Erik and {Castelo-Branco}, Gon{\c c}alo and Cramer, Patrick and Adameyko, Igor and Linnarsson, Sten and Kharchenko, Peter V.}, + year = {2018}, + month = aug, + volume = {560}, + pages = {494}, + issn = {1476-4687}, + doi = {10.1038/s41586-018-0414-6}, + abstract = {RNA velocity, estimated in single cells by comparison of spliced and unspliced mRNA, is a good indicator of transcriptome dynamics and will provide a useful tool for analysis of developmental lineage.}, + copyright = {2018 Springer Nature Limited}, + file = {/Users/laurent/Zotero/storage/AXYLPA3G/La Manno et al. - 2018 - RNA velocity of single cells.pdf;/Users/laurent/Zotero/storage/GLCG9Z3K/La Manno et al. - 2018 - RNA velocity of single cells.pdf;/Users/laurent/Zotero/storage/WJLI22I3/Manno et al. - 2018 - RNA velocity of single cells.pdf;/Users/laurent/Zotero/storage/XSE8ZSVP/Manno et al. - 2018 - RNA velocity of single cells.pdf;/Users/laurent/Zotero/storage/G28K5LYG/s41586-018-0414-6.html;/Users/laurent/Zotero/storage/L89LRTIF/s41586-018-0414-6.html}, + journal = {Nature}, + language = {En}, + number = {7719} +} + +@misc{ManualTitleLanguage, + title = {@{{Manual}}\{, Title = \{\vphantom{\}\}}{{R}}: {{A Language}} and {{Environment}} for {{Statistical Computing}}\vphantom\{\}, Author = \{\{\vphantom{\}\}}{{R Core Team}}\vphantom\{\}\vphantom\{\}, Organization = \{\vphantom\}{{R Foundation}} for {{Statistical Computing}}\vphantom\{\}, Address = \{\vphantom\}{{Vienna}}, {{Austria}}\vphantom\{\}, Year = \{2013\}, Note = \{\{\vphantom{\}\}}{{ISBN}}\vphantom\{\} 3-900051-07-0\vphantom\{\}, Url = \{\vphantom\}{{http://www.R-project.org/\vphantom\{\},}} \vphantom\{\}} +} + +@article{maoNonnegativeIndependentFactor2020, + title = {Non-Negative {{Independent Factor Analysis}} for Single Cell {{RNA}}-Seq}, + author = {Mao, Weiguang and Pouyan, Maziyar Baran and Kostka, Dennis and Chikina, Maria}, + year = {2020}, + month = feb, + pages = {2020.01.31.927921}, + doi = {10.1101/2020.01.31.927921}, + abstract = {{$<$}p{$>$}Single cell RNA sequencing (scRNA-seq) enables transcriptional profiling at the level of individual cells. With the emergence of high-throughput platforms datasets comprising tens of thousands or more cells have become routine, and the technology is having an impact across a wide range of biomedical subject areas. However, scRNA-seq data are high-dimensional and affected by noise, so that scalable and robust computational techniques are needed for meaningful analysis, visualization and interpretation. Specifically, a range of matrix factorization techniques have been employed to aid scRNA-seq data analysis. In this context we note that sources contributing to biological variability between cells can be discrete (or multi-modal, for instance cell-types), or continuous (e.g. pathway activity). However, no current matrix factorization approach is set up to jointly infer such mixed sources of variability. To address this shortcoming, we present a new probabilistic single-cell factor analysis model, Non-negative Independent Factor Analysis (NIFA), that combines features of complementary approaches like Independent Component Analysis (ICA), Principal Component Analysis (PCA), and Non-negative Matrix Factorization (NMF). NIFA simultaneously models uni- and multi-modal latent factors and can so isolate discrete cell-type identity and continuous pathway-level variations into separate components. Similar to NMF, NIFA constrains factor loadings to be non-negative in order to increase biological interpretability. We apply our approach to a range of data sets where cell-type identity is known, and we show that NIFA-derived factors outperform results from ICA, PCA and NMF in terms of cell-type identification and biological interpretability. Studying an immunotherapy dataset in detail, we show that NIFA identifies biomedically meaningful sources of variation, derive an improved expression signature for regulatory T-cells, and identify a novel myeloid cell subtype associated with treatment response. Overall, NIFA is a general approach advancing scRNA-seq analysis capabilities and it allows researchers to better take advantage of their data. NIFA is available at https://github.com/wgmao/NIFA.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2020, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + file = {/Users/laurent/Zotero/storage/WUCUQGME/Mao et al. - 2020 - Non-negative Independent Factor Analysis for singl.pdf;/Users/laurent/Zotero/storage/5YV5JLWM/2020.01.31.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{marcaisMUMmer4FastVersatile2018, + title = {{{MUMmer4}}: {{A}} Fast and Versatile Genome Alignment System}, + shorttitle = {{{MUMmer4}}}, + author = {Mar{\c c}ais, Guillaume and Delcher, Arthur L. and Phillippy, Adam M. and Coston, Rachel and Salzberg, Steven L. and Zimin, Aleksey}, + editor = {Darling, Aaron E.}, + year = {2018}, + month = jan, + volume = {14}, + pages = {e1005944}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005944}, + file = {/Users/laurent/Documents/bibliography/to_read/Marçais et al. - 2018 - MUMmer4 A fast and versatile genome alignment sys.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {1} +} + +@article{maroufRealisticSilicoGeneration2020, + title = {Realistic in Silico Generation and Augmentation of Single-Cell {{RNA}}-Seq Data Using Generative Adversarial Networks}, + author = {Marouf, Mohamed and Machart, Pierre and Bansal, Vikas and Kilian, Christoph and Magruder, Daniel S. and Krebs, Christian F. and Bonn, Stefan}, + year = {2020}, + month = jan, + volume = {11}, + pages = {166}, + issn = {2041-1723}, + doi = {10.1038/s41467-019-14018-z}, + abstract = {A fundamental problem in biomedical research is the low number of observations available, mostly due to a lack of available biosamples, prohibitive costs, or ethical reasons. Augmenting few real observations with generated in silico samples could lead to more robust analysis results and a higher reproducibility rate. Here, we propose the use of conditional single-cell generative adversarial neural networks (cscGAN) for the realistic generation of single-cell RNA-seq data. cscGAN learns non-linear gene-gene dependencies from complex, multiple cell type samples and uses this information to generate realistic cells of defined types. Augmenting sparse cell populations with cscGAN generated cells improves downstream analyses such as the detection of marker genes, the robustness and reliability of classifiers, the assessment of novel analysis algorithms, and might reduce the number of animal experiments and costs in consequence. cscGAN outperforms existing methods for single-cell RNA-seq data generation in quality and hold great promise for the realistic generation and augmentation of other biomedical data types.}, + journal = {Nature Communications}, + language = {eng}, + number = {1}, + pmid = {31919373} +} + +@article{maroufRealisticSilicoGeneration2020a, + title = {Realistic in Silico Generation and Augmentation of Single-Cell {{RNA}}-Seq Data Using Generative Adversarial Networks}, + author = {Marouf, Mohamed and Machart, Pierre and Bansal, Vikas and Kilian, Christoph and Magruder, Daniel S. and Krebs, Christian F. and Bonn, Stefan}, + year = {2020}, + month = jan, + volume = {11}, + pages = {1--12}, + issn = {2041-1723}, + doi = {10.1038/s41467-019-14018-z}, + abstract = {Low sample numbers often limit the robustness of analyses in biomedical research. Here, the authors introduce a method to generate realistic scRNA-seq data using GANs that learn gene expression dependencies from complex samples, and show that augmenting spare cell populations improves downstream analyses.}, + copyright = {2020 The Author(s)}, + file = {/Users/laurent/Zotero/storage/8DBZGH42/Marouf et al. - 2020 - Realistic in silico generation and augmentation of.pdf;/Users/laurent/Zotero/storage/NCTLW6DU/s41467-019-14018-z.html}, + journal = {Nature Communications}, + language = {en}, + number = {1} +} + +@article{marquesClusterdvSimpleDensitybased2019, + title = {Clusterdv: A Simple Density-Based Clustering Method That Is Robust, General and Automatic}, + shorttitle = {Clusterdv}, + author = {Marques, Jo{\~a}o C. and Orger, Michael B.}, + year = {2019}, + month = jun, + volume = {35}, + pages = {2125--2132}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty932}, + abstract = {AbstractMotivation. How to partition a dataset into a set of distinct clusters is a ubiquitous and challenging problem. The fact that data vary widely in featu}, + file = {/Users/laurent/Zotero/storage/8TLEPAMV/Marques and Orger - 2019 - Clusterdv a simple density-based clustering metho.pdf;/Users/laurent/Zotero/storage/4KW6B5X8/5165379.html}, + journal = {Bioinformatics}, + language = {en}, + number = {12} +} + +@article{martinCutadaptRemovesAdapter2011, + title = {Cutadapt Removes Adapter Sequences from High-Throughput Sequencing Reads}, + author = {Martin, Marcel}, + year = {2011}, + month = may, + volume = {17}, + pages = {10--12}, + issn = {2226-6089}, + doi = {10.14806/ej.17.1.200}, + abstract = {When small RNA is sequenced on current sequencing machines, the resulting reads are usually longer than the RNA and therefore contain parts of the 3' adapter. That adapter must be found and removed error-tolerantly from each read before read mapping. Previous solutions are either hard to use or do not offer required features, in particular support for color space data. As an easy to use alternative, we developed the command-line tool cutadapt, which supports 454, Illumina and SOLiD (color space) data, offers two adapter trimming algorithms, and has other useful features. Cutadapt, including its MIT-licensed source code, is available for download at http://code.google.com/p/cutadapt/}, + copyright = {Copyright (c)}, + file = {/Users/laurent/Zotero/storage/AES8FX6P/Martin - 2011 - Cutadapt removes adapter sequences from high-throu.pdf;/Users/laurent/Zotero/storage/YIRDGZMX/200.html}, + journal = {EMBnet.journal}, + keywords = {adapter removal,microRNA,next generation sequencing,small RNA}, + language = {en}, + number = {1} +} + +@article{marxHowDeduplicatePCR2017, + title = {How to Deduplicate {{PCR}}}, + author = {Marx, Vivien}, + year = {2017}, + month = apr, + volume = {14}, + pages = {473--476}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4268}, + file = {/Users/laurent/Documents/bibliography/to_read/Marx - 2017 - How to deduplicate PCR.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {5} +} + +@article{masumTenSimpleRules2013, + title = {Ten {{Simple Rules}} for {{Cultivating Open Science}} and {{Collaborative R}}\&{{D}}}, + author = {Masum, Hassan and Rao, Aarthi and Good, Benjamin M. and Todd, Matthew H. and Edwards, Aled M. and Chan, Leslie and Bunin, Barry A. and Su, Andrew I. and Thomas, Zakir and Bourne, Philip E.}, + editor = {Lewitter, Fran}, + year = {2013}, + month = sep, + volume = {9}, + pages = {e1003244}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1003244}, + abstract = {Washington, D.C.: The National Academies Press. 4. Woelfle M, Olliaro P, Todd MH (2011) Open science is a research accelerator. Nat Chem 3: 745\textendash{}748. doi:10.1038/nchem.1149.}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Masum et al. - 2013 - Ten Simple Rules for Cultivating Open Science and .pdf}, + journal = {PLoS Computational Biology}, + language = {en}, + number = {9} +} + +@article{matsumotoSCODEEfficientRegulatory2017, + title = {{{SCODE}}: An Efficient Regulatory Network Inference Algorithm from Single-Cell {{RNA}}-{{Seq}} during Differentiation}, + shorttitle = {{{SCODE}}}, + author = {Matsumoto, Hirotaka and Kiryu, Hisanori and Furusawa, Chikara and Ko, Minoru S. H. and Ko, Shigeru B. H. and Gouda, Norio and Hayashi, Tetsutaro and Nikaido, Itoshi}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2314--2321}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx194}, + abstract = {Motivation: The analysis of RNA-Seq data from individual differentiating cells enables us to reconstruct the differentiation process and the degree of differentiation (in pseudo-time) of each cell. Such analyses can reveal detailed expression dynamics and functional relationships for differentiation. To further elucidate differentiation processes, more insight into gene regulatory networks is required. The pseudo-time can be regarded as time information and, therefore, single-cell RNASeq data are time-course data with high time resolution. Although time-course data are useful for inferring networks, conventional inference algorithms for such data suffer from high time complexity when the number of samples and genes is large. Therefore, a novel algorithm is necessary to infer networks from single-cell RNA-Seq during differentiation.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Matsumoto et al. - 2017 - SCODE an efficient regulatory network inference a.pdf;/Users/laurent/Zotero/storage/CBWME9AC/Matsumoto et al. - 2017 - SCODE an efficient regulatory network inference a.pdf;/Users/laurent/Zotero/storage/I7IG5PYW/Matsumoto et al. - 2017 - SCODE an efficient regulatory network inference a.pdf;/Users/laurent/Zotero/storage/MCRN9FAI/Matsumoto et al. - 2017 - SCODE an efficient regulatory network inference a.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {15} +} + +@article{matsumotoSCOUPProbabilisticModel2016, + title = {{{SCOUP}}: A Probabilistic Model Based on the {{Ornstein}}\textendash{{Uhlenbeck}} Process to Analyze Single-Cell Expression Data during Differentiation}, + shorttitle = {{{SCOUP}}}, + author = {Matsumoto, Hirotaka and Kiryu, Hisanori}, + year = {2016}, + month = dec, + volume = {17}, + issn = {1471-2105}, + doi = {10.1186/s12859-016-1109-3}, + abstract = {Background: Single-cell technologies make it possible to quantify the comprehensive states of individual cells, and have the power to shed light on cellular differentiation in particular. Although several methods have been developed to fully analyze the single-cell expression data, there is still room for improvement in the analysis of differentiation. +Results: In this paper, we propose a novel method SCOUP to elucidate differentiation process. Unlike previous dimension reduction-based approaches, SCOUP describes the dynamics of gene expression throughout differentiation directly, including the degree of differentiation of a cell (in pseudo-time) and cell fate. SCOUP is superior to previous methods with respect to pseudo-time estimation, especially for single-cell RNA-seq. SCOUP also successfully estimates cell lineage more accurately than previous method, especially for cells at an early stage of bifurcation. In addition, SCOUP can be applied to various downstream analyses. As an example, we propose a novel correlation calculation method for elucidating regulatory relationships among genes. We apply this method to a single-cell RNA-seq data and detect a candidate of key regulator for differentiation and clusters in a correlation network which are not detected with conventional correlation analysis. +Conclusions: We develop a stochastic process-based method SCOUP to analyze single-cell expression data throughout differentiation. SCOUP can estimate pseudo-time and cell lineage more accurately than previous methods. We also propose a novel correlation calculation method based on SCOUP. SCOUP is a promising approach for further single-cell analysis and available at https://github.com/hmatsu1226/SCOUP.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Matsumoto and Kiryu - 2016 - SCOUP a probabilistic model based on the Ornstein.pdf;/Users/laurent/Zotero/storage/7CRGCJQG/Matsumoto and Kiryu - 2016 - SCOUP a probabilistic model based on the Ornstein.pdf;/Users/laurent/Zotero/storage/NGSG9N93/Matsumoto and Kiryu - 2016 - SCOUP a probabilistic model based on the Ornstein.pdf;/Users/laurent/Zotero/storage/ZEDXCCWU/Matsumoto and Kiryu - 2016 - SCOUP a probabilistic model based on the Ornstein.pdf}, + journal = {BMC Bioinformatics}, + language = {en}, + number = {1} +} + +@article{matsutaniDiscoveringNovelMutation2019, + title = {Discovering Novel Mutation Signatures by Latent {{Dirichlet}} Allocation with Variational {{Bayes}} Inference}, + author = {Matsutani, Taro and Ueno, Yuki and Fukunaga, Tsukasa and Hamada, Michiaki}, + year = {2019}, + month = nov, + volume = {35}, + pages = {4543--4552}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz266}, + abstract = {AbstractMotivation. A cancer genome includes many mutations derived from various mutagens and mutational processes, leading to specific mutation patterns. It i}, + file = {/Users/laurent/Zotero/storage/NRTFK7F9/Matsutani et al. - 2019 - Discovering novel mutation signatures by latent Di.pdf;/Users/laurent/Zotero/storage/3NI5DKPY/5472341.html}, + journal = {Bioinformatics}, + language = {en}, + number = {22} +} + +@article{mcdavidContributionCellCycle2016, + title = {The Contribution of Cell Cycle to Heterogeneity in Single-Cell {{RNA}}-Seq Data}, + author = {McDavid, Andrew and Finak, Greg and Gottardo, Raphael}, + year = {2016}, + month = jun, + volume = {34}, + pages = {591--593}, + issn = {1087-0156, 1546-1696}, + doi = {10.1038/nbt.3498}, + file = {/Users/laurent/Zotero/storage/3MR3EALC/McDavid et al. - 2016 - The contribution of cell cycle to heterogeneity in.pdf}, + journal = {Nature Biotechnology}, + language = {en}, + number = {6} +} + +@article{mcdowellClusteringGeneExpression2018, + title = {Clustering Gene Expression Time Series Data Using an Infinite {{Gaussian}} Process Mixture Model}, + author = {McDowell, Ian C. and Manandhar, Dinesh and Vockley, Christopher M. and Schmid, Amy K. and Reddy, Timothy E. and Engelhardt, Barbara E.}, + editor = {Nie, Qing}, + year = {2018}, + month = jan, + volume = {14}, + pages = {e1005896}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005896}, + abstract = {Transcriptome-wide time series expression profiling is used to characterize the cellular response to environmental perturbations. The first step to analyzing transcriptional response data is often to cluster genes with similar responses. Here, we present a nonparametric model-based method, Dirichlet process Gaussian process mixture model (DPGP), which jointly models data clusters with a Dirichlet process and temporal dependencies with Gaussian processes. We demonstrate the accuracy of DPGP in comparison to state-of-theart approaches using hundreds of simulated data sets. To further test our method, we apply DPGP to published microarray data from a microbial model organism exposed to stress and to novel RNA-seq data from a human cell line exposed to the glucocorticoid dexamethasone. We validate our clusters by examining local transcription factor binding and histone modifications. Our results demonstrate that jointly modeling cluster number and temporal dependencies can reveal shared regulatory mechanisms. DPGP software is freely available online at https://github.com/PrincetonUniversity/DP\_GP\_cluster.}, + file = {/Users/laurent/Documents/bibliography/to_read/McDowell et al. - 2018 - Clustering gene expression time series data using .pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {1} +} + +@article{mcdowellClusteringGeneExpression2018a, + title = {Clustering Gene Expression Time Series Data Using an Infinite {{Gaussian}} Process Mixture Model}, + author = {McDowell, Ian C. and Manandhar, Dinesh and Vockley, Christopher M. and Schmid, Amy K. and Reddy, Timothy E. and Engelhardt, Barbara E.}, + year = {2018}, + month = jan, + volume = {14}, + pages = {e1005896}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005896}, + abstract = {Transcriptome-wide time series expression profiling is used to characterize the cellular response to environmental perturbations. The first step to analyzing transcriptional response data is often to cluster genes with similar responses. Here, we present a nonparametric model-based method, Dirichlet process Gaussian process mixture model (DPGP), which jointly models data clusters with a Dirichlet process and temporal dependencies with Gaussian processes. We demonstrate the accuracy of DPGP in comparison to state-of-the-art approaches using hundreds of simulated data sets. To further test our method, we apply DPGP to published microarray data from a microbial model organism exposed to stress and to novel RNA-seq data from a human cell line exposed to the glucocorticoid dexamethasone. We validate our clusters by examining local transcription factor binding and histone modifications. Our results demonstrate that jointly modeling cluster number and temporal dependencies can reveal shared regulatory mechanisms. DPGP software is freely available online at https://github.com/PrincetonUniversity/DP\_GP\_cluster.}, + file = {/Users/laurent/Zotero/storage/AQKA7R3V/McDowell et al. - 2018 - Clustering gene expression time series data using .pdf;/Users/laurent/Zotero/storage/8UVG3R7D/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Cell cycle and cell division,Covariance,DNA transcription,Gene expression,Gene regulation,Histone modification,Probability distribution,Simulation and modeling}, + language = {en}, + number = {1} +} + +@article{mcfaline-figueroaPooledSinglecellGenetic2019, + title = {A Pooled Single-Cell Genetic Screen Identifies Regulatory Checkpoints in the Continuum of the Epithelial-to-Mesenchymal Transition}, + author = {{McFaline-Figueroa}, Jos{\'e} L. and Hill, Andrew J. and Qiu, Xiaojie and Jackson, Dana and Shendure, Jay and Trapnell, Cole}, + year = {2019}, + month = sep, + volume = {51}, + pages = {1389--1398}, + issn = {1546-1718}, + doi = {10.1038/s41588-019-0489-5}, + abstract = {Transcriptional profiling of 61,052 cells undergoing epithelial-to-mesenchymal transition (EMT) shows continuous waves of gene regulation, not discrete stages. A CRISPR screen orders EMT regulators along EMT progression.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature America, Inc.}, + file = {/Users/laurent/Zotero/storage/AFJYGXNK/McFaline-Figueroa et al. - 2019 - A pooled single-cell genetic screen identifies reg.pdf;/Users/laurent/Zotero/storage/D6FR29E2/s41588-019-0489-5.html}, + journal = {Nature Genetics}, + language = {en}, + number = {9} +} + +@article{mcinnesUMAPUniformManifold2018, + title = {{{UMAP}}: {{Uniform Manifold Approximation}} and {{Projection}} for {{Dimension Reduction}}}, + shorttitle = {{{UMAP}}}, + author = {McInnes, Leland and Healy, John and Melville, James}, + year = {2018}, + month = feb, + abstract = {UMAP (Uniform Manifold Approximation and Projection) is a novel manifold learning technique for dimension reduction. UMAP is constructed from a theoretical framework based in Riemannian geometry and algebraic topology. e result is a practical scalable algorithm that applies to real world data. e UMAP algorithm is competitive with t-SNE for visualization quality, and arguably preserves more of the global structure with superior run time performance. Furthermore, UMAP has no computational restrictions on embedding dimension, making it viable as a general purpose dimension reduction technique for machine learning.}, + archivePrefix = {arXiv}, + eprint = {1802.03426}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/3HZ66U8V/McInnes et al. - 2018 - UMAP Uniform Manifold Approximation and Projectio.pdf}, + journal = {arXiv:1802.03426 [cs, stat]}, + keywords = {Computer Science - Computational Geometry,Computer Science - Machine Learning,Statistics - Machine Learning}, + language = {en}, + primaryClass = {cs, stat} +} + +@article{mehdiVariationalInfiniteHeterogeneous2019, + title = {Variational Infinite Heterogeneous Mixture Model for Semi-Supervised Clustering of Heart Enhancers}, + author = {Mehdi, Tahmid F. and Singh, Gurdeep and Mitchell, Jennifer A. and Moses, Alan M.}, + year = {2019}, + month = sep, + volume = {35}, + pages = {3232--3239}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz064}, + abstract = {AbstractMotivation. Mammalian genomes can contain thousands of enhancers but only a subset are actively driving gene expression in a given cellular context. In}, + file = {/Users/laurent/Zotero/storage/C2PMNTIZ/Mehdi et al. - 2019 - Variational infinite heterogeneous mixture model f.pdf;/Users/laurent/Zotero/storage/49Y972CM/5308600.html}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{merinoBenchmarkingWorkflowsDetecting2019, + title = {A Benchmarking of Workflows for Detecting Differential Splicing and Differential Expression at Isoform Level in Human {{RNA}}-Seq Studies}, + author = {Merino, Gabriela A and Conesa, Ana and Fern{\'a}ndez, Elmer A}, + year = {2019}, + month = mar, + volume = {20}, + pages = {471--481}, + issn = {1477-4054}, + doi = {10.1093/bib/bbx122}, + abstract = {Over the last few years, RNA-seq has been used to study alterations in alternative splicing related to several diseases. Bioinformatics workflows used to perform these studies can be divided into two groups, those finding changes in the absolute isoform expression and those studying differential splicing. Many computational methods for transcriptomics analysis have been developed, evaluated and compared; however, there are not enough reports of systematic and objective assessment of processing pipelines as a whole. Moreover, comparative studies have been performed considering separately the changes in absolute or relative isoform expression levels. Consequently, no consensus exists about the best practices and appropriate workflows to analyse alternative and differential splicing. To assist the adequate pipeline choice, we present here a benchmarking of nine commonly used workflows to detect differential isoform expression and splicing. We evaluated the workflows performance over different experimental scenarios where changes in absolute and relative isoform expression occurred simultaneously. In addition, the effect of the number of isoforms per gene, and the magnitude of the expression change over pipeline performances were also evaluated. Our results suggest that workflow performance is influenced by the number of replicates per condition and the conditions heterogeneity. In general, workflows based on DESeq2, DEXSeq, Limma and NOISeq performed well over a wide range of transcriptomics experiments. In particular, we suggest the use of workflows based on Limma when high precision is required, and DESeq2 and DEXseq pipelines to prioritize sensitivity. When several replicates per condition are available, NOISeq and Limma pipelines are indicated.}, + file = {/Users/laurent/Zotero/storage/52BZQP32/Merino et al. - 2019 - A benchmarking of workflows for detecting differen.pdf;/Users/laurent/Zotero/storage/LGHLE7UV/Merino et al. - 2019 - A benchmarking of workflows for detecting differen.pdf}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {2} +} + +@article{mestdaghPrepaidParameterEstimation2019, + title = {Prepaid Parameter Estimation without Likelihoods}, + author = {Mestdagh, Merijn and Verdonck, Stijn and Meers, Kristof and Loossens, Tim and Tuerlinckx, Francis}, + year = {2019}, + month = sep, + volume = {15}, + pages = {e1007181}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007181}, + abstract = {In various fields, statistical models of interest are analytically intractable and inference is usually performed using a simulation-based method. However elegant these methods are, they are often painstakingly slow and convergence is difficult to assess. As a result, statistical inference is greatly hampered by computational constraints. However, for a given statistical model, different users, even with different data, are likely to perform similar computations. Computations done by one user are potentially useful for other users with different data sets. We propose a pooling of resources across researchers to capitalize on this. More specifically, we preemptively chart out the entire space of possible model outcomes in a prepaid database. Using advanced interpolation techniques, any individual estimation problem can now be solved on the spot. The prepaid method can easily accommodate different priors as well as constraints on the parameters. We created prepaid databases for three challenging models and demonstrate how they can be distributed through an online parameter estimation service. Our method outperforms state-of-the-art estimation techniques in both speed (with a 23,000 to 100,000-fold speed up) and accuracy, and is able to handle previously quasi inestimable models.}, + file = {/Users/laurent/Zotero/storage/9XGNEEX6/Mestdagh et al. - 2019 - Prepaid parameter estimation without likelihoods.pdf;/Users/laurent/Zotero/storage/EEJQ6YSJ/Mestdagh et al. - 2019 - Prepaid parameter estimation without likelihoods.pdf}, + journal = {PLOS Computational Biology}, + keywords = {Covariance,Ellipses,Interpolation,Linear regression analysis,Machine learning,Simulation and modeling,Statistical distributions,Support vector machines}, + language = {en}, + number = {9} +} + +@article{mestdaghPrepaidParameterEstimation2019a, + title = {Prepaid Parameter Estimation without Likelihoods}, + author = {Mestdagh, Merijn and Verdonck, Stijn and Meers, Kristof and Loossens, Tim and Tuerlinckx, Francis}, + year = {2019}, + month = sep, + volume = {15}, + pages = {e1007181}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007181}, + abstract = {In various fields, statistical models of interest are analytically intractable and inference is usually performed using a simulation-based method. However elegant these methods are, they are often painstakingly slow and convergence is difficult to assess. As a result, statistical inference is greatly hampered by computational constraints. However, for a given statistical model, different users, even with different data, are likely to perform similar computations. Computations done by one user are potentially useful for other users with different data sets. We propose a pooling of resources across researchers to capitalize on this. More specifically, we preemptively chart out the entire space of possible model outcomes in a prepaid database. Using advanced interpolation techniques, any individual estimation problem can now be solved on the spot. The prepaid method can easily accommodate different priors as well as constraints on the parameters. We created prepaid databases for three challenging models and demonstrate how they can be distributed through an online parameter estimation service. Our method outperforms state-of-the-art estimation techniques in both speed (with a 23,000 to 100,000-fold speed up) and accuracy, and is able to handle previously quasi inestimable models.}, + file = {/Users/laurent/Zotero/storage/LDY2QSG5/Mestdagh et al. - 2019 - Prepaid parameter estimation without likelihoods.pdf;/Users/laurent/Zotero/storage/SCUPUMBW/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Covariance,Ellipses,Interpolation,Linear regression analysis,Machine learning,Simulation and modeling,Statistical distributions,Support vector machines}, + language = {en}, + number = {9} +} + +@misc{MethodsHandlingLongitudinal, + title = {Methods for Handling Longitudinal Outcome Processes Truncated by Dropout and Death | {{Biostatistics}} | {{Oxford Academic}}}, + file = {/Users/laurent/Zotero/storage/IJESJHE2/4237504.html}, + howpublished = {https://academic-oup-com.insb.bib.cnrs.fr/biostatistics/article/19/4/407/4237504} +} + +@article{miaoASEluxUltrafastAccurate2018, + title = {{{ASElux}}: An Ultra-Fast and Accurate Allelic Reads Counter}, + shorttitle = {{{ASElux}}}, + author = {Miao, Zong and Alvarez, Marcus and Pajukanta, P{\"a}ivi and Ko, Arthur}, + year = {2018}, + month = apr, + volume = {34}, + pages = {1313--1320}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx762}, + abstract = {Motivation: Mapping bias causes preferential alignment to the reference allele, forming a major obstacle in allele-specific expression (ASE) analysis. The existing methods, such as simulation and SNP-aware alignment, are either inaccurate or relatively slow. To fast and accurately count allelic reads for ASE analysis, we developed a novel approach, ASElux, which utilizes the personal SNP information and counts allelic reads directly from unmapped RNA-sequence (RNA-seq) data. ASElux significantly reduces runtime by disregarding reads outside single nucleotide polymorphisms (SNPs) during the alignment.}, + file = {/Users/laurent/Documents/bibliography/to_read/Miao et al. - 2018 - ASElux an ultra-fast and accurate allelic reads c.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {8} +} + +@article{miaoDEsingleDetectingThree2018, + title = {{{DEsingle}} for Detecting Three Types of Differential Expression in Single-Cell {{RNA}}-Seq Data}, + author = {Miao, Zhun and Deng, Ke and Wang, Xiaowo and Zhang, Xuegong}, + year = {2018}, + month = sep, + volume = {34}, + pages = {3223--3224}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty332}, + abstract = {AbstractSummary. The excessive amount of zeros in single-cell RNA-seq (scRNA-seq) data includes `real' zeros due to the on-off nature of gene transcription in}, + file = {/Users/laurent/Zotero/storage/YRQKMSF7/Miao et al. - 2018 - DEsingle for detecting three types of differential.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{mieleNineQuickTips2019, + title = {Nine Quick Tips for Analyzing Network Data}, + author = {Miele, Vincent and Matias, Catherine and Robin, St{\'e}phane and Dray, St{\'e}phane}, + year = {2019}, + month = dec, + volume = {15}, + pages = {e1007434}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007434}, + file = {/Users/laurent/Zotero/storage/KYVKTZZY/Miele et al. - 2019 - Nine quick tips for analyzing network data.pdf;/Users/laurent/Zotero/storage/Y6XYYHF8/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Biologists,Food web structure,Genetic networks,Mathematical models,Network analysis,Neural networks,Protein interaction networks,Software tools}, + language = {en}, + number = {12} +} + +@article{miladiRNAscClustClusteringRNA2017, + title = {{{RNAscClust}} : Clustering {{RNA}} Sequences Using Structure Conservation and Graph Based Motifs}, + shorttitle = {{{RNAscClust}}}, + author = {Miladi, Milad and Junge, Alexander and Costa, Fabrizio and Seemann, Stefan E. and Havgaard, Jakob Hull and Gorodkin, Jan and Backofen, Rolf}, + year = {2017}, + month = jul, + volume = {33}, + pages = {2089--2096}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx114}, + abstract = {Motivation: Clustering RNA sequences with common secondary structure is an essential step towards studying RNA function. Whereas structural RNA alignment strategies typically identify common structure for orthologous structured RNAs, clustering seeks to group paralogous RNAs based on structural similarities. However, existing approaches for clustering paralogous RNAs, do not take the compensatory base pair changes obtained from structure conservation in orthologous sequences into account.}, + file = {/Users/laurent/Documents/bibliography/evolution/Miladi et al. - 2017 - RNAscClust clustering RNA sequences using struct.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{millerHybridAssemblyLong2017, + title = {Hybrid Assembly with Long and Short Reads Improves Discovery of Gene Family Expansions}, + author = {Miller, Jason R. and Zhou, Peng and Mudge, Joann and Gurtowski, James and Lee, Hayan and Ramaraj, Thiruvarangan and Walenz, Brian P. and Liu, Junqi and Stupar, Robert M. and Denny, Roxanne and Song, Li and Singh, Namrata and Maron, Lyza G. and McCouch, Susan R. and McCombie, W. Richard and Schatz, Michael C. and Tiffin, Peter and Young, Nevin D. and Silverstein, Kevin A. T.}, + year = {2017}, + month = jul, + volume = {18}, + pages = {541}, + issn = {1471-2164}, + doi = {10.1186/s12864-017-3927-8}, + abstract = {Long-read and short-read sequencing technologies offer competing advantages for eukaryotic genome sequencing projects. Combinations of both may be appropriate for surveys of within-species genomic variation.}, + journal = {BMC Genomics}, + number = {1} +} + +@article{millerJustOrthologsFastAccurate2019, + title = {{{JustOrthologs}}: A Fast, Accurate and User-Friendly Ortholog Identification Algorithm}, + shorttitle = {{{JustOrthologs}}}, + author = {Miller, Justin B. and Pickett, Brandon D. and Ridge, Perry G.}, + year = {2019}, + month = feb, + volume = {35}, + pages = {546--552}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty669}, + abstract = {AbstractMotivation. Orthologous gene identification is fundamental to all aspects of biology. For example, ortholog identification between species can provide}, + file = {/Users/laurent/Zotero/storage/FI5DYPMY/Miller et al. - 2019 - JustOrthologs a fast, accurate and user-friendly .pdf;/Users/laurent/Zotero/storage/HZF2M8PB/5063405.html}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{milneTabletNextGeneration2010, + title = {Tablet\textemdash{}next Generation Sequence Assembly Visualization}, + author = {Milne, Iain and Bayer, Micha and Cardle, Linda and Shaw, Paul and Stephen, Gordon and Wright, Frank and Marshall, David}, + year = {2010}, + month = feb, + volume = {26}, + pages = {401--402}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btp666}, + abstract = {Summary: Tablet is a lightweight, high-performance graphical viewer for next-generation sequence assemblies and alignments. Supporting a range of input assembly formats, Tablet provides high-quality visualizations showing data in packed or stacked views, allowing instant access and navigation to any region of interest, and whole contig overviews and data summaries. Tablet is both multi-core aware and memory efficient, allowing it to handle assemblies containing millions of reads, even on a 32-bit desktop machine., Availability: Tablet is freely available for Microsoft Windows, Apple Mac OS X, Linux and Solaris. Fully bundled installers can be downloaded from http://bioinf.scri.ac.uk/tablet in 32- and 64-bit versions., Contact: tablet@scri.ac.uk}, + journal = {Bioinformatics}, + number = {3}, + pmcid = {PMC2815658}, + pmid = {19965881} +} + +@article{mingLSMMStatisticalApproach2018, + title = {{{LSMM}}: A Statistical Approach to Integrating Functional Annotations with Genome-Wide Association Studies}, + shorttitle = {{{LSMM}}}, + author = {Ming, Jingsi and Dai, Mingwei and Cai, Mingxuan and Wan, Xiang and Liu, Jin and Yang, Can}, + year = {2018}, + month = aug, + volume = {34}, + pages = {2788--2796}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty187}, + abstract = {AbstractMotivation. Thousands of risk variants underlying complex phenotypes (quantitative traits and diseases) have been identified in genome-wide association}, + file = {/Users/laurent/Zotero/storage/GCXLK8W7/Ming et al. - 2018 - LSMM a statistical approach to integrating functi.pdf;/Users/laurent/Zotero/storage/G5CSXYPR/4956013.html}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{minMeffilEfficientNormalization2018, + title = {Meffil: Efficient Normalization and Analysis of Very Large {{DNA}} Methylation Datasets}, + shorttitle = {Meffil}, + author = {Min, J. L. and Hemani, G. and Davey Smith, G. and Relton, C. and Suderman, M.}, + year = {2018}, + month = dec, + volume = {34}, + pages = {3983--3989}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty476}, + abstract = {AbstractMotivation. DNA methylation datasets are growing ever larger both in sample size and genome coverage. Novel computational solutions are required to eff}, + file = {/Users/laurent/Zotero/storage/C3FVV5MB/Min et al. - 2018 - Meffil efficient normalization and analysis of ve.pdf;/Users/laurent/Zotero/storage/8QBJGGEW/5042224.html}, + journal = {Bioinformatics}, + language = {en}, + number = {23} +} + +@article{modoloUrQtEfficientSoftware2015, + title = {{{UrQt}}: An Efficient Software for the {{Unsupervised Quality}} Trimming of {{NGS}} Data}, + shorttitle = {{{UrQt}}}, + author = {Modolo, Laurent and Lerat, Emmanuelle}, + year = {2015}, + month = apr, + volume = {16}, + pages = {137}, + issn = {1471-2105}, + doi = {10.1186/s12859-015-0546-8}, + abstract = {Quality control is a necessary step of any Next Generation Sequencing analysis. Although customary, this step still requires manual interventions to empirically choose tuning parameters according to various quality statistics. Moreover, current quality control procedures that provide a ``good quality'' data set, are not optimal and discard many informative nucleotides. To address these drawbacks, we present a new quality control method, implemented in UrQt software, for Unsupervised Quality trimming of Next Generation Sequencing reads.}, + file = {/Users/laurent/Zotero/storage/KJI3PXV6/Modolo and Lerat - 2015 - UrQt an efficient software for the Unsupervised Q.pdf;/Users/laurent/Zotero/storage/9DKCWWP8/s12859-015-0546-8.html}, + journal = {BMC Bioinformatics}, + number = {1} +} + +@article{moFullyBayesianLatent2018, + title = {A Fully {{Bayesian}} Latent Variable Model for Integrative Clustering Analysis of Multi-Type Omics Data}, + author = {Mo, Qianxing and Shen, Ronglai and Guo, Cui and Vannucci, Marina and Chan, Keith S and Hilsenbeck, Susan G}, + year = {2018}, + month = jan, + volume = {19}, + pages = {71--86}, + issn = {1465-4644, 1468-4357}, + doi = {10.1093/biostatistics/kxx017}, + file = {/Users/laurent/Documents/bibliography/to_read/Mo et al. - 2018 - A fully Bayesian latent variable model for integra.pdf}, + journal = {Biostatistics}, + language = {en}, + number = {1} +} + +@article{mohammadiDECODEingSparsityPatterns2018, + title = {{{DECODE}}-Ing Sparsity Patterns in Single-Cell {{RNA}}-Seq}, + author = {Mohammadi, Shahin and {Davila-Velderrain}, Jose and Kellis, Manolis and Grama, Ananth}, + year = {2018}, + month = mar, + doi = {10.1101/241646}, + abstract = {An inherent challenge in interpreting single-cell transcriptomic data is the high frequency of zero values. This phenomenon has been attributed to both biological and technical sources, although the extent of the contribution of each remains unclear. Here, we show that the underlying gene presence/absence sparsity patterns are by themselves highly informative. We develop an algorithm, called DECODE, to assess the extent of joint presence/absence of genes across different cells, and to infer a gene dependency network. We show that this network captures biologically-meaningful pathways, cell-type specific modules, and connectivity patterns characteristic of complex networks. We develop a model that uses this network to discriminate biological vs. technical zeros, by exploiting each gene's local network neighborhood. For inferred non-biological zeros, we build a predictive model that imputes the missing value of each gene based on activity patterns of its most informative neighbors. We show that our framework accurately infers gene-gene functional dependencies, pinpoints technical zeros, and predicts biologically-meaningful missing values in three diverse datasets.}, + file = {/Users/laurent/Documents/bibliography/to_read/Mohammadi et al. - 2018 - DECODE-ing sparsity patterns in single-cell RNA-se.pdf}, + language = {en} +} + +@article{molnarSAGE2ParallelHuman2018, + title = {{{SAGE2}}: Parallel Human Genome Assembly}, + shorttitle = {{{SAGE2}}}, + author = {Molnar, Michael and Haghshenas, Ehsan and Ilie, Lucian}, + editor = {Birol, Inanc}, + year = {2018}, + month = feb, + volume = {34}, + pages = {678--680}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx648}, + abstract = {Summary: De novo genome assembly of next-generation sequencing data is a fundamental problem in bioinformatics. There are many programs that assemble small genomes, but very few can assemble whole human genomes. We present a new algorithm for parallel overlap graph construction that is capable of assembling human genomes and improves upon the current state-of-the-art in genome assembly.}, + file = {/Users/laurent/Documents/bibliography/to_read/Molnar et al. - 2018 - SAGE2 parallel human genome assembly.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{moonManifoldLearningbasedMethods2018, + title = {Manifold Learning-Based Methods for Analyzing Single-Cell {{RNA}}-Sequencing Data}, + author = {Moon, Kevin R. and Stanley, Jay S. and Burkhardt, Daniel and {van Dijk}, David and Wolf, Guy and Krishnaswamy, Smita}, + year = {2018}, + month = feb, + volume = {7}, + pages = {36--46}, + issn = {24523100}, + doi = {10.1016/j.coisb.2017.12.008}, + abstract = {Recent advances in single-cell RNA sequencing technologies enable deep insights into cellular development, gene regulation, and phenotypic diversity by measuring gene expression for thousands of cells in a single experiment. While these technologies hold great potential for improving our understanding of cellular states and progression, they also pose new challenges and require advanced mathematical and algorithmic tools to extract underlying biological signals. In this review, we cover one of the most promising avenues of research into unlocking the potential of scRNA-seq data: the field of manifold learning, and the related manifold assumption in data analysis. Manifold learning provides a powerful structure for algorithmic approaches to process the data, extract its dynamics, and infer patterns in it. In particular, we cover manifold learning-based methods for denoising the data, revealing gene interactions, extracting pseudotime progressions with model fitting, visualizing the cellular state space via dimensionality reduction, and clustering the data.}, + file = {/Users/laurent/Documents/bibliography/to_read/Moon et al. - 2018 - Manifold learning-based methods for analyzing sing.pdf}, + journal = {Current Opinion in Systems Biology}, + language = {en} +} + +@article{moonVisualizingStructureTransitions2018, + title = {Visualizing {{Structure}} and {{Transitions}} for {{Biological Data Exploration}}}, + author = {Moon, Kevin R. and van Dijk, David and Wang, Zheng and Gigante, Scott and Burkhardt, Daniel and Chen, William S. and Yim, Kristina and van den Elzen, Antonia and Hirn, Matthew J. and Coifman, Ronald R. and Ivanova, Natalia B. and Wolf, Guy and Krishnaswamy, Smita}, + year = {2018}, + month = jun, + pages = {120378}, + doi = {10.1101/120378}, + abstract = {{$<$}h3{$>$}Abstract{$<$}/h3{$>$} {$<$}p{$>$}With the advent of high-throughput technologies measuring high-dimensional biological data, there is a pressing need for visualization tools that reveal the structure and emergent patterns of data in an intuitive form. We present PHATE, a visualization method that captures both local and global nonlinear structure in data by an information-geometry distance between datapoints. We perform extensive comparison between PHATE and other tools on a variety of artificial and biological datasets, and find that it consistently preserves a range of patterns in data including continual progressions, branches, and clusters. We show that PHATE is applicable to a wide variety of datatypes including mass cytometry, single-cell RNA-sequencing, Hi-C, and gut microbiome data, where it can generate interpretable insights into the underlying systems. Finally, we use PHATE to explore a newly generated scRNA-seq dataset of human germ layer differentiation. Here, PHATE reveals a dynamic picture of the main developmental branches in unparalleled detail.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2018, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + file = {/Users/laurent/Zotero/storage/9LJ4HHNY/Moon et al. - 2018 - Visualizing Structure and Transitions for Biologic.pdf;/Users/laurent/Zotero/storage/2KVQ4IZU/120378v3.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{moonVisualizingStructureTransitions2018a, + title = {Visualizing {{Structure}} and {{Transitions}} for {{Biological Data Exploration}}}, + author = {Moon, Kevin R. and van Dijk, David and Wang, Zheng and Gigante, Scott and Burkhardt, Daniel and Chen, William S. and Yim, Kristina and van den Elzen, Antonia and Hirn, Matthew J. and Coifman, Ronald R. and Ivanova, Natalia B. and Wolf, Guy and Krishnaswamy, Smita}, + year = {2018}, + month = jun, + pages = {120378}, + doi = {10.1101/120378}, + abstract = {{$<$}h3{$>$}Abstract{$<$}/h3{$>$} {$<$}p{$>$}With the advent of high-throughput technologies measuring high-dimensional biological data, there is a pressing need for visualization tools that reveal the structure and emergent patterns of data in an intuitive form. We present PHATE, a visualization method that captures both local and global nonlinear structure in data by an information-geometry distance between datapoints. We perform extensive comparison between PHATE and other tools on a variety of artificial and biological datasets, and find that it consistently preserves a range of patterns in data including continual progressions, branches, and clusters. We show that PHATE is applicable to a wide variety of datatypes including mass cytometry, single-cell RNA-sequencing, Hi-C, and gut microbiome data, where it can generate interpretable insights into the underlying systems. Finally, we use PHATE to explore a newly generated scRNA-seq dataset of human germ layer differentiation. Here, PHATE reveals a dynamic picture of the main developmental branches in unparalleled detail.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2018, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + file = {/Users/laurent/Zotero/storage/7NHWL2WY/Moon et al. - 2018 - Visualizing Structure and Transitions for Biologic.pdf;/Users/laurent/Zotero/storage/IF778ZB4/120378v3.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{moraLoopPromoterEnhancer2015, + title = {In the Loop: Promoter\textendash{}Enhancer Interactions and Bioinformatics}, + shorttitle = {In the Loop}, + author = {Mora, Antonio and Sandve, Geir Kjetil and Gabrielsen, Odd Stokke and Eskeland, Ragnhild}, + year = {2015}, + month = nov, + pages = {bbv097}, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bbv097}, + abstract = {Enhancer\textendash{}promoter regulation is a fundamental mechanism underlying differential transcriptional regulation. Spatial chromatin organization brings remote enhancers in contact with target promoters in cis to regulate gene expression. There is considerable evidence for promoter\textendash{}enhancer interactions (PEIs). In the recent years, genome-wide analyses have identified signatures and mapped novel enhancers; however, being able to precisely identify their target gene(s) requires massive biological and bioinformatics efforts. In this review, we give a short overview of the chromatin landscape and transcriptional regulation. We discuss some key concepts and problems related to chromatin interaction detection technologies, and emerging knowledge from genome-wide chromatin interaction data sets. Then, we critically review different types of bioinformatics analysis methods and tools related to representation and visualization of PEI data, raw data processing and PEI prediction. Lastly, we provide specific examples of how PEIs have been used to elucidate a functional role of non-coding single-nucleotide polymorphisms. The topic is at the forefront of epigenetic research, and by highlighting some future bioinformatics challenges in the field, this review provides a comprehensive background for future PEI studies.}, + file = {/Users/laurent/Documents/bibliography/bioinfo/documentation/Mora et al. - 2015 - In the loop promoter–enhancer interactions and bi.pdf;/Users/laurent/Documents/bibliography/Hi-C/Mora et al. - 2015 - In the loop promoter–enhancer interactions and bi.pdf}, + journal = {Briefings in Bioinformatics}, + language = {en} +} + +@article{moranSpikeandSlabLassoBiclustering, + title = {Spike-and-{{Slab Lasso Biclustering}}}, + author = {Moran, Gemma E and Ro\textasciicaron{}ckova, Veronika and George, Edward I}, + pages = {43}, + abstract = {Biclustering methods aim to group samples using only subsets of their associated features. In this way, biclustering methods differ from traditional clustering methods, which utilize the entire set of features to group samples. Motivating applications for biclustering include genomics data, where the goal is to cluster patients or samples by their gene expression profiles; and recommender systems, which seek to group customers based on their product preferences. Biclusters of interest often manifest as rank1 submatrices of the data matrix. This submatrix detection problem can be viewed as a factor analysis problem in which both the factors and loadings are sparse. In this paper, we propose a new biclustering method called Spike-and-Slab Lasso Biclustering (SSLB) which utilizes the Spike-and-Slab Lasso of Ro\textasciicaron{}ckova\textasciiacute{} and George (2018) to find such a sparse factorization of the data matrix. SSLB also incorporates an Indian Buffet Process prior to automatically choose the number of biclusters. Many biclustering methods make assumptions about the size of the latent biclusters; either assuming that the biclusters are all of the same size, or that the biclusters are very large or very small. In contrast, SSLB can adapt to find biclusters which have a continuum of sizes. SSLB is implemented via a fast EM algorithm with a variational step. In a variety of simulation settings, SSLB outperforms other biclustering methods. We apply SSLB to both a microarray dataset and a single-cell RNA-sequencing dataset and highlight that SSLB can recover biologically meaningful structures in the data. The SSLB software is available as an R/C++ package at https://github.com/gemoran/SSLB.}, + file = {/Users/laurent/Zotero/storage/ZCVJTWCH/Moran et al. - Spike-and-Slab Lasso Biclustering.pdf}, + language = {en} +} + +@article{mordeletBaggingSVMLearn2014, + title = {A Bagging {{SVM}} to Learn from Positive and Unlabeled Examples}, + author = {Mordelet, F. and Vert, J.-P.}, + year = {2014}, + month = feb, + volume = {37}, + pages = {201--209}, + issn = {01678655}, + doi = {10.1016/j.patrec.2013.06.010}, + abstract = {We consider the problem of learning a binary classifier from a training set of positive and unlabeled examples, both in the inductive and in the transductive setting. This problem, often referred to as PU learning, differs from the standard supervised classification problem by the lack of negative examples in the training set. It corresponds to an ubiquitous situation in many applications such as information retrieval or gene ranking, when we have identified a set of data of interest sharing a particular property, and we wish to automatically retrieve additional data sharing the same property among a large and easily available pool of unlabeled data. We propose a new method for PU learning with a conceptually simple implementation based on bootstrap aggregating (bagging) techniques: the algorithm iteratively trains many binary classifiers to discriminate the known positive examples from random subsamples of the unlabeled set, and averages their predictions. We show theoretically and experimentally that the method can match and even outperform the performance of state-of-the-art methods for PU learning, particularly when the number of positive examples is limited and the fraction of negatives among the unlabeled examples is small. The proposed method can also run considerably faster than state-of-the-art methods, particularly when the set of unlabeled examples is large.}, + file = {/Users/laurent/Documents/bibliography/stats/Mordelet and Vert - 2014 - A bagging SVM to learn from positive and unlabeled.pdf;/Users/laurent/Zotero/storage/IGDHU6YI/Mordelet and Vert - 2014 - A bagging SVM to learn from positive and unlabeled.pdf;/Users/laurent/Zotero/storage/6IHFFXW8/S0167865513002432.html}, + journal = {Pattern Recognition Letters}, + keywords = {Bagging,PU learning,SVM}, + language = {en} +} + +@article{moreno-betancurSurvivalAnalysisTimedependent2018, + title = {Survival Analysis with Time-Dependent Covariates Subject to Missing Data or Measurement Error: {{Multiple Imputation}} for {{Joint Modeling}} ({{MIJM}})}, + shorttitle = {Survival Analysis with Time-Dependent Covariates Subject to Missing Data or Measurement Error}, + author = {{Moreno-Betancur}, Margarita and Carlin, John B. and Brilleman, Samuel L. and Tanamas, Stephanie K. and Peeters, Anna and Wolfe, Rory}, + year = {2018}, + month = oct, + volume = {19}, + pages = {479--496}, + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxx046}, + abstract = {SUMMARY. Modern epidemiological studies collect data on time-varying individual-specific characteristics, such as body mass index and blood pressure. Incorpora}, + file = {/Users/laurent/Zotero/storage/5KFNX79F/Moreno-Betancur et al. - 2018 - Survival analysis with time-dependent covariates s.pdf;/Users/laurent/Zotero/storage/5UNSHEYN/4461848.html}, + journal = {Biostatistics}, + language = {en}, + number = {4} +} + +@article{morisseHybridCorrectionHighly2018, + title = {Hybrid Correction of Highly Noisy Long Reads Using a Variable-Order de {{Bruijn}} Graph}, + author = {Morisse, Pierre and Lecroq, Thierry and Lefebvre, Arnaud}, + year = {2018}, + month = dec, + volume = {34}, + pages = {4213--4222}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty521}, + abstract = {AbstractMotivation. The recent rise of long read sequencing technologies such as Pacific Biosciences and Oxford Nanopore allows to solve assembly problems for}, + file = {/Users/laurent/Zotero/storage/8FMTDQGC/Morisse et al. - 2018 - Hybrid correction of highly noisy long reads using.pdf;/Users/laurent/Zotero/storage/LSH7BEKB/5046256.html}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{mostafaviIdentifyingGeneticVariants2017, + title = {Identifying Genetic Variants That Affect Viability in Large Cohorts}, + author = {Mostafavi, Hakhamanesh and Berisa, Tomaz and Day, Felix R. and Perry, John R. B. and Przeworski, Molly and Pickrell, Joseph K.}, + editor = {Barton, Nick}, + year = {2017}, + month = sep, + volume = {15}, + pages = {e2002458}, + issn = {1545-7885}, + doi = {10.1371/journal.pbio.2002458}, + file = {/Users/laurent/Documents/bibliography/to_read/Mostafavi et al. - 2017 - Identifying genetic variants that affect viability.pdf}, + journal = {PLOS Biology}, + language = {en}, + number = {9} +} + +@article{muggliBuildingLargeUpdatable2019, + title = {Building Large Updatable Colored de {{Bruijn}} Graphs via Merging}, + author = {Muggli, Martin D. and Alipanahi, Bahar and Boucher, Christina}, + year = {2019}, + month = jul, + volume = {35}, + pages = {i51-i60}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz350}, + abstract = {AbstractMotivation. There exist several large genomic and metagenomic data collection efforts, including GenomeTrakr and MetaSub, which are routinely updated w}, + file = {/Users/laurent/Zotero/storage/3VM79KDX/Muggli et al. - 2019 - Building large updatable colored de Bruijn graphs .pdf;/Users/laurent/Zotero/storage/VB4FC4WA/5529124.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{mukherjeeAligningOpticalMaps2019, + title = {Aligning Optical Maps to de {{Bruijn}} Graphs}, + author = {Mukherjee, Kingshuk and Alipanahi, Bahar and Kahveci, Tamer and Salmela, Leena and Boucher, Christina}, + year = {2019}, + month = sep, + volume = {35}, + pages = {3250--3256}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz069}, + abstract = {AbstractMotivation. Optical maps are high-resolution restriction maps (Rmaps) that give a unique numeric representation to a genome. Used in concert with seque}, + file = {/Users/laurent/Zotero/storage/C4Z8TBE9/Mukherjee et al. - 2019 - Aligning optical maps to de Bruijn graphs.pdf;/Users/laurent/Zotero/storage/3PK6WCIP/5304362.html}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{mukherjeeScalablePreprocessingSparse2018, + title = {Scalable Preprocessing for Sparse {{scRNA}}-Seq Data Exploiting Prior Knowledge}, + author = {Mukherjee, Sumit and Zhang, Yue and Fan, Joshua and Seelig, Georg and Kannan, Sreeram}, + year = {2018}, + month = jul, + volume = {34}, + pages = {i124-i132}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty293}, + abstract = {Motivation +Single cell RNA-seq (scRNA-seq) data contains a wealth of information which has to be inferred computationally from the observed sequencing reads. As the ability to sequence more cells improves rapidly, existing computational tools suffer from three problems. (i) The decreased reads-per-cell implies a highly sparse sample of the true cellular transcriptome. (ii) Many tools simply cannot handle the size of the resulting datasets. (iii) Prior biological knowledge such as bulk RNA-seq information of certain cell types or qualitative marker information is not taken into account. Here we present UNCURL, a preprocessing framework based on non-negative matrix factorization for scRNA-seq data, that is able to handle varying sampling distributions, scales to very large cell numbers and can incorporate prior knowledge. + +Results +We find that preprocessing using UNCURL consistently improves performance of commonly used scRNA-seq tools for clustering, visualization and lineage estimation, both in the absence and presence of prior knowledge. Finally we demonstrate that UNCURL is extremely scalable and parallelizable, and runs faster than other methods on a scRNA-seq dataset containing 1.3 million cells. + +Availability and implementation +Source code is available at https://github.com/yjzhang/uncurl\_python. + +Supplementary information + + are available at Bioinformatics online.}, + file = {/Users/laurent/Zotero/storage/9EPPBQLX/Mukherjee et al. - 2018 - Scalable preprocessing for sparse scRNA-seq data e.pdf;/Users/laurent/Zotero/storage/WUUS9W8P/Mukherjee et al. - 2018 - Scalable preprocessing for sparse scRNA-seq data e.pdf;/Users/laurent/Zotero/storage/8FTKQE4Z/5045758.html}, + journal = {Bioinformatics}, + number = {13}, + pmcid = {PMC6022691}, + pmid = {29949988} +} + +@article{mulderDevelopmentApplicationBioinformatics2018, + title = {The Development and Application of Bioinformatics Core Competencies to Improve Bioinformatics Training and Education}, + author = {Mulder, Nicola and Schwartz, Russell and Brazas, Michelle D. and Brooksbank, Cath and Gaeta, Bruno and Morgan, Sarah L. and Pauley, Mark A. and Rosenwald, Anne and Rustici, Gabriella and Sierk, Michael and Warnow, Tandy and Welch, Lonnie}, + editor = {Troyanskaya, Olga G.}, + year = {2018}, + month = feb, + volume = {14}, + pages = {e1005772}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005772}, + abstract = {Bioinformatics is recognized as part of the essential knowledge base of numerous career paths in biomedical research and healthcare. However, there is little agreement in the field over what that knowledge entails or how best to provide it. These disagreements are compounded by the wide range of populations in need of bioinformatics training, with divergent prior backgrounds and intended application areas. The Curriculum Task Force of the International Society of Computational Biology (ISCB) Education Committee has sought to provide a framework for training needs and curricula in terms of a set of bioinformatics core competencies that cut across many user personas and training programs. The initial competencies developed based on surveys of employers and training programs have since been refined through a multiyear process of community engagement. This report describes the current status of the competencies and presents a series of use cases illustrating how they are being applied in diverse training contexts. These use cases are intended to demonstrate how others can make use of the competencies and engage in the process of their continuing refinement and application. The report concludes with a consideration of remaining challenges and future plans.}, + file = {/Users/laurent/Documents/bibliography/to_read/Mulder et al. - 2018 - The development and application of bioinformatics .pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {2} +} + +@article{munroSVPVStructuralVariant2017, + title = {{{SVPV}}: A Structural Variant Prediction Viewer for Paired-End Sequencing Datasets}, + shorttitle = {{{SVPV}}}, + author = {Munro, Jacob E. and Dunwoodie, Sally L. and Giannoulatou, Eleni}, + year = {2017}, + month = jul, + volume = {33}, + pages = {2032--2033}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx117}, + abstract = {Motivation: A wide range of algorithms exist for the prediction of structural variants (SVs) from paired-end whole genome sequencing (WGS) alignments. It is essential for the purpose of quality control to be able to visualize, compare and contrast the data underlying the predictions across multiple different algorithms.}, + file = {/Users/laurent/Documents/bibliography/to_read/Munro et al. - 2017 - SVPV a structural variant prediction viewer for p.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {13} +} + +@article{munskyUsingGeneExpression2012, + title = {Using {{Gene Expression Noise}} to {{Understand Gene Regulation}}}, + author = {Munsky, Brian and Neuert, Gregor and van Oudenaarden, Alexander}, + year = {2012}, + month = apr, + volume = {336}, + pages = {183--187}, + issn = {0036-8075, 1095-9203}, + doi = {10.1126/science.1216379}, + abstract = {{$<$}p{$>$}Phenotypic variation is ubiquitous in biology and is often traceable to underlying genetic and environmental variation. However, even genetically identical cells in identical environments display variable phenotypes. Stochastic gene expression, or gene expression ``noise,'' has been suggested as a major source of this variability, and its physiological consequences have been topics of intense research for the last decade. Several recent studies have measured variability in protein and messenger RNA levels, and they have discovered strong connections between noise and gene regulation mechanisms. When integrated with discrete stochastic models, measurements of cell-to-cell variability provide a sensitive ``fingerprint'' with which to explore fundamental questions of gene regulation. In this review, we highlight several studies that used gene expression variability to develop a quantitative understanding of the mechanisms and dynamics of gene regulation.{$<$}/p{$>$}}, + copyright = {Copyright \textcopyright{} 2012, American Association for the Advancement of Science}, + file = {/Users/laurent/Zotero/storage/UQIXKWFB/Munsky et al. - 2012 - Using Gene Expression Noise to Understand Gene Reg.pdf;/Users/laurent/Zotero/storage/5UH9UYKC/tab-pdf.html}, + journal = {Science}, + language = {en}, + number = {6078}, + pmid = {22499939} +} + +@article{nacuDeepRNASequencing2011, + title = {Deep {{RNA}} Sequencing Analysis of Readthrough Gene Fusions in Human Prostate Adenocarcinoma and Reference Samples}, + author = {Nacu, Serban and Yuan, Wenlin and Kan, Zhengyan and Bhatt, Deepali and Rivers, Celina Sanchez and Stinson, Jeremy and Peters, Brock A and Modrusan, Zora and Jung, Kenneth and Seshagiri, Somasekar and Wu, Thomas D}, + year = {2011}, + month = dec, + volume = {4}, + issn = {1755-8794}, + doi = {10.1186/1755-8794-4-11}, + abstract = {Background: Readthrough fusions across adjacent genes in the genome, or transcription-induced chimeras (TICs), have been estimated using expressed sequence tag (EST) libraries to involve 4-6\% of all genes. Deep transcriptional sequencing (RNA-Seq) now makes it possible to study the occurrence and expression levels of TICs in individual samples across the genome. +Methods: We performed single-end RNA-Seq on three human prostate adenocarcinoma samples and their corresponding normal tissues, as well as brain and universal reference samples. We developed two bioinformatics methods to specifically identify TIC events: a targeted alignment method using artificial exon-exon junctions within 200,000 bp from adjacent genes, and genomic alignment allowing splicing within individual reads. We performed further experimental verification and characterization of selected TIC and fusion events using quantitative RT-PCR and comparative genomic hybridization microarrays. +Results: Targeted alignment against artificial exon-exon junctions yielded 339 distinct TIC events, including 32 gene pairs with multiple isoforms. The false discovery rate was estimated to be 1.5\%. Spliced alignment to the genome was less sensitive, finding only 18\% of those found by targeted alignment in 33-nt reads and 59\% of those in 50-nt reads. However, spliced alignment revealed 30 cases of TICs with intervening exons, in addition to distant inversions, scrambled genes, and translocations. Our findings increase the catalog of observed TIC gene pairs by 66\%. We verified 6 of 6 predicted TICs in all prostate samples, and 2 of 5 predicted novel distant gene fusions, both private events among 54 prostate tumor samples tested. Expression of TICs correlates with that of the upstream gene, which can explain the prostate-specific pattern of some TIC events and the restriction of the SLC45A3-ELK4 e4-e2 TIC to ERG-negative prostate samples, as confirmed in 20 matched prostate tumor and normal samples and 9 lung cancer cell lines. +Conclusions: Deep transcriptional sequencing and analysis with targeted and spliced alignment methods can effectively identify TIC events across the genome in individual tissues. Prostate and reference samples exhibit a wide range of TIC events, involving more genes than estimated previously using ESTs. Tissue specificity of TIC events is correlated with expression patterns of the upstream gene. Some TIC events, such as MSMB-NCOA4, may play functional roles in cancer.}, + file = {/Users/laurent/Documents/bibliography/readthrough/Nacu et al. - 2011 - Deep RNA sequencing analysis of readthrough gene f.pdf}, + journal = {BMC Medical Genomics}, + language = {en}, + number = {1} +} + +@article{nakamuraSexDeterminationAmphibians2009, + title = {Sex Determination in Amphibians}, + author = {Nakamura, Masahisa}, + year = {2009}, + month = may, + volume = {20}, + pages = {271--282}, + issn = {1084-9521}, + doi = {10.1016/j.semcdb.2008.10.003}, + abstract = {The heterogametic sex is male in all mammals, whereas it is female in almost all birds. By contrast, there are two heterogametic types (XX/XY and ZZ/ZW) for genetic sex determination in amphibians. Though the original heterogametic sex was female in amphibians, the two heterogametic types were probably interchangeable, suggesting that sex chromosomes evolved several times in this lineage. Indeed, the frog Rana rugosa has the XX/XY and ZZ/ZW sex-determining systems within a single species, depending on the local population in Japan. The XY and ZW geographic forms with differentiated sex chromosomes probably have a common origin as undifferentiated sex chromosomes resulted from the hybridization between the primary populations of West Japan and Kanto forms. It is clear that the sex chromosomes are still undergoing evolution in this species group. Regardless of the presence of a sex-determining gene in amphibians, the gonadal sex of some species can be changed by sex steroids. Namely, sex steroids can induce the sex reversal, with estrogens inducing the male-to-female sex reversal, whereas androgens have the opposite effect. In R. rugosa, gonadal activity of CYP19 (P450 aromatase) is correlated with the feminization of gonads. Of particular interest is that high levels of CYP19 expression are observed in indifferent gonads at time before sex determination. Increases in the expression of CYP19 in female gonads and CYP17 (P450 17alpha-hydroxylase/C17-20 lyase) in male gonads suggest that the former plays an important role in phenotypic female determination, whereas the latter is needed for male determination. Thus, steroids could be the key factor for sex determination in R. rugosa. In addition to the role of sex steroids in gonadal sex determination in this species, Foxl2 and Sox3 are capable of promoting CYP19 expression. Since both the genes are autosomal, another factor up-regulating CYP19 expression must be recruited. The factor, which may be located on the X or W chromosome, intervenes directly or indirectly, in the transcriptional regulation of the CYP19 gene for feminization in amphibians. A factor up-regulating CYP17 expression remains to be identified.}, + journal = {Seminars in Cell \& Developmental Biology}, + keywords = {Amphibians,Animals,ESD,Female,GSD,Male,Sex chromosome,sex determination amphibians,Sex Determination Processes,Sex reversal,Sex steroids}, + language = {eng}, + number = {3}, + pmid = {18996493} +} + +@article{nakatoSensitiveRobustAssessment2018, + title = {Sensitive and Robust Assessment of {{ChIP}}-Seq Read Distribution Using a Strand-Shift Profile}, + author = {Nakato, Ryuichiro and Shirahige, Katsuhiko}, + year = {2018}, + month = jul, + volume = {34}, + pages = {2356--2363}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty137}, + abstract = {AbstractMotivation. Chromatin immunoprecipitation followed by sequencing (ChIP-seq) can detect read-enriched DNA loci for point-source (e.g. transcription fact}, + file = {/Users/laurent/Zotero/storage/QJ7VR93H/Nakato and Shirahige - 2018 - Sensitive and robust assessment of ChIP-seq read d.pdf;/Users/laurent/Zotero/storage/K4KT7TXQ/4924717.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@incollection{nakatoStatisticalAnalysisQuality2018, + title = {Statistical {{Analysis}} and {{Quality Assessment}} of {{ChIP}}-Seq {{Data}} with {{DROMPA}}}, + booktitle = {Genome {{Instability}}: {{Methods}} and {{Protocols}}}, + author = {Nakato, Ryuichiro and Shirahige, Katsuhiko}, + editor = {{Muzi-Falconi}, Marco and Brown, Grant W}, + year = {2018}, + pages = {631--643}, + publisher = {{Springer}}, + address = {{New York, NY}}, + doi = {10.1007/978-1-4939-7306-4_41}, + abstract = {Chromatin immunoprecipitation followed by sequencing (ChIP-seq) analysis can detect protein/DNA-binding and histone-modification sites across an entire genome. As there are various factors during sample preparation that affect the obtained results, multilateral quality assessments are essential. Here, we describe a step-by-step protocol using DROMPA, a program for user-friendly ChIP-seq pipelining. DROMPA can be used for quality assessment, data normalization, visualization, peak calling, and multiple statistical analyses.}, + isbn = {978-1-4939-7306-4}, + keywords = {ChIP-seq,Chromatin immunoprecipitation,High-throughput sequencing,Normalization,Quality management,Statistical analysis,Visualization}, + language = {en}, + series = {Methods in {{Molecular Biology}}} +} + +@book{NanoPackVisualizingProcessing, + title = {{{NanoPack}}: Visualizing and Processing Long-Read Sequencing Data \textbackslash{}textbar {{Bioinformatics}} \textbackslash{}textbar {{Oxford Academic}}} +} + +@article{navarinEfficientGraphKernel2017, + title = {An Efficient Graph Kernel Method for Non-Coding {{RNA}} Functional Prediction}, + author = {Navarin, Nicol{\`o} and Costa, Fabrizio}, + year = {2017}, + month = sep, + volume = {33}, + pages = {2642--2650}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx295}, + abstract = {Motivation: The importance of RNA protein-coding gene regulation is by now well appreciated. Noncoding RNAs (ncRNAs) are known to regulate gene expression at practically every stage, ranging from chromatin packaging to mRNA translation. However the functional characterization of specific instances remains a challenging task in genome scale settings. For this reason, automatic annotation approaches are of interest. Existing computational methods are either efficient but non accurate or they offer increased precision, but present scalability problems.}, + file = {/Users/laurent/Documents/bibliography/to_read/Navarin and Costa - 2017 - An efficient graph kernel method for non-coding RN.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{nawrockiInfernal100foldFaster2013, + title = {Infernal 1.1: 100-Fold Faster {{RNA}} Homology Searches}, + shorttitle = {Infernal 1.1}, + author = {Nawrocki, E. P. and Eddy, S. R.}, + year = {2013}, + month = nov, + volume = {29}, + pages = {2933--2935}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btt509}, + abstract = {Summary: Infernal builds probabilistic profiles of the sequence and secondary structure of an RNA family called covariance models (CMs) from structurally annotated multiple sequence alignments given as input. Infernal uses CMs to search for new family members in sequence databases and to create potentially large multiple sequence alignments. Version 1.1 of Infernal introduces a new filter pipeline for RNA homology search based on accelerated profile hidden Markov model (HMM) methods and HMM-banded CM alignment methods. This enables 100-fold acceleration over the previous version and 10 000-fold acceleration over exhaustive non-filtered CM searches. Availability: Source code, documentation and the benchmark are downloadable from http://infernal.janelia.org. Infernal is freely licensed under the GNU GPLv3 and should be portable to any POSIX-compliant operating system, including Linux and Mac OS/X. Documentation includes a user's guide with a tutorial, a discussion of file formats and user options and additional details on methods implemented in the software.}, + file = {/Users/laurent/Documents/bibliography/mapper/Nawrocki and Eddy - 2013 - Infernal 1.1 100-fold faster RNA homology searche.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {22} +} + +@article{negriFeminizingWolbachiaZyginidia2006, + title = {Feminizing {{Wolbachia}} in {{Zyginidia}} Pullula ({{Insecta}}, {{Hemiptera}}), a Leafhopper with an {{XX}}/{{X0}} Sex-Determination System}, + author = {Negri, I and Pellecchia, M and Mazzoglio, P.J and Patetta, A and Alma, A}, + year = {2006}, + month = sep, + volume = {273}, + pages = {2409--2416}, + issn = {0962-8452}, + doi = {10.1098/rspb.2006.3592}, + abstract = {Zyginidia pullula is a grass-dwelling leafhopper characterized by a bisexual reproduction mode. In this species, some females collected in Northern Italy, when mated with males, gave origin to an exclusively female brood. Here, we demonstrated that in these lineages an infection by a new strain of Wolbachia pipientis\textemdash{}designated as wZygpul\textemdash{}was detected by amplifying and sequencing the wsp and 16S rRNA genes. About half of the female progeny were characterized by intersexual phenotypes, i.e. showing upper pygofer appendages, a typical male secondary sexual feature. The karyological analysis proved that while phenotypically normal females had a female genotype, those with upper pygofer appendages had a male genotype and were thus feminized males., The complete removal of W. pipientis after tetracycline treatment of morphologically normal females, and the consequent re-appearance of males in the brood, permitted us to connect the feminizing effect with the presence of the bacterium. This is the first case of feminization by W. pipientis in an XX/X0 sex-determination system, and is the second case reported in insects.}, + journal = {Proceedings of the Royal Society B: Biological Sciences}, + keywords = {sex system}, + number = {1599}, + pmcid = {PMC1636090}, + pmid = {16928646} +} + +@article{nephBEDOPSHighperformanceGenomic2012, + title = {{{BEDOPS}}: High-Performance Genomic Feature Operations}, + shorttitle = {{{BEDOPS}}}, + author = {Neph, Shane and Kuehn, M. Scott and Reynolds, Alex P. and Haugen, Eric and Thurman, Robert E. and Johnson, Audra K. and Rynes, Eric and Maurano, Matthew T. and Vierstra, Jeff and Thomas, Sean and Sandstrom, Richard and Humbert, Richard and Stamatoyannopoulos, John A.}, + year = {2012}, + month = jul, + volume = {28}, + pages = {1919--1920}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bts277}, + abstract = {Abstract. Summary: The large and growing number of genome-wide datasets highlights the need for high-performance feature analysis and data comparison methods,}, + file = {/Users/laurent/Zotero/storage/YJFCK3DM/Neph et al. - 2012 - BEDOPS high-performance genomic feature operation.pdf;/Users/laurent/Zotero/storage/HNZN6XC3/218826.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{ngoConjoinedDirichletProcess2020, + title = {Conjoined {{Dirichlet Process}}}, + author = {Ngo, Michelle N. and Pluta, Dustin S. and Ngo, Alexander N. and Shahbaba, Babak}, + year = {2020}, + month = feb, + abstract = {Biclustering is a class of techniques that simultaneously clusters the rows and columns of a matrix to sort heterogeneous data into homogeneous blocks. Although many algorithms have been proposed to find biclusters, existing methods suffer from the pre-specification of the number of biclusters or place constraints on the model structure. To address these issues, we develop a novel, non-parametric probabilistic biclustering method based on Dirichlet processes to identify biclusters with strong co-occurrence in both rows and columns. The proposed method utilizes dual Dirichlet process mixture models to learn row and column clusters, with the number of resulting clusters determined by the data rather than pre-specified. Probabilistic biclusters are identified by modeling the mutual dependence between the row and column clusters. We apply our method to two different applications, text mining and gene expression analysis, and demonstrate that our method improves bicluster extraction in many settings compared to existing approaches.}, + archivePrefix = {arXiv}, + eprint = {2002.03223}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/28I243B2/Ngo et al. - 2020 - Conjoined Dirichlet Process.pdf}, + journal = {arXiv:2002.03223 [cs, stat]}, + keywords = {Computer Science - Machine Learning,Statistics - Machine Learning,Statistics - Methodology}, + language = {en}, + primaryClass = {cs, stat} +} + +@article{ngueyepHighdimensionalMultivariateAdditive2018, + title = {High-Dimensional Multivariate Additive Regression for Uncovering Contributing Factors to Healthcare Expenditure}, + author = {Ngueyep, Rodrigue and Serban, Nicoleta}, + year = {2018}, + month = jul, + volume = {19}, + pages = {359--373}, + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxx043}, + abstract = {SUMMARY. Many studies in health services research rely on regression models with a large number of covariates or predictors. In this article, we introduce nove}, + file = {/Users/laurent/Zotero/storage/V4TMB6RR/Ngueyep and Serban - 2018 - High-dimensional multivariate additive regression .pdf;/Users/laurent/Zotero/storage/I92HRNB6/4157472.html}, + journal = {Biostatistics}, + language = {en}, + number = {3} +} + +@article{nguyenTenQuickTips2019, + title = {Ten Quick Tips for Effective Dimensionality Reduction}, + author = {Nguyen, Lan Huong and Holmes, Susan}, + year = {2019}, + month = jun, + volume = {15}, + pages = {e1006907}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006907}, + file = {/Users/laurent/Zotero/storage/HX9W9YE5/Nguyen and Holmes - 2019 - Ten quick tips for effective dimensionality reduct.pdf;/Users/laurent/Zotero/storage/CN4PF3YB/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Aspect ratio,Data visualization,Eigenvalues,Kernel methods,Linear discriminant analysis,Phenols,Principal component analysis,Wine}, + language = {en}, + number = {6} +} + +@article{nicolasWhatShapesEukaryotic2017, + title = {What Shapes Eukaryotic Transcriptional Bursting?}, + author = {Nicolas, Damien and Phillips, Nick E. and Naef, Felix}, + year = {2017}, + volume = {13}, + pages = {1280--1290}, + issn = {1742-206X, 1742-2051}, + doi = {10.1039/C7MB00154A}, + file = {/Users/laurent/Zotero/storage/52LHEVMA/Nicolas et al. - 2017 - What shapes eukaryotic transcriptional bursting.pdf;/Users/laurent/Zotero/storage/EPPB6SXN/Nicolas et al. - 2017 - What shapes eukaryotic transcriptional bursting.pdf}, + journal = {Molecular BioSystems}, + language = {en}, + number = {7} +} + +@article{nobleQuickGuideOrganizing2009, + title = {A {{Quick Guide}} to {{Organizing Computational Biology Projects}}}, + author = {Noble, William Stafford}, + editor = {Lewitter, Fran}, + year = {2009}, + month = jul, + volume = {5}, + pages = {e1000424}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1000424}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Noble - 2009 - A Quick Guide to Organizing Computational Biology .pdf}, + journal = {PLoS Computational Biology}, + language = {en}, + number = {7} +} + +@article{nobleTenSimpleRules2017, + title = {Ten Simple Rules for Writing a Response to Reviewers}, + author = {Noble, William Stafford}, + year = {2017}, + month = oct, + volume = {13}, + pages = {e1005730}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005730}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Noble - 2017 - Ten simple rules for writing a response to reviewe.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {10} +} + +@misc{NormalizationVarianceStabilization, + title = {Normalization and Variance Stabilization of Single-Cell {{RNA}}-Seq Data Using Regularized Negative Binomial Regression | {{bioRxiv}}}, + file = {/Users/laurent/Zotero/storage/A82EH8T4/576827v2.html}, + howpublished = {https://www.biorxiv.org/content/10.1101/576827v2} +} + +@book{NovoAssemblyAnnotation, + title = {De {{Novo Assembly}} and {{Annotation}} of the {{Asian Tiger Mosquito}} ({{Aedes}} Albopictus) {{Repeatome}} with {{dnaPipeTE}} from {{Raw Genomic Reads}} and {{Comparative Analysis}} with the {{Yellow Fever Mosquito}} ({{Aedes}} Aegypti) \textbackslash{}textbar {{Genome Biology}} and {{Evolution}} \textbackslash{}textbar {{Oxford Academic}}} +} + +@misc{ObservationWeightsUnlock, + title = {Observation Weights Unlock Bulk {{RNA}}-Seq Tools for Zero Inflation and Single-Cell Applications | {{Genome Biology}} | {{Full Text}}}, + file = {/Users/laurent/Zotero/storage/RA34XQRF/s13059-018-1406-4.html}, + howpublished = {https://genomebiology.biomedcentral.com/articles/10.1186/s13059-018-1406-4} +} + +@article{ochoaDomainPredictionProbabilistic2017, + title = {Domain Prediction with Probabilistic Directional Context}, + author = {Ochoa, Alejandro and Singh, Mona}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2471--2478}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx221}, + abstract = {Motivation: Protein domain prediction is one of the most powerful approaches for sequence-based function prediction. Although domain instances are typically predicted independently of each other, newer approaches have demonstrated improved performance by rewarding domain pairs that frequently co-occur within sequences. However, most of these approaches have ignored the order in which domains preferentially co-occur and have also not modeled domain co-occurrence probabilistically.}, + file = {/Users/laurent/Documents/bibliography/annotation/Ochoa and Singh - 2017 - Domain prediction with probabilistic directional c.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{olarerin-georgeMetaPlotRPerlPipeline2017, + title = {{{MetaPlotR}}: A {{Perl}}/{{R}} Pipeline for Plotting Metagenes of Nucleotide Modifications and Other Transcriptomic Sites}, + shorttitle = {{{MetaPlotR}}}, + author = {{Olarerin-George}, Anthony O. and Jaffrey, Samie R.}, + year = {2017}, + month = may, + volume = {33}, + pages = {1563--1564}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx002}, + abstract = {Summary: An increasing number of studies are mapping protein binding and nucleotide modifications sites throughout the transcriptome. Often, these sites cluster in certain regions of the transcript, giving clues to their function. Hence, it is informative to summarize where in the transcript these sites occur. A metagene is a simple and effective tool for visualizing the distribution of sites along a simplified transcript model. In this work, we introduce MetaPlotR, a Perl/R pipeline for creating metagene plots.}, + file = {/Users/laurent/Documents/bibliography/to_read/Olarerin-George and Jaffrey - 2017 - MetaPlotR a PerlR pipeline for plotting metagene.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {10} +} + +@article{omidiAutomatedIncorporationPairwise2017, + title = {Automated Incorporation of Pairwise Dependency in Transcription Factor Binding Site Prediction Using Dinucleotide Weight Tensors}, + author = {Omidi, Saeed and Zavolan, Mihaela and Pachkov, Mikhail and Breda, Jeremie and Berger, Severin}, + year = {2017}, + pages = {22}, + file = {/Users/laurent/Documents/bibliography/ChipSeq/Omidi et al. - 2017 - Automated incorporation of pairwise dependency in .pdf}, + journal = {PLOS Computational Biology}, + language = {en} +} + +@article{orabiAlignmentfreeClusteringUMI2019, + title = {Alignment-Free Clustering of {{UMI}} Tagged {{DNA}} Molecules}, + author = {Orabi, Baraa and Erhan, Emre and McConeghy, Brian and Volik, Stanislav V. and Le Bihan, Stephane and Bell, Robert and Collins, Colin C. and Chauve, Cedric and Hach, Faraz}, + year = {2019}, + month = jun, + volume = {35}, + pages = {1829--1836}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty888}, + abstract = {AbstractMotivation. Next-Generation Sequencing has led to the availability of massive genomic datasets whose processing raises many challenges, including the h}, + file = {/Users/laurent/Zotero/storage/GQ9CRYZG/Orabi et al. - 2019 - Alignment-free clustering of UMI tagged DNA molecu.pdf;/Users/laurent/Zotero/storage/H2T9UGZN/5142725.html}, + journal = {Bioinformatics}, + language = {en}, + number = {11} +} + +@article{orioliHumanMAF1Targets2016, + title = {Human {{MAF1}} Targets and Represses Active {{RNA}} Polymerase {{III}} Genes by Preventing Recruitment Rather than Inducing Long-Term Transcriptional Arrest}, + author = {Orioli, Andrea and Praz, Viviane and Lh{\^o}te, Philippe and Hernandez, Nouria}, + year = {2016}, + month = may, + volume = {26}, + pages = {624--635}, + issn = {1088-9051, 1549-5469}, + doi = {10.1101/gr.201400.115}, + file = {/Users/laurent/Documents/bibliography/tRNA/Orioli et al. - 2016 - Human MAF1 targets and represses active RNA polyme.pdf}, + journal = {Genome Research}, + language = {en}, + number = {5} +} + +@article{orlandoSVMdependentPairwiseHMM2017, + title = {{{SVM}}-Dependent Pairwise {{HMM}}: An Application to Protein Pairwise Alignments}, + shorttitle = {{{SVM}}-Dependent Pairwise {{HMM}}}, + author = {Orlando, Gabriele and Raimondi, Daniele and Khan, Taushif and Lenaerts, Tom and Vranken, Wim F}, + year = {2017}, + month = dec, + volume = {33}, + pages = {3902--3908}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx391}, + abstract = {Motivation: Methods able to provide reliable protein alignments are crucial for many bioinformatics applications. In the last years many different algorithms have been developed and various kinds of information, from sequence conservation to secondary structure, have been used to improve the alignment performances. This is especially relevant for proteins with highly divergent sequences. However, recent works suggest that different features may have different importance in diverse protein classes and it would be an advantage to have more customizable approaches, capable to deal with different alignment definitions.}, + file = {/Users/laurent/Documents/bibliography/to_read/Orlando et al. - 2017 - SVM-dependent pairwise HMM an application to prot.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{osorioSingleCellExpressionVariability2020, + title = {Single-{{Cell Expression Variability Implies Cell Function}}}, + author = {Osorio, Daniel and Yu, Xue and Zhong, Yan and Li, Guanxun and Yu, Peng and Serpedin, Erchin and Huang, Jianhua Z. and Cai, James J.}, + year = {2020}, + month = jan, + volume = {9}, + pages = {14}, + doi = {10.3390/cells9010014}, + abstract = {As single-cell RNA sequencing (scRNA-seq) data becomes widely available, cell-to-cell variability in gene expression, or single-cell expression variability (scEV), has been increasingly appreciated. However, it remains unclear whether this variability is functionally important and, if so, what are its implications for multi-cellular organisms. Here, we analyzed multiple scRNA-seq data sets from lymphoblastoid cell lines (LCLs), lung airway epithelial cells (LAECs), and dermal fibroblasts (DFs) and, for each cell type, selected a group of homogenous cells with highly similar expression profiles. We estimated the scEV levels for genes after correcting the mean-variance dependency in that data and identified 465, 466, and 364 highly variable genes (HVGs) in LCLs, LAECs, and DFs, respectively. Functions of these HVGs were found to be enriched with those biological processes precisely relevant to the corresponding cell type\’s function, from which the scRNA-seq data used to identify HVGs were generated\—e.g., cytokine signaling pathways were enriched in HVGs identified in LCLs, collagen formation in LAECs, and keratinization in DFs. We repeated the same analysis with scRNA-seq data from induced pluripotent stem cells (iPSCs) and identified only 79 HVGs with no statistically significant enriched functions; the overall scEV in iPSCs was of negligible magnitude. Our results support the \“variation is function\” hypothesis, arguing that scEV is required for cell type-specific, higher-level system function. Thus, quantifying and characterizing scEV are of importance for our understating of normal and pathological cellular processes.}, + copyright = {http://creativecommons.org/licenses/by/3.0/}, + file = {/Users/laurent/Zotero/storage/3H8NZ7FK/Osorio et al. - 2020 - Single-Cell Expression Variability Implies Cell Fu.pdf;/Users/laurent/Zotero/storage/86P6GJPE/14.html}, + journal = {Cells}, + keywords = {airway epithelial cell,cell-to-cell variation,dermal fibroblast,induced pluripotent stem cell,lymphoblastoid cell line,scRNA-seq,single-cell expression variability,single-cell RNA sequencing}, + language = {en}, + number = {1} +} + +@article{OUPAcceptedManuscript2017, + title = {{{OUP}} Accepted Manuscript}, + year = {2017}, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bbx008}, + abstract = {RNA-Seq is a widely used method for studying the behavior of genes under different biological conditions. An essential step in an RNA-Seq study is normalization, in which raw data are adjusted to account for factors that prevent direct comparison of expression measures. Errors in normalization can have a significant impact on downstream analysis, such as inflated false positives in differential expression analysis. An underemphasized feature of normalization is the assumptions on which the methods rely and how the validity of these assumptions can have a substantial impact on the performance of the methods. In this article, we explain how assumptions provide the link between raw RNA-Seq read counts and meaningful measures of gene expression. We examine normalization methods from the perspective of their assumptions, as an understanding of methodological assumptions is necessary for choosing methods appropriate for the data at hand. Furthermore, we discuss why normalization methods perform poorly when their assumptions are violated and how this causes problems in subsequent analysis. To analyze a biological experiment, researchers must select a normalization method with assumptions that are met and that produces a meaningful measure of expression for the given experiment.}, + file = {/Users/laurent/Documents/bibliography/DEA/2017 - OUP accepted manuscript.pdf}, + journal = {Briefings In Bioinformatics}, + language = {en} +} + +@article{palarea-albaladejoMALDIrppaQualityControl2018, + title = {{{MALDIrppa}}: Quality Control and Robust Analysis for Mass Spectrometry Data}, + shorttitle = {{{MALDIrppa}}}, + author = {{Palarea-Albaladejo}, Javier and Mclean, Kevin and Wright, Frank and Smith, David G E}, + year = {2018}, + month = feb, + volume = {34}, + pages = {522--523}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx628}, + abstract = {Summary: This R package helps to implement a robust approach to deal with mass spectrometry (MS) data. It is aimed at alleviating reproducibility issues and pernicious effects of deviating signals on both data pre-processing and downstream data analysis. Based on robust statistical methods, it facilitates the identification and filtering of low-quality mass spectra and atypical peak profiles as well as monitoring and data handling through pre-processing, which extends existing computational tools for high-throughput data.}, + file = {/Users/laurent/Documents/bibliography/massSpec/Palarea-Albaladejo et al. - 2018 - MALDIrppa quality control and robust analysis for.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {3} +} + +@article{panaretosStatisticalAspectsWasserstein2018, + title = {Statistical {{Aspects}} of {{Wasserstein Distances}}}, + author = {Panaretos, Victor M. and Zemel, Yoav}, + year = {2018}, + month = jun, + abstract = {Wasserstein distances are metrics on probability distributions inspired by the problem of optimal mass transportation. Roughly speaking, they measure the minimal effort required to reconfigure the probability mass of one distribution in order to recover the other distribution. They are ubiquitous in mathematics, with a long history that has seen them catalyse core developments in analysis, optimization, and probability. Beyond their intrinsic mathematical richness, they possess attractive features that make them a versatile tool for the statistician: they can be used to derive weak convergence and convergence of moments, and can be easily bounded; they are well-adapted to quantify a natural notion of perturbation of a probability distribution; and they seamlessly incorporate the geometry of the domain of the distributions in question, thus being useful for contrasting complex objects. Consequently, they frequently appear in the development of statistical theory and inferential methodology, and have recently become an object of inference in themselves. In this review, we provide a snapshot of the main concepts involved in Wasserstein distances and optimal transportation, and a succinct overview of some of their many statistical aspects.}, + archivePrefix = {arXiv}, + eprint = {1806.05500}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/FL4A9QFE/Panaretos and Zemel - 2018 - Statistical Aspects of Wasserstein Distances.pdf}, + journal = {arXiv:1806.05500 [stat]}, + keywords = {62-00 (primary); 62G99; 62M99 (secondary),Statistics - Methodology}, + language = {en}, + primaryClass = {stat} +} + +@article{pandeySqueakrExactApproximate, + title = {Squeakr: {{An Exact}} and {{Approximate}} k-Mer {{Counting System}}}, + author = {Pandey, Prashant and Bender, Michael A and Johnson, Rob and Patro, Rob}, + pages = {7}, + abstract = {Motivation: k-mer-based algorithms have become increasingly popular in the processing of highthroughput sequencing (HTS) data. These algorithms span the gamut of the analysis pipeline from k-mer counting (e.g., for estimating assembly parameters), to error correction, genome and transcriptome assembly, and even transcript quantification. Yet, these tasks often use very different k-mer representations and data structures. In this paper, we show how to build a k-mer-counting and multiset-representation system using the counting quotient filter (CQF), a feature-rich approximate membership query (AMQ) data structure. We introduce the k-mer-counting/querying system Squeakr (Simple Quotient filter-based Exact and Approximate Kmer Representation), which is based on the CQF. This off-the-shelf data structure turns out to be an efficient (approximate or exact) representation for sets or multisets of k-mers.}, + file = {/Users/laurent/Documents/bibliography/to_read/Pandey et al. - Squeakr An Exact and Approximate k-mer Counting S.pdf}, + language = {en} +} + +@article{papiligaoCALISTAClusteringLINEAGE2020, + title = {{{CALISTA}}: {{Clustering}} and {{LINEAGE Inference}} in {{Single}}-{{Cell Transcriptional Analysis}}}, + shorttitle = {{{CALISTA}}}, + author = {Papili Gao, Nan and Hartmann, Thomas and Fang, Tao and Gunawan, Rudiyanto}, + year = {2020}, + volume = {8}, + issn = {2296-4185}, + doi = {10.3389/fbioe.2020.00018}, + abstract = {We present CALISTA (Clustering and Lineage Inference in Single-Cell Transcriptional Analysis), a numerically efficient and highly scalable toolbox for end-to-end analysis of single-cell transcriptomic profiles. CALISTA includes four essential single-cell analyses for cell differentiation studies, including single-cell clustering, reconstruction of cell lineage specification, transition gene identification, and cell pseudotime ordering, which can be applied individually or in a pipeline. In these analyses, we employ a likelihood-based approach where single-cell mRNA counts are described by a probabilistic distribution function associated with stochastic gene transcriptional bursts and random technical dropout events. We illustrate the efficacy of CALISTA using single-cell gene expression datasets from different single-cell transcriptional profiling technologies and from a few hundred to tens of thousands of cells. CALISTA is freely available on https://www.cabselab.com/calista.}, + file = {/Users/laurent/Zotero/storage/IZ6QML74/Papili Gao et al. - 2020 - CALISTA Clustering and LINEAGE Inference in Singl.pdf}, + journal = {Frontiers in Bioengineering and Biotechnology}, + keywords = {Cell Differentiation,clustering,Gene Expression,lineage progression,Pseudotime,Random dropout,single cell,transcriptional burst}, + language = {English} +} + +@article{papiligaoSINCERITIESInferringGene2018, + title = {{{SINCERITIES}}: Inferring Gene Regulatory Networks from Time-Stamped Single Cell Transcriptional Expression Profiles}, + shorttitle = {{{SINCERITIES}}}, + author = {Papili Gao, Nan and {Ud-Dean}, S M Minhaz and Gandrillon, Olivier and Gunawan, Rudiyanto}, + year = {2018}, + month = jan, + volume = {34}, + pages = {258--266}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx575}, + abstract = {Motivation: Single cell transcriptional profiling opens up a new avenue in studying the functional role of cell-to-cell variability in physiological processes. The analysis of single cell expression profiles creates new challenges due to the distributive nature of the data and the stochastic dynamics of gene transcription process. The reconstruction of gene regulatory networks (GRNs) using single cell transcriptional profiles is particularly challenging, especially when directed gene-gene relationships are desired.}, + file = {/Users/laurent/Documents/bibliography/to_read/Papili Gao et al. - 2018 - SINCERITIES inferring gene regulatory networks fr.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {2} +} + +@misc{PARCUltrafastAccurate, + title = {{{PARC}}: Ultrafast and Accurate Clustering of Phenotypic Data of Millions of Single Cells | {{bioRxiv}}}, + file = {/Users/laurent/Zotero/storage/VNE2LE8D/765628v1.html}, + howpublished = {https://www.biorxiv.org/content/10.1101/765628v1} +} + +@article{parkSpectralClusteringBased2018, + title = {Spectral Clustering Based on Learning Similarity Matrix}, + author = {Park, Seyoung and Zhao, Hongyu}, + editor = {Birol, Inanc}, + year = {2018}, + month = feb, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/bty050}, + abstract = {Motivation: Single-cell RNA-sequencing (scRNA-seq) technology can generate genome-wide expression data at the single-cell levels. One important objective in scRNA-seq analysis is to cluster cells where each cluster consists of cells belonging to the same cell type based on gene expression patterns.}, + file = {/Users/laurent/Documents/bibliography/to_read/Park and Zhao - 2018 - Spectral clustering based on learning similarity m.pdf;/Users/laurent/Zotero/storage/75F8HCNH/Park and Zhao - 2018 - Spectral clustering based on learning similarity m.pdf;/Users/laurent/Zotero/storage/3LLI3JAP/4844126.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{parodiFunChIPBioconductorPackage2017, + title = {{{FunChIP}}: An {{R}}/{{Bioconductor}} Package for Functional Classification of {{ChIP}}-Seq Shapes}, + shorttitle = {{{FunChIP}}}, + author = {Parodi, Alice C. L. and Sangalli, Laura M. and Vantini, Simone and Amati, Bruno and Secchi, Piercesare and Morelli, Marco J.}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2570--2572}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx201}, + abstract = {Summary: Chromatin Immunoprecipitation followed by sequencing (ChIP-seq) generates local accumulations of sequencing reads on the genome (``peaks''), which correspond to specific protein\textendash{}DNA interactions or chromatin modifications. Peaks are detected by considering their total area above a background signal, usually neglecting their shapes, which instead may convey additional biological information. We present FunChIP, an R/Bioconductor package for clustering peaks according to a functional representation of their shapes: after approximating their profiles with cubic B-splines, FunChIP minimizes their functional distance and classifies the peaks applying a kmean alignment and clustering algorithm. The whole pipeline is user-friendly and provides visualization functions for a quick inspection of the results. An application to the transcription factor Myc in 3T9 murine fibroblasts shows that clusters of peaks with different shapes are associated with different genomic locations and different transcriptional regulatory activity.}, + file = {/Users/laurent/Documents/bibliography/ChipSeq/Parodi et al. - 2017 - FunChIP an RBioconductor package for functional .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{patroSalmonProvidesFast2017, + title = {Salmon Provides Fast and Bias-Aware Quantification of Transcript Expression}, + author = {Patro, Rob and Duggal, Geet and Love, Michael I and Irizarry, Rafael A and Kingsford, Carl}, + year = {2017}, + month = apr, + volume = {14}, + pages = {417--419}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4197}, + file = {/Users/laurent/Documents/bibliography/mapper/Patro et al. - 2017 - Salmon provides fast and bias-aware quantification.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {4} +} + +@article{paulsenLongrangeInteractionsTopologically2019, + title = {Long-Range Interactions between Topologically Associating Domains Shape the Four-Dimensional Genome during Differentiation}, + author = {Paulsen, Jonas and Ali, Tharvesh M. Liyakat and Nekrasov, Maxim and Delbarre, Erwan and Baudement, Marie-Odile and Kurscheid, Sebastian and Tremethick, David and Collas, Philippe}, + year = {2019}, + month = may, + volume = {51}, + pages = {835--843}, + issn = {1546-1718}, + doi = {10.1038/s41588-019-0392-0}, + abstract = {The authors identify the formation of dynamic topologically associating domain (TAD) cliques during differentiation and reprogramming. Their analysis indicates that TAD cliques stabilize heterochromatin at the nuclear periphery.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature America, Inc.}, + file = {/Users/laurent/Zotero/storage/TAHMCXDF/Paulsen et al. - 2019 - Long-range interactions between topologically asso.pdf;/Users/laurent/Zotero/storage/9DH5PS7E/s41588-019-0392-0.html}, + journal = {Nature Genetics}, + language = {en}, + number = {5} +} + +@article{pedersenCyvcf2FastFlexible2017, + title = {Cyvcf2: Fast, Flexible Variant Analysis with {{Python}}}, + shorttitle = {Cyvcf2}, + author = {Pedersen, Brent S. and Quinlan, Aaron R.}, + year = {2017}, + month = jun, + volume = {33}, + pages = {1867--1869}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx057}, + abstract = {Motivation: Variant call format (VCF) files document the genetic variation observed after DNA sequencing, alignment and variant calling of a sample cohort. Given the complexity of the VCF format as well as the diverse variant annotations and genotype metadata, there is a need for fast, flexible methods enabling intuitive analysis of the variant data within VCF and BCF files.}, + file = {/Users/laurent/Documents/bibliography/to_read/Pedersen and Quinlan - 2017 - cyvcf2 fast, flexible variant analysis with Pytho.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {12} +} + +@article{pedersenPanVizInteractiveVisualization2017, + title = {{{PanViz}}: Interactive Visualization of the Structure of Functionally Annotated Pangenomes}, + shorttitle = {{{PanViz}}}, + author = {Pedersen, Thomas Lin and Nookaew, Intawat and Wayne Ussery, David and M{\aa}nsson, Maria}, + year = {2017}, + month = jan, + pages = {btw761}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw761}, + abstract = {Summary: PanViz is a novel, interactive, visualization tool for pangenome analysis. PanViz allows visualization of changes in gene group (groups of similar genes across genomes) classification as different subsets of pangenomes are selected, as well as comparisons of individual genomes to pangenomes with gene ontology based navigation of gene groups. Furthermore it allows for rich and complex visual querying of gene groups in the pangenome. PanViz visualizations require no external programs and are easily sharable, allowing for rapid pangenome analyses.}, + file = {/Users/laurent/Documents/bibliography/to_read/Pedersen et al. - 2017 - PanViz interactive visualization of the structure.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{penfoldBranchrecombinantGaussianProcesses2018, + title = {Branch-Recombinant {{Gaussian}} Processes for Analysis of Perturbations in Biological Time Series}, + author = {Penfold, Christopher A. and Sybirna, Anastasiya and Reid, John E. and Huang, Yun and Wernisch, Lorenz and Ghahramani, Zoubin and Grant, Murray and Surani, M. Azim}, + year = {2018}, + month = sep, + volume = {34}, + pages = {i1005-i1013}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty603}, + abstract = {AbstractMotivation. A common class of behaviour encountered in the biological sciences involves branching and recombination. During branching, a statistical pr}, + file = {/Users/laurent/Zotero/storage/VRXDADX5/Penfold et al. - 2018 - Branch-recombinant Gaussian processes for analysis.pdf;/Users/laurent/Zotero/storage/3ZX6KC76/5093256.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{pengComponentOverlappingAttribute2019, + title = {A Component Overlapping Attribute Clustering ({{COAC}}) Algorithm for Single-Cell {{RNA}} Sequencing Data Analysis and Potential Pathobiological Implications}, + author = {Peng, He and Zeng, Xiangxiang and Zhou, Yadi and Zhang, Defu and Nussinov, Ruth and Cheng, Feixiong}, + editor = {Panchenko, Anna R. R.}, + year = {2019}, + month = feb, + volume = {15}, + pages = {e1006772}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006772}, + abstract = {Recent advances in next-generation sequencing and computational technologies have enabled routine analysis of large-scale single-cell ribonucleic acid sequencing (scRNA-seq) data. However, scRNA-seq technologies have suffered from several technical challenges, including low mean expression levels in most genes and higher frequencies of missing data than bulk population sequencing technologies. Identifying functional gene sets and their regulatory networks that link specific cell types to human diseases and therapeutics from scRNA-seq profiles are daunting tasks. In this study, we developed a Component Overlapping Attribute Clustering (COAC) algorithm to perform the localized (cell subpopulation) gene co-expression network analysis from large-scale scRNA-seq profiles. Gene subnetworks that represent specific gene co-expression patterns are inferred from the components of a decomposed matrix of scRNA-seq profiles. We showed that single-cell gene subnetworks identified by COAC from multiple time points within cell phases can be used for cell type identification with high accuracy (83\%). In addition, COAC-inferred subnetworks from melanoma patients' scRNA-seq profiles are highly correlated with survival rate from The Cancer Genome Atlas (TCGA). Moreover, the localized gene subnetworks identified by COAC from individual patients' scRNA-seq data can be used as pharmacogenomics biomarkers to predict drug responses (The area under the receiver operating characteristic curves ranges from 0.728 to 0.783) in cancer cell lines from the Genomics of Drug Sensitivity in Cancer (GDSC) database. In summary, COAC offers a powerful tool to identify potential network-based diagnostic and pharmacogenomics biomarkers from large-scale scRNAseq profiles. COAC is freely available at https://github.com/ChengF-Lab/COAC.}, + file = {/Users/laurent/Zotero/storage/37YHXSK9/Peng et al. - 2019 - A component overlapping attribute clustering (COAC.pdf;/Users/laurent/Zotero/storage/JVS863WF/Peng et al. - 2019 - A component overlapping attribute clustering (COAC.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {2} +} + +@article{pengellyLinkageDisequilibriumMaps2019, + title = {Linkage Disequilibrium Maps to Guide Contig Ordering for Genome Assembly}, + author = {Pengelly, Reuben J. and Collins, Andrew}, + year = {2019}, + month = feb, + volume = {35}, + pages = {541--545}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty687}, + abstract = {AbstractMotivation. Efforts to establish reference genome sequences by de novo sequence assembly have to address the difficulty of linking relatively short seq}, + file = {/Users/laurent/Zotero/storage/B8GTGBQW/Pengelly and Collins - 2019 - Linkage disequilibrium maps to guide contig orderi.pdf;/Users/laurent/Zotero/storage/9WHZLNBF/5067858.html}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{pengSCRABBLESinglecellRNAseq2019, + title = {{{SCRABBLE}}: Single-Cell {{RNA}}-Seq Imputation Constrained by Bulk {{RNA}}-Seq Data}, + shorttitle = {{{SCRABBLE}}}, + author = {Peng, Tao and Zhu, Qin and Yin, Penghang and Tan, Kai}, + year = {2019}, + month = dec, + volume = {20}, + issn = {1474-760X}, + doi = {10.1186/s13059-019-1681-8}, + abstract = {Single-cell RNA-seq data contain a large proportion of zeros for expressed genes. Such dropout events present a fundamental challenge for various types of data analyses. Here, we describe the SCRABBLE algorithm to address this problem. SCRABBLE leverages bulk data as a constraint and reduces unwanted bias towards expressed genes during imputation. Using both simulation and several types of experimental data, we demonstrate that SCRABBLE outperforms the existing methods in recovering dropout events, capturing true distribution of gene expression across cells, and preserving gene-gene relationship and cell-cell relationship in the data.}, + file = {/Users/laurent/Zotero/storage/GITV7CYE/Peng et al. - 2019 - SCRABBLE single-cell RNA-seq imputation constrain.pdf;/Users/laurent/Zotero/storage/MU5VMIKW/Peng et al. - 2019 - SCRABBLE single-cell RNA-seq imputation constrain.pdf}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@article{peonaHowCompleteAre2018, + title = {How Complete Are ``Complete'' Genome Assemblies?\textemdash{{An}} Avian Perspective}, + shorttitle = {How Complete Are ``Complete'' Genome Assemblies?}, + author = {Peona, Valentina and Weissensteiner, Matthias H. and Suh, Alexander}, + year = {2018}, + month = nov, + volume = {18}, + pages = {1188--1195}, + issn = {1755-0998}, + doi = {10.1111/1755-0998.12933}, + abstract = {The genomics revolution has led to the sequencing of a large variety of nonmodel organisms often referred to as ``whole'' or ``complete'' genome assemblies. But how complete are these, really? Here, we use birds as an example for nonmodel vertebrates and find that, although suitable in principle for genomic studies, the current standard of short-read assemblies misses a significant proportion of the expected genome size (7\% to 42\%; mean 20 {$\pm$} 9\%). In particular, regions with strongly deviating nucleotide composition (e.g., guanine-cytosine-[GC]-rich) and regions highly enriched in repetitive DNA (e.g., transposable elements and satellite DNA) are usually underrepresented in assemblies. However, long-read sequencing technologies successfully characterize many of these underrepresented GC-rich or repeat-rich regions in several bird genomes. For instance, only 2\% of the expected total base pairs are missing in the last chicken reference (galGal5). These assemblies still contain thousands of gaps (i.e., fragmented sequences) because some chromosomal structures (e.g., centromeres) likely contain arrays of repetitive DNA that are too long to bridge with currently available technologies. We discuss how to minimize the number of assembly gaps by combining the latest available technologies with complementary strengths. At last, we emphasize the importance of knowing the location, size and potential content of assembly gaps when making population genetic inferences about adjacent genomic regions.}, + copyright = {\textcopyright{} 2018 The Authors. Molecular Ecology Resources Published by John Wiley \& Sons Ltd.}, + file = {/Users/laurent/Zotero/storage/NCL28UFA/Peona et al. - 2018 - How complete are “complete” genome assemblies—An .pdf;/Users/laurent/Zotero/storage/3DFSC2LM/1755-0998.html}, + journal = {Molecular Ecology Resources}, + keywords = {birds,genomics,hybrid assembly,long reads,multiplatform sequencing,repeats}, + language = {en}, + number = {6} +} + +@article{peyreComputationalOptimalTransport2018, + title = {Computational {{Optimal Transport}}}, + author = {Peyr{\'e}, Gabriel and Cuturi, Marco}, + year = {2018}, + month = mar, + abstract = {Optimal Transport (OT) is a mathematical gem at the interface between probability, analysis and optimization. The goal of that theory is to define geometric tools that are useful to compare probability distributions. Earlier contributions originated from Monge's work in the 18th century, to be later rediscovered under a different formalism by Tolstoi in the 1920's, Kantorovich, Hitchcock and Koopmans in the 1940's. The problem was solved numerically by Dantzig in 1949 and others in the 1950's within the framework of linear programming, paving the way for major industrial applications in the second half of the 20th century. OT was later rediscovered under a different light by analysts in the 90's, following important work by Brenier and others, as well as in the computer vision/graphics fields under the name of earth mover's distances. Recent years have witnessed yet another revolution in the spread of OT, thanks to the emergence of approximate solvers that can scale to sizes and dimensions that are relevant to data sciences. Thanks to this newfound scalability, OT is being increasingly used to unlock various problems in imaging sciences (such as color or texture processing), computer vision and graphics (for shape manipulation) or machine learning (for regression,classification and density fitting). This short book reviews OT with a bias toward numerical methods and their applications in data sciences, and sheds lights on the theoretical properties of OT that make it particularly useful for some of these applications.}, + archivePrefix = {arXiv}, + eprint = {1803.00567}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/5CMMY75Y/Peyré and Cuturi - 2018 - Computational Optimal Transport.pdf;/Users/laurent/Zotero/storage/KT9T8KRF/Peyré and Cuturi - 2018 - Computational Optimal Transport.pdf;/Users/laurent/Zotero/storage/PT87ZXH8/Peyré and Cuturi - 2018 - Computational Optimal Transport.pdf;/Users/laurent/Zotero/storage/RD37X38L/Peyré and Cuturi - 2018 - Computational Optimal Transport.pdf;/Users/laurent/Zotero/storage/DRQSVQ7Y/1803.html;/Users/laurent/Zotero/storage/M5GAIHKR/1803.html}, + journal = {arXiv:1803.00567 [stat]}, + keywords = {Statistics - Machine Learning}, + primaryClass = {stat} +} + +@article{pfeiferBlockFeSTBayesianCalculation2018, + title = {{{BlockFeST}}: {{Bayesian}} Calculation of Region-Specific {{FST}} to Detect Local Adaptation}, + shorttitle = {{{BlockFeST}}}, + author = {Pfeifer, Bastian and Lercher, Martin J.}, + year = {2018}, + month = sep, + volume = {34}, + pages = {3205--3207}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty299}, + abstract = {AbstractSummary. The fixation index FST can be used to identify non-neutrally evolving loci from genome-scale SNP data across two or more populations. Recent y}, + file = {/Users/laurent/Zotero/storage/GI4ZUSNC/Pfeifer and Lercher - 2018 - BlockFeST Bayesian calculation of region-specific.pdf;/Users/laurent/Zotero/storage/R4L64BLW/4987139.html}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{phillipsGenespecificTranscriptionalMemory2018, + title = {Gene-Specific Transcriptional Memory in Mammalian Cell Lineages}, + author = {Phillips, Nicholas E. and Mandic, Aleksandra and Omidi, Saeed and Naef, Felix and Suter, David M.}, + year = {2018}, + month = sep, + pages = {411447}, + doi = {10.1101/411447}, + abstract = {Phenotypically identical mammalian cells often display considerable variability in transcript levels of individual genes. How transcriptional activity propagates in cell lineages, and how this varies across genes is poorly understood. Here we combined live-cell imaging of short-lived transcriptional reporters in mouse embryonic stem cells with mathematical modelling to quantify the propagation of transcriptional activity over time and across cell generations. In sister cells we found mean transcriptional activity to be strongly correlated and transcriptional dynamics tended to be synchronous; both features control how quickly sister cells diverge in a gene-specific manner. Mean transcriptional activity was also highly correlated between mother and daughter cells, leading to multi-generational transcriptional memory whose duration scaled with the spread of transcriptional activities in the population. The resulting family-specific transcriptional levels suggest a potential role of transcriptional memory in patterning tissue gene expression.}, + copyright = {\textcopyright{} 2018, Posted by Cold Spring Harbor Laboratory. The copyright holder for this pre-print is the author. All rights reserved. The material may not be redistributed, re-used or adapted without the author's permission.}, + file = {/Users/laurent/Zotero/storage/5W3D8VRT/Phillips et al. - 2018 - Gene-specific transcriptional memory in mammalian .pdf;/Users/laurent/Zotero/storage/PEVNT7MB/411447.html}, + journal = {bioRxiv}, + language = {en} +} + +@incollection{picelliFullLengthSingleCellRNA2019, + title = {Full-{{Length Single}}-{{Cell RNA Sequencing}} with {{Smart}}-Seq2}, + booktitle = {Single {{Cell Methods}}}, + author = {Picelli, Simone}, + editor = {Proserpio, Valentina}, + year = {2019}, + volume = {1979}, + pages = {25--44}, + publisher = {{Springer New York}}, + address = {{New York, NY}}, + doi = {10.1007/978-1-4939-9240-9_3}, + abstract = {In the last few years single-cell RNA sequencing (scRNA-seq) has enabled the investigation of cellular heterogeneity at the transcriptional level, the characterization of rare cell types as well as the detailed analysis of the stochastic nature of gene expression. A large number of methods have been developed, varying in their throughput, sensitivity, and scalability. A major distinction is whether they profile only 50- or 30-terminal part of the transcripts or allow for the characterization of the entire length of the transcripts. Among the latter, Smart-seq2 is still considered the ``gold standard'' due to its sensitivity, precision, lower cost, scalability and for being easy to set up on automated platforms. In this chapter I describe how to efficiently generate sequencing-ready libraries, highlight common issues and pitfalls, and offer solutions for generating high-quality data.}, + file = {/Users/laurent/Zotero/storage/6L9V4477/Picelli - 2019 - Full-Length Single-Cell RNA Sequencing with Smart-.pdf;/Users/laurent/Zotero/storage/DQNSJQIN/Picelli - 2019 - Full-Length Single-Cell RNA Sequencing with Smart-.pdf}, + isbn = {978-1-4939-9239-3 978-1-4939-9240-9}, + language = {en} +} + +@article{pickettKmerSSRFastExhaustive2017, + title = {Kmer-{{SSR}}: A Fast and Exhaustive {{SSR}} Search Algorithm}, + shorttitle = {Kmer-{{SSR}}}, + author = {Pickett, Brandon D and Miller, Justin B and Ridge, Perry G}, + year = {2017}, + month = dec, + volume = {33}, + pages = {3922--3928}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx538}, + abstract = {Motivation: One of the main challenges with bioinformatics software is that the size and complexity of datasets necessitate trading speed for accuracy, or completeness. To combat this problem of computational complexity, a plethora of heuristic algorithms have arisen that report a `good enough' solution to biological questions. However, in instances such as Simple Sequence Repeats (SSRs), a `good enough' solution may not accurately portray results in population genetics, phylogenetics and forensics, which require accurate SSRs to calculate intra- and inter-species interactions.}, + file = {/Users/laurent/Documents/bibliography/to_read/Pickett et al. - 2017 - Kmer-SSR a fast and exhaustive SSR search algorit.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{pijuan-salaSinglecellTranscriptionalProfiling2018, + title = {Single-Cell Transcriptional Profiling: A Window into Embryonic Cell-Type Specification}, + shorttitle = {Single-Cell Transcriptional Profiling}, + author = {{Pijuan-Sala}, Blanca and Guibentif, Carolina and G{\"o}ttgens, Berthold}, + year = {2018}, + month = apr, + issn = {1471-0072, 1471-0080}, + doi = {10.1038/s41580-018-0002-5}, + abstract = {During mammalian embryonic development, a single fertilized egg cell will proliferate and differentiate into all the cell lineages and cell types that eventually form the adult organism. Cell lineage diversification involves repeated cell fate choices that ultimately occur at the level of the individual cell rather than at the cell-population level. Improvements in single-cell technologies are transforming our understanding of mammalian development, not only by overcoming the limitations presented by the extremely low cell numbers of early embryos but also by enabling the study of cell fate specification in greater detail. In this Review{$\mkern1mu$}, we first discuss recent advances in single-c ell transcriptomics and imaging and provide a brief outline of current bioinformatics methods available to analyse the resulting data. We then discuss how these techniques have contributed to our understanding of pre-implantation and early postimplantation development and of in vitro pluripotency. Finally{$\mkern1mu$}, we overview the current challenges facing single-c ell research and highlight the latest advances and potential future avenues.}, + file = {/Users/laurent/Documents/bibliography/to_read/Pijuan-Sala et al. - 2018 - Single-cell transcriptional profiling a window in.pdf}, + journal = {Nature Reviews Molecular Cell Biology}, + language = {en} +} + +@article{pimentelDifferentialAnalysisRNAseq2017, + title = {Differential Analysis of {{RNA}}-Seq Incorporating Quantification Uncertainty}, + author = {Pimentel, Harold and Bray, Nicolas L and Puente, Suzette and Melsted, P{\'a}ll and Pachter, Lior}, + year = {2017}, + month = jun, + volume = {14}, + pages = {687--690}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4324}, + file = {/Users/laurent/Documents/bibliography/RNASeq/Pimentel et al. - 2017 - Differential analysis of RNA-seq incorporating qua.pdf;/Users/laurent/Documents/bibliography/to_read/Pimentel et al. - 2017 - Differential analysis of RNA-seq incorporating qua.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {7} +} + +@article{pirklSingleCellNetwork2018, + title = {Single Cell Network Analysis with a Mixture of {{Nested Effects Models}}}, + author = {Pirkl, Martin and Beerenwinkel, Niko}, + year = {2018}, + month = sep, + volume = {34}, + pages = {i964-i971}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty602}, + abstract = {AbstractMotivation. New technologies allow for the elaborate measurement of different traits of single cells under genetic perturbations. These interventional}, + file = {/Users/laurent/Zotero/storage/DLKWTZUB/Pirkl and Beerenwinkel - 2018 - Single cell network analysis with a mixture of Nes.pdf;/Users/laurent/Zotero/storage/RUWYNYZS/Pirkl and Beerenwinkel - 2018 - Single cell network analysis with a mixture of Nes.pdf;/Users/laurent/Zotero/storage/3MKMWXVA/5093248.html;/Users/laurent/Zotero/storage/RANJJBSY/5093248.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{poellACEAbsoluteCopy2019, + title = {{{ACE}}: Absolute Copy Number Estimation from Low-Coverage Whole-Genome Sequencing Data}, + shorttitle = {{{ACE}}}, + author = {Poell, Jos B. and Mendeville, Matias and Sie, Daoud and Brink, Arjen and Brakenhoff, Ruud H. and Ylstra, Bauke}, + year = {2019}, + month = aug, + volume = {35}, + pages = {2847--2849}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty1055}, + abstract = {AbstractSummary. Chromosomal copy number aberrations can be efficiently detected and quantified using low-coverage whole-genome sequencing, but analysis is ham}, + file = {/Users/laurent/Zotero/storage/UELKC5XF/Poell et al. - 2019 - ACE absolute copy number estimation from low-cove.pdf;/Users/laurent/Zotero/storage/72AF66Z5/5265327.html}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{pontSingleCellSignatureExplorer2019, + title = {Single-{{Cell Signature Explorer}} for Comprehensive Visualization of Single Cell Signatures across {{scRNA}}-Seq Data Sets}, + author = {Pont, Fr{\'e}d{\'e}ric and Tosolini, Marie and Fourni{\'e}, Jean Jacques}, + year = {2019}, + month = apr, + doi = {10.1101/621805}, + abstract = {The momentum of scRNA-seq data sets prompts for simple and powerful tools exploring their meaningful signatures. Here we present Single-Cell\_Signature\_Explorer ( + + ), the first method for high throughput scoring at single cell level of any gene set-based signature and visualization across t-SNE. By scanning data sets for single or combined signatures, it quantitatively and qualitatively maps any multi-gene feature, exemplified here with signatures of cell lineages, biological hallmarks and metabolic pathways in large scRNAseq datasets of human PBMC, lung cancer and adult testis.}, + file = {/Users/laurent/Zotero/storage/K98BBSPB/Pont et al. - 2019 - Single-Cell Signature Explorer for comprehensive v.pdf;/Users/laurent/Zotero/storage/KJGRZE56/Pont et al. - 2019 - Single-Cell Signature Explorer for comprehensive v.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{postChromosomesFilariae2005, + title = {The Chromosomes of the {{Filariae}}}, + author = {Post, Rory}, + year = {2005}, + month = nov, + volume = {4}, + pages = {10}, + issn = {1475-2883}, + doi = {10.1186/1475-2883-4-10}, + abstract = {An understanding of the nature of the chromosomes of the filariae is expected to greatly assist the future interpretation of genome data. Filarial development is not eutelic, and there does not seem to be a fixed number of cell divisions in the way that there is in Caenorhabditis. It is not clear whether the chromosomes of the filariae have localized centromeres or whether they are holocentric. Sex determination is by a chromosomal "balance" X0 system in most filariae, but in some Onchocercidae there has been a chromosomal fusion to create a neo-XY system. It is presumed that the molecular basis of sex determination in filariae is similar to Caenorhabditis. The ancestral karyotype of the filariae is probably 5A+X0, but in some Onchocercidae this has been reduced to 4A+XY, and in O. volvulus and O. gibsoni it has been further reduced to 3A+XY. Onchocerca volvulus and O. gibsoni both have supernumary (B-) chromosomes and in O. volvulus there is a single active nucleolus organising region near the middle of the long autosome.}, + journal = {Filaria Journal}, + pmcid = {PMC1282586}, + pmid = {16266430} +} + +@article{prakadanScalingShrinkingEmpowering2017, + title = {Scaling by Shrinking: Empowering Single-Cell 'omics' with Microfluidic Devices}, + shorttitle = {Scaling by Shrinking}, + author = {Prakadan, Sanjay M. and Shalek, Alex K. and Weitz, David A.}, + year = {2017}, + month = apr, + volume = {18}, + pages = {345--361}, + issn = {1471-0056, 1471-0064}, + doi = {10.1038/nrg.2017.15}, + abstract = {Recent advances in cellular profiling have demonstrated substantial heterogeneity in the behaviour of cells once deemed `identical', challenging fundamental notions of cell `type' and `state'. Not surprisingly, these findings have elicited substantial interest in deeply characterizing the diversity, interrelationships and plasticity among cellular phenotypes. To explore these questions, experimental platforms are needed that can extensively and controllably profile many individual cells. Here, microfluidic structures \textemdash{} whether valve-, droplet- or nanowell-based \textemdash{}have an important role because they can facilitate easy capture and processing of single cells and their components, reducing labour and costs relative to conventional plate-based methods while also improving consistency. In this article, we review the current state-of-the-art methodologies with respect to microfluidics for mammalian single-cell `omics' and discuss challenges and future opportunities.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Prakadan et al. - 2017 - Scaling by shrinking empowering single-cell 'omic.pdf;/Users/laurent/Zotero/storage/3K73LH6U/Prakadan et al. - 2017 - Scaling by shrinking empowering single-cell 'omic.pdf;/Users/laurent/Zotero/storage/EEZIKZD5/Prakadan et al. - 2017 - Scaling by shrinking empowering single-cell 'omic.pdf;/Users/laurent/Zotero/storage/IUQG3VXK/Prakadan et al. - 2017 - Scaling by shrinking empowering single-cell 'omic.pdf}, + journal = {Nature Reviews Genetics}, + language = {en}, + number = {6} +} + +@misc{PrePhaser, + title = {Pre-{{Phaser}}}, + file = {/Users/laurent/Zotero/storage/LT2R925P/citation.html}, + howpublished = {https://dl.acm.org/citation.cfm?id=3342174} +} + +@article{priveEfficientAnalysisLargescale2018, + title = {Efficient Analysis of Large-Scale Genome-Wide Data with Two {{R}} Packages: Bigstatsr and Bigsnpr}, + shorttitle = {Efficient Analysis of Large-Scale Genome-Wide Data with Two {{R}} Packages}, + author = {Priv{\'e}, Florian and Aschard, Hugues and Ziyatdinov, Andrey and Blum, Michael G. B.}, + year = {2018}, + month = aug, + volume = {34}, + pages = {2781--2787}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty185}, + abstract = {AbstractMotivation. Genome-wide datasets produced for association studies have dramatically increased in size over the past few years, with modern datasets com}, + file = {/Users/laurent/Zotero/storage/KHSPBE7Y/Privé et al. - 2018 - Efficient analysis of large-scale genome-wide data.pdf;/Users/laurent/Zotero/storage/PRTM868E/Privé et al. - 2018 - Efficient analysis of large-scale genome-wide data.pdf;/Users/laurent/Zotero/storage/8JJA8MR7/4956666.html;/Users/laurent/Zotero/storage/SIFECAKU/4956666.html}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{prlicTenSimpleRules2012, + title = {Ten {{Simple Rules}} for the {{Open Development}} of {{Scientific Software}}}, + author = {Prli{\'c}, Andreas and Procter, James B.}, + year = {2012}, + month = dec, + volume = {8}, + pages = {e1002802}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1002802}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Prlić and Procter - 2012 - Ten Simple Rules for the Open Development of Scien.pdf}, + journal = {PLoS Computational Biology}, + language = {en}, + number = {12} +} + +@article{pryszczRedundansAssemblyPipeline2016, + title = {Redundans: An Assembly Pipeline for Highly Heterozygous Genomes}, + shorttitle = {Redundans}, + author = {Pryszcz, Leszek P. and Gabald{\'o}n, Toni}, + year = {2016}, + volume = {44}, + pages = {e113}, + issn = {1362-4962}, + doi = {10.1093/nar/gkw294}, + abstract = {Many genomes display high levels of heterozygosity (i.e. presence of different alleles at the same loci in homologous chromosomes), being those of hybrid organisms an extreme such case. The assembly of highly heterozygous genomes from short sequencing reads is a challenging task because it is difficult to accurately recover the different haplotypes. When confronted with highly heterozygous genomes, the standard assembly process tends to collapse homozygous regions and reports heterozygous regions in alternative contigs. The boundaries between homozygous and heterozygous regions result in multiple assembly paths that are hard to resolve, which leads to highly fragmented assemblies with a total size larger than expected. This, in turn, causes numerous problems in downstream analyses such as fragmented gene models, wrong gene copy number, or broken synteny. To circumvent these caveats we have developed a pipeline that specifically deals with the assembly of heterozygous genomes by introducing a step to recognise and selectively remove alternative heterozygous contigs. We tested our pipeline on simulated and naturally-occurring heterozygous genomes and compared its accuracy to other existing tools. Our method is freely available at https://github.com/Gabaldonlab/redundans.}, + journal = {Nucleic Acids Research}, + keywords = {Algorithms,DNA,Genome,Genomics,Heterozygote,High-Throughput Nucleotide Sequencing,Homozygote,Sequence Analysis,Software,Synteny}, + language = {eng}, + number = {12}, + pmcid = {PMC4937319}, + pmid = {27131372} +} + +@misc{PSMNOnlineLaTeX, + title = {{{PSMN}} - {{Online LaTeX Editor ShareLaTeX}}}, + file = {/Users/laurent/Zotero/storage/DI5SFPKG/5afa9ac4f67d127e11475987.pdf;/Users/laurent/Zotero/storage/YL5EQPIN/5afa9ac4f67d127e11475987.pdf}, + howpublished = {https://v2.overleaf.com/project/5afa9ac4f67d127e11475987} +} + +@article{pucholtRecentSexChromosome2017, + title = {Recent {{Sex Chromosome Divergence}} despite {{Ancient Dioecy}} in the {{Willow Salix}} Viminalis}, + author = {Pucholt, Pascal and Wright, Alison E. and Conze, Lei Liu and Mank, Judith E. and Berlin, Sofia}, + year = {2017}, + month = aug, + volume = {34}, + pages = {1991--2001}, + issn = {0737-4038, 1537-1719}, + doi = {10.1093/molbev/msx144}, + abstract = {Sex chromosomes can evolve when recombination is halted between a pair of chromosomes, and this can lead to degeneration of the sex-limited chromosome. In the early stages of differentiation sex chromosomes are homomorphic, and even though homomorphic sex chromosomes are very common throughout animals and plants, we know little about the evolutionary forces shaping these types of sex chromosomes. We used DNA- and RNA-Seq data from females and males to explore the sex chromosomes in the female heterogametic willow, Salix viminalis, a species with ancient dioecy but with homomorphic sex chromosomes. We detected no major sex differences in read coverage in the sex determination (SD) region, indicating that the W region has not significantly degenerated. However, single nucleotide polymorphism densities in the SD region are higher in females compared with males, indicating very recent recombination suppression, followed by the accumulation of sex-specific single nucleotide polymorphisms. Interestingly, we identified two female-specific scaffolds that likely represent W-chromosome-specific sequence. We show that genes located in the SD region display a mild excess of male-biased expression in sex-specific tissue, and we use allele-specific gene expression analysis to show that this is the result of masculinization of expression on the Z chromosome rather than degeneration of female-expression on the W chromosome. Together, our results demonstrate that insertion of small DNA fragments and accumulation of sex-biased gene expression can occur before the detectable decay of the sex-limited chromosome.}, + file = {/Users/laurent/Documents/bibliography/SNP/Pucholt et al. - 2017 - Recent Sex Chromosome Divergence despite Ancient D.pdf}, + journal = {Molecular Biology and Evolution}, + language = {en}, + number = {8} +} + +@article{pudloReliableABCModel2014, + title = {Reliable {{ABC}} Model Choice via Random Forests}, + author = {Pudlo, Pierre and Marin, Jean-Michel and Estoup, Arnaud and Cornuet, Jean-Marie and Gautier, Mathieu and Robert, Christian P.}, + year = {2014}, + month = jun, + abstract = {Approximate Bayesian computation (ABC) methods provide an elaborate approach to Bayesian inference on complex models, including model choice. Both theoretical arguments and simulation experiments indicate, however, that model posterior probabilities may be poorly evaluated by standard ABC techniques. We propose a novel approach based on a machine learning tool named random forests to conduct selection among the highly complex models covered by ABC algorithms. We thus modify the way Bayesian model selection is both understood and operated, in that we rephrase the inferential goal as a classification problem, first predicting the model that best fits the data with random forests and postponing the approximation of the posterior probability of the predicted MAP for a second stage also relying on random forests. Compared with earlier implementations of ABC model choice, the ABC random forest approach offers several potential improvements: (i) it often has a larger discriminative power among the competing models, (ii) it is more robust against the number and choice of statistics summarizing the data, (iii) the computing effort is drastically reduced (with a gain in computation efficiency of at least fifty), and (iv) it includes an approximation of the posterior probability of the selected model. The call to random forests will undoubtedly extend the range of size of datasets and complexity of models that ABC can handle. We illustrate the power of this novel methodology by analyzing controlled experiments as well as genuine population genetics datasets. The proposed methodologies are implemented in the R package abcrf available on the CRAN.}, + archivePrefix = {arXiv}, + eprint = {1406.6288}, + eprinttype = {arxiv}, + file = {/Users/laurent/Zotero/storage/JJVXTRT8/Pudlo et al. - 2014 - Reliable ABC model choice via random forests.pdf;/Users/laurent/Zotero/storage/XI7UE8K8/1406.html}, + journal = {arXiv:1406.6288 [q-bio, stat]}, + keywords = {Quantitative Biology - Populations and Evolution,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology}, + primaryClass = {q-bio, stat} +} + +@article{puigdevallGenomicScoresSeamlessAccess2018, + title = {{{GenomicScores}}: Seamless Access to Genomewide Position-Specific Scores from {{R}} and {{Bioconductor}}}, + shorttitle = {{{GenomicScores}}}, + author = {Puigdevall, Pau and Castelo, Robert}, + year = {2018}, + month = sep, + volume = {34}, + pages = {3208--3210}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty311}, + abstract = {AbstractSummary. Genomewide position-specific scores, such as those estimating conservation, constraint, fitness or mutation tolerance, are ubiquitous in curre}, + file = {/Users/laurent/Zotero/storage/QR9H8DJJ/Puigdevall and Castelo - 2018 - GenomicScores seamless access to genomewide posit.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{qiClusteringClassificationMethods, + title = {Clustering and Classification Methods for Single-Cell {{RNA}}-Sequencing Data}, + author = {Qi, Ren and Ma, Anjun and Ma, Qin and Zou, Quan}, + doi = {10.1093/bib/bbz062}, + abstract = {Abstract. Appropriate ways to measure the similarity between single-cell RNA-sequencing (scRNA-seq) data are ubiquitous in bioinformatics, but using single clu}, + file = {/Users/laurent/Zotero/storage/NZ5SM46W/Qi et al. - Clustering and classification methods for single-c.pdf;/Users/laurent/Zotero/storage/N3293RYW/5528236.html}, + journal = {Briefings in Bioinformatics}, + language = {en} +} + +@article{qiuSinglecellMRNAQuantification2017, + title = {Single-Cell {{mRNA}} Quantification and Differential Analysis with {{Census}}}, + author = {Qiu, Xiaojie and Hill, Andrew and Packer, Jonathan and Lin, Dejun and Ma, Yi-An and Trapnell, Cole}, + year = {2017}, + month = mar, + volume = {14}, + pages = {309--315}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4150}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Qiu et al. - 2017 - Single-cell mRNA quantification and differential a 2.pdf;/Users/laurent/Documents/bibliography/scRNASeq/Qiu et al. - 2017 - Single-cell mRNA quantification and differential a.pdf;/Users/laurent/Zotero/storage/8QC2QQUL/Qiu et al. - 2017 - Single-cell mRNA quantification and differential a.pdf;/Users/laurent/Zotero/storage/ABMEL2UA/Qiu et al. - 2017 - Single-cell mRNA quantification and differential a.pdf;/Users/laurent/Zotero/storage/MXQVQ398/Qiu et al. - 2017 - Single-cell mRNA quantification and differential a.pdf;/Users/laurent/Zotero/storage/P8AGXCUN/Qiu et al. - 2017 - Single-cell mRNA quantification and differential a.pdf;/Users/laurent/Zotero/storage/WCMND2FG/Qiu et al. - 2017 - Single-cell mRNA quantification and differential a.pdf;/Users/laurent/Zotero/storage/ZU8YLY6M/Qiu et al. - 2017 - Single-cell mRNA quantification and differential a.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {3} +} + +@misc{QuantifyingWaddingtonEpigenetic, + title = {Quantifying {{Waddington}}'s Epigenetic Landscape: A Comparison of Single-Cell Potency Measures | {{Briefings}} in {{Bioinformatics}} | {{Oxford Academic}}}, + file = {/Users/laurent/Zotero/storage/H26BKWJG/5115275.html}, + howpublished = {https://academic-oup-com.insb.bib.cnrs.fr/bib/article/21/1/248/5115275} +} + +@book{QUASTQualityAssessment, + title = {{{QUAST}}: Quality Assessment Tool for Genome Assemblies} +} + +@article{raghupathyHierarchicalAnalysisRNAseq2018, + title = {Hierarchical Analysis of {{RNA}}-Seq Reads Improves the Accuracy of Allele-Specific Expression}, + author = {Raghupathy, Narayanan and Choi, Kwangbom and Vincent, Matthew J. and Beane, Glen L. and Sheppard, Keith S. and Munger, Steven C. and Korstanje, Ron and {Pardo-Manual de Villena}, Fernando and Churchill, Gary A.}, + year = {2018}, + month = jul, + volume = {34}, + pages = {2177--2184}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty078}, + abstract = {AbstractMotivation. Allele-specific expression (ASE) refers to the differential abundance of the allelic copies of a transcript. RNA sequencing (RNA-seq) can p}, + file = {/Users/laurent/Zotero/storage/4YMZV2NX/Raghupathy et al. - 2018 - Hierarchical analysis of RNA-seq reads improves th.pdf;/Users/laurent/Zotero/storage/UJNTMKYS/4850941.html}, + journal = {Bioinformatics}, + language = {en}, + number = {13} +} + +@article{rahmanIntegratedMRFRandomForestbased2017, + title = {{{IntegratedMRF}}: Random Forest-Based Framework for Integrating Prediction from Different Data Types}, + shorttitle = {{{IntegratedMRF}}}, + author = {Rahman, Raziur and Otridge, John and Pal, Ranadip}, + year = {2017}, + month = may, + volume = {33}, + pages = {1407--1410}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw765}, + abstract = {Summary: IntegratedMRF is an open-source R implementation for integrating drug response predictions from various genomic characterizations using univariate or multivariate random forests that includes various options for error estimation techniques. The integrated framework was developed following superior performance of random forest based methods in NCI-DREAM drug sensitivity prediction challenge. The computational framework can be applied to estimate mean and confidence interval of drug response prediction errors based on ensemble approaches with various combinations of genetic and epigenetic characterizations as inputs. The multivariate random forest implementation included in the package incorporates the correlations between output responses in the modeling and has been shown to perform better than existing approaches when the drug responses are correlated. Detailed analysis of the provided features is included in the Supplementary Material.}, + file = {/Users/laurent/Documents/bibliography/to_read/Rahman et al. - 2017 - IntegratedMRF random forest-based framework for i.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {9} +} + +@article{rainerEnsembldbPackageCreate2019, + title = {Ensembldb: An {{R}} Package to Create and Use {{Ensembl}}-Based Annotation Resources}, + shorttitle = {Ensembldb}, + author = {Rainer, Johannes and Gatto, Laurent and Weichenberger, Christian X.}, + year = {2019}, + month = sep, + volume = {35}, + pages = {3151--3153}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz031}, + abstract = {AbstractSummary. Bioinformatics research frequently involves handling gene-centric data such as exons, transcripts, proteins and their positions relative to a}, + file = {/Users/laurent/Zotero/storage/25P98KG9/Rainer et al. - 2019 - ensembldb an R package to create and use Ensembl-.pdf;/Users/laurent/Zotero/storage/NAL8ZYBJ/5301311.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{ramdasUnifiedTreatmentMultiple, + title = {A {{Unified Treatment}} of {{Multiple Testing}} with {{Prior Knowledge}}}, + author = {Ramdas, Aaditya and Wainwright, Martin J and Barber, Rina Foygel and Jordan, Michael I}, + pages = {29}, + abstract = {A significant literature has arisen to study ways to employing prior knowledge to improve power and precision of multiple testing procedures. Some common forms of prior knowledge may include (a) a priori beliefs about which hypotheses are null, modeled by non-uniform prior weights; (b) differing importances of hypotheses, modeled by differing penalties for false discoveries; (c) partitions of the hypotheses into known groups, indicating (dis)similarity of hypotheses; and (d) knowledge of independence, positive dependence or arbitrary dependence between hypotheses or groups, allowing for more aggressive or conservative procedures. We present a general framework for global null testing and false discovery rate (FDR) control that allows the scientist to incorporate all four types of prior knowledge (a)\textendash{}(d) simultaneously. We unify a number of existing procedures, generalize the conditions under which they are known to work, and simplify their proofs of FDR control under independence, positive and arbitrary dependence. We also present an algorithmic framework that strictly generalizes and unifies the classic algorithms of Benjamini and Hochberg [3] and Simes [25], algorithms that guard against unknown dependence [7, 9], algorithms that employ prior weights [17, 15], algorithms that use penalty weights [4], algorithms that incorporate null-proportion adaptivity [26, 27], and algorithms that make use of multiple arbitrary partitions into groups [1]. Unlike this previous work, we can simultaneously incorporate all of the four types of prior knowledge, combined with all of the three forms of dependence.}, + file = {/Users/laurent/Documents/bibliography/stats/Ramdas et al. - A Unified Treatment of Multiple Testing with Prior .pdf}, + language = {en} +} + +@article{ranganLoopcountingMethodCovariatecorrected2018, + title = {A Loop-Counting Method for Covariate-Corrected Low-Rank Biclustering of Gene-Expression and Genome-Wide Association Study Data}, + author = {Rangan, Aaditya V. and McGrouther, Caroline C. and Kelsoe, John and Schork, Nicholas and Stahl, Eli and Zhu, Qian and Krishnan, Arjun and Yao, Vicky and Troyanskaya, Olga and Bilaloglu, Seda and Raghavan, Preeti and Bergen, Sarah and Jureus, Anders and Landen, Mikael and Consortium, Bipolar Disorders Working Group of the Psychiatric Genomics}, + year = {2018}, + month = may, + volume = {14}, + pages = {e1006105}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006105}, + abstract = {A common goal in data-analysis is to sift through a large data-matrix and detect any significant submatrices (i.e., biclusters) that have a low numerical rank. We present a simple algorithm for tackling this biclustering problem. Our algorithm accumulates information about 2-by-2 submatrices (i.e., `loops') within the data-matrix, and focuses on rows and columns of the data-matrix that participate in an abundance of low-rank loops. We demonstrate, through analysis and numerical-experiments, that this loop-counting method performs well in a variety of scenarios, outperforming simple spectral methods in many situations of interest. Another important feature of our method is that it can easily be modified to account for aspects of experimental design which commonly arise in practice. For example, our algorithm can be modified to correct for controls, categorical- and continuous-covariates, as well as sparsity within the data. We demonstrate these practical features with two examples; the first drawn from gene-expression analysis and the second drawn from a much larger genome-wide-association-study (GWAS).}, + file = {/Users/laurent/Zotero/storage/IPTAHT4C/Rangan et al. - 2018 - A loop-counting method for covariate-corrected low.pdf;/Users/laurent/Zotero/storage/PFRCCCQZ/Rangan et al. - 2018 - A loop-counting method for covariate-corrected low.pdf;/Users/laurent/Zotero/storage/KD88NLXC/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Algorithms,Experimental design,Genetics of disease,Genome-wide association studies,Human genetics,Sequence alignment,Source code,Structural genomics}, + language = {en}, + number = {5} +} + +@article{rapsomanikiCellCycleTRACERAccountsCell2018, + title = {{{CellCycleTRACER}} Accounts for Cell Cycle and Volume in Mass Cytometry Data}, + author = {Rapsomaniki, Maria Anna and Lun, Xiao-Kang and Woerner, Stefan and Laumanns, Marco and Bodenmiller, Bernd and Mart{\'i}nez, Mar{\'i}a Rodr{\'i}guez}, + year = {2018}, + month = feb, + volume = {9}, + pages = {632}, + issn = {2041-1723}, + doi = {10.1038/s41467-018-03005-5}, + abstract = {Mass cytometry is a powerful method of single cell analysis, but potential confounding effects of cell cycle and cell volume are not taken into account. Here the authors present a combined experimental and computational method to correct for these effects and reveal features of TNF{$\alpha$} stimulation that are otherwise masked.}, + copyright = {2018 The Author(s)}, + file = {/Users/laurent/Zotero/storage/KKQJUPPA/Rapsomaniki et al. - 2018 - CellCycleTRACER accounts for cell cycle and volume.pdf;/Users/laurent/Zotero/storage/DJMDHYXU/s41467-018-03005-5.html}, + journal = {Nature Communications}, + language = {En}, + number = {1} +} + +@article{rashidTASICDeterminingBranching2017, + title = {{{TASIC}}: Determining Branching Models from Time Series Single Cell Data}, + shorttitle = {{{TASIC}}}, + author = {Rashid, Sabrina and Kotton, Darrell N. and {Bar-Joseph}, Ziv}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2504--2512}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx173}, + abstract = {Motivation: Single cell RNA-Seq analysis holds great promise for elucidating the networks and pathways controlling cellular differentiation and disease. However, the analysis of time series single cell RNA-Seq data raises several new computational challenges. Cells at each time point are often sampled from a mixture of cell types, each of which may be a progenitor of one, or several, specific fates making it hard to determine which cells should be used to reconstruct temporal trajectories. In addition, cells, even from the same time point, may be unsynchronized making it hard to rely on the measured time for determining these trajectories.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Rashid et al. - 2017 - TASIC determining branching models from time seri.pdf;/Users/laurent/Zotero/storage/L39YXHMW/Rashid et al. - 2017 - TASIC determining branching models from time seri.pdf;/Users/laurent/Zotero/storage/L95PW2RY/Rashid et al. - 2017 - TASIC determining branching models from time seri.pdf;/Users/laurent/Zotero/storage/ZZC5C9KC/Rashid et al. - 2017 - TASIC determining branching models from time seri.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{rauschAlfredInteractiveMultisample, + title = {Alfred: {{Interactive}} Multi-Sample {{BAM}} Alignment Statistics, Feature Counting and Feature Annotation for Long- and Short-Read Sequencing}, + author = {Rausch, Tobias and Fritz, Markus Hsi-Yang and Korbel, Jan O}, + pages = {2}, + abstract = {Summary/Motivation: Harmonizing quality control of large-scale second and third-generation sequencing datasets is key for enabling downstream computational and biological analyses. We present Alfred, an efficient and versatile command-line application that computes multi-sample quality control metrics in a read-group aware manner, across a wide variety of sequencing assays and technologies. In addition to standard quality control metrics such as GC bias, base composition, insert size and sequencing coverage distributions it supports haplotype-aware and allele-specific feature counting and feature annotation. The versatility of Alfred allows for easy pipeline integration in high-throughput settings, including DNA sequencing facilities and large-scale research initiatives, enabling continuous monitoring of sequence data quality and characteristics across samples. Alfred supports haplo-tagging of BAM/CRAM files to conduct haplotype-resolved analyses in conjunction with a variety of next-generation sequencing based assays. Alfred's companion web application enables interactive exploration of results and comparison to public data sets.}, + file = {/Users/laurent/Zotero/storage/EDWBKU8V/Rausch et al. - Alfred Interactive multi-sample BAM alignment sta.pdf}, + language = {en} +} + +@article{rauTransformationModelChoice2018, + title = {Transformation and Model Choice for {{RNA}}-Seq Co-Expression Analysis}, + author = {Rau, Andrea and {Maugis-Rabusseau}, Cathy}, + year = {2018}, + month = may, + volume = {19}, + pages = {425--436}, + issn = {1467-5463}, + doi = {10.1093/bib/bbw128}, + abstract = {Abstract. Although a large number of clustering algorithms have been proposed to identify groups of co-expressed genes from microarray data, the question of if}, + file = {/Users/laurent/Zotero/storage/CPY4ZLXU/Rau and Maugis-Rabusseau - 2018 - Transformation and model choice for RNA-seq co-exp.pdf;/Users/laurent/Zotero/storage/9URMIA55/2870509.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {3} +} + +@article{rayonSpeciesspecificDevelopmentalTiming2019, + title = {Species-Specific Developmental Timing Is Associated with Global Differences in Protein Stability in Mouse and Human}, + author = {Rayon, Teresa and Stamataki, Despina and {Perez-Carrasco}, Ruben and {Garcia-Perez}, Lorena and Barrington, Christopher and Melchionda, Manuela and Exelby, Katherine and Tybulewicz, Victor and Fisher, Elizabeth M. C. and Briscoe, James}, + year = {2019}, + month = dec, + pages = {2019.12.29.889543}, + doi = {10.1101/2019.12.29.889543}, + abstract = {{$<$}p{$>$}What determines the pace of embryonic development? Although many molecular mechanisms controlling developmental processes are evolutionarily conserved, the speed at which these operate can vary substantially between species. For example, the same genetic programme, comprising sequential changes in transcriptional states, governs the differentiation of motor neurons in mouse and human, but the tempo at which it operates differs between species. Using in vitro directed differentiation of embryonic stem cells to motor neurons, we show that the programme runs twice as fast in mouse as in human. We provide evidence that this is neither due to differences in signalling, nor the genomic sequence of genes or their regulatory elements. Instead, we find an approximately two-fold increase in protein stability and cell cycle duration in human cells compared to mouse. This can account for the slower pace of human development, indicating that global differences in key kinetic parameters play a major role in interspecies differences in developmental tempo.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2019, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + file = {/Users/laurent/Zotero/storage/AB9HGNAR/Rayon et al. - 2019 - Species-specific developmental timing is associate.pdf;/Users/laurent/Zotero/storage/WF7BGMM5/2019.12.29.html}, + journal = {bioRxiv}, + language = {en} +} + +@book{rcoreteamLanguageEnvironmentStatistical2013, + title = {R: {{A Language}} and {{Environment}} for {{Statistical Computing}}}, + author = {{R Core Team}}, + year = {2013}, + publisher = {{R Foundation for Statistical Computing}}, + address = {{Vienna, Austria}} +} + +@misc{RealisticSilicoGeneration, + title = {Realistic in Silico Generation and Augmentation of Single-Cell {{RNA}}-Seq Data Using Generative Adversarial Networks | {{Nature Communications}}}, + file = {/Users/laurent/Zotero/storage/L3D3I55E/s41467-019-14018-z.html}, + howpublished = {https://www-nature-com.insb.bib.cnrs.fr/articles/s41467-019-14018-z} +} + +@article{redmanGeneticsMatingSex2008, + title = {Genetics of {{Mating}} and {{Sex Determination}} in the {{Parasitic Nematode Haemonchus}} Contortus}, + author = {Redman, Elizabeth and Grillo, Victoria and Saunders, Gary and Packard, Erica and Jackson, Frank and Berriman, Matt and Gilleard, John Stuart}, + year = {2008}, + month = dec, + volume = {180}, + pages = {1877--1887}, + issn = {0016-6731, 1943-2631}, + doi = {10.1534/genetics.108.094623}, + abstract = {Genetic analysis of parasitic nematodes has been a neglected area of research and the basic genetics of this important group of pathogens are poorly understood. Haemonchus contortus is one of the most economically significant livestock parasites worldwide and is a key experimental model for the strongylid nematode group that includes many important human and animal pathogens. We have undertaken a study of the genetics and the mode of mating of this parasite using microsatellite markers. Inheritance studies with autosomal markers demonstrated obligate dioecious sexual reproduction and polyandrous mating that are reported here for the first time in a parasitic helminth and provide the parasite with a mechanism of increasing genetic diversity. The karyotype of the H. contortus, MHco3(ISE) isolate was determined as 2n = 11 or 12. We have developed a panel of microsatellite markers that are tightly linked on the X chromosome and have used them to determine the sex chromosomal karyotype as XO male and XX female. Haplotype analysis using the X-chromosomal markers also demonstrated polyandry, independent of the autosomal marker analysis, and enabled a more direct estimate of the number of male parental genotypes contributing to each brood. This work provides a basis for future forward genetic analysis on H. contortus and related parasitic nematodes.}, + copyright = {Copyright \textcopyright{} 2008 by the Genetics Society of America}, + journal = {Genetics}, + language = {en}, + number = {4}, + pmid = {18854587} +} + +@article{reiniusAnalysisAllelicExpression2016, + title = {Analysis of Allelic Expression Patterns in Clonal Somatic Cells by Single-Cell {{RNA}}\textendash{}Seq}, + author = {Reinius, Bj{\"o}rn and Mold, Jeff E and Ramsk{\"o}ld, Daniel and Deng, Qiaolin and Johnsson, Per and Micha{\"e}lsson, Jakob and Fris{\'e}n, Jonas and Sandberg, Rickard}, + year = {2016}, + month = nov, + volume = {48}, + pages = {1430--1435}, + issn = {1061-4036, 1546-1718}, + doi = {10.1038/ng.3678}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Reinius et al. - 2016 - Analysis of allelic expression patterns in clonal .pdf;/Users/laurent/Zotero/storage/JPZX4KXW/Reinius et al. - 2016 - Analysis of allelic expression patterns in clonal .pdf;/Users/laurent/Zotero/storage/SW6BWY2J/Reinius et al. - 2016 - Analysis of allelic expression patterns in clonal .pdf;/Users/laurent/Zotero/storage/TLFRMTT3/Reinius et al. - 2016 - Analysis of allelic expression patterns in clonal .pdf}, + journal = {Nature Genetics}, + language = {en}, + number = {11} +} + +@article{renSSCCNovelComputational2019, + title = {{{SSCC}}: {{A Novel Computational Framework}} for {{Rapid}} and {{Accurate Clustering Large}}-Scale {{Single Cell RNA}}-Seq {{Data}}}, + shorttitle = {{{SSCC}}}, + author = {Ren, Xianwen and Zheng, Liangtao and Zhang, Zemin}, + year = {2019}, + month = apr, + volume = {17}, + pages = {201--210}, + issn = {1672-0229}, + doi = {10.1016/j.gpb.2018.10.003}, + abstract = {Clustering is a prevalent analytical means to analyze single cell RNA sequencing (scRNA-seq) data but the rapidly expanding data volume can make this process computationally challenging. New methods for both accurate and efficient clustering are of pressing need. Here we proposed Spearman subsampling-clustering-classification (SSCC), a new clustering framework based on random projection and feature construction, for large-scale scRNA-seq data. SSCC greatly improves clustering accuracy, robustness, and computational efficacy for various state-of-the-art algorithms benchmarked on multiple real datasets. On a dataset with 68,578 human blood cells, SSCC achieved 20\% improvement for clustering accuracy and 50-fold acceleration, but only consumed 66\% memory usage, compared to the widelyused software package SC3. Compared to k-means, the accuracy improvement of SSCC can reach 3-fold. An R implementation of SSCC is available at https://github.com/Japrin/sscClust.}, + file = {/Users/laurent/Zotero/storage/PJWCXMAP/Ren et al. - 2019 - SSCC A Novel Computational Framework for Rapid an.pdf;/Users/laurent/Zotero/storage/X8SEKHCY/S1672022918301086.html}, + journal = {Genomics, Proteomics \& Bioinformatics}, + keywords = {Classification,Clustering,RNA-seq,Single cell,Subsampling}, + language = {en}, + number = {2} +} + +@article{reyCopulaMixtureModel, + title = {Copula {{Mixture Model}} for {{Dependency}}-Seeking {{Clustering}}}, + author = {Rey, M{\'e}lanie and Roth, Volker}, + pages = {8}, + abstract = {We introduce a copula mixture model to perform dependency-seeking clustering when cooccurring samples from different data sources are available. The model takes advantage of the great flexibility offered by the copulas framework to extend mixtures of Canonical Correlation Analysis to multivariate data with arbitrary continuous marginal densities. We formulate our model as a non-parametric Bayesian mixture, while providing efficient MCMC inference. Experiments on synthetic and real data demonstrate that the increased flexibility of the copula mixture significantly improves the clustering and the interpretability of the results.}, + file = {/Users/laurent/Zotero/storage/M723WYN2/Rey and Roth - Copula Mixture Model for Dependency-seeking Cluste.pdf}, + language = {en} +} + +@article{rhoadsPacBioSequencingIts2015, + title = {{{PacBio Sequencing}} and {{Its Applications}}}, + author = {Rhoads, Anthony and Au, Kin Fai}, + year = {2015}, + month = oct, + volume = {13}, + pages = {278--289}, + issn = {1672-0229}, + doi = {10.1016/j.gpb.2015.08.002}, + abstract = {Single-molecule, real-time sequencing developed by Pacific BioSciences offers longer read lengths than the second-generation sequencing (SGS) technologies, making it well-suited for unsolved problems in genome, transcriptome, and epigenetics research. The highly-contiguous de novo assemblies using PacBio sequencing can close gaps in current reference assemblies and characterize structural variation (SV) in personal genomes. With longer reads, we can sequence through extended repetitive regions and detect mutations, many of which are associated with diseases. Moreover, PacBio transcriptome sequencing is advantageous for the identification of gene isoforms and facilitates reliable discoveries of novel genes and novel isoforms of annotated genes, due to its ability to sequence full-length transcripts or fragments with significant lengths. Additionally, PacBio's sequencing technique provides information that is useful for the direct detection of base modifications, such as methylation. In addition to using PacBio sequencing alone, many hybrid sequencing strategies have been developed to make use of more accurate short reads in conjunction with PacBio long reads. In general, hybrid sequencing strategies are more affordable and scalable especially for small-size laboratories than using PacBio Sequencing alone. The advent of PacBio sequencing has made available much information that could not be obtained via SGS alone.}, + journal = {Genomics, Proteomics \& Bioinformatics}, + keywords = {assembly,Gene isoform detection,Hybrid sequencing,Methylation,pacbio,Third-generation sequencing}, + number = {5}, + series = {{{SI}}: {{Metagenomics}} of {{Marine Environments}}} +} + +@article{risserSpatiallyDependentMultipleTesting, + title = {Spatially-{{Dependent Multiple Testing Under Model Misspecification}}, with {{Application}} to {{Detection}} of {{Anthropogenic Influence}} on {{Extreme Climate Events}}}, + author = {Risser, Mark D}, + pages = {51}, + abstract = {The Weather Risk Attribution Forecast (WRAF) is a forecasting tool that uses output from global climate models to make simultaneous attribution statements about whether and how greenhouse gas emissions have contributed to extreme weather across the globe. However, in conducting a large number of simultaneous hypothesis tests, the WRAF is prone to identifying false ``discoveries.'' A common technique for addressing this multiple testing problem is to adjust the procedure in a way that controls the proportion of true null hypotheses that are incorrectly rejected, or the false discovery rate (FDR). Unfortunately, generic FDR procedures suffer from low power when the hypotheses are dependent, and techniques designed to account for dependence are sensitive to misspecification of the underlying statistical model. In this paper, we develop a Bayesian decision theoretic approach for dependent multiple testing that flexibly controls false discovery and is robust to model misspecification. We illustrate the robustness of our procedure to model error with a simulation study, using a framework that accounts for generic spatial dependence and allows the practitioner to flexibly specify the decision criteria. Finally, we outline the best procedure of those considered for use in the WRAF workflow and apply the procedure to several seasonal forecasts.}, + file = {/Users/laurent/Documents/bibliography/stats/Risser - Spatially-Dependent Multiple Testing Under Model M.pdf}, + language = {en} +} + +@article{rissoClusterExperimentRSECBioconductor2018, + title = {{{clusterExperiment}} and {{RSEC}}: {{A Bioconductor}} Package and Framework for Clustering of Single-Cell and Other Large Gene Expression Datasets}, + shorttitle = {{{clusterExperiment}} and {{RSEC}}}, + author = {Risso, Davide and Purvis, Liam and Fletcher, Russell B. and Das, Diya and Ngai, John and Dudoit, Sandrine and Purdom, Elizabeth}, + year = {2018}, + month = sep, + volume = {14}, + pages = {e1006378}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006378}, + abstract = {Clustering of genes and/or samples is a common task in gene expression analysis. The goals in clustering can vary, but an important scenario is that of finding biologically meaningful subtypes within the samples. This is an application that is particularly appropriate when there are large numbers of samples, as in many human disease studies. With the increasing popularity of single-cell transcriptome sequencing (RNA-Seq), many more controlled experiments on model organisms are similarly creating large gene expression datasets with the goal of detecting previously unknown heterogeneity within cells. It is common in the detection of novel subtypes to run many clustering algorithms, as well as rely on subsampling and ensemble methods to improve robustness. We introduce a Bioconductor R package, clusterExperiment, that implements a general and flexible strategy we entitle Resampling-based Sequential Ensemble Clustering (RSEC). RSEC enables the user to easily create multiple, competing clusterings of the data based on different techniques and associated tuning parameters, including easy integration of resampling and sequential clustering, and then provides methods for consolidating the multiple clusterings into a final consensus clustering. The package is modular and allows the user to separately apply the individual components of the RSEC procedure, i.e., apply multiple clustering algorithms, create a consensus clustering or choose tuning parameters, and merge clusters. Additionally, clusterExperiment provides a variety of visualization tools for the clustering process, as well as methods for the identification of possible cluster signatures or biomarkers. The R package clusterExperiment is publicly available through the Bioconductor Project, with a detailed manual (vignette) as well as well documented help pages for each function.}, + file = {/Users/laurent/Zotero/storage/VIK4P9L4/Risso et al. - 2018 - clusterExperiment and RSEC A Bioconductor package.pdf;/Users/laurent/Zotero/storage/8F5RFGEY/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Algorithms,Biomarkers,Clustering algorithms,Data visualization,Epithelium,Gene expression,Hypothalamus,Marker genes}, + language = {en}, + number = {9} +} + +@article{rissoClusterExperimentRSECBioconductor2018a, + title = {{{clusterExperiment}} and {{RSEC}}: {{A Bioconductor}} Package and Framework for Clustering of Single-Cell and Other Large Gene Expression Datasets}, + shorttitle = {{{clusterExperiment}} and {{RSEC}}}, + author = {Risso, Davide and Purvis, Liam and Fletcher, Russell and Das, Diya and Ngai, John and Dudoit, Sandrine and Purdom, Elizabeth}, + year = {2018}, + month = mar, + doi = {10.1101/280545}, + abstract = {Clustering of genes and/or samples is a common task in gene expression analysis. The goals in clustering can vary, but an important scenario is that of finding biologically meaningful subtypes within the samples. This is an application that is particularly appropriate when there are large numbers of samples, as in many human disease studies. With the increasing popularity of single-cell transcriptome sequencing (RNA-Seq), many more controlled experiments on model organisms are similarly creating large gene expression datasets with the goal of detecting previously unknown heterogeneity within cells. It is common in the detection of novel subtypes to run many clustering algorithms, as well as rely on subsampling and ensemble methods to improve robustness. We introduce a Bioconductor R package, clusterExperiment, that implements a general and flexible strategy we entitle Resampling-based Sequential Ensemble Clustering (RSEC). RSEC enables the user to easily create multiple, competing clusterings of the data based on different techniques and associated tuning parameters, including easy integration of resampling and sequential clustering, and then provides methods for consolidating the multiple clusterings into a final consensus clustering. The package is modular and allows the user to separately apply the individual components of the RSEC procedure, i.e., apply multiple clustering algorithms, create a consensus clustering or choose tuning parameters, and merge clusters. Additionally, clusterExperimentprovides a variety of visualization tools for the clustering process, as well as methods for the identification of possible cluster signatures or biomarkers. The package clusterExperimentis publicly available through the Bioconductor Project, with a detailed manual (vignette) as well as well documented help pages for each function.}, + file = {/Users/laurent/Zotero/storage/EAHEZSD5/Risso et al. - 2018 - clusterExperiment and RSEC A Bioconductor package.pdf;/Users/laurent/Zotero/storage/X5WK45YT/Risso et al. - 2018 - clusterExperiment and RSEC A Bioconductor package.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{rissoGeneralFlexibleMethod2018, + title = {A General and Flexible Method for Signal Extraction from Single-Cell {{RNA}}-Seq Data}, + author = {Risso, Davide and Perraudeau, Fanny and Gribkova, Svetlana and Dudoit, Sandrine and Vert, Jean-Philippe}, + year = {2018}, + month = dec, + volume = {9}, + issn = {2041-1723}, + doi = {10.1038/s41467-017-02554-5}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Risso et al. - 2018 - A general and flexible method for signal extractio.pdf;/Users/laurent/Zotero/storage/ELFBQHP3/Risso et al. - 2018 - A general and flexible method for signal extractio.pdf;/Users/laurent/Zotero/storage/LDDWKBE9/Risso et al. - 2018 - A general and flexible method for signal extractio.pdf;/Users/laurent/Zotero/storage/MDHTARUJ/Risso et al. - 2018 - A general and flexible method for signal extractio.pdf;/Users/laurent/Zotero/storage/MF9QE35N/Risso et al. - 2018 - A general and flexible method for signal extractio.pdf;/Users/laurent/Zotero/storage/E8AW3I55/s41467-017-02554-5.html}, + journal = {Nature Communications}, + language = {en}, + number = {1} +} + +@article{rissoZINBWaVEGeneralFlexible2017, + title = {{{ZINB}}-{{WaVE}}: {{A}} General and Flexible Method for Signal Extraction from Single-Cell {{RNA}}-Seq Data}, + shorttitle = {{{ZINB}}-{{WaVE}}}, + author = {Risso, Davide and Perraudeau, Fanny and Gribkova, Svetlana and Dudoit, Sandrine and Vert, Jean-Philippe}, + year = {2017}, + month = nov, + doi = {10.1101/125112}, + abstract = {Single-cell RNA sequencing (scRNA-seq) is a powerful technique that enables researchers to measure gene expression at the resolution of single cells. Because of the low amount of RNA present in a single cell, many genes fail to be detected even though they are expressed; these genes are usually referred to as dropouts. Here, we present a general and flexible zero-inflated negative binomial model (ZINB-WaVE), which leads to low-dimensional representations of the data that account for zero inflation (dropouts), over-dispersion, and the count nature of the data. We demonstrate, with simulations and real data, that the model and its associated estimation procedure are able to give a more stable and accurate lowdimensional representation of the data than principal component analysis (PCA) and zero-inflated factor analysis (ZIFA), without the need for a preliminary normalization step.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Risso et al. - 2017 - ZINB-WaVE A general and flexible method for signa.pdf;/Users/laurent/Zotero/storage/39VKDJJ4/Risso et al. - 2017 - ZINB-WaVE A general and flexible method for signa.pdf;/Users/laurent/Zotero/storage/CI3R7654/Risso et al. - 2017 - ZINB-WaVE A general and flexible method for signa.pdf;/Users/laurent/Zotero/storage/ZCME2Y3H/Risso et al. - 2017 - ZINB-WaVE A general and flexible method for signa.pdf}, + language = {en} +} + +@article{robertsImprovingRNASeqExpression2011, + title = {Improving {{RNA}}-{{Seq}} Expression Estimates by Correcting for Fragment Bias}, + author = {Roberts, Adam and Trapnell, Cole and Donaghey, Julie and Rinn, John L and Pachter, Lior}, + year = {2011}, + volume = {12}, + pages = {R22}, + issn = {1465-6906}, + doi = {10.1186/gb-2011-12-3-r22}, + abstract = {The biochemistry of RNA-Seq library preparation results in cDNA fragments that are not uniformly distributed within the transcripts they represent. This non-uniformity must be accounted for when estimating expression levels, and we show how to perform the needed corrections using a likelihood based approach. We find improvements in expression estimates as measured by correlation with independently performed qRT-PCR and show that correction of bias leads to improved replicability of results across libraries and sequencing technologies.}, + file = {/Users/laurent/Documents/bibliography/RNASeq/Roberts et al. - 2011 - Improving RNA-Seq expression estimates by correcti.pdf}, + journal = {Genome Biology}, + language = {en}, + number = {3} +} + +@article{robertsStreamingFragmentAssignment2013, + title = {Streaming Fragment Assignment for Real-Time Analysis of Sequencing Experiments}, + author = {Roberts, Adam and Pachter, Lior}, + year = {2013}, + month = jan, + volume = {10}, + pages = {71--73}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.2251}, + file = {/Users/laurent/Documents/bibliography/RNASeq/Roberts and Pachter - 2013 - Streaming fragment assignment for real-time analys.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {1} +} + +@article{robinsonIntegrativeGenomicsViewer2011, + title = {Integrative {{Genomics Viewer}}}, + author = {Robinson, James T. and Thorvaldsd{\'o}ttir, Helga and Winckler, Wendy and Guttman, Mitchell and Lander, Eric S. and Getz, Gad and Mesirov, Jill P.}, + year = {2011}, + month = jan, + volume = {29}, + pages = {24--26}, + issn = {1087-0156}, + doi = {10.1038/nbt.1754}, + journal = {Nature biotechnology}, + number = {1}, + pmcid = {PMC3346182}, + pmid = {21221095} +} + +@article{robisonMobileElementsShape2018, + title = {Mobile {{Elements Shape Plastome Evolution}} in {{Ferns}}}, + author = {Robison, Tanner A. and Grusz, Amanda L. and Wolf, Paul G. and Mower, Jeffrey P. and Fauskee, Blake D. and Sosa, Karla and Schuettpelz, Eric}, + year = {2018}, + month = oct, + volume = {10}, + pages = {2558--2571}, + doi = {10.1093/gbe/evy189}, + abstract = {Abstract. Plastid genomes display remarkable organizational stability over evolutionary time. From green algae to angiosperms, most plastid genomes are largely}, + file = {/Users/laurent/Zotero/storage/EQBU8GKX/Robison et al. - 2018 - Mobile Elements Shape Plastome Evolution in Ferns.pdf;/Users/laurent/Zotero/storage/AVPDU3EP/5079404.html}, + journal = {Genome Biology and Evolution}, + language = {en}, + number = {10} +} + +@misc{RobustDistributedLag, + title = {Robust Distributed Lag Models Using Data Adaptive Shrinkage | {{Biostatistics}} | {{Oxford Academic}}}, + file = {/Users/laurent/Zotero/storage/77IG2S23/4508801.html}, + howpublished = {https://academic-oup-com.insb.bib.cnrs.fr/biostatistics/article/19/4/461/4508801} +} + +@article{roguskiFaStoreSpacesavingSolution2018, + title = {{{FaStore}}: A Space-Saving Solution for Raw Sequencing Data}, + shorttitle = {{{FaStore}}}, + author = {Roguski, {\L}ukasz and Ochoa, Idoia and Hernaez, Mikel and Deorowicz, Sebastian}, + year = {2018}, + month = aug, + volume = {34}, + pages = {2748--2756}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty205}, + abstract = {AbstractMotivation. The affordability of DNA sequencing has led to the generation of unprecedented volumes of raw sequencing data. These data must be stored, p}, + file = {/Users/laurent/Zotero/storage/FWH8PXNI/Roguski et al. - 2018 - FaStore a space-saving solution for raw sequencin.pdf;/Users/laurent/Zotero/storage/VIJDUUYF/4956350.html}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{romanAutomatedDeconvolutionStructured2017, + title = {Automated Deconvolution of Structured Mixtures from Heterogeneous Tumor Genomic Data}, + author = {Roman, Theodore and Xie, Lu and Schwartz, Russell}, + editor = {Raphael, Benjamin J.}, + year = {2017}, + month = oct, + volume = {13}, + pages = {e1005815}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005815}, + file = {/Users/laurent/Documents/bibliography/to_read/Roman et al. - 2017 - Automated deconvolution of structured mixtures fro.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {10} +} + +@article{rosenbergScalingSingleCell2017, + title = {Scaling Single Cell Transcriptomics through Split Pool Barcoding}, + author = {Rosenberg, Alexander B and Roco, Charles and Muscat, Richard A and Kuchina, Anna and Mukherjee, Sumit and Chen, Wei and Peeler, David J and Yao, Zizhen and Tasic, Bosiljka and Sellers, Drew L and Pun, Suzie H and Seelig, Georg}, + year = {2017}, + month = feb, + doi = {10.1101/105163}, + abstract = {Constructing an atlas of cell types in complex organisms will require a collective effort to characterize billions of individual cells. Single cell RNA sequencing (scRNA-seq) has emerged as the main tool for characterizing cellular diversity, but current methods use custom microfluidics or microwells to compartmentalize single cells, limiting scalability and widespread adoption. Here we present Split Pool Ligation-based Transcriptome sequencing (SPLiT-seq), a scRNA-seq method that labels the cellular origin of RNA through combinatorial indexing. SPLiT-seq is compatible with fixed cells, scales exponentially, uses only basic laboratory equipment, and costs one cent per cell. We used this approach to analyze 109,069 single cell transcriptomes from an entire postnatal day 5 mouse brain, providing the first global snapshot at this stage of development. We identified 13 main populations comprising different types of neurons, glia, immune cells, endothelia, as well as types in the blood-brain-barrier. Moreover, we resolve substructure within these clusters corresponding to cells at different stages of development. As sequencing capacity increases, SPLiT-seq will enable profiling of billions of cells in a single experiment.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Rosenberg et al. - 2017 - Scaling single cell transcriptomics through split .pdf;/Users/laurent/Zotero/storage/ADACYPSC/Rosenberg et al. - 2017 - Scaling single cell transcriptomics through split .pdf;/Users/laurent/Zotero/storage/H2ZIM7RA/Rosenberg et al. - 2017 - Scaling single cell transcriptomics through split .pdf;/Users/laurent/Zotero/storage/N8S2ME6X/Rosenberg et al. - 2017 - Scaling single cell transcriptomics through split .pdf}, + language = {en} +} + +@article{rubioloExtremeLearningMachines2018, + title = {Extreme Learning Machines for Reverse Engineering of Gene Regulatory Networks from Expression Time Series}, + author = {Rubiolo, M and Milone, D H and Stegmayer, G}, + year = {2018}, + month = apr, + volume = {34}, + pages = {1253--1260}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx730}, + abstract = {Motivation: The reconstruction of gene regulatory networks (GRNs) from genes profiles has a growing interest in bioinformatics for understanding the complex regulatory mechanisms in cellular systems. GRNs explicitly represent the cause\textendash{}effect of regulation among a group of genes and its reconstruction is today a challenging computational problem. Several methods were proposed, but most of them require different input sources to provide an acceptable prediction. Thus, it is a great challenge to reconstruct a GRN only from temporal gene expression data.}, + file = {/Users/laurent/Documents/bibliography/to_read/Rubiolo et al. - 2018 - Extreme learning machines for reverse engineering .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {7} +} + +@article{rudorfEfficiencyProteinSynthesis2019, + title = {Efficiency of Protein Synthesis Inhibition Depends on {{tRNA}} and Codon Compositions}, + author = {Rudorf, Sophia}, + year = {2019}, + month = aug, + volume = {15}, + pages = {e1006979}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006979}, + abstract = {Regulation and maintenance of protein synthesis are vital to all organisms and are thus key targets of attack and defense at the cellular level. Here, we mathematically analyze protein synthesis for its sensitivity to the inhibition of elongation factor EF-Tu and/or ribosomes in dependence of the system's tRNA and codon compositions. We find that protein synthesis reacts ultrasensitively to a decrease in the elongation factor's concentration for systems with an imbalance between codon usages and tRNA concentrations. For well-balanced tRNA/codon compositions, protein synthesis is impeded more effectively by the inhibition of ribosomes instead of EF-Tu. Our predictions are supported by re-evaluated experimental data as well as by independent computer simulations. Not only does the described ultrasensitivity render EF-Tu a distinguished target of protein synthesis inhibiting antibiotics. It may also enable persister cell formation mediated by toxin-antitoxin systems. The strong impact of the tRNA/codon composition provides a basis for tissue-specificities of disorders caused by mutations of human mitochondrial EF-Tu as well as for the potential use of EF-Tu targeting drugs for tissue-specific treatments.}, + file = {/Users/laurent/Zotero/storage/8BASDDRW/Rudorf - 2019 - Efficiency of protein synthesis inhibition depends.pdf;/Users/laurent/Zotero/storage/ZMRTPUHY/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Antibiotics,Mitochondria,Peptide synthesis,Protein synthesis,Protein translation,Ribosomes,Toxins,Transfer RNA}, + language = {en}, + number = {8} +} + +@article{saaryRTKEfficientRarefaction2017, + title = {{{RTK}}: Efficient Rarefaction Analysis of Large Datasets}, + shorttitle = {{{RTK}}}, + author = {Saary, Paul and Forslund, Kristoffer and Bork, Peer and Hildebrand, Falk}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2594--2595}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx206}, + abstract = {Motivation: The rapidly expanding microbiomics field is generating increasingly larger datasets, characterizing the microbiota in diverse environments. Although classical numerical ecology methods provide a robust statistical framework for their analysis, software currently available is inadequate for large datasets and some computationally intensive tasks, like rarefaction and associated analysis.}, + file = {/Users/laurent/Documents/bibliography/stats/Saary et al. - 2017 - RTK efficient rarefaction analysis of large datas.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{saelensComparisonSinglecellTrajectory2019, + title = {A Comparison of Single-Cell Trajectory Inference Methods}, + author = {Saelens, Wouter and Cannoodt, Robrecht and Todorov, Helena and Saeys, Yvan}, + year = {2019}, + month = may, + volume = {37}, + pages = {547--554}, + issn = {1087-0156, 1546-1696}, + doi = {10.1038/s41587-019-0071-9}, + file = {/Users/laurent/Zotero/storage/DP4FRIWS/Saelens et al. - 2019 - A comparison of single-cell trajectory inference m.pdf;/Users/laurent/Zotero/storage/QCFCGAE2/Saelens et al. - 2019 - A comparison of single-cell trajectory inference m.pdf}, + journal = {Nature Biotechnology}, + language = {en}, + number = {5} +} + +@article{salmelaAccurateSelfcorrectionErrors2016, + title = {Accurate Self-Correction of Errors in Long Reads Using de {{Bruijn}} Graphs}, + author = {Salmela, Leena and Walve, Riku and Rivals, Eric and Ukkonen, Esko}, + year = {2016}, + month = jun, + pages = {btw321}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw321}, + abstract = {Motivation: New long read sequencing technologies, like PacBio SMRT and Oxford NanoPore, can produce sequencing reads up to 50,000 bp long but with an error rate of at least 15\%. Reducing the error rate is necessary for subsequent utilisation of the reads in, e.g., de novo genome assembly. The error correction problem has been tackled either by aligning the long reads against each other or by a hybrid approach that uses the more accurate short reads produced by second generation sequencing technologies to correct the long reads.}, + file = {/Users/laurent/Documents/bibliography/long_reads/Salmela et al. - 2016 - Accurate self-correction of errors in long reads u.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{sanchez-taltavullBayesianCorrelationRobust2020, + title = {Bayesian Correlation Is a Robust Gene Similarity Measure for Single-Cell {{RNA}}-Seq Data}, + author = {{Sanchez-Taltavull}, Daniel and Perkins, Theodore J. and Dommann, Noelle and Melin, Nicolas and Keogh, Adrian and Candinas, Daniel and Stroka, Deborah and Beldi, Guido}, + year = {2020}, + month = mar, + volume = {2}, + doi = {10.1093/nargab/lqaa002}, + abstract = {Abstract. Assessing similarity is highly important for bioinformatics algorithms to determine correlations between biological information. A common problem is}, + file = {/Users/laurent/Zotero/storage/7XKUHH68/5715215.html}, + journal = {NAR Genomics and Bioinformatics}, + language = {en}, + number = {1} +} + +@article{sanderImpulseDEDetectionDifferentially2016, + title = {{{ImpulseDE}}: Detection of Differentially Expressed Genes in Time Series Data Using Impulse Models}, + shorttitle = {{{ImpulseDE}}}, + author = {Sander, Jil and Schultze, Joachim L. and Yosef, Nir}, + year = {2016}, + month = oct, + pages = {btw665}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw665}, + abstract = {Summary: Perturbations in the environment lead to distinctive gene expression changes within a cell. Observed over time, those variations can be characterized by single impulse-like progression patterns. ImpulseDE is an R package suited to capture these patterns in high throughput time series datasets. By fitting a representative impulse model to each gene, it reports differentially expressed genes across time points from a single or between two time courses from two experiments. To optimize running time, the code uses clustering and multi-threading. By applying ImpulseDE, we demonstrate its power to represent underlying biology of gene expression in microarray and RNA-Seq data. Availability and Implementation: ImpulseDE is available on Bioconductor (https://bioconductor. org/packages/ImpulseDE/).}, + file = {/Users/laurent/Documents/bibliography/DEA/Sander et al. - 2016 - ImpulseDE detection of differentially expressed g.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{sandveTenSimpleRules2013, + title = {Ten {{Simple Rules}} for {{Reproducible Computational Research}}}, + author = {Sandve, Geir Kjetil and Nekrutenko, Anton and Taylor, James and Hovig, Eivind}, + editor = {Bourne, Philip E.}, + year = {2013}, + month = oct, + volume = {9}, + pages = {e1003285}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1003285}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Sandve et al. - 2013 - Ten Simple Rules for Reproducible Computational Re.pdf}, + journal = {PLoS Computational Biology}, + language = {en}, + number = {10} +} + +@article{santesmassesComputationalIdentificationSelenocysteine2017, + title = {Computational Identification of the Selenocysteine {{tRNA}} ({{tRNASec}}) in Genomes}, + author = {Santesmasses, Didac and Mariotti, Marco and Guig{\'o}, Roderic}, + editor = {Gough, Julian}, + year = {2017}, + month = feb, + volume = {13}, + pages = {e1005383}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005383}, + abstract = {Selenocysteine (Sec) is known as the 21st amino acid, a cysteine analogue with selenium replacing sulphur. Sec is inserted co-translationally in a small fraction of proteins called selenoproteins. In selenoprotein genes, the Sec specific tRNA (tRNASec) drives the recoding of highly specific UGA codons from stop signals to Sec. Although found in organisms from the three domains of life, Sec is not universal. Many species are completely devoid of selenoprotein genes and lack the ability to synthesize Sec. Since tRNASec is a key component in selenoprotein biosynthesis, its efficient identification in genomes is instrumental to characterize the utilization of Sec across lineages. Available tRNA prediction methods fail to accurately predict tRNASec, due to its unusual structural fold. Here, we present Secmarker, a method based on manually curated covariance models capturing the specific tRNASec structure in archaea, bacteria and eukaryotes. We exploited the non-universality of Sec to build a proper benchmark set for tRNASec predictions, which is not possible for the predictions of other tRNAs. We show that Secmarker greatly improves the accuracy of previously existing methods constituting a valuable tool to identify tRNASec genes, and to efficiently determine whether a genome contains selenoproteins. We used Secmarker to analyze a large set of fully sequenced genomes, and the results revealed new insights in the biology of tRNASec, led to the discovery of a novel bacterial selenoprotein family, and shed additional light on the phylogenetic distribution of selenoprotein containing genomes. Secmarker is freely accessible for download, or online analysis through a web server at http://secmarker.crg.cat.}, + file = {/Users/laurent/Documents/bibliography/tRNA/Santesmasses et al. - 2017 - Computational identification of the selenocysteine.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {2} +} + +@article{sarkarMinnowPrincipledFramework2019, + title = {Minnow: A Principled Framework for Rapid Simulation of {{dscRNA}}-Seq Data at the Read Level}, + shorttitle = {Minnow}, + author = {Sarkar, Hirak and Srivastava, Avi and Patro, Rob}, + year = {2019}, + month = jul, + volume = {35}, + pages = {i136-i144}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz351}, + abstract = {AbstractSummary. With the advancements of high-throughput single-cell RNA-sequencing protocols, there has been a rapid increase in the tools available to perfo}, + file = {/Users/laurent/Zotero/storage/HBZHHEPK/Sarkar et al. - 2019 - Minnow a principled framework for rapid simulatio.pdf;/Users/laurent/Zotero/storage/E74G23JD/5529127.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{savisaarBothMaintenanceAvoidance2017, + title = {Both {{Maintenance}} and {{Avoidance}} of {{RNA}}-{{Binding Protein Interactions Constrain Coding Sequence Evolution}}}, + author = {Savisaar, Rosina and Hurst, Laurence D.}, + year = {2017}, + month = may, + volume = {34}, + pages = {1110--1126}, + issn = {0737-4038}, + doi = {10.1093/molbev/msx061}, + abstract = {While the principal force directing coding sequence (CDS) evolution is selection on protein function, to ensure correct gene expression CDSs must also maintain interactions with RNA-binding proteins (RBPs). Understanding how our genes are shaped by these RNA-level pressures is necessary for diagnostics and for improving transgenes. However, the evolutionary impact of the need to maintain RBP interactions remains unresolved. Are coding sequences constrained by the need to specify RBP binding motifs? If so, what proportion of mutations are affected? Might sequence evolution also be constrained by the need not to specify motifs that might attract unwanted binding, for instance because it would interfere with exon definition? Here, we have scanned human CDSs for motifs that have been experimentally determined to be recognized by RBPs. We observe two sets of motifs\textemdash{}those that are enriched over nucleotide-controlled null and those that are depleted. Importantly, the depleted set is enriched for motifs recognized by non-CDS binding RBPs. Supporting the functional relevance of our observations, we find that motifs that are more enriched are also slower-evolving. The net effect of this selection to preserve is a reduction in the over-all rate of synonymous evolution of 2\textendash{}3\% in both primates and rodents. Stronger motif depletion, on the other hand, is associated with stronger selection against motif gain in evolution. The challenge faced by our CDSs is therefore not only one of attracting the right RBPs but also of avoiding the wrong ones, all while also evolving under selection pressures related to protein structure.}, + file = {/Users/laurent/Zotero/storage/ESJP5EV6/Savisaar and Hurst - 2017 - Both Maintenance and Avoidance of RNA-Binding Prot.pdf}, + journal = {Molecular Biology and Evolution}, + number = {5}, + pmcid = {PMC5400389}, + pmid = {28138077} +} + +@misc{ScalableNonlinearProgramming, + title = {Scalable Nonlinear Programming Framework for Parameter Estimation in Dynamic Biological System Models}, + file = {/Users/laurent/Zotero/storage/2R5B37M5/article.html}, + howpublished = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006828} +} + +@article{schaferIntegrativeAnalysisMultiple2017, + title = {Integrative Analysis of Multiple Genomic Variables Using a Hierarchical {{Bayesian}} Model}, + author = {Sch{\"a}fer, Martin and Klein, Hans-Ulrich and Schwender, Holger}, + year = {2017}, + month = oct, + volume = {33}, + pages = {3220--3227}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx356}, + abstract = {Motivation: Genes showing congruent differences in several genomic variables between two biological conditions are crucial to unravel causalities behind phenotypes of interest. Detecting such genes is important in biomedical research, e.g. when identifying genes responsible for cancer development. Small sample sizes common in next-generation sequencing studies are a key challenge, and there are still only very few statistical methods to analyze more than two genomic variables in an integrative, model-based way. Here, we present a novel bioinformatics approach to detect congruent differences between two biological conditions in a larger number of different measurements such as various epigenetic marks or mRNA transcript levels.}, + file = {/Users/laurent/Documents/bibliography/to_read/Schäfer et al. - 2017 - Integrative analysis of multiple genomic variables.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {20} +} + +@article{schiebingerOptimalTransportAnalysisSingleCell2019, + title = {Optimal-{{Transport Analysis}} of {{Single}}-{{Cell Gene Expression Identifies Developmental Trajectories}} in {{Reprogramming}}}, + author = {Schiebinger, Geoffrey and Shu, Jian and Tabaka, Marcin and Cleary, Brian and Subramanian, Vidya and Solomon, Aryeh and Gould, Joshua and Liu, Siyan and Lin, Stacie and Berube, Peter and Lee, Lia and Chen, Jenny and Brumbaugh, Justin and Rigollet, Philippe and Hochedlinger, Konrad and Jaenisch, Rudolf and Regev, Aviv and Lander, Eric S.}, + year = {2019}, + month = feb, + volume = {176}, + pages = {928-943.e22}, + issn = {00928674}, + doi = {10.1016/j.cell.2019.01.006}, + file = {/Users/laurent/Zotero/storage/RUH28AYX/Schiebinger et al. - 2019 - Optimal-Transport Analysis of Single-Cell Gene Exp.pdf;/Users/laurent/Zotero/storage/XQ9J6RFH/Schiebinger et al. - 2019 - Optimal-Transport Analysis of Single-Cell Gene Exp.pdf}, + journal = {Cell}, + language = {en}, + number = {4} +} + +@article{schiebingerReconstructionDevelopmentalLandscapes2017, + title = {Reconstruction of Developmental Landscapes by Optimal-Transport Analysis of Single-Cell Gene Expression Sheds Light on Cellular Reprogramming.}, + author = {Schiebinger, Geoffrey and Shu, Jian and Tabaka, Marcin and Cleary, Brian and Subramanian, Vidya and Solomon, Aryeh and Liu, Siyan and Lin, Stacie and Berube, Peter and Lee, Lia and Chen, Jenny and Brumbaugh, Justin and Rigollet, Philippe and Hochedlinger, Konrad and Jaenisch, Rudolf and Regev, Aviv and Lander, Eric}, + year = {2017}, + month = sep, + doi = {10.1101/191056}, + abstract = {Understanding the molecular programs that guide cellular differentiation during development is a major goal of modern biology. Here, we introduce an approach, WADDINGTON-OT, based on the mathematics of optimal transport, for inferring developmental landscapes, probabilistic cellular fates and dynamic trajectories from large-scale single-cell RNA-seq (scRNA-seq) data collected along a time course. We demonstrate the power of WADDINGTON-OT by applying the approach to study 65,781 scRNA-seq profiles collected at 10 time points over 16 days during reprogramming of fibroblasts to iPSCs. We construct a high-resolution map of reprogramming that rediscovers known features; uncovers new alternative cell fates including neural- and placental-like cells; predicts the origin and fate of any cell class; highlights senescent-like cells that may support reprogramming through paracrine signaling; and implicates regulatory models in particular trajectories. Of these findings, we highlight Obox6, which we experimentally show enhances reprogramming efficiency. Our approach provides a general framework for investigating cellular differentiation.}, + file = {/Users/laurent/Documents/bibliography/to_read/Schiebinger et al. - 2017 - Reconstruction of developmental landscapes by opti.pdf;/Users/laurent/Zotero/storage/9DSQNM2I/Schiebinger et al. - 2017 - Reconstruction of developmental landscapes by opti.pdf;/Users/laurent/Zotero/storage/ZFPGYK2V/Schiebinger et al. - 2017 - Reconstruction of developmental landscapes by opti.pdf}, + language = {en} +} + +@article{schiffmanSIDEseqCellSimilarity2017, + title = {{{SIDEseq}}: {{A Cell Similarity Measure Defined}} by {{Shared Identified Differentially Expressed Genes}} for {{Single}}-{{Cell RNA}} Sequencing {{Data}}}, + shorttitle = {{{SIDEseq}}}, + author = {Schiffman, Courtney and Lin, Christina and Shi, Funan and Chen, Luonan and Sohn, Lydia and Huang, Haiyan}, + year = {2017}, + month = jun, + volume = {9}, + pages = {200--216}, + issn = {1867-1764, 1867-1772}, + doi = {10.1007/s12561-017-9194-z}, + file = {/Users/laurent/Documents/bibliography/to_read/Schiffman et al. - 2017 - SIDEseq A Cell Similarity Measure Defined by Shar.pdf}, + journal = {Statistics in Biosciences}, + language = {en}, + number = {1} +} + +@article{schovilleModelSpeciesAgricultural2018, + title = {A Model Species for Agricultural Pest Genomics: The Genome of the {{Colorado}} Potato Beetle, {{Leptinotarsa}} Decemlineata ({{Coleoptera}}: {{Chrysomelidae}})}, + shorttitle = {A Model Species for Agricultural Pest Genomics}, + author = {Schoville, Sean D. and Chen, Yolanda H. and Andersson, Martin N. and Benoit, Joshua B. and Bhandari, Anita and Bowsher, Julia H. and Brevik, Kristian and Cappelle, Kaat and Chen, Mei-Ju M. and Childers, Anna K. and Childers, Christopher and Christiaens, Olivier and Clements, Justin and Didion, Elise M. and Elpidina, Elena N. and Engsontia, Patamarerk and Friedrich, Markus and {Garc{\'i}a-Robles}, Inmaculada and Gibbs, Richard A. and Goswami, Chandan and Grapputo, Alessandro and Gruden, Kristina and Grynberg, Marcin and Henrissat, Bernard and Jennings, Emily C. and Jones, Jeffery W. and Kalsi, Megha and Khan, Sher A. and Kumar, Abhishek and Li, Fei and Lombard, Vincent and Ma, Xingzhou and Martynov, Alexander and Miller, Nicholas J. and Mitchell, Robert F. and {Munoz-Torres}, Monica and Muszewska, Anna and Oppert, Brenda and Palli, Subba Reddy and Panfilio, Kristen A. and Pauchet, Yannick and Perkin, Lindsey C. and Petek, Marko and Poelchau, Monica F. and Record, {\'E}ric and Rinehart, Joseph P. and Robertson, Hugh M. and Rosendale, Andrew J. and {Ruiz-Arroyo}, Victor M. and Smagghe, Guy and Szendrei, Zsofia and Thomas, Gregg W. C. and Torson, Alex S. and Jentzsch, Iris M. Vargas and Weirauch, Matthew T. and Yates, Ashley D. and Yocum, George D. and Yoon, June-Sun and Richards, Stephen}, + year = {2018}, + month = jan, + volume = {8}, + pages = {1931}, + issn = {2045-2322}, + doi = {10.1038/s41598-018-20154-1}, + abstract = {The Colorado potato beetle is one of the most challenging agricultural pests to manage. It has shown a spectacular ability to adapt to a variety of solanaceaeous plants and variable climates during its global invasion, and, notably, to rapidly evolve insecticide resistance. To examine evidence of rapid evolutionary change, and to understand the genetic basis of herbivory and insecticide resistance, we tested for structural and functional genomic changes relative to other arthropod species using genome sequencing, transcriptomics, and community annotation. Two factors that might facilitate rapid evolutionary change include transposable elements, which comprise at least 17\% of the genome and are rapidly evolving compared to other Coleoptera, and high levels of nucleotide diversity in rapidly growing pest populations. Adaptations to plant feeding are evident in gene expansions and differential expression of digestive enzymes in gut tissues, as well as expansions of gustatory receptors for bitter tasting. Surprisingly, the suite of genes involved in insecticide resistance is similar to other beetles. Finally, duplications in the RNAi pathway might explain why Leptinotarsa decemlineata has high sensitivity to dsRNA. The L. decemlineata genome provides opportunities to investigate a broad range of phenotypes and to develop sustainable methods to control this widely successful pest.}, + copyright = {2018 The Author(s)}, + journal = {Scientific Reports}, + language = {en}, + number = {1} +} + +@article{schurchHowManyBiological2016, + title = {How Many Biological Replicates Are Needed in an {{RNA}}-Seq Experiment and Which Differential Expression Tool Should You Use?}, + author = {Schurch, Nicholas J. and Schofield, Piet{\'a} and Gierli{\'n}ski, Marek and Cole, Christian and Sherstnev, Alexander and Singh, Vijender and Wrobel, Nicola and Gharbi, Karim and Simpson, Gordon G. and {Owen-Hughes}, Tom and Blaxter, Mark and Barton, Geoffrey J.}, + year = {2016}, + month = jun, + volume = {22}, + pages = {839--851}, + issn = {1355-8382, 1469-9001}, + doi = {10.1261/rna.053959.115}, + abstract = {RNA-seq is now the technology of choice for genome-wide differential gene expression experiments, but it is not clear how many biological replicates are needed to ensure valid biological interpretation of the results or which statistical tools are best for analyzing the data. An RNA-seq experiment with 48 biological replicates in each of two conditions was performed to answer these questions and provide guidelines for experimental design. With three biological replicates, nine of the 11 tools evaluated found only 20\%\textendash{}40\% of the significantly differentially expressed (SDE) genes identified with the full set of 42 clean replicates. This rises to {$>$}85\% for the subset of SDE genes changing in expression by more than fourfold. To achieve {$>$}85\% for all SDE genes regardless of fold change requires more than 20 biological replicates. The same nine tools successfully control their false discovery rate at {$\lessequivlnt$}5\% for all numbers of replicates, while the remaining two tools fail to control their FDR adequately, particularly for low numbers of replicates. For future RNA-seq experiments, these results suggest that at least six biological replicates should be used, rising to at least 12 when it is important to identify SDE genes for all fold changes. If fewer than 12 replicates are used, a superior combination of true positive and false positive performances makes edgeR and DESeq2 the leading tools. For higher replicate numbers, minimizing false positives is more important and DESeq marginally outperforms the other tools.}, + file = {/Users/laurent/Documents/bibliography/DEA/Schurch et al. - 2016 - How many biological replicates are needed in an RN.pdf}, + journal = {RNA}, + language = {en}, + number = {6} +} + +@article{schweigerFactorialHMMFastExact2019, + title = {{{FactorialHMM}}: Fast and Exact Inference in Factorial Hidden {{Markov}} Models}, + shorttitle = {{{FactorialHMM}}}, + author = {Schweiger, Regev and Erlich, Yaniv and Carmi, Shai}, + year = {2019}, + month = jun, + volume = {35}, + pages = {2162--2164}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty944}, + abstract = {AbstractMotivation. Hidden Markov models (HMMs) are powerful tools for modeling processes along the genome. In a standard genomic HMM, observations are drawn,}, + file = {/Users/laurent/Zotero/storage/6E2HI4MM/Schweiger et al. - 2019 - FactorialHMM fast and exact inference in factoria.pdf;/Users/laurent/Zotero/storage/N9E8HU57/5184283.html}, + journal = {Bioinformatics}, + language = {en}, + number = {12} +} + +@article{SciHub101038, + title = {Sci-{{Hub}} | | 10.1038/S41592-020-0750-y} +} + +@article{SciHubAccountingTechnical, + title = {Sci-{{Hub}} | {{Accounting}} for Technical Noise in Single-Cell {{RNA}}-Seq Experiments. {{Nature Methods}}, 10(11), 1093\textendash{}1095 | 10.1038/Nmeth.2645} +} + +@article{SciHubContributionCell, + title = {Sci-{{Hub}} | {{The}} Contribution of Cell Cycle to Heterogeneity in Single-Cell {{RNA}}-Seq Data. {{Nature Biotechnology}}, 34(6), 591\textendash{}593 | 10.1038/Nbt.3498} +} + +@misc{ScRecoverDiscriminatingTrue, + title = {{{scRecover}}: {{Discriminating}} True and False Zeros in Single-Cell {{RNA}}-Seq Data for Imputation - {{Abstract}} - {{Europe PMC}}}, + file = {/Users/laurent/Zotero/storage/WQRQ87AK/ppr82338.html}, + howpublished = {https://europepmc-org.insb.bib.cnrs.fr/article/ppr/ppr82338} +} + +@article{SearchResults2012, + title = {Search Results}, + year = {2012}, + month = dec, + abstract = {View (previous 20 | next 20) (20 | 50 | 100 | 250 | 500)}, + copyright = {Creative Commons Attribution-ShareAlike License}, + file = {/Users/laurent/Zotero/storage/RRTK3KRI/SpecialSearch.html}, + journal = {Wikipedia, the free encyclopedia}, + language = {en} +} + +@article{sekhonDeepDiffDEEPlearningPredicting2018, + title = {{{DeepDiff}}: {{DEEP}}-Learning for Predicting {{DIFFerential}} Gene Expression from Histone Modifications}, + shorttitle = {{{DeepDiff}}}, + author = {Sekhon, Arshdeep and Singh, Ritambhara and Qi, Yanjun}, + year = {2018}, + month = sep, + volume = {34}, + pages = {i891-i900}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty612}, + abstract = {AbstractMotivation. Computational methods that predict differential gene expression from histone modification signals are highly desirable for understanding ho}, + file = {/Users/laurent/Zotero/storage/CFDL5XVN/Sekhon et al. - 2018 - DeepDiff DEEP-learning for predicting DIFFerentia.pdf;/Users/laurent/Zotero/storage/DYTR239I/5093224.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{sekulaDetectionDifferentiallyExpressed2019, + title = {Detection of Differentially Expressed Genes in Discrete Single-cell {{RNA}} Sequencing Data Using a Hurdle Model with Correlated Random Effects}, + author = {Sekula, Michael and Gaskins, Jeremy and Datta, Susmita}, + year = {2019}, + month = apr, + issn = {0006-341X, 1541-0420}, + doi = {10.1111/biom.13074}, + abstract = {Single-cell RNA sequencing (scRNA-seq) technologies are revolutionary tools allowing researchers to examine gene expression at the level of a single cell. Traditionally, transcriptomic data have been analyzed from bulk samples, masking the heterogeneity now seen across individual cells. Even within the same cellular population, genes can be highly expressed in some cells but not expressed (or lowly expressed) in others. Therefore, the computational approaches used to analyze bulk RNA sequencing data are not appropriate for the analysis of scRNA-seq data. Here, we present a novel statistical model for high dimensional and zeroinflated scRNA-seq count data to identify differentially expressed genes across cell types. Correlated random effects are employed based on an initial clustering of cells to capture the cell-to-cell variability within treatment groups. Moreover, this model is flexible and can be easily adapted to an independent random effect structure if needed. We apply our proposed methodology to both simulated and real data and compare results to other popular methods designed for detecting differentially expressed genes. Due to the hurdle model's ability to detect differences in the proportion of cells expressed and the average expression level (among the expressed cells), our methods naturally identify some genes as differentially expressed that other methods do not, and we demonstrate with real data that these uniquely detected genes are associated with similar biological processes and functions.}, + file = {/Users/laurent/Zotero/storage/B53EH34U/Sekula et al. - 2019 - Detection of differentially expressed genes in dis.pdf;/Users/laurent/Zotero/storage/CSZRN34Q/Sekula et al. - 2019 - Detection of differentially expressed genes in dis.pdf}, + journal = {Biometrics}, + language = {en} +} + +@article{selitskyTDRmapperChallengesSolutions2015, + title = {{{tDRmapper}}: Challenges and Solutions to Mapping, Naming, and Quantifying {{tRNA}}-Derived {{RNAs}} from Human Small {{RNA}}-Sequencing Data}, + shorttitle = {{{tDRmapper}}}, + author = {Selitsky, Sara R. and Sethupathy, Praveen}, + year = {2015}, + month = dec, + volume = {16}, + issn = {1471-2105}, + doi = {10.1186/s12859-015-0800-0}, + abstract = {Background: Small RNA-sequencing has revealed the diversity and high abundance of small RNAs derived from tRNAs, referred to as tRNA-derived RNAs. However, at present, there is no standardized nomenclature and there are no methods for accurate annotation and quantification of these small RNAs. tRNA-derived RNAs have unique features that limit the utility of conventional alignment tools and quantification methods. +Results: We describe here the challenges of mapping, naming, and quantifying tRNA-derived RNAs and present a novel method that addresses them, called tDRmapper. We then use tDRmapper to perform a comparative analysis of tRNA-derived RNA profiles across different human cell types and diseases. We found that (1) tRNA-derived RNA profiles can differ dramatically across different cell types and disease states, (2) that positions and types of chemical modifications of tRNA-derived RNAs vary by cell type and disease, and (3) that entirely different tRNA-derived RNA species can be produced from the same parental tRNA depending on the cell type. +Conclusion: tDRmappernot only provides a standardized nomenclature and quantification scheme, but also includes graphical visualization that facilitates the discovery of novel tRNA and tRNA-derived RNA biology.}, + file = {/Users/laurent/Documents/bibliography/tRNA/Selitsky and Sethupathy - 2015 - tDRmapper challenges and solutions to mapping, na.pdf}, + journal = {BMC Bioinformatics}, + language = {en}, + number = {1} +} + +@article{senolcaliNanoporeSequencingTechnology2019, + title = {Nanopore Sequencing Technology and Tools for Genome Assembly: Computational Analysis of the Current State, Bottlenecks and Future Directions}, + shorttitle = {Nanopore Sequencing Technology and Tools for Genome Assembly}, + author = {Senol Cali, Damla and Kim, Jeremie S. and Ghose, Saugata and Alkan, Can and Mutlu, Onur}, + year = {2019}, + month = jul, + volume = {20}, + pages = {1542--1559}, + doi = {10.1093/bib/bby017}, + abstract = {Abstract. Nanopore sequencing technology has the potential to render other sequencing technologies obsolete with its ability to generate long reads and provide}, + file = {/Users/laurent/Zotero/storage/TRKF8P99/Senol Cali et al. - 2019 - Nanopore sequencing technology and tools for genom.pdf;/Users/laurent/Zotero/storage/XNNFUXMP/4958758.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {4} +} + +@misc{SequenceClusteringBioinformatics, + title = {Sequence Clustering in Bioinformatics: An Empirical Study | {{Briefings}} in {{Bioinformatics}} | {{Oxford Academic}}}, + file = {/Users/laurent/Zotero/storage/SRYJPPCE/5098604.html}, + howpublished = {https://academic-oup-com.insb.bib.cnrs.fr/bib/article/21/1/1/5098604} +} + +@article{serraAutomaticAnalysis3Dmodelling2017, + title = {Automatic Analysis and {{3D}}-Modelling of {{Hi}}-{{C}} Data Using {{TADbit}} Reveals Structural Features of the Fly Chromatin Colors}, + author = {Serra, Fran{\c c}ois and Ba{\`u}, Davide and Goodstadt, Mike and Castillo, David and Filion, Guillaume J. and {Marti-Renom}, Marc A.}, + editor = {Prlic, Andreas}, + year = {2017}, + month = jul, + volume = {13}, + pages = {e1005665}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005665}, + file = {/Users/laurent/Documents/bibliography/Hi-C/Serra et al. - 2017 - Automatic analysis and 3D-modelling of Hi-C data u.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {7} +} + +@article{serraRobustSparseCorrelation2018, + title = {Robust and Sparse Correlation Matrix Estimation for the Analysis of High-Dimensional Genomics Data}, + author = {Serra, Angela and Coretto, Pietro and Fratello, Michele and Tagliaferri, Roberto}, + editor = {Stegle, Oliver}, + year = {2018}, + month = feb, + volume = {34}, + pages = {625--634}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx642}, + abstract = {Motivation: Microarray technology can be used to study the expression of thousands of genes across a number of different experimental conditions, usually hundreds. The underlying principle is that genes sharing similar expression patterns, across different samples, can be part of the same co-expression system, or they may share the same biological functions. Groups of genes are usually identified based on cluster analysis. Clustering methods rely on the similarity matrix between genes. A common choice to measure similarity is to compute the sample correlation matrix. Dimensionality reduction is another popular data analysis task which is also based on covariance/correlation matrix estimates. Unfortunately, covariance/correlation matrix estimation suffers from the intrinsic noise present in high-dimensional data. Sources of noise are: sampling variations, presents of outlying sample units, and the fact that in most cases the number of units is much larger than the number of genes.}, + file = {/Users/laurent/Documents/bibliography/to_read/Serra et al. - 2018 - Robust and sparse correlation matrix estimation fo.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{sethMysteriousCircleMolecular2017, + title = {The Mysterious Circle: {{Molecular}} Curiosities of {{RNA}} Mediated Gene Regulation}, + shorttitle = {The Mysterious Circle}, + author = {Seth, Kunal and {Harish}}, + year = {2017}, + month = dec, + volume = {9}, + pages = {13--19}, + issn = {24520144}, + doi = {10.1016/j.genrep.2017.08.001}, + abstract = {Circular RNA (circRNA) is emerging as a key player because of its role in gene regulation and potential application in RNA-based drugs. Regulated and/or competitive spatiotemporal biogenesis of circRNA occurs through non-canonical splicing mechanism that involves back-splicing. Many functional aspects have been attributed to this newly emerging group of non-coding RNA. Few functional roles identified are miRNA sponging, protein sponging, mRNA trapping and involvement of circRNA in development and diseases. The role of circRNA in plants is less explored and has been demonstrated that many circRNAs are involved in modulation of stress responsive genes. In this review, biogenesis and functional aspects of circRNA have been discussed with special focus on plants perspectives. Many unanswered questions which have not been raised before are discussed for future research endeavors.}, + file = {/Users/laurent/Documents/bibliography/circRNA/Seth and Harish - 2017 - The mysterious circle Molecular curiosities of RN.pdf}, + journal = {Gene Reports}, + language = {en} +} + +@article{seton-rogersInvertedArchitecture2018, + title = {Inverted Architecture}, + author = {{Seton-Rogers}, Sarah}, + year = {2018}, + month = may, + pages = {1}, + issn = {1474-1768}, + doi = {10.1038/s41568-018-0025-4}, + abstract = {Single-cell RNA sequencing analysis in paediatric diffuse midline gliomas with histone H3 lysine 27 to methionine mutations indicates that these aggressive tumours contain many stem-like cells and that lineage-based therapeutic targeting might be beneficial.}, + copyright = {2018 Macmillan Publishers Ltd., part of Springer Nature}, + file = {/Users/laurent/Zotero/storage/EWW5IJ84/s41568-018-0025-4.html}, + journal = {Nature Reviews Cancer}, + language = {en} +} + +@article{shahamRemovalBatchEffects2017, + title = {Removal of Batch Effects Using Distribution-Matching Residual Networks}, + author = {Shaham, Uri and Stanton, Kelly P. and Zhao, Jun and Li, Huamin and Raddassi, Khadir and Montgomery, Ruth and Kluger, Yuval}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2539--2546}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx196}, + abstract = {Motivation: Sources of variability in experimentally derived data include measurement error in addition to the physical phenomena of interest. This measurement error is a combination of systematic components, originating from the measuring instrument and random measurement errors. Several novel biological technologies, such as mass cytometry and single-cell RNA-seq (scRNAseq), are plagued with systematic errors that may severely affect statistical analysis if the data are not properly calibrated.}, + file = {/Users/laurent/Documents/bibliography/stats/Shaham et al. - 2017 - Removal of batch effects using distribution-matchi.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@misc{SHARPHyperfastAccurate, + title = {{{SHARP}}: Hyper-Fast and Accurate Processing of Single-Cell {{RNA}}-Seq Data via Ensemble Random Projection}, + file = {/Users/laurent/Zotero/storage/RMYZJZ4Q/gr.254557.119.html}, + howpublished = {https://genome-cshlp-org.insb.bib.cnrs.fr/content/early/2020/01/28/gr.254557.119.abstract} +} + +@article{shekharComprehensiveClassificationRetinal2016, + title = {Comprehensive {{Classification}} of {{Retinal Bipolar Neurons}} by {{Single}}-{{Cell Transcriptomics}}}, + author = {Shekhar, Karthik and Lapan, Sylvain W. and Whitney, Irene E. and Tran, Nicholas M. and Macosko, Evan Z. and Kowalczyk, Monika and Adiconis, Xian and Levin, Joshua Z. and Nemesh, James and Goldman, Melissa and McCarroll, Steven A. and Cepko, Constance L. and Regev, Aviv and Sanes, Joshua R.}, + year = {2016}, + month = aug, + volume = {166}, + pages = {1308-1323.e30}, + issn = {00928674}, + doi = {10.1016/j.cell.2016.07.054}, + abstract = {Patterns of gene expression can be used to characterize and classify neuronal types. It is challenging, however, to generate taxonomies that fulfill the essential criteria of being comprehensive, harmonizing with conventional classification schemes, and lacking superfluous subdivisions of genuine types. To address these challenges, we used massively parallel single-cell RNA profiling and optimized computational methods on a heterogeneous class of neurons, mouse retinal bipolar cells (BCs). From a population of \$25,000 BCs, we derived a molecular classification that identified 15 types, including all types observed previously and two novel types, one of which has a non-canonical morphology and position. We validated the classification scheme and identified dozens of novel markers using methods that match molecular expression to cell morphology. This work provides a systematic methodology for achieving comprehensive molecular classification of neurons, identifies novel neuronal types, and uncovers transcriptional differences that distinguish types within a class.}, + file = {/Users/laurent/Documents/bibliography/to_read/Shekhar et al. - 2016 - Comprehensive Classification of Retinal Bipolar Ne.pdf}, + journal = {Cell}, + language = {en}, + number = {5} +} + +@article{shengEffectiveDetectionVariation2017, + title = {Effective Detection of Variation in Single-Cell Transcriptomes Using {{MATQ}}-Seq}, + author = {Sheng, Kuanwei and Cao, Wenjian and Niu, Yichi and Deng, Qing and Zong, Chenghang}, + year = {2017}, + month = mar, + volume = {14}, + pages = {267--270}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4145}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Sheng et al. - 2017 - Effective detection of variation in single-cell tr.pdf;/Users/laurent/Zotero/storage/D67IFHDC/Sheng et al. - 2017 - Effective detection of variation in single-cell tr.pdf;/Users/laurent/Zotero/storage/EVLF5LWX/Sheng et al. - 2017 - Effective detection of variation in single-cell tr.pdf;/Users/laurent/Zotero/storage/RKFJWF43/Sheng et al. - 2017 - Effective detection of variation in single-cell tr.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {3} +} + +@article{shenRMATSRobustFlexible2014, + title = {{{rMATS}}: {{Robust}} and Flexible Detection of Differential Alternative Splicing from Replicate {{RNA}}-{{Seq}} Data}, + shorttitle = {{{rMATS}}}, + author = {Shen, Shihao and Park, Juw Won and Lu, Zhi-xiang and Lin, Lan and Henry, Michael D. and Wu, Ying Nian and Zhou, Qing and Xing, Yi}, + year = {2014}, + month = dec, + volume = {111}, + pages = {E5593-E5601}, + issn = {0027-8424, 1091-6490}, + doi = {10.1073/pnas.1419161111}, + abstract = {Ultra-deep RNA sequencing (RNA-Seq) has become a powerful approach for genome-wide analysis of pre-mRNA alternative splicing. We previously developed multivariate analysis of transcript splicing (MATS), a statistical method for detecting differential alternative splicing between two RNA-Seq samples. Here we describe a new statistical model and computer program, replicate MATS (rMATS), designed for detection of differential alternative splicing from replicate RNA-Seq data. rMATS uses a hierarchical model to simultaneously account for sampling uncertainty in individual replicates and variability among replicates. In addition to the analysis of unpaired replicates, rMATS also includes a model specifically designed for paired replicates between sample groups. The hypothesis-testing framework of rMATS is flexible and can assess the statistical significance over any user-defined magnitude of splicing change. The performance of rMATS is evaluated by the analysis of simulated and real RNA-Seq data. rMATS outperformed two existing methods for replicate RNA-Seq data in all simulation settings, and RT-PCR yielded a high validation rate (94\%) in an RNA-Seq dataset of prostate cancer cell lines. Our data also provide guiding principles for designing RNA-Seq studies of alternative splicing. We demonstrate that it is essential to incorporate biological replicates in the study design. Of note, pooling RNAs or merging RNA-Seq data from multiple replicates is not an effective approach to account for variability, and the result is particularly sensitive to outliers. The rMATS source code is freely available at rnaseq-mats.sourceforge.net/. As the popularity of RNA-Seq continues to grow, we expect rMATS will be useful for studies of alternative splicing in diverse RNA-Seq projects.}, + copyright = {\textcopyright{} . Freely available online through the PNAS open access option.}, + file = {/Users/laurent/Zotero/storage/WG3IIZLT/Shen et al. - 2014 - rMATS Robust and flexible detection of differenti.pdf;/Users/laurent/Zotero/storage/7VXHW2U5/E5593.html}, + journal = {Proceedings of the National Academy of Sciences}, + keywords = {alternative splicing,exon,isoform,RNA sequencing,transcriptome}, + language = {en}, + number = {51}, + pmid = {25480548} +} + +@article{shiAccurateEfficientEstimation2019, + title = {Accurate and Efficient Estimation of Small {{P}}-Values with the Cross-Entropy Method: Applications in Genomic Data Analysis}, + shorttitle = {Accurate and Efficient Estimation of Small {{P}}-Values with the Cross-Entropy Method}, + author = {Shi, Yang and Wang, Mengqiao and Shi, Weiping and Lee, Ji-Hyun and Kang, Huining and Jiang, Hui}, + year = {2019}, + month = jul, + volume = {35}, + pages = {2441--2448}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty1005}, + abstract = {AbstractMotivation. Small P-values are often required to be accurately estimated in large-scale genomic studies for the adjustment of multiple hypothesis tests}, + file = {/Users/laurent/Zotero/storage/9RUDPV5F/Shi et al. - 2019 - Accurate and efficient estimation of small P-value.pdf;/Users/laurent/Zotero/storage/B5QTRFEG/5232222.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{shiIdentifyingCellSubpopulations2017, + title = {Identifying {{Cell Subpopulations}} and {{Their Genetic Drivers}} from {{Single}}-{{Cell RNA}}-{{Seq Data Using}} a {{Biclustering Approach}}}, + author = {Shi, Funan and Huang, Haiyan}, + year = {2017}, + month = jul, + volume = {24}, + pages = {663--674}, + issn = {1066-5277, 1557-8666}, + doi = {10.1089/cmb.2017.0049}, + abstract = {Single-cell RNA-Seq (scRNA-Seq) has attracted much attention recently because it allows unprecedented resolution into cellular activity; the technology, therefore, has been widely applied in studying cell heterogeneity such as the heterogeneity among embryonic cells at varied developmental stages or cells of different cancer types or subtypes. A pertinent question in such analyses is to identify cell subpopulations as well as their associated genetic drivers. Consequently, a multitude of approaches have been developed for clustering or biclustering analysis of scRNA-Seq data. In this article, we present a fast and simple iterative biclustering approach called ``BiSNNWalk'' based on the existing SNN-Cliq algorithm. One of BiSNN-Walk's differentiating features is that it returns a ranked list of clusters, which may serve as an indicator of a cluster's reliability. Another important feature is that BiSNN-Walk ranks genes in a gene cluster according to their level of affiliation to the associated cell cluster, making the result more biologically interpretable. We also introduce an entropy-based measure for choosing a highly clusterable similarity matrix as our starting point among a wide selection to facilitate the efficient operation of our algorithm. We applied BiSNN-Walk to three large scRNA-Seq studies, where we demonstrated that BiSNNWalk was able to retain and sometimes improve the cell clustering ability of SNN-Cliq. We were able to obtain biologically sensible gene clusters in terms of GO term enrichment. In addition, we saw that there was significant overlap in top characteristic genes for clusters corresponding to similar cell states, further demonstrating the fidelity of our gene clusters.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Shi and Huang - 2017 - Identifying Cell Subpopulations and Their Genetic .pdf;/Users/laurent/Zotero/storage/3PKLPHI5/Shi and Huang - 2017 - Identifying Cell Subpopulations and Their Genetic .pdf;/Users/laurent/Zotero/storage/E2P3SPCH/Shi and Huang - 2017 - Identifying Cell Subpopulations and Their Genetic .pdf;/Users/laurent/Zotero/storage/LSLA75MB/Shi and Huang - 2017 - Identifying Cell Subpopulations and Their Genetic .pdf}, + journal = {Journal of Computational Biology}, + language = {en}, + number = {7} +} + +@article{shiSparseIsoNovelBayesian2018, + title = {{{SparseIso}}: A Novel {{Bayesian}} Approach to Identify Alternatively Spliced Isoforms from {{RNA}}-Seq Data}, + shorttitle = {{{SparseIso}}}, + author = {Shi, Xu and Wang, Xiao and Wang, Tian-Li and {Hilakivi-Clarke}, Leena and Clarke, Robert and Xuan, Jianhua}, + year = {2018}, + month = jan, + volume = {34}, + pages = {56--63}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx557}, + abstract = {Motivation: Recent advances in high-throughput RNA sequencing (RNA-seq) technologies have made it possible to reconstruct the full transcriptome of various types of cells. It is important to accurately assemble transcripts or identify isoforms for an improved understanding of molecular mechanisms in biological systems.}, + file = {/Users/laurent/Documents/bibliography/to_read/Shi et al. - 2018 - SparseIso a novel Bayesian approach to identify a.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {1} +} + +@article{shiVariableSelectionValidation2019, + title = {Variable Selection and Validation in Multivariate Modelling}, + author = {Shi, Lin and Westerhuis, Johan A. and Ros{\'e}n, Johan and Landberg, Rikard and Brunius, Carl}, + year = {2019}, + month = mar, + volume = {35}, + pages = {972--980}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty710}, + abstract = {AbstractMotivation. Validation of variable selection and predictive performance is crucial in construction of robust multivariate models that generalize well,}, + file = {/Users/laurent/Zotero/storage/G7VA89FP/Shi et al. - 2019 - Variable selection and validation in multivariate .pdf;/Users/laurent/Zotero/storage/UGD54JPY/5085367.html}, + journal = {Bioinformatics}, + language = {en}, + number = {6} +} + +@article{silvermanNaughtAllZeros2018, + title = {Naught All Zeros in Sequence Count Data Are the Same}, + author = {Silverman, Justin D and Roche, Kimberly and Mukherjee, Sayan and David, Lawrence A}, + year = {2018}, + month = nov, + doi = {10.1101/477794}, + abstract = {Due to the advent and utility of high-throughput sequencing, modern biomedical research abounds with multivariate count data. Yet such sequence count data is often extremely sparse; that is, much of the data is zero values. Such zero values are well known to cause problems for statistical analyses. In this work we provide a systematic description of different processes that can give rise to zero values as well as the types of methods for addressing zeros in sequence count studies. Importantly, we systematically review how various models perform on each type of zero generating process. Our results demonstrate that zero-inflated models can have substantial biases in both simulated and real data settings. Additionally, we find that zeros due to biological absences can, for many applications, be approximated as originating from under sampling. Beyond these results, this work provides a paired categorization scheme for models and zero generating processes to facilitate discussions and future research into the analysis of sequence count data.}, + file = {/Users/laurent/Zotero/storage/XTADDI89/Silverman et al. - 2018 - Naught all zeros in sequence count data are the sa.pdf}, + language = {en} +} + +@article{simaoBUSCOAssessingGenome2015, + title = {{{BUSCO}}: Assessing Genome Assembly and Annotation Completeness with Single-Copy Orthologs}, + shorttitle = {{{BUSCO}}}, + author = {Sim{\~a}o, Felipe A. and Waterhouse, Robert M. and Ioannidis, Panagiotis and Kriventseva, Evgenia V. and Zdobnov, Evgeny M.}, + year = {2015}, + month = oct, + volume = {31}, + pages = {3210--3212}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btv351}, + abstract = {Motivation: Genomics has revolutionized biological research, but quality assessment of the resulting assembled sequences is complicated and remains mostly limited to technical measures like N50. Results: We propose a measure for quantitative assessment of genome assembly and annotation completeness based on evolutionarily informed expectations of gene content. We implemented the assessment procedure in open-source software, with sets of Benchmarking Universal SingleCopy Orthologs, named BUSCO.}, + journal = {Bioinformatics}, + language = {en}, + number = {19} +} + +@article{singerSCIFSinglecellMutation2018, + title = {{{SCI$\Phi$}}: {{Single}}-Cell Mutation Identification via Phylogenetic Inference}, + shorttitle = {{{SCI$\Phi$}}}, + author = {Singer, Jochen and Kuipers, Jack and Jahn, Katharina and Beerenwinkel, Niko}, + year = {2018}, + month = mar, + doi = {10.1101/290908}, + abstract = {Understanding the evolution of cancer is important for the development of appropriate cancer therapies. The task is challenging because tumors evolve as heterogeneous cell populations with an unknown number of genetically distinct subclones of varying frequencies. Conventional approaches based on bulk sequencing are limited in addressing this challenge as clones cannot be observed directly. Single-cell sequencing holds the promise of resolving the heterogeneity of tumors; however, it has its own challenges including elevated error rates, allelic dropout, and uneven coverage. Here, we develop a new approach to mutation detection in individual tumor cells by leveraging the evolutionary relationship among cells. Our method, called SCI{$\Phi$}, jointly calls mutations in individual cells and estimates the tumor phylogeny among these cells. Employing a Markov Chain Monte Carlo scheme we robustly account for the various sources of noise in single-cell sequencing data. Our approach enables us to reliably call mutations in each single cell even in experiments with high dropout rates and missing data. We show that SCI{$\Phi$} outperforms existing methods on simulated data and applied it to different real-world datasets, namely a whole exome breast cancer as well as a panel acute lymphoblastic leukemia dataset.}, + file = {/Users/laurent/Documents/bibliography/scDNAseq/Singer et al. - 2018 - SCIΦ Single-cell mutation identification via phyl.pdf;/Users/laurent/Documents/bibliography/scDNAseq/Singer et al. - 2018 - SCIΦ Single-cell mutation identification via phyl.pdf}, + language = {en} +} + +@book{SINGLECELLMETHODS2019, + title = {{{SINGLE CELL METHODS}}: Sequencing and Proteomics.}, + shorttitle = {{{SINGLE CELL METHODS}}}, + year = {2019}, + publisher = {{HUMANA}}, + address = {{S.l.}}, + file = {/Users/laurent/Zotero/storage/TIP4F3UL/2019 - SINGLE CELL METHODS sequencing and proteomics..pdf;/Users/laurent/Zotero/storage/WLS38PTH/2019 - SINGLE CELL METHODS sequencing and proteomics..pdf}, + isbn = {978-1-4939-9239-3}, + language = {en}, + note = {OCLC: 1083031072} +} + +@misc{SingleCellTranscriptional, + title = {The Single Cell Transcriptional Landscape of Mammalian Organogenesis}, + file = {/Users/laurent/Zotero/storage/KD7FWLBE/PMC6434952.html}, + howpublished = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6434952/} +} + +@article{sinhaDropClustEfficientClustering2018, + title = {{{dropClust}}: Efficient Clustering of Ultra-Large {{scRNA}}-Seq Data}, + shorttitle = {{{dropClust}}}, + author = {Sinha, Debajyoti and Kumar, Akhilesh and Kumar, Himanshu and Bandyopadhyay, Sanghamitra and Sengupta, Debarka}, + year = {2018}, + month = jun, + volume = {46}, + pages = {e36}, + issn = {1362-4962}, + doi = {10.1093/nar/gky007}, + abstract = {Droplet based single cell transcriptomics has recently enabled parallel screening of tens of thousands of single cells. Clustering methods that scale for such high dimensional data without compromising accuracy are scarce. We exploit Locality Sensitive Hashing, an approximate nearest neighbour search technique to develop a de novo clustering algorithm for large-scale single cell data. On a number of real datasets, dropClust outperformed the existing best practice methods in terms of execution time, clustering accuracy and detectability of minor cell sub-types.}, + file = {/Users/laurent/Zotero/storage/69KV8XQ7/Sinha et al. - 2018 - dropClust efficient clustering of ultra-large scR.pdf}, + journal = {Nucleic Acids Research}, + keywords = {Algorithms,Cells; Cultured,Cluster Analysis,Computational Biology,Gene Expression Profiling,HEK293 Cells,Humans,Jurkat Cells,Leukocytes; Mononuclear,Megakaryocyte Progenitor Cells,Reproducibility of Results,RNA; Small Cytoplasmic,Sequence Analysis; RNA,Single-Cell Analysis}, + language = {eng}, + number = {6}, + pmcid = {PMC5888655}, + pmid = {29361178} +} + +@book{skaugGeneralizedLinearMixed2016, + title = {Generalized {{Linear Mixed Models}} Using '{{AD Model Builder}}'}, + author = {Skaug, Hans and Fournier, Dave and Bolker, Ben and Magnusson, Arni and Nielsen, Anders}, + year = {2016}, + month = jan +} + +@misc{smitRepeatMaskerOpen42013, + title = {{{RepeatMasker Open}}-4.0}, + author = {Smit, AFA and Hubley, R and P, Green}, + year = {2013} +} + +@book{smitRepeatMaskerOpen42015, + title = {{{RepeatMasker Open}}-4.0}, + author = {Smit, AFA, R,, Hubley and Green, P}, + year = {2015} +} + +@article{soldatovSpatiotemporalStructureCell2019, + title = {Spatiotemporal Structure of Cell Fate Decisions in Murine Neural Crest}, + author = {Soldatov, Ruslan and Kaucka, Marketa and Kastriti, Maria Eleni and Petersen, Julian and Chontorotzea, Tatiana and Englmaier, Lukas and Akkuratova, Natalia and Yang, Yunshi and H{\"a}ring, Martin and Dyachuk, Viacheslav and Bock, Christoph and Farlik, Matthias and Piacentino, Michael L. and Boismoreau, Franck and Hilscher, Markus M. and Yokota, Chika and Qian, Xiaoyan and Nilsson, Mats and Bronner, Marianne E. and Croci, Laura and Hsiao, Wen-Yu and Guertin, David A. and Brunet, Jean-Francois and Consalez, Gian Giacomo and Ernfors, Patrik and Fried, Kaj and Kharchenko, Peter V. and Adameyko, Igor}, + year = {2019}, + month = jun, + volume = {364}, + pages = {eaas9536}, + issn = {0036-8075, 1095-9203}, + doi = {10.1126/science.aas9536}, + file = {/Users/laurent/Zotero/storage/D7LTFH6Y/Soldatov et al. - 2019 - Spatiotemporal structure of cell fate decisions in.pdf;/Users/laurent/Zotero/storage/JAQVBZZ7/Soldatov et al. - 2019 - Spatiotemporal structure of cell fate decisions in.pdf;/Users/laurent/Zotero/storage/K3NCBJVR/Soldatov et al. - 2019 - Spatiotemporal structure of cell fate decisions in.pdf;/Users/laurent/Zotero/storage/MXTFZUHZ/Soldatov et al. - 2019 - Spatiotemporal structure of cell fate decisions in.pdf;/Users/laurent/Zotero/storage/RN7J992U/Soldatov et al. - 2019 - Spatiotemporal structure of cell fate decisions in.pdf}, + journal = {Science}, + language = {en}, + number = {6444} +} + +@article{sonesonBiasRobustnessScalability2017, + title = {Bias, {{Robustness And Scalability In Differential Expression Analysis Of Single}}-{{Cell RNA}}-{{Seq Data}}}, + author = {Soneson, Charlotte and Robinson, Mark D.}, + year = {2017}, + month = may, + doi = {10.1101/143289}, + abstract = {Background: As single-cell RNA-seq (scRNA-seq) is becoming increasingly common, the amount of publicly available data grows rapidly, generating a useful resource for computational method development and extension of published results. Although processed data matrices are typically made available in public repositories, the procedure to obtain these varies widely between data sets, which may complicate reuse and cross-data set comparison. Moreover, while many statistical methods for performing differential expression analysis of scRNA-seq data are becoming available, their relative merits and the performance compared to methods developed for bulk RNA-seq data are not sufficiently well understood. +Results: We present conquer, a collection of consistently processed, analysis-ready public single-cell RNA-seq data sets. Each data set has count and transcripts per million (TPM) estimates for genes and transcripts, as well as quality control and exploratory analysis reports. We use a subset of the data sets available in conquer to perform an extensive evaluation of the performance and characteristics of statistical methods for differential gene expression analysis, evaluating a total of 30 statistical approaches on both experimental and simulated scRNA-seq data. +Conclusions: Considerable differences are found between the methods in terms of the number and characteristics of the genes that are called differentially expressed. Pre-filtering of lowly expressed genes can have important effects on the results, particularly for some of the methods originally developed for analysis of bulk RNA-seq data. Generally, however, methods developed for bulk RNA-seq analysis do not perform notably worse than those developed specifically for scRNA-seq.}, + file = {/Users/laurent/Documents/bibliography/to_read/Soneson and Robinson - 2017 - Bias, Robustness And Scalability In Differential E.pdf}, + language = {en} +} + +@article{sonesonBiasRobustnessScalability2018, + title = {Bias, Robustness and Scalability in Single-Cell Differential Expression Analysis}, + author = {Soneson, Charlotte and Robinson, Mark D}, + year = {2018}, + month = feb, + volume = {15}, + pages = {255--261}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4612}, + file = {/Users/laurent/Documents/bibliography/to_read/Soneson and Robinson - 2018 - Bias, robustness and scalability in single-cell di.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {4} +} + +@article{songSingleCellAlternativeSplicing2017, + title = {Single-{{Cell Alternative Splicing Analysis}} with {{Expedition Reveals Splicing Dynamics}} during {{Neuron Differentiation}}}, + author = {Song, Yan and Botvinnik, Olga B. and Lovci, Michael T. and Kakaradov, Boyko and Liu, Patrick and Xu, Jia L. and Yeo, Gene W.}, + year = {2017}, + month = jul, + volume = {67}, + pages = {148-161.e5}, + issn = {10972765}, + doi = {10.1016/j.molcel.2017.06.003}, + abstract = {Alternative splicing (AS) generates isoform diversity for cellular identity and homeostasis in multicellular life. Although AS variation has been observed among single cells, little is known about the biological or evolutionary significance of such variation. We developed Expedition, a computational framework consisting of outrigger, a de novo splice graph transversal algorithm to detect AS; anchor, a Bayesian approach to assign modalities; and bonvoyage, a visualization tool using non-negative matrix factorization to display modality changes. Applying Expedition to single pluripotent stem cells undergoing neuronal differentiation, we discover that up to 20\% of AS exons exhibit bimodality. Bimodal exons are flanked by more conserved intronic sequences harboring distinct cis-regulatory motifs, constitute much of cell-type-specific splicing, are highly dynamic during cellular transitions, preserve reading frame, and reveal intricacy of cell states invisible to conventional gene expression analysis. Systematic AS characterization in single cells redefines our understanding of AS complexity in cell biology.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Song et al. - 2017 - Single-Cell Alternative Splicing Analysis with Exp 2.pdf;/Users/laurent/Documents/bibliography/scRNASeq/Song et al. - 2017 - Single-Cell Alternative Splicing Analysis with Exp.pdf;/Users/laurent/Zotero/storage/6CVPTMPI/Song et al. - 2017 - Single-Cell Alternative Splicing Analysis with Exp.pdf;/Users/laurent/Zotero/storage/DH9D8JJZ/Song et al. - 2017 - Single-Cell Alternative Splicing Analysis with Exp.pdf;/Users/laurent/Zotero/storage/KPKZ2X4A/Song et al. - 2017 - Single-Cell Alternative Splicing Analysis with Exp.pdf;/Users/laurent/Zotero/storage/NS7ELJVK/Song et al. - 2017 - Single-Cell Alternative Splicing Analysis with Exp.pdf;/Users/laurent/Zotero/storage/UK9SLYFI/Song et al. - 2017 - Single-Cell Alternative Splicing Analysis with Exp.pdf;/Users/laurent/Zotero/storage/VZYTQJME/Song et al. - 2017 - Single-Cell Alternative Splicing Analysis with Exp.pdf}, + journal = {Molecular Cell}, + language = {en}, + number = {1} +} + +@article{sountoulidisSCRINSHOTSpatialMethod2020, + title = {{{SCRINSHOT}}, a Spatial Method for Single-Cell Resolution Mapping of Cell States in Tissue Sections}, + author = {Sountoulidis, Alexandros and Liontos, Andreas and Nguyen, Hong Phuong and Firsova, Alexandra B. and Fysikopoulos, Athanasios and Qian, Xiaoyan and Seeger, Werner and Sundstr{\"o}m, Erik and Nilsson, Mats and Samakovlis, Christos}, + year = {2020}, + month = feb, + pages = {2020.02.07.938571}, + doi = {10.1101/2020.02.07.938571}, + abstract = {{$<$}p{$>$}Changes in cell identities and positions underlie tissue development and disease progression. Although, single-cell mRNA sequencing (scRNA-Seq) methods rapidly generate extensive lists of cell-states, spatially resolved single-cell mapping presents a challenging task. We developed SCRINSHOT ( S ingle C ell R esolution IN S itu H ybridization O n T issues), a sensitive, multiplex RNA mapping approach. Direct hybridization of padlock probes on mRNA is followed by circularization with SplintR ligase and rolling circle amplification (RCA) of the hybridized padlock probes. Sequential detection of RCA-products using fluorophore-labeled oligonucleotides profiles thousands of cells in tissue sections. We evaluated SCRINSHOT specificity and sensitivity on murine and human organs. SCRINSHOT quantification of marker gene expression shows high correlation with published scRNA-Seq data over a broad range of gene expression levels. We demonstrate the utility of SCRISHOT by mapping the locations of abundant and rare cell types along the murine airways. The amenability, multiplexity and quantitative qualities of SCRINSHOT facilitate single cell mRNA profiling of cell-state alterations in tissues under a variety of native and experimental conditions.{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2020, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + file = {/Users/laurent/Zotero/storage/KCCQEQUF/Sountoulidis et al. - 2020 - SCRINSHOT, a spatial method for single-cell resolu.pdf;/Users/laurent/Zotero/storage/AWA3EZGS/2020.02.07.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{spechtLEAPConstructingGene2016, + title = {{{LEAP}}: Constructing Gene Co-Expression Networks for Single-Cell {{RNA}}-Sequencing Data Using Pseudotime Ordering}, + shorttitle = {{{LEAP}}}, + author = {Specht, Alicia T. and Li, Jun}, + year = {2016}, + month = dec, + pages = {btw729}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw729}, + abstract = {Summary: To construct gene co-expression networks based on single-cell RNA-Sequencing data, we present an algorithm called LEAP, which utilizes the estimated pseudotime of the cells to find gene co-expression that involves time delay.}, + file = {/Users/laurent/Documents/bibliography/networks/Specht and Li - 2016 - LEAP constructing gene co-expression networks for.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{spillBinlessNormalizationHiC2019, + title = {Binless Normalization of {{Hi}}-{{C}} Data Provides Significant Interaction and Difference Detection Independent of Resolution}, + author = {Spill, Yannick G. and Castillo, David and Vidal, Enrique and {Marti-Renom}, Marc A.}, + year = {2019}, + month = apr, + volume = {10}, + pages = {1--10}, + issn = {2041-1723}, + doi = {10.1038/s41467-019-09907-2}, + abstract = {Analysis of Hi-C datasets is limited by the current existing methods for data normalization, with detection of features such as TADs and chromatin loops being inconsistent amongst different approaches. Here the authors develop Binless, a method that allows for reproducible normalization of Hi-C data independent of its resolution and compare how Binless performs in comparison with other methods.}, + copyright = {2019 The Author(s)}, + file = {/Users/laurent/Zotero/storage/CDKL7ASG/Spill et al. - 2019 - Binless normalization of Hi-C data provides signif.pdf;/Users/laurent/Zotero/storage/M59B76ZC/s41467-019-09907-2.html}, + journal = {Nature Communications}, + language = {en}, + number = {1} +} + +@article{stansfieldHiCcompareRpackageJoint2018, + title = {{{HiCcompare}}: An {{R}}-Package for Joint Normalization and Comparison of {{HI}}-{{C}} Datasets}, + shorttitle = {{{HiCcompare}}}, + author = {Stansfield, John C. and Cresswell, Kellen G. and Vladimirov, Vladimir I. and Dozmorov, Mikhail G.}, + year = {2018}, + month = jul, + volume = {19}, + pages = {279}, + issn = {1471-2105}, + doi = {10.1186/s12859-018-2288-x}, + abstract = {Changes in spatial chromatin interactions are now emerging as a unifying mechanism orchestrating the regulation of gene expression. Hi-C sequencing technology allows insight into chromatin interactions on a genome-wide scale. However, Hi-C data contains many DNA sequence- and technology-driven biases. These biases prevent effective comparison of chromatin interactions aimed at identifying genomic regions differentially interacting between, e.g., disease-normal states or different cell types. Several methods have been developed for normalizing individual Hi-C datasets. However, they fail to account for biases between two or more Hi-C datasets, hindering comparative analysis of chromatin interactions.}, + file = {/Users/laurent/Zotero/storage/QLGCDFT3/Stansfield et al. - 2018 - HiCcompare an R-package for joint normalization a.pdf;/Users/laurent/Zotero/storage/JW54HYVB/s12859-018-2288-x.html}, + journal = {BMC Bioinformatics}, + number = {1} +} + +@article{stansfieldHiCcompareRpackageJoint2018a, + title = {{{HiCcompare}}: An {{R}}-Package for Joint Normalization and Comparison of {{HI}}-{{C}} Datasets}, + shorttitle = {{{HiCcompare}}}, + author = {Stansfield, John C. and Cresswell, Kellen G. and Vladimirov, Vladimir I. and Dozmorov, Mikhail G.}, + year = {2018}, + month = dec, + volume = {19}, + pages = {1--10}, + issn = {1471-2105}, + doi = {10.1186/s12859-018-2288-x}, + abstract = {Changes in spatial chromatin interactions are now emerging as a unifying mechanism orchestrating the regulation of gene expression. Hi-C sequencing technology allows insight into chromatin interactions on a genome-wide scale. However, Hi-C data contains many DNA sequence- and technology-driven biases. These biases prevent effective comparison of chromatin interactions aimed at identifying genomic regions differentially interacting between, e.g., disease-normal states or different cell types. Several methods have been developed for normalizing individual Hi-C datasets. However, they fail to account for biases between two or more Hi-C datasets, hindering comparative analysis of chromatin interactions. We developed a simple and effective method, HiCcompare, for the joint normalization and differential analysis of multiple Hi-C datasets. The method introduces a distance-centric analysis and visualization of the differences between two Hi-C datasets on a single plot that allows for a data-driven normalization of biases using locally weighted linear regression (loess). HiCcompare outperforms methods for normalizing individual Hi-C datasets and methods for differential analysis (diffHiC, FIND) in detecting a priori known chromatin interaction differences while preserving the detection of genomic structures, such as A/B compartments. HiCcompare is able to remove between-dataset bias present in Hi-C matrices. It also provides a user-friendly tool to allow the scientific community to perform direct comparisons between the growing number of pre-processed Hi-C datasets available at online repositories. HiCcompare is freely available as a Bioconductor R package https://bioconductor.org/packages/HiCcompare/ .}, + copyright = {2018 The Author(s).}, + file = {/Users/laurent/Zotero/storage/QTS2359N/Stansfield et al. - 2018 - HiCcompare an R-package for joint normalization a.pdf;/Users/laurent/Zotero/storage/YBH9WN7S/s12859-018-2288-x.html}, + journal = {BMC Bioinformatics}, + language = {en}, + number = {1} +} + +@article{stansfieldMultiHiCcompareJointNormalization2019, + title = {{{multiHiCcompare}}: Joint Normalization and Comparative Analysis of Complex {{Hi}}-{{C}} Experiments}, + shorttitle = {{{multiHiCcompare}}}, + author = {Stansfield, John C. and Cresswell, Kellen G. and Dozmorov, Mikhail G.}, + year = {2019}, + month = sep, + volume = {35}, + pages = {2916--2923}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz048}, + abstract = {AbstractMotivation. With the development of chromatin conformation capture technology and its high-throughput derivative Hi-C sequencing, studies of the three-}, + file = {/Users/laurent/Zotero/storage/KFJ67896/Stansfield et al. - 2019 - multiHiCcompare joint normalization and comparati.pdf;/Users/laurent/Zotero/storage/E6E7QPT9/5298730.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{stansfieldMultiHiCcompareJointNormalization2019a, + title = {{{multiHiCcompare}}: Joint Normalization and Comparative Analysis of Complex {{Hi}}-{{C}} Experiments}, + shorttitle = {{{multiHiCcompare}}}, + author = {Stansfield, John C. and Cresswell, Kellen G. and Dozmorov, Mikhail G.}, + year = {2019}, + month = sep, + volume = {35}, + pages = {2916--2923}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz048}, + abstract = {AbstractMotivation. With the development of chromatin conformation capture technology and its high-throughput derivative Hi-C sequencing, studies of the three-}, + file = {/Users/laurent/Zotero/storage/CVHPVR2K/login.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@book{STARUltrafastUniversal, + title = {{{STAR}}: Ultrafast Universal {{RNA}}-Seq Aligner. - {{PubMed}} - {{NCBI}}} +} + +@misc{StatisticalMethodsAlternative, + title = {Statistical {{Methods}} for {{Alternative Splicing Using RNA Sequencing}} - {{ProQuest}}}, + file = {/Users/laurent/Zotero/storage/ISZ8RH5K/1.html}, + howpublished = {https://search.proquest.com/openview/dbb14dddc8909edf4dfa2ed7bceb4b63/1?pq-origsite=gscholar\&cbl=18750\&diss=y}, + language = {en} +} + +@article{stavrovskayaStereoGeneRapidEstimation2017, + title = {{{StereoGene}}: Rapid Estimation of Genome-Wide Correlation of Continuous or Interval Feature Data}, + shorttitle = {{{StereoGene}}}, + author = {Stavrovskaya, Elena D. and Niranjan, Tejasvi and Fertig, Elana J. and Wheelan, Sarah J. and Favorov, Alexander V. and Mironov, Andrey A.}, + year = {2017}, + month = oct, + volume = {33}, + pages = {3158--3165}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx379}, + abstract = {Motivation: Genomics features with similar genome-wide distributions are generally hypothesized to be functionally related, for example, colocalization of histones and transcription start sites indicate chromatin regulation of transcription factor activity. Therefore, statistical algorithms to perform spatial, genome-wide correlation among genomic features are required.}, + file = {/Users/laurent/Documents/bibliography/to_read/Stavrovskaya et al. - 2017 - StereoGene rapid estimation of genome-wide correl.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {20} +} + +@article{steinhauserComprehensiveComparisonTools2016, + title = {A Comprehensive Comparison of Tools for Differential {{ChIP}}-Seq Analysis}, + author = {Steinhauser, Sebastian and Kurzawa, Nils and Eils, Roland and Herrmann, Carl}, + year = {2016}, + month = jan, + pages = {bbv110}, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bbv110}, + abstract = {ChIP-seq has become a widely adopted genomic assay in recent years to determine binding sites for transcription factors or enrichments for specific histone modifications. Beside detection of enriched or bound regions, an important question is to determine differences between conditions. While this is a common analysis for gene expression, for which a large number of computational approaches have been validated, the same question for ChIP-seq is particularly challenging owing to the complexity of ChIP-seq data in terms of noisiness and variability. Many different tools have been developed and published in recent years. However, a comprehensive comparison and review of these tools is still missing. Here, we have reviewed 14 tools, which have been developed to determine differential enrichment between two conditions. They differ in their algorithmic setups, and also in the range of applicability. Hence, we have benchmarked these tools on real data sets for transcription factors and histone modifications, as well as on simulated data sets to quantitatively evaluate their performance. Overall, there is a great variety in the type of signal detected by these tools with a surprisingly low level of agreement. Depending on the type of analysis performed, the choice of method will crucially impact the outcome.}, + file = {/Users/laurent/Documents/bibliography/bioinfo/documentation/Steinhauser et al. - 2016 - A comprehensive comparison of tools for differenti.pdf;/Users/laurent/Documents/bibliography/ChipSeq/Steinhauser et al. - 2016 - A comprehensive comparison of tools for differenti.pdf}, + journal = {Briefings in Bioinformatics}, + language = {en} +} + +@article{stephensFalseDiscoveryRates2016, + title = {False Discovery Rates: A New Deal}, + shorttitle = {False Discovery Rates}, + author = {Stephens, Matthew}, + year = {2016}, + month = oct, + pages = {kxw041}, + issn = {1465-4644, 1468-4357}, + doi = {10.1093/biostatistics/kxw041}, + abstract = {We introduce a new Empirical Bayes approach for large-scale hypothesis testing, including estimating false discovery rates (FDRs), and effect sizes. This approach has two key differences from existing approaches to FDR analysis. First, it assumes that the distribution of the actual (unobserved) effects is unimodal, with a mode at 0. This ``unimodal assumption'' (UA), although natural in many contexts, is not usually incorporated into standard FDR analysis, and we demonstrate how incorporating it brings many benefits. Specifically, the UA facilitates efficient and robust computation\textemdash{}estimating the unimodal distribution involves solving a simple convex optimization problem\textemdash{}and enables more accurate inferences provided that it holds. Second, the method takes as its input two numbers for each test (an effect size estimate and corresponding standard error), rather than the one number usually used (p value or z score). When available, using two numbers instead of one helps account for variation in measurement precision across tests. It also facilitates estimation of effects, and unlike standard FDR methods, our approach provides interval estimates (credible regions) for each effect in addition to measures of significance. To provide a bridge between interval estimates and significance measures, we introduce the term ``local false sign rate'' to refer to the probability of getting the sign of an effect wrong and argue that it is a superior measure of significance than the local FDR because it is both more generally applicable and can be more robustly estimated. Our methods are implemented in an R package ashr available from http://github.com/stephens999/ashr.}, + file = {/Users/laurent/Documents/bibliography/stats/Stephens - 2016 - False discovery rates a new deal.pdf}, + journal = {Biostatistics}, + language = {en} +} + +@book{storaStress2010, + title = {{Le stress}}, + author = {Stora, Jean-Benjamin}, + year = {2010}, + volume = {8e {\'e}d.}, + publisher = {{Presses Universitaires de France}}, + address = {{Paris cedex 14}}, + abstract = {Le stress est-il le nouveau \guillemotleft{} Malaise dans la civilisation \guillemotright{} ? La fortune s{\'e}mantique de cette notion, que l'on ne rencontre pas dans la langue fran{\c c}aise avant le XXe si{\`e}cle, ne peut en tout cas laisser indiff{\'e}rent un chercheur, tant le stress poss{\`e}de cette rare sp{\'e}cificit{\'e} de f{\'e}conder presque toutes les sciences. Quelles sont les sources de ce nouveau mal du si{\`e}cle ? Dans quelle mesure est-il li{\'e} {\`a} la vie professionnelle des individus ? Peut-on {\'e}laborer une th{\'e}orie g{\'e}n{\'e}rale du stress ? Quels moyens permettent de le pr{\'e}venir et de le soigner ?}, + isbn = {978-2-13-058272-4}, + language = {FR}, + series = {{Que sais-je ?}} +} + +@article{stovnerPyRangesEfficientComparison2020, + title = {{{PyRanges}}: Efficient Comparison of Genomic Intervals in {{Python}}}, + shorttitle = {{{PyRanges}}}, + author = {Stovner, Endre Bakken and S{\ae}trom, P{\aa}l}, + year = {2020}, + month = feb, + volume = {36}, + pages = {918--919}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz615}, + abstract = {AbstractSummary. Complex genomic analyses often use sequences of simple set operations like intersection, overlap and nearest on genomic intervals. These opera}, + file = {/Users/laurent/Zotero/storage/PXDG2347/Stovner and Sætrom - 2020 - PyRanges efficient comparison of genomic interval.pdf;/Users/laurent/Zotero/storage/2UVYKEAG/5543103.html}, + journal = {Bioinformatics}, + language = {en}, + number = {3} +} + +@article{strazarScOrangeToolHandson2019, + title = {{{scOrange}}\textemdash{}a Tool for Hands-on Training of Concepts from Single-Cell Data Analytics}, + author = {Stra{\v z}ar, Martin and {\v Z}agar, Lan and Koko{\v s}ar, Jaka and Tanko, Vesna and Erjavec, Ale{\v s} and Poli{\v c}ar, Pavlin G. and Stari{\v c}, An{\v z}e and Dem{\v s}ar, Janez and Shaulsky, Gad and Menon, Vilas and Lemire, Andrew and Parikh, Anup and Zupan, Bla{\v z}}, + year = {2019}, + month = jul, + volume = {35}, + pages = {i4-i12}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz348}, + abstract = {AbstractMotivation. Single-cell RNA sequencing allows us to simultaneously profile the transcriptomes of thousands of cells and to indulge in exploring cell di}, + file = {/Users/laurent/Zotero/storage/5S3HWQJP/Stražar et al. - 2019 - scOrange—a tool for hands-on training of concepts .pdf;/Users/laurent/Zotero/storage/2W7C5TJP/5529249.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@book{StressBrainAdaptation, + title = {Stress and the Brain: From Adaptation to Disease \textbackslash{}textbar {{Nature Reviews Neuroscience}}} +} + +@book{STRESSHEALTHPsychological, + title = {{{STRESS AND HEALTH}}: {{Psychological}}, {{Behavioral}}, and {{Biological Determinants}}} +} + +@book{StressJeanBenjaminStora, + title = {Le Stress - {{Jean}}-{{Benjamin Stora}} \textbackslash{}textbar {{Cairn}}.Info} +} + +@article{stubbingtonSinglecellTranscriptomicsExplore2017, + title = {Single-Cell Transcriptomics to Explore the Immune System in Health and Disease}, + author = {Stubbington, Michael J. T. and {Rozenblatt-Rosen}, Orit and Regev, Aviv and Teichmann, Sarah A.}, + year = {2017}, + month = oct, + volume = {358}, + pages = {58--63}, + issn = {0036-8075, 1095-9203}, + doi = {10.1126/science.aan6828}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Stubbington et al. - 2017 - Single-cell transcriptomics to explore the immune .pdf;/Users/laurent/Zotero/storage/4PIUXUIP/Stubbington et al. - 2017 - Single-cell transcriptomics to explore the immune .pdf;/Users/laurent/Zotero/storage/8MRNVVZ2/Stubbington et al. - 2017 - Single-cell transcriptomics to explore the immune .pdf;/Users/laurent/Zotero/storage/UIHI89RD/Stubbington et al. - 2017 - Single-cell transcriptomics to explore the immune .pdf}, + journal = {Science}, + language = {en}, + number = {6359} +} + +@article{sulaimanovInferringGeneExpression2019, + title = {Inferring Gene Expression Networks with Hubs Using a Degree Weighted {{Lasso}} Approach}, + author = {Sulaimanov, Nurgazy and Kumar, Sunil and Burdet, Fr{\'e}d{\'e}ric and Ibberson, Mark and Pagni, Marco and Koeppl, Heinz}, + year = {2019}, + month = mar, + volume = {35}, + pages = {987--994}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty716}, + abstract = {AbstractMotivation. Genome-scale gene networks contain regulatory genes called hubs that have many interaction partners. These genes usually play an essential}, + file = {/Users/laurent/Zotero/storage/XNHUAHE9/Sulaimanov et al. - 2019 - Inferring gene expression networks with hubs using.pdf;/Users/laurent/Zotero/storage/VXCHLD2G/5085370.html}, + journal = {Bioinformatics}, + language = {en}, + number = {6} +} + +@article{sunAccuracyRobustnessScalability2019, + title = {Accuracy, Robustness and Scalability of Dimensionality Reduction Methods for Single-Cell {{RNA}}-Seq Analysis}, + author = {Sun, Shiquan and Zhu, Jiaqiang and Ma, Ying and Zhou, Xiang}, + year = {2019}, + month = dec, + volume = {20}, + pages = {1--21}, + issn = {1474-760X}, + doi = {10.1186/s13059-019-1898-6}, + abstract = {Dimensionality reduction is an indispensable analytic component for many areas of single-cell RNA sequencing (scRNA-seq) data analysis. Proper dimensionality reduction can allow for effective noise removal and facilitate many downstream analyses that include cell clustering and lineage reconstruction. Unfortunately, despite the critical importance of dimensionality reduction in scRNA-seq analysis and the vast number of dimensionality reduction methods developed for scRNA-seq studies, few comprehensive comparison studies have been performed to evaluate the effectiveness of different dimensionality reduction methods in scRNA-seq. We aim to fill this critical knowledge gap by providing a comparative evaluation of a variety of commonly used dimensionality reduction methods for scRNA-seq studies. Specifically, we compare 18 different dimensionality reduction methods on 30 publicly available scRNA-seq datasets that cover a range of sequencing techniques and sample sizes. We evaluate the performance of different dimensionality reduction methods for neighborhood preserving in terms of their ability to recover features of the original expression matrix, and for cell clustering and lineage reconstruction in terms of their accuracy and robustness. We also evaluate the computational scalability of different dimensionality reduction methods by recording their computational cost. Based on the comprehensive evaluation results, we provide important guidelines for choosing dimensionality reduction methods for scRNA-seq data analysis. We also provide all analysis scripts used in the present study at www.xzlab.org/reproduce.html.}, + copyright = {2019 The Author(s).}, + file = {/Users/laurent/Zotero/storage/UFUPTP7P/Sun et al. - 2019 - Accuracy, robustness and scalability of dimensiona.pdf;/Users/laurent/Zotero/storage/XEJ8957B/s13059-019-1898-6.html}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@article{sunBayesianMixtureModel2019, + title = {A {{Bayesian}} Mixture Model for Clustering Droplet-Based Single-Cell Transcriptomic Data from Population Studies}, + author = {Sun, Zhe and Chen, Li and Xin, Hongyi and Jiang, Yale and Huang, Qianhui and Cillo, Anthony R. and Tabib, Tracy and Kolls, Jay K. and Bruno, Tullia C. and Lafyatis, Robert and Vignali, Dario A. A. and Chen, Kong and Ding, Ying and Hu, Ming and Chen, Wei}, + year = {2019}, + month = dec, + volume = {10}, + issn = {2041-1723}, + doi = {10.1038/s41467-019-09639-3}, + file = {/Users/laurent/Zotero/storage/39VKYADH/Sun et al. - 2019 - A Bayesian mixture model for clustering droplet-ba.pdf;/Users/laurent/Zotero/storage/EBM27WXJ/Sun et al. - 2019 - A Bayesian mixture model for clustering droplet-ba.pdf}, + journal = {Nature Communications}, + language = {en}, + number = {1} +} + +@article{sunDIMMSCDirichletMixture2018, + title = {{{DIMM}}-{{SC}}: A {{Dirichlet}} Mixture Model for Clustering Droplet-Based Single Cell Transcriptomic Data}, + shorttitle = {{{DIMM}}-{{SC}}}, + author = {Sun, Zhe and Wang, Ting and Deng, Ke and Wang, Xiao-Feng and Lafyatis, Robert and Ding, Ying and Hu, Ming and Chen, Wei}, + year = {2018}, + month = jan, + volume = {34}, + pages = {139--146}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx490}, + abstract = {Motivation: Single cell transcriptome sequencing (scRNA-Seq) has become a revolutionary tool to study cellular and molecular processes at single cell resolution. Among existing technologies, the recently developed droplet-based platform enables efficient parallel processing of thousands of single cells with direct counting of transcript copies using Unique Molecular Identifier (UMI). Despite the technology advances, statistical methods and computational tools are still lacking for analyzing droplet-based scRNA-Seq data. Particularly, model-based approaches for clustering large-scale single cell transcriptomic data are still under-explored.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Sun et al. - 2018 - DIMM-SC a Dirichlet mixture model for clustering .pdf;/Users/laurent/Documents/bibliography/to_read/Sun et al. - 2018 - DIMM-SC a Dirichlet mixture model for clustering .pdf;/Users/laurent/Zotero/storage/6P77THFC/Sun et al. - 2018 - DIMM-SC a Dirichlet mixture model for clustering .pdf;/Users/laurent/Zotero/storage/SEA3HEZA/Sun et al. - 2018 - DIMM-SC a Dirichlet mixture model for clustering .pdf;/Users/laurent/Zotero/storage/UTFYKULG/Sun et al. - 2018 - DIMM-SC a Dirichlet mixture model for clustering .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {1} +} + +@article{sunerClusteringMethodsSinglecell2019, + title = {Clustering Methods for Single-Cell {{RNA}}-Sequencing Expression Data: Performance Evaluation with Varying Sample Sizes and Cell Compositions}, + shorttitle = {Clustering Methods for Single-Cell {{RNA}}-Sequencing Expression Data}, + author = {Suner, Asl\i{}}, + year = {2019}, + volume = {18}, + issn = {1544-6115}, + doi = {10.1515/sagmb-2019-0004}, + abstract = {A number of specialized clustering methods have been developed so far for the accurate analysis of single-cell RNA-sequencing (scRNA-seq) expression data, and several reports have been published documenting the performance measures of these clustering methods under different conditions. However, to date, there are no available studies regarding the systematic evaluation of the performance measures of the clustering methods taking into consideration the sample size and cell composition of a given scRNA-seq dataset. Herein, a comprehensive performance evaluation study of 11 selected scRNA-seq clustering methods was performed using synthetic datasets with known sample sizes and number of subpopulations, as well as varying levels of transcriptome complexity. The results indicate that the overall performance of the clustering methods under study are highly dependent on the sample size and complexity of the scRNA-seq dataset. In most of the cases, better clustering performances were obtained as the number of cells in a given expression dataset was increased. The findings of this study also highlight the importance of sample size for the successful detection of rare cell subpopulations with an appropriate clustering tool.}, + journal = {Statistical Applications in Genetics and Molecular Biology}, + keywords = {clustering,performance evaluation,RNA sequencing,single cell}, + number = {5} +} + +@article{sunFastEfficientCountbased2019, + title = {A Fast and Efficient Count-Based Matrix Factorization Method for Detecting Cell Types from Single-Cell {{RNAseq}} Data}, + author = {Sun, Shiquan and Chen, Yabo and Liu, Yang and Shang, Xuequn}, + year = {2019}, + month = apr, + volume = {13}, + issn = {1752-0509}, + doi = {10.1186/s12918-019-0699-6}, + abstract = {Background: Single-cell RNA sequencing (scRNAseq) data always involves various unwanted variables, which would be able to mask the true signal to identify cell-types. More efficient way of dealing with this issue is to extract low dimension information from high dimensional gene expression data to represent cell-type structure. In the past two years, several powerful matrix factorization tools were developed for scRNAseq data, such as NMF, ZIFA, pCMF and ZINB-WaVE. But the existing approaches either are unable to directly model the raw count of scRNAseq data or are really time-consuming when handling a large number of cells (e.g. n {$>$}500). +Results: In this paper, we developed a fast and efficient count-based matrix factorization method (single-cell negative binomial matrix factorization, scNBMF) based on the TensorFlow framework to infer the low dimensional structure of cell types. To make our method scalable, we conducted a series of experiments on three public scRNAseq data sets, brain, embryonic stem, and pancreatic islet. The experimental results show that scNBMF is more powerful to detect cell types and 10 - 100 folds faster than the scRNAseq bespoke tools. +Conclusions: In this paper, we proposed a fast and efficient count-based matrix factorization method, scNBMF, which is more powerful for detecting cell type purposes. A series of experiments were performed on three public scRNAseq data sets. The results show that scNBMF is a more powerful tool in large-scale scRNAseq data analysis. scNBMF was implemented in R and Python, and the source code are freely available at https://github.com/sqsun.}, + file = {/Users/laurent/Zotero/storage/9SRT24RL/Sun et al. - 2019 - A fast and efficient count-based matrix factorizat.pdf;/Users/laurent/Zotero/storage/GA72T3ZX/Sun et al. - 2019 - A fast and efficient count-based matrix factorizat.pdf;/Users/laurent/Zotero/storage/IYZHPT6Y/Sun et al. - 2019 - A fast and efficient count-based matrix factorizat.pdf;/Users/laurent/Zotero/storage/W2VLQS62/Sun et al. - 2019 - A fast and efficient count-based matrix factorizat.pdf}, + journal = {BMC Systems Biology}, + language = {en}, + number = {S2} +} + +@article{sunNonlinearDynamicsFluctuations2019, + title = {The Nonlinear Dynamics and Fluctuations of {{mRNA}} Levels in Cell Cycle Coupled Transcription}, + author = {Sun, Qiwen and Jiao, Feng and Lin, Genghong and Yu, Jianshe and Tang, Moxun}, + year = {2019}, + month = apr, + volume = {15}, + pages = {e1007017}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007017}, + abstract = {Gene transcription is a noisy process, and cell division cycle is an important source of gene transcription noise. In this work, we develop a mathematical approach by coupling transcription kinetics with cell division cycles to delineate how they are combined to regulate transcription output and noise. In view of gene dosage, a cell cycle is divided into an early stage S 1 and a late stage S 2. The analytical forms for the mean and the noise of mRNA numbers are given in each stage. The analysis based on these formulas predicts precisely the fold change r* of mRNA numbers from S 1 to S 2 measured in a mouse embryonic stem cell line. When transcription follows similar kinetics in both stages, r* buffers against DNA dosage variation and r* {$\in$} (1, 2). Numerical simulations suggest that increasing cell cycle durations up-regulates transcription with less noise, whereas rapid stage transitions induce highly noisy transcription. A minimization of the transcription noise is observed when transcription homeostasis is attained by varying a single kinetic rate. When the transcription level scales with cellular volume, either by reducing the transcription burst frequency or by increasing the burst size in S 2, the noise shows only a minor variation over a wide range of cell cycle stage durations. The reduction level in the burst frequency is nearly a constant, whereas the increase in the burst size is conceivably sensitive, when responding to a large random variation of the cell cycle durations and the gene duplication time.}, + file = {/Users/laurent/Zotero/storage/HDL92Z9S/Sun et al. - 2019 - The nonlinear dynamics and fluctuations of mRNA le.pdf;/Users/laurent/Zotero/storage/SKSQXXLU/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Cell cycle and cell division,DNA transcription,Embryonic stem cells,G1 phase,Gene duplication,Homeostasis,Messenger RNA,Synthesis phase}, + language = {en}, + number = {4} +} + +@article{sunVarMatchRobustMatching2016, + title = {{{VarMatch}}: Robust Matching of Small Variant Datasets Using Flexible Scoring Schemes}, + author = {Sun, Chen and Medvedev, Paul}, + year = {2016}, + pages = {8}, + abstract = {Motivation: Small variant calling is an important component of many analyses, and, in many instances, it is important to determine the set of variants which appear in multiple callsets. Variant matching is complicated by variants that have multiple equivalent representations. Normalization and decomposition algorithms have been proposed, but are not robust to different representation of complex variants. Variant matching is also usually done to maximize the number of matches, as opposed to other optimization criteria.}, + file = {/Users/laurent/Documents/bibliography/to_read/Sun and Medvedev - 2016 - VarMatch robust matching of small variant dataset.pdf}, + language = {en} +} + +@article{suomiROTSPackageReproducibilityoptimized2017, + title = {{{ROTS}}: {{An R}} Package for Reproducibility-Optimized Statistical Testing}, + shorttitle = {{{ROTS}}}, + author = {Suomi, Tomi and Seyednasrollah, Fatemeh and Jaakkola, Maria K. and Faux, Thomas and Elo, Laura L.}, + editor = {Poisot, Timoth{\'e}e}, + year = {2017}, + month = may, + volume = {13}, + pages = {e1005562}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005562}, + abstract = {Differential expression analysis is one of the most common types of analyses performed on various biological data (e.g. RNA-seq or mass spectrometry proteomics). It is the process that detects features, such as genes or proteins, showing statistically significant differences between the sample groups under comparison. A major challenge in the analysis is the choice of an appropriate test statistic, as different statistics have been shown to perform well in different datasets. To this end, the reproducibility-optimized test statistic (ROTS) adjusts a modified t-statistic according to the inherent properties of the data and provides a ranking of the features based on their statistical evidence for differential expression between two groups. ROTS has already been successfully applied in a range of different studies from transcriptomics to proteomics, showing competitive performance against other state-of-theart methods. To promote its widespread use, we introduce here a Bioconductor R package for performing ROTS analysis conveniently on different types of omics data. To illustrate the benefits of ROTS in various applications, we present three case studies, involving proteomics and RNA-seq data from public repositories, including both bulk and single cell data. The package is freely available from Bioconductor (https://www.bioconductor.org/ packages/ROTS).}, + file = {/Users/laurent/Documents/bibliography/to_read/Suomi et al. - 2017 - ROTS An R package for reproducibility-optimized s.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {5} +} + +@book{TabletNextGeneration, + title = {Tablet\textemdash{}next Generation Sequence Assembly Visualization} +} + +@article{takedaSimilarityAssessmentSelecting2018, + title = {Beyond Similarity Assessment: Selecting the Optimal Model for Sequence Alignment via the {{Factorized Asymptotic Bayesian}} Algorithm}, + shorttitle = {Beyond Similarity Assessment}, + author = {Takeda, Taikai and Hamada, Michiaki}, + editor = {Hancock, John}, + year = {2018}, + month = feb, + volume = {34}, + pages = {576--584}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx643}, + abstract = {Motivation: Pair Hidden Markov Models (PHMMs) are probabilistic models used for pairwise sequence alignment, a quintessential problem in bioinformatics. PHMMs include three types of hidden states: match, insertion and deletion. Most previous studies have used one or two hidden states for each PHMM state type. However, few studies have examined the number of states suitable for representing sequence data or improving alignment accuracy.}, + file = {/Users/laurent/Documents/bibliography/to_read/Takeda and Hamada - 2018 - Beyond similarity assessment selecting the optima.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{talwarAutoImputeAutoencoderBased2018, + title = {{{AutoImpute}}: {{Autoencoder}} Based Imputation of Single-Cell {{RNA}}-Seq Data}, + shorttitle = {{{AutoImpute}}}, + author = {Talwar, Divyanshu and Mongia, Aanchal and Sengupta, Debarka and Majumdar, Angshul}, + year = {2018}, + month = nov, + volume = {8}, + pages = {16329}, + issn = {2045-2322}, + doi = {10.1038/s41598-018-34688-x}, + abstract = {The emergence of single-cell RNA sequencing (scRNA-seq) technologies has enabled us to measure the expression levels of thousands of genes at single-cell resolution. However, insufficient quantities of starting RNA in the individual cells cause significant dropout events, introducing a large number of zero counts in the expression matrix. To circumvent this, we developed an autoencoder-based sparse gene expression matrix imputation method. AutoImpute, which learns the inherent distribution of the input scRNA-seq data and imputes the missing values accordingly with minimal modification to the biologically silent genes. When tested on real scRNA-seq datasets, AutoImpute performed competitively wrt., the existing single-cell imputation methods, on the grounds of expression recovery from subsampled data, cell-clustering accuracy, variance stabilization and cell-type separability.}, + copyright = {2018 The Author(s)}, + file = {/Users/laurent/Zotero/storage/Z65AF9PD/Talwar et al. - 2018 - AutoImpute Autoencoder based imputation of single.pdf;/Users/laurent/Zotero/storage/UWXSVBH4/s41598-018-34688-x.html}, + journal = {Scientific Reports}, + language = {En}, + number = {1} +} + +@article{tambaIterativeSureIndependence2017, + title = {Iterative Sure Independence Screening {{EM}}-{{Bayesian LASSO}} Algorithm for Multi-Locus Genome-Wide Association Studies}, + author = {Tamba, Cox Lwaka and Ni, Yuan-Li and Zhang, Yuan-Ming}, + editor = {Komarova, Natalia L.}, + year = {2017}, + month = jan, + volume = {13}, + pages = {e1005357}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005357}, + abstract = {Genome-wide association study (GWAS) entails examining a large number of single nucleotide polymorphisms (SNPs) in a limited sample with hundreds of individuals, implying a variable selection problem in the high dimensional dataset. Although many single-locus GWAS approaches under polygenic background and population structure controls have been widely used, some significant loci fail to be detected. In this study, we used an iterative modified-sure independence screening (ISIS) approach in reducing the number of SNPs to a moderate size. Expectation-Maximization (EM)-Bayesian least absolute shrinkage and selection operator (BLASSO) was used to estimate all the selected SNP effects for true quantitative trait nucleotide (QTN) detection. This method is referred to as ISIS EMBLASSO algorithm. Monte Carlo simulation studies validated the new method, which has the highest empirical power in QTN detection and the highest accuracy in QTN effect estimation, and it is the fastest, as compared with efficient mixed-model association (EMMA), smoothly clipped absolute deviation (SCAD), fixed and random model circulating probability unification (FarmCPU), and multi-locus random-SNP-effect mixed linear model (mrMLM). To further demonstrate the new method, six flowering time traits in Arabidopsis thaliana were re-analyzed by four methods (New method, EMMA, FarmCPU, and mrMLM). As a result, the new method identified most previously reported genes. Therefore, the new method is a good alternative for multi-locus GWAS.}, + file = {/Users/laurent/Documents/bibliography/GWAS/Tamba et al. - 2017 - Iterative sure independence screening EM-Bayesian .pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {1} +} + +@article{tanayScalingSinglecellGenomics2017, + title = {Scaling Single-Cell Genomics from Phenomenology to Mechanism}, + author = {Tanay, Amos and Regev, Aviv}, + year = {2017}, + month = jan, + volume = {541}, + pages = {331--338}, + issn = {0028-0836, 1476-4687}, + doi = {10.1038/nature21350}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Tanay and Regev - 2017 - Scaling single-cell genomics from phenomenology to.pdf;/Users/laurent/Zotero/storage/9JL3D8B3/Tanay and Regev - 2017 - Scaling single-cell genomics from phenomenology to.pdf;/Users/laurent/Zotero/storage/ITV7XGNL/Tanay and Regev - 2017 - Scaling single-cell genomics from phenomenology to.pdf;/Users/laurent/Zotero/storage/TW3XZUKS/Tanay and Regev - 2017 - Scaling single-cell genomics from phenomenology to.pdf}, + journal = {Nature}, + language = {en}, + number = {7637} +} + +@article{tanFindingNemoHybrid2018, + title = {Finding {{Nemo}}: Hybrid Assembly with {{Oxford Nanopore}} and {{Illumina}} Reads Greatly Improves the Clownfish ({{Amphiprion}} Ocellaris) Genome Assembly}, + shorttitle = {Finding {{Nemo}}}, + author = {Tan, Mun Hua and Austin, Christopher M. and Hammer, Michael P. and Lee, Yin Peng and Croft, Laurence J. and Gan, Han Ming}, + year = {2018}, + month = mar, + volume = {7}, + doi = {10.1093/gigascience/gix137}, + abstract = {AbstractBackground. Some of the most widely recognized coral reef fishes are clownfish or anemonefish, members of the family Pomacentridae (subfamily: Amphipri}, + journal = {GigaScience}, + language = {en}, + number = {3} +} + +@article{tangBayNormBayesianGene2020, + title = {{{bayNorm}}: {{Bayesian}} Gene Expression Recovery, Imputation and Normalization for Single-Cell {{RNA}}-Sequencing Data}, + shorttitle = {{{bayNorm}}}, + author = {Tang, Wenhao and Bertaux, Fran{\c c}ois and Thomas, Philipp and Stefanelli, Claire and Saint, Malika and Marguerat, Samuel and Shahrezaei, Vahid}, + year = {2020}, + month = feb, + volume = {36}, + pages = {1174--1181}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz726}, + abstract = {AbstractMotivation. Normalization of single-cell RNA-sequencing (scRNA-seq) data is a prerequisite to their interpretation. The marked technical variability, h}, + file = {/Users/laurent/Zotero/storage/NTH892XE/Tang et al. - 2020 - bayNorm Bayesian gene expression recovery, imputa.pdf;/Users/laurent/Zotero/storage/G3UV8XQW/5581401.html}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{tangSpikeandslabLassoCox2017, + title = {The Spike-and-Slab Lasso {{Cox}} Model for Survival Prediction and Associated Genes Detection}, + author = {Tang, Zaixiang and Shen, Yueping and Zhang, Xinyan and Yi, Nengjun}, + year = {2017}, + month = sep, + volume = {33}, + pages = {2799--2807}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx300}, + abstract = {Motivation: Large-scale molecular profiling data have offered extraordinary opportunities to improve survival prediction of cancers and other diseases and to detect disease associated genes. However, there are considerable challenges in analyzing large-scale molecular data.}, + file = {/Users/laurent/Documents/bibliography/to_read/Tang et al. - 2017 - The spike-and-slab lasso Cox model for survival pr.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{taoLogisticalConstraintsLead2018, + title = {Logistical Constraints Lead to an Intermediate Optimum in Outbreak Response Vaccination}, + author = {Tao, Yun and Shea, Katriona and Ferrari, Matthew}, + year = {2018}, + month = may, + volume = {14}, + pages = {e1006161}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006161}, + abstract = {Dynamic models in disease ecology have historically evaluated vaccination strategies under the assumption that they are implemented homogeneously in space and time. However, this approach fails to formally account for operational and logistical constraints inherent in the distribution of vaccination to the population at risk. Thus, feedback between the dynamic processes of vaccine distribution and transmission might be overlooked. Here, we present a spatially explicit, stochastic Susceptible-Infected-Recovered-Vaccinated model that highlights the density-dependence and spatial constraints of various diffusive strategies of vaccination during an outbreak. The model integrates an agent-based process of disease spread with a partial differential process of vaccination deployment. We characterize the vaccination response in terms of a diffusion rate that describes the distribution of vaccination to the population at risk from a central location. This generates an explicit trade-off between slow diffusion, which concentrates effort near the central location, and fast diffusion, which spreads a fixed vaccination effort thinly over a large area. We use stochastic simulation to identify the optimum vaccination diffusion rate as a function of population density, interaction scale, transmissibility, and vaccine intensity. Our results show that, conditional on a timely response, the optimal strategy for minimizing outbreak size is to distribute vaccination resource at an intermediate rate: fast enough to outpace the epidemic, but slow enough to achieve local herd immunity. If the response is delayed, however, the optimal strategy for minimizing outbreak size changes to a rapidly diffusive distribution of vaccination effort. The latter may also result in significantly larger outbreaks, thus suggesting a benefit of allocating resources to timely outbreak detection and response.}, + file = {/Users/laurent/Zotero/storage/GZITFPFZ/Tao et al. - 2018 - Logistical constraints lead to an intermediate opt.pdf;/Users/laurent/Zotero/storage/EGCC52Q4/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Cholera vaccines,Disease dynamics,Epidemiology,Immunity,Infectious disease control,Population density,Vaccination and immunization,Vaccines}, + language = {en}, + number = {5} +} + +@article{tarasovSambambaFastProcessing2015, + title = {Sambamba: Fast Processing of {{NGS}} Alignment Formats}, + shorttitle = {Sambamba}, + author = {Tarasov, Artem and Vilella, Albert J. and Cuppen, Edwin and Nijman, Isaac J. and Prins, Pjotr}, + year = {2015}, + month = jun, + volume = {31}, + pages = {2032--2034}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btv098}, + abstract = {Summary: Sambamba is a high-performance robust tool and library for working with SAM, BAM and CRAM sequence alignment files; the most common file formats for aligned next generation sequencing data. Sambamba is a faster alternative to samtools that exploits multi-core processing and dramatically reduces processing time. Sambamba is being adopted at sequencing centers, not only because of its speed, but also because of additional functionality, including coverage analysis and powerful filtering capability., Availability and implementation: Sambamba is free and open source software, available under a GPLv2 license. Sambamba can be downloaded and installed from http://www.open-bio.org/wiki/Sambamba., Sambamba v0.5.0 was released with doi:10.5281/zenodo.13200., Contact: j.c.p.prins@umcutrecht.nl}, + file = {/Users/laurent/Zotero/storage/DP9B846S/Tarasov et al. - 2015 - Sambamba fast processing of NGS alignment formats.pdf}, + journal = {Bioinformatics}, + number = {12}, + pmcid = {PMC4765878}, + pmid = {25697820} +} + +@article{tarbellHMMRATACHiddenMarkov2018, + title = {{{HMMRATAC}}, the {{Hidden Markov ModeleR}} for {{ATAC}}-Seq}, + author = {Tarbell, Evan D. and Liu, Tao}, + year = {2018}, + month = sep, + pages = {306621}, + doi = {10.1101/306621}, + abstract = {ATAC-seq has been widely adopted to identify accessible chromatin regions across the genome. However, current data analysis still utilizes approaches originally designed for ChIP-seq or DNase-seq, without taking into account the transposase digested DNA fragments that contain additional nucleosome positioning information. We present the first dedicated ATAC-seq analysis tool, a semi-supervised machine learning approach named HMMRATAC. HMMRATAC splits a single ATAC-seq dataset into nucleosome-free and nucleosome-enriched signals, learns the unique chromatin structure around accessible regions, and then predicts accessible regions across the entire genome. We show that HMMRATAC outperforms the popular peak-calling algorithms on published human and mouse ATAC-seq datasets.}, + copyright = {\textcopyright{} 2018, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NoDerivs 4.0 International), CC BY-ND 4.0, as described at http://creativecommons.org/licenses/by-nd/4.0/}, + file = {/Users/laurent/Zotero/storage/YW7NXLAB/Tarbell and Liu - 2018 - HMMRATAC, the Hidden Markov ModeleR for ATAC-seq.pdf;/Users/laurent/Zotero/storage/KMEJLBCK/306621.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{taschukTenSimpleRules2017, + title = {Ten Simple Rules for Making Research Software More Robust}, + author = {Taschuk, Morgan and Wilson, Greg}, + year = {2017}, + month = apr, + volume = {13}, + pages = {e1005412}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005412}, + abstract = {Software produced for research, published and otherwise, suffers from a number of common problems that make it difficult or impossible to run outside the original institution or even off the primary developer's computer. We present ten simple rules to make such software robust enough to be run by anyone, anywhere, and thereby delight your users and collaborators.}, + file = {/Users/laurent/Documents/bibliography/to_read/Taschuk and Wilson - 2017 - Ten simple rules for making research software more.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {4} +} + +@article{tasicCellTypesBehaving2018, + title = {Cell Types Behaving in Their Natural Habitat}, + author = {Tasic, Bosiljka and Nicovich, Philip R.}, + year = {2018}, + month = nov, + volume = {362}, + pages = {749--750}, + issn = {0036-8075, 1095-9203}, + doi = {10.1126/science.aav4841}, + abstract = {A full understanding of a complex system is difficult, perhaps impossible, to accomplish without an inventory of the components involved. When that system is an organ in an organism, the parts list becomes a census of cell types, including their identity, number, location, and function. On page 792 of this issue, Moffitt et al. (1) demonstrate an advanced method for in situ profiling of gene expression within the preoptic region of the intact mouse hypothalamus. Their approach not only yields a census of cell types in this region of the brain, but also assesses which cell types are activated during certain behaviors. This region of the brain is involved in the regulation of homeostasis and social behaviors such as aggression, sex, and parenting. +Defining cell types in situ connects gene expression, anatomy, and function during certain behaviors +Defining cell types in situ connects gene expression, anatomy, and function during certain behaviors}, + copyright = {Copyright \textcopyright{} 2018, American Association for the Advancement of Science. http://www.sciencemag.org/about/science-licenses-journal-article-reuseThis is an article distributed under the terms of the Science Journals Default License.}, + file = {/Users/laurent/Zotero/storage/QI5CG4C6/Tasic and Nicovich - 2018 - Cell types behaving in their natural habitat.pdf;/Users/laurent/Zotero/storage/3IN75K7K/749.html;/Users/laurent/Zotero/storage/JKXV4P3H/749.html}, + journal = {Science}, + language = {en}, + number = {6416}, + pmid = {30442791} +} + +@article{taylor-kingDynamicDistributionDecomposition2020, + title = {Dynamic Distribution Decomposition for Single-Cell Snapshot Time Series Identifies Subpopulations and Trajectories during {{iPSC}} Reprogramming}, + author = {{Taylor-King}, Jake P. and Riseth, Asbj{\o}rn N. and Macnair, Will and Claassen, Manfred}, + year = {2020}, + month = jan, + volume = {16}, + pages = {e1007491}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007491}, + abstract = {Recent high-dimensional single-cell technologies such as mass cytometry are enabling time series experiments to monitor the temporal evolution of cell state distributions and to identify dynamically important cell states, such as fate decision states in differentiation. However, these technologies are destructive, and require analysis approaches that temporally map between cell state distributions across time points. Current approaches to approximate the single-cell time series as a dynamical system suffer from too restrictive assumptions about the type of kinetics, or link together pairs of sequential measurements in a discontinuous fashion. We propose Dynamic Distribution Decomposition (DDD), an operator approximation approach to infer a continuous distribution map between time points. On the basis of single-cell snapshot time series data, DDD approximates the continuous time Perron-Frobenius operator by means of a finite set of basis functions. This procedure can be interpreted as a continuous time Markov chain over a continuum of states. By only assuming a memoryless Markov (autonomous) process, the types of dynamics represented are more general than those represented by other common models, e.g., chemical reaction networks, stochastic differential equations. Furthermore, we can a posteriori check whether the autonomy assumptions are valid by calculation of prediction error\textemdash{}which we show gives a measure of autonomy within the studied system. The continuity and autonomy assumptions ensure that the same dynamical system maps between all time points, not arbitrarily changing at each time point. We demonstrate the ability of DDD to reconstruct dynamically important cell states and their transitions both on synthetic data, as well as on mass cytometry time series of iPSC reprogramming of a fibroblast system. We use DDD to find previously identified subpopulations of cells and to visualise differentiation trajectories. Dynamic Distribution Decomposition allows interpretation of high-dimensional snapshot time series data as a low-dimensional Markov process, thereby enabling an interpretable dynamics analysis for a variety of biological processes by means of identifying their dynamically important cell states.}, + file = {/Users/laurent/Zotero/storage/SELWR6G7/Taylor-King et al. - 2020 - Dynamic distribution decomposition for single-cell.pdf;/Users/laurent/Zotero/storage/3DZ3LNCT/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Cell differentiation,Cytometry,Data visualization,Dynamical systems,Eigenvalues,Fibroblasts,Graphs,Probability density}, + language = {en}, + number = {1} +} + +@article{tengBenchmarkRNAseqQuantification2016, + title = {A Benchmark for {{RNA}}-Seq Quantification Pipelines}, + author = {Teng, Mingxiang and Love, Michael I. and Davis, Carrie A. and Djebali, Sarah and Dobin, Alexander and Graveley, Brenton R. and Li, Sheng and Mason, Christopher E. and Olson, Sara and Pervouchine, Dmitri and Sloan, Cricket A. and Wei, Xintao and Zhan, Lijun and Irizarry, Rafael A.}, + year = {2016}, + month = dec, + volume = {17}, + issn = {1474-760X}, + doi = {10.1186/s13059-016-0940-1}, + abstract = {Obtaining RNA-seq measurements involves a complex data analytical process with a large number of competing algorithms as options. There is much debate about which of these methods provides the best approach. Unfortunately, it is currently difficult to evaluate their performance due in part to a lack of sensitive assessment metrics. We present a series of statistical summaries and plots to evaluate the performance in terms of specificity and sensitivity, available as a R/Bioconductor package (http://bioconductor.org/packages/rnaseqcomp). Using two independent datasets, we assessed seven competing pipelines. Performance was generally poor, with two methods clearly underperforming and RSEM slightly outperforming the rest.}, + file = {/Users/laurent/Documents/bibliography/RNASeq/Teng et al. - 2016 - A benchmark for RNA-seq quantification pipelines.pdf}, + journal = {Genome Biology}, + language = {en}, + number = {1} +} + +@misc{TenQuickTips, + title = {Ten Quick Tips for Delivering Programming Lessons | {{https://journals.plos.org/}}}, + file = {/Users/laurent/Zotero/storage/4G4VACE6/article.html}, + howpublished = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1007433} +} + +@article{theplosbiologystaffeditorsImportanceBeingSecond2018, + title = {The Importance of Being Second}, + author = {{The PLOS Biology Staff Editors}}, + year = {2018}, + month = jan, + volume = {16}, + pages = {e2005203}, + issn = {1545-7885}, + doi = {10.1371/journal.pbio.2005203}, + file = {/Users/laurent/Documents/bibliography/to_read/The PLOS Biology Staff Editors - 2018 - The importance of being second.pdf}, + journal = {PLOS Biology}, + language = {en}, + number = {1} +} + +@article{theploscomputationalbiologystaffCorrectionOrderUncertainty2017, + title = {Correction: {{Order Under Uncertainty}}: {{Robust Differential Expression Analysis Using Probabilistic Models}} for {{Pseudotime Inference}}}, + shorttitle = {Correction}, + author = {{The PLOS Computational Biology Staff}}, + year = {2017}, + month = apr, + volume = {13}, + pages = {e1005477}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005477}, + file = {/Users/laurent/Documents/bibliography/to_read/The PLOS Computational Biology Staff - 2017 - Correction Order Under Uncertainty Robust Differ.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {4} +} + +@article{thomasFeaturesThatDefine2016, + title = {Features That Define the Best {{ChIP}}-Seq Peak Calling Algorithms}, + author = {Thomas, Reuben and Thomas, Sean and Holloway, Alisha K. and Pollard, Katherine S.}, + year = {2016}, + month = may, + pages = {bbw035}, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bbw035}, + abstract = {Chromatin immunoprecipitation followed by sequencing (ChIP-seq) is an important tool for studying gene regulatory proteins, such as transcription factors and histones. Peak calling is one of the first steps in the analysis of these data. Peak calling consists of two sub-problems: identifying candidate peaks and testing candidate peaks for statistical significance. We surveyed 30 methods and identified 12 features of the two sub-problems that distinguish methods from each other. We picked six methods GEM, MACS2, MUSIC, BCP, Threshold-based method (TM) and ZINBA] that span this feature space and used a combination of 300 simulated ChIP-seq data sets, 3 real data sets and mathematical analyses to identify features of methods that allow some to perform better than the others. We prove that methods that explicitly combine the signals from ChIP and input samples are less powerful than methods that do not. Methods that use windows of different sizes are more powerful than the ones that do not. For statistical testing of candidate peaks, methods that use a Poisson test to rank their candidate peaks are more powerful than those that use a Binomial test. BCP and MACS2 have the best operating characteristics on simulated transcription factor binding data. GEM has the highest fraction of the top 500 peaks containing the binding motif of the immunoprecipitated factor, with 50\% of its peaks within 10 base pairs of a motif. BCP and MUSIC perform best on histone data. These findings provide guidance and rationale for selecting the best peak caller for a given application.}, + file = {/Users/laurent/Documents/bibliography/ChipSeq/Thomas et al. - 2016 - Features that define the best ChIP-seq peak callin.pdf;/Users/laurent/Documents/bibliography/to_read/Thomas et al. - 2016 - Features that define the best ChIP-seq peak callin.pdf}, + journal = {Briefings in Bioinformatics}, + language = {en} +} + +@article{thorpeSharedTranscriptionalControl2018, + title = {Shared {{Transcriptional Control}} and {{Disparate Gain}} and {{Loss}} of {{Aphid Parasitism Genes}}}, + author = {Thorpe, Peter and {Escudero-Martinez}, Carmen M. and Cock, Peter J. A. and {Eves-van den Akker}, Sebastian and Bos, Jorunn I. B.}, + year = {2018}, + month = oct, + volume = {10}, + pages = {2716--2733}, + doi = {10.1093/gbe/evy183}, + abstract = {Abstract. Aphids are a diverse group of taxa that contain agronomically important species, which vary in their host range and ability to infest crop plants. Th}, + file = {/Users/laurent/Zotero/storage/Y6WMNCCQ/Thorpe et al. - 2018 - Shared Transcriptional Control and Disparate Gain .pdf;/Users/laurent/Zotero/storage/A5ECPXIW/5079402.html}, + journal = {Genome Biology and Evolution}, + language = {en}, + number = {10} +} + +@article{tianClusteringSinglecellRNAseq2019, + title = {Clustering Single-Cell {{RNA}}-Seq Data with a Model-Based Deep Learning Approach}, + author = {Tian, Tian and Wan, Ji and Song, Qi and Wei, Zhi}, + year = {2019}, + month = apr, + volume = {1}, + pages = {191--198}, + issn = {2522-5839}, + doi = {10.1038/s42256-019-0037-0}, + file = {/Users/laurent/Zotero/storage/PXPXPVG3/Tian et al. - 2019 - Clustering single-cell RNA-seq data with a model-b.pdf;/Users/laurent/Zotero/storage/UY68BSJK/Tian et al. - 2019 - Clustering single-cell RNA-seq data with a model-b.pdf}, + journal = {Nature Machine Intelligence}, + language = {en}, + number = {4} +} + +@article{tianScPipeFlexibleBioconductor2018, + title = {{{scPipe}}: A Flexible {{R}}/{{Bioconductor}} Preprocessing Pipeline for Single-Cell {{RNA}}-Sequencing Data}, + shorttitle = {{{scPipe}}}, + author = {Tian, Luyi and Su, Shian and Dong, Xueyi and {Amann-Zalcenstein}, Daniela and Biben, Christine and Seidi, Azadeh and Hilton, Douglas J and Naik, Shalin H. and Ritchie, Matthew E.}, + year = {2018}, + month = mar, + doi = {10.1101/175927}, + abstract = {Single-cell RNA sequencing (scRNA-seq) technology allows researchers to profile the transcriptomes of thousands of cells simultaneously. Protocols that incorporate both designed and random barcodes have greatly increased the throughput of scRNA-seq, but give rise to a more complex data structure. There is a need for new tools that can handle the various barcoding strategies used by different protocols and exploit this information for quality assessment at the sample-level and provide effective visualization of these results in preparation for higher-level analyses.}, + file = {/Users/laurent/Documents/bibliography/to_read/Tian et al. - 2018 - scPipe a flexible RBioconductor preprocessing pi.pdf}, + language = {en} +} + +@article{tianScPipeFlexibleBioconductor2018a, + title = {{{scPipe}}: {{A}} Flexible {{R}}/{{Bioconductor}} Preprocessing Pipeline for Single-Cell {{RNA}}-Sequencing Data}, + shorttitle = {{{scPipe}}}, + author = {Tian, Luyi and Su, Shian and Dong, Xueyi and {Amann-Zalcenstein}, Daniela and Biben, Christine and Seidi, Azadeh and Hilton, Douglas J. and Naik, Shalin H. and Ritchie, Matthew E.}, + year = {2018}, + month = aug, + volume = {14}, + pages = {e1006361}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006361}, + abstract = {Single-cell RNA sequencing (scRNA-seq) technology allows researchers to profile the transcriptomes of thousands of cells simultaneously. Protocols that incorporate both designed and random barcodes have greatly increased the throughput of scRNA-seq, but give rise to a more complex data structure. There is a need for new tools that can handle the various barcoding strategies used by different protocols and exploit this information for quality assessment at the sample-level and provide effective visualization of these results in preparation for higher-level analyses. To this end, we developed scPipe, an R/Bioconductor package that integrates barcode demultiplexing, read alignment, UMI-aware gene-level quantification and quality control of raw sequencing data generated by multiple protocols that include CEL-seq, MARS-seq, Chromium 10X, Drop-seq and Smart-seq. scPipe produces a count matrix that is essential for downstream analysis along with an HTML report that summarises data quality. These results can be used as input for downstream analyses including normalization, visualization and statistical testing. scPipe performs this processing in a few simple R commands, promoting reproducible analysis of single-cell data that is compatible with the emerging suite of open-source scRNA-seq analysis tools available in R/Bioconductor and beyond. The scPipe R package is available for download from https://www.bioconductor.org/packages/scPipe.}, + file = {/Users/laurent/Zotero/storage/3IDFNKAR/Tian et al. - 2018 - scPipe A flexible RBioconductor preprocessing pi.pdf;/Users/laurent/Zotero/storage/CAJQHSNA/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Chromium,Data processing,Exon mapping,Gene expression,Preprocessing,Quality control,Sequence alignment,Software tools}, + language = {en}, + number = {8} +} + +@article{tiberiBayesianInferenceStochastic2018, + title = {Bayesian Inference on Stochastic Gene Transcription from Flow Cytometry Data}, + author = {Tiberi, Simone and Walsh, Mark and Cavallaro, Massimo and Hebenstreit, Daniel and Finkenst{\"a}dt, B{\"a}rbel}, + year = {2018}, + month = sep, + volume = {34}, + pages = {i647-i655}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty568}, + abstract = {AbstractMotivation. Transcription in single cells is an inherently stochastic process as mRNA levels vary greatly between cells, even for genetically identical}, + file = {/Users/laurent/Zotero/storage/ILJL7X2C/Tiberi et al. - 2018 - Bayesian inference on stochastic gene transcriptio.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{tiniMultiomicsIntegrationComparison2019, + title = {Multi-Omics Integration\textemdash{}a Comparison of Unsupervised Clustering Methodologies}, + author = {Tini, Giulia and Marchetti, Luca and Priami, Corrado and {Scott-Boyer}, Marie-Pier}, + year = {2019}, + month = jul, + volume = {20}, + pages = {1269--1279}, + issn = {1467-5463}, + doi = {10.1093/bib/bbx167}, + abstract = {Abstract. With the recent developments in the field of multi-omics integration, the interest in factors such as data preprocessing, choice of the integration m}, + file = {/Users/laurent/Zotero/storage/U9EF4MBW/4758623.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {4} +} + +@article{torkamanehDepthFinderToolDetermine2020, + title = {{{DepthFinder}}: A Tool to Determine the Optimal Read Depth for Reduced-Representation Sequencing}, + shorttitle = {{{DepthFinder}}}, + author = {Torkamaneh, Davoud and Laroche, J{\'e}r{\^o}me and Boyle, Brian and Belzile, Fran{\c c}ois}, + year = {2020}, + month = jan, + volume = {36}, + pages = {26--32}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz473}, + abstract = {AbstractMotivation. Identification of DNA sequence variations such as single nucleotide polymorphisms (SNPs) is a fundamental step toward genetic studies. Redu}, + file = {/Users/laurent/Zotero/storage/PIGUYJZC/Torkamaneh et al. - 2020 - DepthFinder a tool to determine the optimal read .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {1} +} + +@article{torrenteClustCompBioconductorPackage2017, + title = {{{clustComp}}, a Bioconductor Package for the Comparison of Clustering Results}, + author = {Torrente, Aurora and Brazma, Alvis}, + year = {2017}, + month = dec, + volume = {33}, + pages = {4001--4003}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx532}, + abstract = {Summary: clustComp is an open source Bioconductor package that implements different techniques for the comparison of two gene expression clustering results. These include flat versus flat and hierarchical versus flat comparisons. The visualization of the similarities is provided by means of a bipartite graph, whose layout is heuristically optimized. Its flexibility allows a suitable visualization for both small and large datasets.}, + file = {/Users/laurent/Documents/bibliography/to_read/Torrente and Brazma - 2017 - clustComp, a bioconductor package for the comparis.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{trapnellDefiningCellTypes, + title = {Defining Cell Types and States with Single-Cell Genomics}, + author = {Trapnell, Cole}, + pages = {9}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Trapnell - Defining cell types and states with single-cell ge.pdf;/Users/laurent/Zotero/storage/CLRM76K3/Trapnell - Defining cell types and states with single-cell ge.pdf;/Users/laurent/Zotero/storage/FENIWNAF/Trapnell - 2015 - Defining cell types and states with single-cell ge.pdf;/Users/laurent/Zotero/storage/VK5HYWUR/Trapnell - 2015 - Defining cell types and states with single-cell ge.pdf}, + language = {en} +} + +@article{tsoucasGiniClust2ClusterawareWeighted2018, + title = {{{GiniClust2}}: A Cluster-Aware, Weighted Ensemble Clustering Method for Cell-Type Detection}, + shorttitle = {{{GiniClust2}}}, + author = {Tsoucas, Daphne and Yuan, Guo-Cheng}, + year = {2018}, + month = may, + volume = {19}, + pages = {58}, + issn = {1474-760X}, + doi = {10.1186/s13059-018-1431-3}, + abstract = {Single-cell analysis is a powerful tool for dissecting the cellular composition within a tissue or organ. However, it remains difficult to detect rare and common cell types at the same time. Here, we present a new computational method, GiniClust2, to overcome this challenge. GiniClust2 combines the strengths of two complementary approaches, using the Gini index and Fano factor, respectively, through a cluster-aware, weighted ensemble clustering technique. GiniClust2 successfully identifies both common and rare cell types in diverse datasets, outperforming existing methods. GiniClust2 is scalable to large datasets.}, + file = {/Users/laurent/Zotero/storage/F48A5W39/Tsoucas and Yuan - 2018 - GiniClust2 a cluster-aware, weighted ensemble clu.pdf;/Users/laurent/Zotero/storage/JRQYQ9A9/Tsoucas and Yuan - 2018 - GiniClust2 a cluster-aware, weighted ensemble clu.pdf;/Users/laurent/Zotero/storage/YKMFEPFW/Tsoucas and Yuan - 2018 - GiniClust2 a cluster-aware, weighted ensemble clu.pdf;/Users/laurent/Zotero/storage/IN262XWE/s13059-018-1431-3.html}, + journal = {Genome Biology}, + number = {1} +} + +@article{tsuchiyaSystemIdentificationSignaling2017, + title = {System Identification of Signaling Dependent Gene Expression with Different Time-Scale Data}, + author = {Tsuchiya, Takaho and Fujii, Masashi and Matsuda, Naoki and Kunida, Katsuyuki and Uda, Shinsuke and Kubota, Hiroyuki and Konishi, Katsumi and Kuroda, Shinya}, + editor = {Hoffmann, Alexander}, + year = {2017}, + month = dec, + volume = {13}, + pages = {e1005913}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005913}, + abstract = {Cells decode information of signaling activation at a scale of tens of minutes by downstream gene expression with a scale of hours to days, leading to cell fate decisions such as cell differentiation. However, no system identification method with such different time scales exists. Here we used compressed sensing technology and developed a system identification method using data of different time scales by recovering signals of missing time points. We measured phosphorylation of ERK and CREB, immediate early gene expression products, and mRNAs of decoder genes for neurite elongation in PC12 cell differentiation and performed system identification, revealing the input\textendash{}output relationships between signaling and gene expression with sensitivity such as graded or switch-like response and with time delay and gain, representing signal transfer efficiency. We predicted and validated the identified system using pharmacological perturbation. Thus, we provide a versatile method for system identification using data with different time scales.}, + file = {/Users/laurent/Documents/bibliography/to_read/Tsuchiya et al. - 2017 - System identification of signaling dependent gene .pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {12} +} + +@article{tuMAnorm2QuantitativelyComparing2020, + title = {{{MAnorm2}} for Quantitatively Comparing Groups of {{ChIP}}-Seq Samples}, + author = {Tu, Shiqi and Li, Mushan and Tan, Fengxiang and Chen, Haojie and Xu, Jian and Waxman, David J. and Zhang, Yijing and Shao, Zhen}, + year = {2020}, + month = jan, + doi = {10.1101/2020.01.07.896894}, + abstract = {Eukaryotic gene transcription is regulated by a large cohort of chromatin associated proteins, and inferring their differential binding sites between cellular contexts requires a rigorous comparison of the corresponding ChIP-seq data. We present MAnorm2, a new computational tool for quantitatively comparing groups of ChIP-seq samples. MAnorm2 uses a hierarchical strategy for ChIP-seq data normalization and performs differential analysis by assessing within-group variability of ChIP-seq signals under an empirical Bayes framework. In this framework, MAnorm2 considers the abundance of differential ChIP-seq signals between groups of samples and the possibility of different within-group variability between groups. When samples in each group are biological replicates, MAnorm2 can reliably identify differential binding events even between highly similar cellular contexts. Using a number of real ChIP-seq data sets, we observed that MAnorm2 clearly outperformed existing tools for differential ChIP-seq analysis.}, + file = {/Users/laurent/Zotero/storage/H4B3GBEN/Tu et al. - 2020 - MAnorm2 for quantitatively comparing groups of ChI.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{tungBatchEffectsEffective2017, + title = {Batch Effects and the Effective Design of Single-Cell Gene Expression Studies}, + author = {Tung, Po-Yuan and Blischak, John D. and Hsiao, Chiaowen Joyce and Knowles, David A. and Burnett, Jonathan E. and Pritchard, Jonathan K. and Gilad, Yoav}, + year = {2017}, + month = jan, + volume = {7}, + pages = {39921}, + issn = {2045-2322}, + doi = {10.1038/srep39921}, + file = {/Users/laurent/Documents/bibliography/to_read/Tung et al. - 2017 - Batch effects and the effective design of single-c.pdf}, + journal = {Scientific Reports}, + language = {en} +} + +@article{turnerIntegratingLongrangeConnectivity2018, + title = {Integrating Long-Range Connectivity Information into de {{Bruijn}} Graphs}, + author = {Turner, Isaac and Garimella, Kiran V. and Iqbal, Zamin and McVean, Gil}, + year = {2018}, + month = aug, + volume = {34}, + pages = {2556--2565}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty157}, + abstract = {AbstractMotivation. The de Bruijn graph is a simple and efficient data structure that is used in many areas of sequence analysis including genome assembly, rea}, + file = {/Users/laurent/Zotero/storage/9EJXLV9Q/Turner et al. - 2018 - Integrating long-range connectivity information in.pdf;/Users/laurent/Zotero/storage/6U5AEHNX/4938484.html}, + journal = {Bioinformatics}, + language = {en}, + number = {15} +} + +@article{turowskiGlobalAnalysisTranscriptionally2016, + title = {Global Analysis of Transcriptionally Engaged Yeast {{RNA}} Polymerase {{III}} Reveals Extended {{tRNA}} Transcripts}, + author = {Turowski, Tomasz W. and Le{\'s}niewska, Ewa and {Delan-Forino}, Clementine and Sayou, Camille and Boguta, Magdalena and Tollervey, David}, + year = {2016}, + month = jul, + volume = {26}, + pages = {933--944}, + issn = {1088-9051, 1549-5469}, + doi = {10.1101/gr.205492.116}, + file = {/Users/laurent/Documents/bibliography/tRNA/Turowski et al. - 2016 - Global analysis of transcriptionally engaged yeast.pdf}, + journal = {Genome Research}, + language = {en}, + number = {7} +} + +@article{tylerEvaluationOxfordNanopore2018, + title = {Evaluation of {{Oxford Nanopore}}'s {{MinION Sequencing Device}} for {{Microbial Whole Genome Sequencing Applications}}}, + author = {Tyler, Andrea D. and Mataseje, Laura and Urfano, Chantel J. and Schmidt, Lisa and Antonation, Kym S. and Mulvey, Michael R. and Corbett, Cindi R.}, + year = {2018}, + month = jul, + volume = {8}, + issn = {2045-2322}, + doi = {10.1038/s41598-018-29334-5}, + abstract = {The MinION sequencer (Oxford Nanopore Technologies) is a paradigm shifting device allowing rapid, real time long read sequencing of nucleic acids. Yet external benchmarking of this technologies' capabilities has not been extensively reported, nor has thorough evaluation of its utility for field-based analysis with sub-optimal sample types been described. The aim of this study was to evaluate the capability of the MinION sequencer for bacterial genomic and metagenomic applications, with specific emphasis placed on the quality, yield, and accuracy of generated sequence data. Two independent laboratories at the National Microbiology Laboratory (Public Health Agency of Canada), sequenced a set of microbes in replicate, using the currently available flowcells, sequencing chemistries, and software available at the time of the experiment. Overall sequencing yield and quality improved through the course of this set of experiments. Sequencing alignment accuracy was high reaching 97\% for all 2D experiments, though was slightly lower for 1D sequencing (94\%). 1D sequencing provided much longer sequences than 2D. Both sequencing chemistries performed equally well in constructing genomic assemblies. There was evidence of barcode cross-over using both the native and PCR barcoding methods. Despite the sub-optimal nature of samples sequenced in the field, sequences attributable to B. anthracis the target organism used in this scenario, could none-the-less be detected. Together, this report showcases the rapid advancement in this technology and its utility in the context of genomic sequencing of microbial isolates of importance to public health.}, + journal = {Scientific Reports}, + pmcid = {PMC6053456}, + pmid = {30026559} +} + +@article{tysonMinIONbasedLongreadSequencing2018, + title = {{{MinION}}-Based Long-Read Sequencing and Assembly Extends the {{Caenorhabditis}} Elegans Reference Genome}, + author = {Tyson, John R. and O'Neil, Nigel J. and Jain, Miten and Olsen, Hugh E. and Hieter, Philip and Snutch, Terrance P.}, + year = {2018}, + volume = {28}, + pages = {266--274}, + issn = {1549-5469}, + doi = {10.1101/gr.221184.117}, + abstract = {Advances in long-read single molecule sequencing have opened new possibilities for 'benchtop' whole-genome sequencing. The Oxford Nanopore Technologies MinION is a portable device that uses nanopore technology that can directly sequence DNA molecules. MinION single molecule long sequence reads are well suited for de novo assembly of complex genomes as they facilitate the construction of highly contiguous physical genome maps obviating the need for labor-intensive physical genome mapping. Long sequence reads can also be used to delineate complex chromosomal rearrangements, such as those that occur in tumor cells, that can confound analysis using short reads. Here, we assessed MinION long-read-derived sequences for feasibility concerning: (1) the de novo assembly of a large complex genome, and (2) the elucidation of complex rearrangements. The genomes of two Caenorhabditis elegans strains, a wild-type strain and a strain containing two complex rearrangements, were sequenced with MinION. Up to 42-fold coverage was obtained from a single flow cell, and the best pooled data assembly produced a highly contiguous wild-type C. elegans genome containing 48 contigs (N50 contig length = 3.99 Mb) covering \textbackslash{}textgreater99\% of the 100,286,401-base reference genome. Further, the MinION-derived genome assembly expanded the C. elegans reference genome by \textbackslash{}textgreater2 Mb due to a more accurate determination of repetitive sequence elements and assembled the complete genomes of two co-extracted bacteria. MinION long-read sequence data also facilitated the elucidation of complex rearrangements in a mutagenized strain. The sequence accuracy of the MinION long-read contigs ({$\sim$}98\%) was improved using Illumina-derived sequence data to polish the final genome assembly to 99.8\% nucleotide accuracy when compared to the reference assembly.}, + journal = {Genome Research}, + keywords = {minion}, + language = {eng}, + number = {2}, + pmcid = {PMC5793790}, + pmid = {29273626} +} + +@article{ursuGenomeDISCOConcordanceScore2018, + title = {{{GenomeDISCO}}: A Concordance Score for Chromosome Conformation Capture Experiments Using Random Walks on Contact Map Graphs}, + shorttitle = {{{GenomeDISCO}}}, + author = {Ursu, Oana and Boley, Nathan and Taranova, Maryna and Wang, Y. X. Rachel and Yardimci, Galip Gurkan and Stafford Noble, William and Kundaje, Anshul}, + year = {2018}, + month = aug, + volume = {34}, + pages = {2701--2707}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty164}, + abstract = {AbstractMotivation. The three-dimensional organization of chromatin plays a critical role in gene regulation and disease. High-throughput chromosome conformati}, + file = {/Users/laurent/Zotero/storage/LE6V992S/Ursu et al. - 2018 - GenomeDISCO a concordance score for chromosome co.pdf;/Users/laurent/Zotero/storage/DZJG4JPW/4938489.html}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@misc{UsingGeneExpression, + title = {Using {{Gene Expression Noise}} to {{Understand Gene Regulation}} | {{Science}}}, + howpublished = {https://science-sciencemag-org.insb.bib.cnrs.fr/content/336/6078/183/tab-pdf} +} + +@article{vallejosNormalizingSinglecellRNA2017, + title = {Normalizing Single-Cell {{RNA}} Sequencing Data: Challenges and Opportunities}, + shorttitle = {Normalizing Single-Cell {{RNA}} Sequencing Data}, + author = {Vallejos, Catalina A and Risso, Davide and Scialdone, Antonio and Dudoit, Sandrine and Marioni, John C}, + year = {2017}, + month = may, + volume = {14}, + pages = {565--571}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4292}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Vallejos et al. - 2017 - Normalizing single-cell RNA sequencing data chall.pdf;/Users/laurent/Zotero/storage/23ADKDWA/Vallejos et al. - 2017 - Normalizing single-cell RNA sequencing data chall.pdf;/Users/laurent/Zotero/storage/48SP49VW/Vallejos et al. - 2017 - Normalizing single-cell RNA sequencing data chall.pdf;/Users/laurent/Zotero/storage/K89MJE6M/Vallejos et al. - 2017 - Normalizing single-cell RNA sequencing data chall.pdf;/Users/laurent/Zotero/storage/Z3VAENSI/Vallejos et al. - 2017 - Normalizing single-cell RNA sequencing data chall.pdf;/Users/laurent/Zotero/storage/QAKSKILT/nmeth.html}, + journal = {Nature Methods}, + language = {en}, + number = {6} +} + +@article{vandenbergeObservationWeightsUnlock2018, + title = {Observation Weights to Unlock Bulk {{RNA}}-Seq Tools for Zero Inflation and Single-Cell Applications}, + author = {{Van den Berge}, Koen and Perraudeau, Fanny and Soneson, Charlotte and Love, Michael I and Risso, Davide and Vert, Jean-Philippe and Robinson, Mark D and Dudoit, Sandrine and Clement, Lieven}, + year = {2018}, + month = jan, + doi = {10.1101/250126}, + abstract = {Dropout events in single-cell transcriptome sequencing (scRNA-seq) cause many transcripts to go undetected and induce an excess of zero read counts, leading to power issues in differential expression (DE) analysis. This has triggered the development of bespoke scRNA-seq DE methods to cope with zero inflation. Recent evaluations, however, have shown that dedicated scRNA-seq tools provide no advantage compared to traditional bulk RNA-seq tools. We introduce a weighting strategy, based on a zero-inflated negative binomial (ZINB) model, that identifies excess zero counts and generates gene and cell-specific weights to unlock bulk RNA-seq DE pipelines for zero-inflated data, boosting performance for scRNA-seq.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Van den Berge et al. - 2018 - Observation weights to unlock bulk RNA-seq tools f.pdf;/Users/laurent/Zotero/storage/H4ZJYBRJ/Van den Berge et al. - 2018 - Observation weights to unlock bulk RNA-seq tools f.pdf;/Users/laurent/Zotero/storage/LJ7QM3MC/Van den Berge et al. - 2018 - Observation weights to unlock bulk RNA-seq tools f.pdf;/Users/laurent/Zotero/storage/XXWQD2GE/Van den Berge et al. - 2018 - Observation weights to unlock bulk RNA-seq tools f.pdf}, + language = {en} +} + +@article{vandenbergeObservationWeightsUnlock2018d, + title = {Observation Weights Unlock Bulk {{RNA}}-Seq Tools for Zero Inflation and Single-Cell Applications}, + author = {{Van den Berge}, Koen and Perraudeau, Fanny and Soneson, Charlotte and Love, Michael I. and Risso, Davide and Vert, Jean-Philippe and Robinson, Mark D. and Dudoit, Sandrine and Clement, Lieven}, + year = {2018}, + month = feb, + volume = {19}, + pages = {24}, + issn = {1474-760X}, + doi = {10.1186/s13059-018-1406-4}, + abstract = {Dropout events in single-cell RNA sequencing (scRNA-seq) cause many transcripts to go undetected and induce an excess of zero read counts, leading to power issues in differential expression (DE) analysis. This has triggered the development of bespoke scRNA-seq DE methods to cope with zero inflation. Recent evaluations, however, have shown that dedicated scRNA-seq tools provide no advantage compared to traditional bulk RNA-seq tools. We introduce a weighting strategy, based on a zero-inflated negative binomial model, that identifies excess zero counts and generates gene- and cell-specific weights to unlock bulk RNA-seq DE pipelines for zero-inflated data, boosting performance for scRNA-seq.}, + file = {/Users/laurent/Zotero/storage/MGQL7JQ4/Van den Berge et al. - 2018 - Observation weights unlock bulk RNA-seq tools for .pdf;/Users/laurent/Zotero/storage/ISMF5E9I/s13059-018-1406-4.html}, + journal = {Genome Biology}, + number = {1} +} + +@article{vandenbonSingleCellHaystackFindingSurprising2019, + title = {{{singleCellHaystack}}: {{Finding}} Surprising Genes in Single-Cell Transcriptome Data}, + shorttitle = {{{singleCellHaystack}}}, + author = {Vandenbon, Alexis and Diez, Diego}, + year = {2019}, + month = apr, + doi = {10.1101/557967}, + abstract = {Summary: Single-cell sequencing data is often visualized in 2-dimensional plots, including t-SNE plots. However, it is not straightforward to extract biological knowledge, such as differentially expressed genes, from these plots. Here we introduce singleCellHaystack, a methodology that addresses this problem. singleCellHaystack uses Kullback-Leibler Divergence to find genes that are expressed in subsets of cells that are non-randomly positioned on a 2D plot. We illustrate the usage of singleCellHaystack through applications on several singlecell datasets. singleCellHaystack is implemented as an R package, and includes additional functions for clustering and visualization of genes with interesting expression patterns.}, + file = {/Users/laurent/Zotero/storage/633CIZLZ/Vandenbon and Diez - 2019 - singleCellHaystack Finding surprising genes in si.pdf;/Users/laurent/Zotero/storage/LZIPQ69B/Vandenbon and Diez - 2019 - singleCellHaystack Finding surprising genes in si.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{vandijkMAGICDiffusionbasedImputation2017, + title = {{{MAGIC}}: {{A}} Diffusion-Based Imputation Method Reveals Gene-Gene Interactions in Single-Cell {{RNA}}-Sequencing Data}, + shorttitle = {{{MAGIC}}}, + author = {{van Dijk}, David and Nainys, Juozas and Sharma, Roshan and Kathail, Pooja and Carr, Ambrose J and Moon, Kevin R and Mazutis, Linas and Wolf, Guy and Krishnaswamy, Smita and Pe'er, Dana}, + year = {2017}, + month = feb, + doi = {10.1101/111591}, + abstract = {Single-cell RNA-sequencing is fast becoming a major technology that is revolutionizing biological discovery in fields such as development, immunology and cancer. The ability to simultaneously measure thousands of genes at single cell resolution allows, among other prospects, for the possibility of learning gene regulatory networks at large scales. However, scRNA-seq technologies suffer from many sources of significant technical noise, the most prominent of which is `dropout' due to inefficient mRNA capture. This results in data that has a high degree of sparsity, with typically only \textasciitilde{}10\% non-zero values. To address this, we developed MAGIC (Markov Affinity-based Graph Imputation of Cells), a method for imputing missing values, and restoring the structure of the data. After MAGIC, we find that two- and three-dimensional gene interactions are restored and that MAGIC is able to impute complex and non-linear shapes of interactions. MAGIC also retains cluster structure, enhances cluster-specific gene interactions and restores trajectories, as demonstrated in mouse retinal bipolar cells, hematopoiesis, and our newly generated epithelial-to-mesenchymal transition dataset.}, + file = {/Users/laurent/Zotero/storage/JZYS4446/van Dijk et al. - 2017 - MAGIC A diffusion-based imputation method reveals.pdf;/Users/laurent/Zotero/storage/KNMR46W4/van Dijk et al. - 2017 - MAGIC A diffusion-based imputation method reveals.pdf;/Users/laurent/Zotero/storage/MH4D5R4F/van Dijk et al. - 2017 - MAGIC A diffusion-based imputation method reveals.pdf}, + language = {en} +} + +@article{vandijkRecoveringGeneInteractions2018, + title = {Recovering Gene Interactions from Single-Cell Data Using Data Diffusion}, + author = {{van Dijk}, David and Sharma, Roshan and Nainys, Juozas and Yim, Kristina and Kathail, Pooja and Carr, Ambrose and Burdziak, Cassandra and Moon, Kevin R. and Chaffer, Christine L. and Pattabiraman, Diwakar and Bierie, Brian and Mazutis, Linas and Wolf, Guy and Krishnaswamy, Smita and Pe'er, Dana}, + year = {2018}, + month = jul, + volume = {174}, + pages = {716-729.e27}, + issn = {0092-8674}, + doi = {10.1016/j.cell.2018.05.061}, + abstract = {Single-cell RNA-sequencing technologies suffer from many sources of technical noise, including under-sampling of mRNA molecules, often termed `dropout', which can severely obscure important gene-gene relationships. To address this, we developed MAGIC (Markov Affinity-based Graph Imputation of Cells), a method that shares information across similar cells, via data diffusion, to denoise the cell count matrix and fill in missing transcripts. We validate MAGIC on several biological systems and find it effective at recovering gene-gene relationships and additional structures. MAGIC reveals a phenotypic continuum, with the majority of cells residing in intermediate states that display stem-like signatures and uncovers known and previously uncharacterized regulatory interactions, demonstrating that our approach can successfully uncover regulatory relations without perturbations., One Sentence Summary: Graph diffusion-based imputation method recovers missing transcripts in scRNA-seq data, yielding insight into the epithelial-to-mesenchymal transition., Abstract highlights:, 1. MAGIC restores noisy and sparse single-cell data using diffusion geometry., 2. Corrected data is amenable to myriad downstream analyses., 3. MAGIC enables archetypal analysis and inference of gene interactions., 4. Transcription factor targets can be predicted without perturbation after MAGIC. In brief - A new algorithm overcomes limitations of data loss in single cell sequencing experiments,}, + file = {/Users/laurent/Zotero/storage/EP7Y8BR8/van Dijk et al. - 2018 - Recovering gene interactions from single-cell data.pdf}, + journal = {Cell}, + number = {3}, + pmcid = {PMC6771278}, + pmid = {29961576} +} + +@book{venablesModernAppliedStatistics2002, + title = {Modern {{Applied Statistics}} with {{S}}}, + author = {Venables, W. N. and Ripley, B. D.}, + year = {2002}, + edition = {Fourth}, + publisher = {{Springer}}, + address = {{New York}}, + keywords = {Data processing,Mathematical statistics,S (Computer system),Statistics} +} + +@article{viaTenSimpleRules2011, + title = {Ten {{Simple Rules}} for {{Developing}} a {{Short Bioinformatics Training Course}}}, + author = {Via, Allegra and De Las Rivas, Javier and Attwood, Teresa K. and Landsman, David and Brazas, Michelle D. and Leunissen, Jack A. M. and Tramontano, Anna and Schneider, Maria Victoria}, + year = {2011}, + month = oct, + volume = {7}, + pages = {e1002245}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1002245}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Via et al. - 2011 - Ten Simple Rules for Developing a Short Bioinforma.pdf}, + journal = {PLoS Computational Biology}, + language = {en}, + number = {10} +} + +@article{vicensTenSimpleRules2007, + title = {Ten {{Simple Rules}} for a {{Successful Collaboration}}}, + author = {Vicens, Quentin and Bourne, Philip E.}, + year = {2007}, + volume = {3}, + pages = {e44}, + issn = {1553-734X, 1553-7358}, + doi = {10.1371/journal.pcbi.0030044}, + file = {/Users/laurent/Documents/bibliography/bioinfo/Vicens and Bourne - 2007 - Ten Simple Rules for a Successful Collaboration.pdf}, + journal = {PLoS Computational Biology}, + language = {en}, + number = {3} +} + +@article{vitting-seerupIsoformSwitchAnalyzeRAnalysisChanges2019, + title = {{{IsoformSwitchAnalyzeR}}: Analysis of Changes in Genome-Wide Patterns of Alternative Splicing and Its Functional Consequences}, + shorttitle = {{{IsoformSwitchAnalyzeR}}}, + author = {{Vitting-Seerup}, Kristoffer and Sandelin, Albin}, + year = {2019}, + month = nov, + volume = {35}, + pages = {4469--4471}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz247}, + abstract = {AbstractSummary. Alternative splicing is an important mechanism involved in health and disease. Recent work highlights the importance of investigating genome-w}, + file = {/Users/laurent/Zotero/storage/Y795422K/Vitting-Seerup and Sandelin - 2019 - IsoformSwitchAnalyzeR analysis of changes in geno.pdf;/Users/laurent/Zotero/storage/7RS3BSKP/5466456.html}, + journal = {Bioinformatics}, + language = {en}, + number = {21} +} + +@article{vonluxburgTutorialSpectralClustering2007, + title = {A Tutorial on Spectral Clustering}, + author = {{von Luxburg}, Ulrike}, + year = {2007}, + month = dec, + volume = {17}, + pages = {395--416}, + issn = {0960-3174, 1573-1375}, + doi = {10.1007/s11222-007-9033-z}, + abstract = {In recent years, spectral clustering has become one of the most popular modern clustering algorithms. It is simple to implement, can be solved efficiently by standard linear algebra software, and very often outperforms traditional clustering algorithms such as the k-means algorithm. On the first glance spectral clustering appears slightly mysterious, and it is not obvious to see why it works at all and what it really does. The goal of this tutorial is to give some intuition on those questions. We describe different graph Laplacians and their basic properties, present the most common spectral clustering algorithms, and derive those algorithms from scratch by several different approaches. Advantages and disadvantages of the different spectral clustering algorithms are discussed.}, + file = {/Users/laurent/Documents/bibliography/stats/von Luxburg - 2007 - A tutorial on spectral clustering.pdf}, + journal = {Statistics and Computing}, + language = {en}, + number = {4} +} + +@inproceedings{vrahatisVisualizingHighDimensionalSingleCell2019, + title = {Visualizing {{High}}-{{Dimensional Single}}-{{Cell RNA}}-Seq {{Data}} via {{Random Projections}} and {{Geodesic Distances}}}, + booktitle = {2019 {{IEEE Conference}} on {{Computational Intelligence}} in {{Bioinformatics}} and {{Computational Biology}} ({{CIBCB}})}, + author = {Vrahatis, Aristidis G. and Tasoulis, Sotiris K. and Dimitrakopoulos, Georgios N. and Plagianakos, Vassilis P.}, + year = {2019}, + month = jul, + pages = {1--6}, + issn = {null}, + doi = {10.1109/CIBCB.2019.8791482}, + abstract = {The recent advent in Next Generation Sequencing has created a huge data source which offers a great potential for elucidating complex disease mechanisms and biological processes. A recent technology is the single-cell RNA sequencing, which allows transcriptomics measurements in individual cells, having promising results. However, such studies measure the entire genome for thousands of cells, creating datasets with extremely high dimensionality and complexity. Following this perspective, we propose a dimensionality reduction approach, called RGt-SNE, which visualizes single-cell RNA-seq data in two dimensions. Initially, RGt-SNE defines a cell-cell distance matrix based on Random Projections and Geodesic Distances. The first is used to define the pairwise cells distances in a low dimensional projected space avoiding the difficulties that exist in data with ultra-high dimensionality. The latter is used to better define the large pairwise cells distances. Subsequently, the t-SNE method is applied in the customized distance matrix for two dimensional visualization. RGt-SNE was evaluated in two real experimental single-cell RNA-seq data against three well-known methods, such as t-SNE, Multidimensional scaling, and ISOMAP. Outcomes provide the superiority of RGt-SNE suggesting it as a reliable tool for single-cell RNA-seq data analysis and visualization.}, + file = {/Users/laurent/Zotero/storage/CIV4YK22/8791482.html}, + keywords = {biology computing,cell-cell distance matrix,data analysis,data source,data visualisation,data visualization,Data visualization,Dimensionality reduction,dimensionality reduction approach,diseases,Gene expression,genetics,genomics,geodesic distances,High-dimensional data,molecular biophysics,multidimensional scaling,pairwise cells distances,Principal component analysis,Reliability,RGt-SNE,RNA,single-cell RNA sequencing,single-cell RNA-seq,single-cell RNA-seq data,Tools,ultra-high dimensionality,Visualization} +} + +@article{vuBetaPoissonModelSinglecell2016, + title = {Beta-{{Poisson}} Model for Single-Cell {{RNA}}-Seq Data Analyses}, + author = {Vu, Trung Nghia and Wills, Quin F. and Kalari, Krishna R. and Niu, Nifang and Wang, Liewei and Rantalainen, Mattias and Pawitan, Yudi}, + year = {2016}, + month = jul, + volume = {32}, + pages = {2128--2135}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btw202}, + abstract = {Abstract. Motivation: Single-cell RNA-sequencing technology allows detection of gene expression at the single-cell level. One typical feature of the data is a}, + file = {/Users/laurent/Zotero/storage/FVGYHNJJ/Vu et al. - 2016 - Beta-Poisson model for single-cell RNA-seq data an.pdf;/Users/laurent/Zotero/storage/XXXS93AF/Vu et al. - 2016 - Beta-Poisson model for single-cell RNA-seq data an.pdf;/Users/laurent/Zotero/storage/ZGXQPKRD/Vu et al. - 2016 - Beta-Poisson model for single-cell RNA-seq data an.pdf;/Users/laurent/Zotero/storage/ABJRM3AX/2288270.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{vuIsoformlevelGeneExpression2018, + title = {Isoform-Level Gene Expression Patterns in Single-Cell {{RNA}}-Sequencing Data}, + author = {Vu, Trung Nghia and Wills, Quin F. and Kalari, Krishna R. and Niu, Nifang and Wang, Liewei and Pawitan, Yudi and Rantalainen, Mattias}, + year = {2018}, + month = jul, + volume = {34}, + pages = {2392--2400}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty100}, + abstract = {AbstractMotivation. RNA sequencing of single cells enables characterization of transcriptional heterogeneity in seemingly homogeneous cell populations. Single-}, + file = {/Users/laurent/Zotero/storage/FWDUYMTR/Vu et al. - 2018 - Isoform-level gene expression patterns in single-c.pdf;/Users/laurent/Zotero/storage/U569A8XD/4911530.html}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{vuongLikelihoodRatioTests1989, + title = {Likelihood {{Ratio Tests}} for {{Model Selection}} and {{Non}}-{{Nested Hypotheses}}}, + author = {Vuong, Quang H.}, + year = {1989}, + volume = {57}, + pages = {307--333}, + issn = {0012-9682}, + doi = {10.2307/1912557}, + abstract = {[In this paper, we develop a classical approach to model selection. Using the Kullback-Leibler Information Criterion to measure the closeness of a model to the truth, we propose simple likelihood-ratio based statistics for testing the null hypothesis that the competing models are equally close to the true data generating process against the alternative hypothesis that one model is closer. The tests are directional and are derived successively for the cases where the competing models are non-nested, overlapping, or nested and whether both, one, or neither is misspecified. As a prerequisite, we fully characterize the asymptotic distribution of the likelihood ratio statistic under the most general conditions. We show that it is a weighted sum of chi-square distribution or a normal distribution depending on whether the distributions in the competing models closest to the truth are observationally identical. We also propose a test of this latter condition.]}, + file = {/Users/laurent/Zotero/storage/7YDN2PQP/Vuong - 1989 - Likelihood Ratio Tests for Model Selection and Non.pdf}, + journal = {Econometrica}, + number = {2} +} + +@article{wagnerGeneticScreeningEnters2017, + title = {Genetic Screening Enters the Single-Cell Era}, + author = {Wagner, Daniel E and Klein, Allon M}, + year = {2017}, + month = mar, + volume = {14}, + pages = {237--238}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4196}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Wagner and Klein - 2017 - Genetic screening enters the single-cell era.pdf;/Users/laurent/Zotero/storage/9KTG7RLK/Wagner and Klein - 2017 - Genetic screening enters the single-cell era.pdf;/Users/laurent/Zotero/storage/JX4HTZIW/Wagner and Klein - 2017 - Genetic screening enters the single-cell era.pdf;/Users/laurent/Zotero/storage/XX5KYXHU/Wagner and Klein - 2017 - Genetic screening enters the single-cell era.pdf}, + journal = {Nature Methods}, + language = {en}, + number = {3} +} + +@article{wangBARTTranscriptionFactor2018, + title = {{{BART}}: A Transcription Factor Prediction Tool with Query Gene Sets or Epigenomic Profiles}, + shorttitle = {{{BART}}}, + author = {Wang, Zhenjia and Civelek, Mete and Miller, Clint and Sheffield, Nathan and Guertin, Michael J. and Zang, Chongzhi}, + year = {2018}, + month = mar, + pages = {280982}, + doi = {10.1101/280982}, + abstract = {Identification of functional transcription factors that regulate a given gene set is an important problem in gene regulation studies. Conventional approaches for identifying transcription factors, such as DNA sequence motif analysis, are unable to predict functional binding of specific factors and not sensitive to detect factors binding at distal enhancers. Here we present Binding Analysis for Regulation of Transcription (BART), a novel computational method and software package for predicting functional transcription factors that regulate a query gene set or associate with a query genomic profile, based on more than 6,000 existing ChIP-seq datasets for over 400 factors in human or mouse. This method demonstrates the advantage of utilizing publicly available data for functional genomics research.}, + copyright = {\textcopyright{} 2018, Posted by Cold Spring Harbor Laboratory. The copyright holder for this pre-print is the author. All rights reserved. The material may not be redistributed, re-used or adapted without the author's permission.}, + file = {/Users/laurent/Zotero/storage/PI8PYVFS/Wang et al. - 2018 - BART a transcription factor prediction tool with .pdf;/Users/laurent/Zotero/storage/43SZQGTE/280982.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{wangBAUMImprovingGenome2018, + title = {{{BAUM}}: Improving Genome Assembly by Adaptive Unique Mapping and Local Overlap-Layout-Consensus Approach}, + shorttitle = {{{BAUM}}}, + author = {Wang, Anqi and Wang, Zhanyu and Li, Zheng and Li, Lei M.}, + year = {2018}, + month = jun, + volume = {34}, + pages = {2019--2028}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty020}, + abstract = {AbstractMotivation. It is highly desirable to assemble genomes of high continuity and consistency at low cost. The current bottleneck of draft genome continuit}, + file = {/Users/laurent/Zotero/storage/E63KR6QK/Wang et al. - 2018 - BAUM improving genome assembly by adaptive unique.pdf;/Users/laurent/Zotero/storage/525U74TS/4810438.html}, + journal = {Bioinformatics}, + language = {en}, + number = {12} +} + +@article{wangConditionalGenerativeAdversarial2018, + title = {Conditional Generative Adversarial Network for Gene Expression Inference}, + author = {Wang, Xiaoqian and Ghasedi Dizaji, Kamran and Huang, Heng}, + year = {2018}, + month = sep, + volume = {34}, + pages = {i603-i611}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty563}, + abstract = {AbstractMotivation. The rapid progress of gene expression profiling has facilitated the prosperity of recent biological studies in various fields, where gene e}, + file = {/Users/laurent/Zotero/storage/C6EGKMGV/Wang et al. - 2018 - Conditional generative adversarial network for gen.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{wangDifferentialGeneNetwork2017, + title = {Differential Gene Network Analysis from Single Cell {{RNA}}-Seq}, + author = {Wang, Yikai and Wu, Hao and Yu, Tianwei}, + year = {2017}, + month = jun, + volume = {44}, + pages = {331--334}, + issn = {16738527}, + doi = {10.1016/j.jgg.2017.03.001}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Wang et al. - 2017 - Differential gene network analysis from single cel.pdf;/Users/laurent/Zotero/storage/CM8FBGSC/Wang et al. - 2017 - Differential gene network analysis from single cel.pdf;/Users/laurent/Zotero/storage/I3RITJ4J/Wang et al. - 2017 - Differential gene network analysis from single cel.pdf;/Users/laurent/Zotero/storage/JVSG9HEB/Wang et al. - 2017 - Differential gene network analysis from single cel.pdf}, + journal = {Journal of Genetics and Genomics}, + language = {en}, + number = {6} +} + +@article{wangEfficientAccurateCausal2017, + title = {Efficient and Accurate Causal Inference with Hidden Confounders from Genome-Transcriptome Variation Data}, + author = {Wang, Lingfei and Michoel, Tom}, + editor = {Listgarten, Jennifer}, + year = {2017}, + month = aug, + volume = {13}, + pages = {e1005703}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005703}, + abstract = {Mapping gene expression as a quantitative trait using whole genome-sequencing and transcriptome analysis allows to discover the functional consequences of genetic variation. We developed a novel method and ultra-fast software Findr for higly accurate causal inference between gene expression traits using cis-regulatory DNA variations as causal anchors, which improves current methods by taking into consideration hidden confounders and weak regulations. Findr outperformed existing methods on the DREAM5 Systems Genetics challenge and on the prediction of microRNA and transcription factor targets in human lymphoblastoid cells, while being nearly a million times faster. Findr is publicly available at https:// github.com/lingfeiwang/findr.}, + file = {/Users/laurent/Documents/bibliography/eQTL/Wang and Michoel - 2017 - Efficient and accurate causal inference with hidde.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {8} +} + +@article{wangGeneExpressionDistribution2018, + title = {Gene Expression Distribution Deconvolution in Single-Cell {{RNA}} Sequencing}, + author = {Wang, Jingshu and Huang, Mo and Torre, Eduardo and Dueck, Hannah and Shaffer, Sydney and Murray, John and Raj, Arjun and Li, Mingyao and Zhang, Nancy R.}, + year = {2018}, + month = jul, + volume = {115}, + pages = {E6437-E6446}, + issn = {0027-8424, 1091-6490}, + doi = {10.1073/pnas.1721085115}, + abstract = {Single-cell RNA sequencing (scRNA-seq) enables the quantification of each gene's expression distribution across cells, thus allowing the assessment of the dispersion, nonzero fraction, and other aspects of its distribution beyond the mean. These statistical characterizations of the gene expression distribution are critical for understanding expression variation and for selecting marker genes for population heterogeneity. However, scRNA-seq data are noisy, with each cell typically sequenced at low coverage, thus making it difficult to infer properties of the gene expression distribution from raw counts. Based on a reexamination of nine public datasets, we propose a simple technical noise model for scRNA-seq data with unique molecular identifiers (UMI). We develop deconvolution of single-cell expression distribution (DESCEND), a method that deconvolves the true cross-cell gene expression distribution from observed scRNA-seq counts, leading to improved estimates of properties of the distribution such as dispersion and nonzero fraction. DESCEND can adjust for cell-level covariates such as cell size, cell cycle, and batch effects. DESCEND's noise model and estimation accuracy are further evaluated through comparisons to RNA FISH data, through data splitting and simulations and through its effectiveness in removing known batch effects. We demonstrate how DESCEND can clarify and improve downstream analyses such as finding differentially expressed genes, identifying cell types, and selecting differentiation markers.}, + copyright = {Copyright \textcopyright{} 2018 the Author(s). Published by PNAS.. https://creativecommons.org/licenses/by-nc-nd/4.0/This open access article is distributed under Creative Commons Attribution-NonCommercial-NoDerivatives License 4.0 (CC BY-NC-ND).}, + file = {/Users/laurent/Zotero/storage/6XE7CABX/Wang et al. - 2018 - Gene expression distribution deconvolution in sing.pdf;/Users/laurent/Zotero/storage/ZUK93Z28/E6437.html}, + journal = {Proceedings of the National Academy of Sciences}, + keywords = {differential expression,Gini coefficient,highly variable genes,RNA sequencing,single-cell transcriptomics}, + language = {en}, + number = {28}, + pmid = {29946020} +} + +@article{wangGeneralizedCorrelationMeasure2018, + title = {Generalized Correlation Measure Using Count Statistics for Gene Expression Data with Ordered Samples}, + author = {Wang, Y X Rachel and Liu, Ke and Theusch, Elizabeth and Rotter, Jerome I and Medina, Marisa W and Waterman, Michael S and Huang, Haiyan}, + editor = {Stegle, Oliver}, + year = {2018}, + month = feb, + volume = {34}, + pages = {617--624}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx641}, + abstract = {Motivation: Capturing association patterns in gene expression levels under different conditions or time points is important for inferring gene regulatory interactions. In practice, temporal changes in gene expression may result in complex association patterns that require more sophisticated detection methods than simple correlation measures. For instance, the effect of regulation may lead to timelagged associations and interactions local to a subset of samples. Furthermore, expression profiles of interest may not be aligned or directly comparable (e.g. gene expression profiles from two species).}, + file = {/Users/laurent/Documents/bibliography/to_read/Wang et al. - 2018 - Generalized correlation measure using count statis.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{wangHighdimensionalLinkageAnalysis2017, + title = {A High-Dimensional Linkage Analysis Model for Characterizing Crossover Interference}, + author = {Wang, Jing and Sun, Lidan and Jiang, Libo and Sang, Mengmeng and Ye, Meixia and Cheng, Tangran and Zhang, Qixiang and Wu, Rongling}, + year = {2017}, + month = may, + volume = {18}, + pages = {382--393}, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bbw033}, + abstract = {Linkage analysis has played an important role in understanding genome structure and evolution. However, two-point linkage analysis widely used for genetic map construction can rarely chart a detailed picture of genome organization because it fails to identify the dependence of crossovers distributed along the length of a chromosome, a phenomenon known as crossover interference. Multi-point analysis, proven to be more advantageous in gene ordering and genetic distance estimation for dominant markers than two-point analysis, is equipped with a capacity to discern and quantify crossover interference. Here, we review a statistical model for four-point analysis, which, beyond three-point analysis, can characterize crossover interference that takes place not only between two adjacent chromosomal intervals, but also over multiple successive intervals. This procedure provides an analytical tool to elucidate the detailed landscape of crossover interference over the genome and further infer the evolution of genome structure and organization.}, + file = {/Users/laurent/Documents/bibliography/to_read/Wang et al. - 2017 - A high-dimensional linkage analysis model for char.pdf}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {3} +} + +@article{wangTaggingSNPsetSelection2017, + title = {Tagging {{SNP}}-Set Selection with Maximum Information Based on Linkage Disequilibrium Structure in Genome-Wide Association Studies}, + author = {Wang, Shudong and He, Sicheng and Yuan, Fayou and Zhu, Xinjie}, + year = {2017}, + month = jul, + volume = {33}, + pages = {2078--2081}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx151}, + abstract = {Motivation: Effective tagging single-nucleotide polymorphism (SNP)-set selection is crucial to SNP-set analysis in genome-wide association studies (GWAS). Most of the existing tagging SNP-set selection methods cannot make full use of the information hidden in common or rare variants associated diseases. It is noticed that some SNPs have overlapping genetic information owing to linkage disequilibrium (LD) structure between SNPs. Therefore, when testing the association between SNPs and disease susceptibility, it is sufficient to elect the representative SNPs (called tag SNP-set or tagSNP-set) with maximum information.}, + file = {/Users/laurent/Documents/bibliography/SNP/Wang et al. - 2017 - Tagging SNP-set selection with maximum information.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {14} +} + +@article{wangTopologicalMethodsVisualization2019, + title = {Topological {{Methods}} for {{Visualization}} and {{Analysis}} of {{High Dimensional Single}}-{{Cell RNA Sequencing Data}}}, + author = {Wang, Tongxin and Johnson, Travis and Zhang, Jie and Huang, Kun}, + year = {2019}, + pages = {12}, + file = {/Users/laurent/Zotero/storage/9JWE8BPU/Wang et al. - 2019 - Topological Methods for Visualization and Analysis.pdf}, + journal = {Pacific Symposium on Biocomputing}, + language = {en} +} + +@article{wangVisualizationAnalysisSinglecell2017, + title = {Visualization and Analysis of Single-Cell {{RNA}}-Seq Data by Kernel-Based Similarity Learning}, + author = {Wang, Bo and Zhu, Junjie and Pierson, Emma and Ramazzotti, Daniele and Batzoglou, Serafim}, + year = {2017}, + month = apr, + volume = {14}, + pages = {414--416}, + issn = {1548-7091, 1548-7105}, + doi = {10.1038/nmeth.4207}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Wang et al. - 2017 - Visualization and analysis of single-cell RNA-seq 2.pdf;/Users/laurent/Documents/bibliography/scRNASeq/Wang et al. - 2017 - Visualization and analysis of single-cell RNA-seq .pdf;/Users/laurent/Zotero/storage/5S6GAI9D/Wang et al. - 2017 - Visualization and analysis of single-cell RNA-seq .pdf;/Users/laurent/Zotero/storage/DURQMTGA/Wang et al. - 2017 - Visualization and analysis of single-cell RNA-seq .pdf;/Users/laurent/Zotero/storage/HX3SB4ST/Wang et al. - 2017 - Visualization and analysis of single-cell RNA-seq .pdf;/Users/laurent/Zotero/storage/KNYR2ER8/Wang et al. - 2017 - Visualization and analysis of single-cell RNA-seq .pdf;/Users/laurent/Zotero/storage/LPEMJ2E4/Wang et al. - 2017 - Visualization and analysis of single-cell RNA-seq .pdf;/Users/laurent/Zotero/storage/PAAE5TQU/Wang et al. - 2017 - Visualization and analysis of single-cell RNA-seq .pdf;/Users/laurent/Zotero/storage/X32SUCK4/Wang et al. - 2017 - Visualization and analysis of single-cell RNA-seq .pdf;/Users/laurent/Zotero/storage/WXHQEQ25/nmeth.html}, + journal = {Nature Methods}, + language = {en}, + number = {4} +} + +@article{wangZoomFocusAlgorithmZFA2017, + title = {A {{Zoom}}-{{Focus}} Algorithm ({{ZFA}}) to Locate the Optimal Testing Region for Rare Variant Association Tests}, + author = {Wang, Maggie Haitian and Weng, Haoyi and Sun, Rui and Lee, Jack and Wu, William Ka Kei and Chong, Ka Chun and Zee, Benny Chung-Ying}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2330--2336}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx130}, + abstract = {Motivation: Increasing amounts of whole exome or genome sequencing data present the challenge of analysing rare variants with extremely small minor allele frequencies. Various statistical tests have been proposed, which are specifically configured to increase power for rare variants by conducting the test within a certain bin, such as a gene or a pathway. However, a gene may contain from several to thousands of markers, and not all of them are related to the phenotype. Combining functional and non-functional variants in an arbitrary genomic region could impair the testing power. Results: We propose a Zoom-Focus algorithm (ZFA) to locate the optimal testing region within a given genomic region. It can be applied as a wrapper function in existing rare variant association tests to increase testing power. The algorithm consists of two steps. In the first step, Zooming, a given genomic region is partitioned by an order of two, and the best partition is located. In the second step, Focusing, the boundaries of the zoomed region are refined. Simulation studies showed that ZFA substantially increased the statistical power of rare variants' tests, including the SKAT, SKAT-O, burden test and the W-test. The algorithm was applied on real exome sequencing data of hypertensive disorder, and identified biologically relevant genetic markers to metabolic disorders that were undetectable by a gene-based method. The proposed algorithm is an efficient and powerful tool to enhance the power of association study for whole exome or genome sequencing data.}, + file = {/Users/laurent/Documents/bibliography/SNP/Wang et al. - 2017 - A Zoom-Focus algorithm (ZFA) to locate the optimal.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {15} +} + +@article{wanLTMGNovelStatistical2019, + title = {{{LTMG}}: A Novel Statistical Modeling of Transcriptional Expression States in Single-Cell {{RNA}}-{{Seq}} Data}, + shorttitle = {{{LTMG}}}, + author = {Wan, Changlin and Chang, Wennan and Zhang, Yu and Shah, Fenil and Lu, Xiaoyu and Zang, Yong and Zhang, Anru and Cao, Sha and Fishel, Melissa L. and Ma, Qin and Zhang, Chi}, + year = {2019}, + month = oct, + volume = {47}, + pages = {e111-e111}, + issn = {0305-1048}, + doi = {10.1093/nar/gkz655}, + abstract = {Abstract. A key challenge in modeling single-cell RNA-seq data is to capture the diversity of gene expression states regulated by different transcriptional reg}, + file = {/Users/laurent/Zotero/storage/5G2PYC8H/Wan et al. - 2019 - LTMG a novel statistical modeling of transcriptio.pdf;/Users/laurent/Zotero/storage/IIJ5LWKA/5542876.html}, + journal = {Nucleic Acids Research}, + language = {en}, + number = {18} +} + +@article{wanSHARPHyperfastAccurate2020, + title = {{{SHARP}}: Hyper-Fast and Accurate Processing of Single-Cell {{RNA}}-Seq Data via Ensemble Random Projection}, + shorttitle = {{{SHARP}}}, + author = {Wan, Shibiao and Kim, Junil and Won, Kyoung Jae}, + year = {2020}, + month = jan, + pages = {gr.254557.119}, + issn = {1088-9051, 1549-5469}, + doi = {10.1101/gr.254557.119}, + abstract = {To process large-scale single-cell RNA-sequencing (scRNA-seq) data effectively without excessive distortion during dimension reduction, we present SHARP, an ensemble random projection-based algorithm which is scalable to clustering 10 million cells. Comprehensive benchmarking tests on 17 public scRNA-seq datasets demonstrate that SHARP outperforms existing methods in terms of speed and accuracy. Particularly, for large-size datasets ({$>$}40,000 cells), SHARP runs faster than other competitors while maintaining high clustering accuracy and robustness. To the best of our knowledge, SHARP is the only R-based tool that is scalable to clustering scRNA-seq data with 10 million cells.}, + file = {/Users/laurent/Zotero/storage/9WLPG4UY/gr.254557.119.html}, + journal = {Genome Research}, + language = {en}, + pmid = {31992615} +} + +@article{wanSHARPHyperfastAccurate2020a, + title = {{{SHARP}}: Hyper-Fast and Accurate Processing of Single-Cell {{RNA}}-Seq Data via Ensemble Random Projection}, + shorttitle = {{{SHARP}}}, + author = {Wan, Shibiao and Kim, Junil and Won, Kyoung Jae}, + year = {2020}, + month = jan, + pages = {gr.254557.119}, + issn = {1088-9051, 1549-5469}, + doi = {10.1101/gr.254557.119}, + abstract = {To process large-scale single-cell RNA-sequencing (scRNA-seq) data effectively without excessive distortion during dimension reduction, we present SHARP, an ensemble random projection-based algorithm which is scalable to clustering 10 million cells. Comprehensive benchmarking tests on 17 public scRNA-seq datasets demonstrate that SHARP outperforms existing methods in terms of speed and accuracy. Particularly, for large-size datasets ({$>$}40,000 cells), SHARP runs faster than other competitors while maintaining high clustering accuracy and robustness. To the best of our knowledge, SHARP is the only R-based tool that is scalable to clustering scRNA-seq data with 10 million cells.}, + file = {/Users/laurent/Zotero/storage/2TTKB6XK/login.html}, + journal = {Genome Research}, + language = {en}, + pmid = {31992615} +} + +@article{warwickvesztrocyPrioritisingCandidateGenes2018, + title = {Prioritising Candidate Genes Causing {{QTL}} Using Hierarchical Orthologous Groups}, + author = {Warwick Vesztrocy, Alex and Dessimoz, Christophe and Redestig, Henning}, + year = {2018}, + month = sep, + volume = {34}, + pages = {i612-i619}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty615}, + abstract = {AbstractMotivation. A key goal in plant biotechnology applications is the identification of genes associated to particular phenotypic traits (for example: yiel}, + file = {/Users/laurent/Zotero/storage/CUY6YF4N/Warwick Vesztrocy et al. - 2018 - Prioritising candidate genes causing QTL using hie.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{wayBayesianDeepLearning2018, + title = {Bayesian Deep Learning for Single-Cell Analysis}, + author = {Way, Gregory P. and Greene, Casey S.}, + year = {2018}, + month = dec, + volume = {15}, + pages = {1009}, + issn = {1548-7105}, + doi = {10.1038/s41592-018-0230-9}, + abstract = {A recent approach for single-cell RNA-sequencing data uses Bayesian deep learning to correct technical artifacts and enable accurate and multifaceted downstream analyses.}, + copyright = {2018 Springer Nature America, Inc.}, + file = {/Users/laurent/Zotero/storage/X8PMFNNY/Way and Greene - 2018 - Bayesian deep learning for single-cell analysis.pdf;/Users/laurent/Zotero/storage/M858GJBL/s41592-018-0230-9.html}, + journal = {Nature Methods}, + language = {En}, + number = {12} +} + +@article{weberPhysicochemicalAminoAcid2018, + title = {Physicochemical Amino Acid Properties Better Describe Substitution Rates in Large Populations}, + author = {Weber, Claudia C. and Whelan, Simon}, + year = {2018}, + month = aug, + pages = {378893}, + doi = {10.1101/378893}, + abstract = {Substitutions between chemically distant amino acids are known to occur less frequently than those between more similar amino acids. This knowledge, however, is not reflected in most codon substitution models, which treat all non-synonymous changes as if they were equivalent in terms of impact on the protein. A variety of methods for integrating chemical distances into models have been proposed, with a common approach being to divide substitutions into radical or conservative categories. Nevertheless, it remains unclear whether the resulting models describe sequence evolution better than their simpler counterparts. We propose a parametric codon model that distinguishes between radical and conservative substitutions, allowing us to assess if radical substitutions are preferentially removed by selection. Applying our new model to a range of phylogenomic data, we find differentiating between radical and conservative substitutions provides significantly better fit for large populations, but see no equivalent improvement for smaller populations. Comparing codon- and amino acid models using these same data shows that alignments from large populations tend to select phylogenetic models containing information about amino acid exchangeabilities, whereas the structure of the genetic code is more important for smaller populations. Our results suggest selection against radical substitutions is, on average, more pronounced in large populations than smaller ones. The reduced observable effect of selection in smaller populations may be due to stronger genetic drift making it more challenging to detect preferences. Our results imply an important connection between the life history of a phylogenetic group and the model that best describes its evolution.}, + copyright = {\textcopyright{} 2018, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + file = {/Users/laurent/Zotero/storage/S8XPM5N7/Weber and Whelan - 2018 - Physicochemical amino acid properties better descr.pdf;/Users/laurent/Zotero/storage/P5UX6UE4/378893.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{weckCorrectionCopyNumber2018, + title = {Correction of Copy Number Induced False Positives in {{CRISPR}} Screens}, + author = {de Weck, Antoine and Golji, Javad and Jones, Michael D. and Korn, Joshua M. and Billy, Eric and Iii, E. Robert McDonald and Schmelzle, Tobias and Bitter, Hans and Kauffmann, Audrey}, + year = {2018}, + month = jul, + volume = {14}, + pages = {e1006279}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006279}, + abstract = {Cell autonomous cancer dependencies are now routinely identified using CRISPR loss-of-function viability screens. However, a bias exists that makes it difficult to assess the true essentiality of genes located in amplicons, since the entire amplified region can exhibit lethal scores. These false-positive hits can either be discarded from further analysis, which in cancer models can represent a significant number of hits, or methods can be developed to rescue the true-positives within amplified regions. We propose two methods to rescue true positive hits in amplified regions by correcting for this copy number artefact. The Local Drop Out (LDO) method uses the relative lethality scores within genomic regions to assess true essentiality and does not require additional orthogonal data (e.g. copy number value). LDO is meant to be used in screens covering a dense region of the genome (e.g. a whole chromosome or the whole genome). The General Additive Model (GAM) method models the screening data as a function of the known copy number values and removes the systematic effect from the measured lethality. GAM does not require the same density as LDO, but does require prior knowledge of the copy number values. Both methods have been developed with single sample experiments in mind so that the correction can be applied even in smaller screens. Here we demonstrate the efficacy of both methods at removing the copy number effect and rescuing hits from some of the amplified regions. We estimate a 70\textendash{}80\% decrease of false positive hits with either method in regions of high copy number compared to no correction.}, + file = {/Users/laurent/Zotero/storage/T22UH6VC/Weck et al. - 2018 - Correction of copy number induced false positives .pdf;/Users/laurent/Zotero/storage/U2QHCVDU/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Cancer screening,CRISPR,Decision trees,Gene amplification,Genetic screens,Genomic library screening,Library screening,Screening guidelines}, + language = {en}, + number = {7} +} + +@article{weichselbaumFuentoFunctionalEnrichment2017, + title = {Fuento: Functional Enrichment for Bioinformatics}, + shorttitle = {Fuento}, + author = {Weichselbaum, David and Zagrovic, Bojan and Polyansky, Anton A.}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2604--2606}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx179}, + abstract = {Summary: The currently available functional enrichment software focuses mostly on gene expression analysis, whereby server- and graphical-user-interface-based tools with specific scope dominate the field. Here we present an efficient, user-friendly, multifunctional commandline-based functional enrichment tool (fu-en-to), tailored for the bioinformatics researcher.}, + file = {/Users/laurent/Documents/bibliography/annotation/Weichselbaum et al. - 2017 - Fuento functional enrichment for bioinformatics.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@article{weiGeneralizedAssociationTest2017, + title = {A Generalized Association Test Based on {{U}} Statistics}, + author = {Wei, Changshuai and Lu, Qing}, + year = {2017}, + month = jul, + volume = {33}, + pages = {1963--1971}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx103}, + abstract = {Motivation: Second generation sequencing technologies are being increasingly used for genetic association studies, where the main research interest is to identify sets of genetic variants that contribute to various phenotypes. The phenotype can be univariate disease status, multivariate responses and even high-dimensional outcomes. Considering the genotype and phenotype as two complex objects, this also poses a general statistical problem of testing association between complex objects.}, + file = {/Users/laurent/Documents/bibliography/to_read/Wei and Lu - 2017 - A generalized association test based on U statisti.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {13} +} + +@article{weinrebFundamentalLimitsDynamic2017, + title = {Fundamental Limits on Dynamic Inference from Single Cell Snapshots}, + author = {Weinreb, Caleb and Wolock, Samuel and Tusi, Betsabeh K. and Socolovsky, Merav and Klein, Allon M.}, + year = {2017}, + month = aug, + doi = {10.1101/170118}, + abstract = {Single cell profiling methods are powerful tools for dissecting the molecular states of cells, but the destructive nature of these methods has made it difficult to measure single cell expression over time. When cell dynamics are asynchronous, they can form a continuous manifold in gene expression space whose structure is thought to encode the trajectory of a typical cell. This insight has spurred a proliferation of methods for single cell trajectory discovery that have successfully ordered cell states and identified differentiation branch-points. However, all attempts to infer dynamics from static snapshots of cell state face a common limitation: for any measured distribution of cells in high dimensional state space, there are multiple dynamics that could give rise to it, and by extension, multiple possibilities for underlying mechanisms of gene regulation. Here, we enumerate from first principles the aspects of gene expression dynamics that cannot be inferred from a static snapshot alone, but nonetheless have a profound influence on temporal ordering and fate probabilities of cells. On the basis of these unknowns, we identify assumptions necessary to constrain a unique solution for the dynamics and translate these constraints into a practical algorithmic approach, called Population Balance Analysis (PBA). At its core, PBA invokes a new method based on spectral graph theory for solving a certain class of high dimensional differential equation. We show the strengths and limitations of PBA using simulations and validate its accuracy on single cell profiles of hematopoietic progenitor cells. Altogether, these results provide a rigorous basis for dynamic interpretation of a gene expression continuum, and the pitfalls facing any method of dynamic inference. In doing so they clarify experimental designs to minimize these shortfalls.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Weinreb et al. - 2017 - Fundamental limits on dynamic inference from singl.pdf;/Users/laurent/Zotero/storage/64ZA469U/Weinreb et al. - 2017 - Fundamental limits on dynamic inference from singl.pdf;/Users/laurent/Zotero/storage/HHN5ATNW/Weinreb et al. - 2017 - Fundamental limits on dynamic inference from singl.pdf;/Users/laurent/Zotero/storage/XISFGRK5/Weinreb et al. - 2017 - Fundamental limits on dynamic inference from singl.pdf}, + language = {en} +} + +@article{weinrebSPRINGKineticInterface2018, + title = {{{SPRING}}: A Kinetic Interface for Visualizing High Dimensional Single-Cell Expression Data}, + shorttitle = {{{SPRING}}}, + author = {Weinreb, Caleb and Wolock, Samuel and Klein, Allon M}, + year = {2018}, + month = apr, + volume = {34}, + pages = {1246--1248}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx792}, + abstract = {Motivation: Single-cell gene expression profiling technologies can map the cell states in a tissue or organism. As these technologies become more common, there is a need for computational tools to explore the data they produce. In particular, visualizing continuous gene expression topologies can be improved, since current tools tend to fragment gene expression continua or capture only limited features of complex population topologies.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Weinreb et al. - 2018 - SPRING a kinetic interface for visualizing high d.pdf;/Users/laurent/Zotero/storage/96B6GN6C/Weinreb et al. - 2018 - SPRING a kinetic interface for visualizing high d.pdf;/Users/laurent/Zotero/storage/M7CGVZYE/Weinreb et al. - 2018 - SPRING a kinetic interface for visualizing high d.pdf;/Users/laurent/Zotero/storage/SPCUY8RW/Weinreb et al. - 2018 - SPRING a kinetic interface for visualizing high d.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {7} +} + +@article{weiZislandExplorerDetect2016, + title = {Zisland {{Explorer}}: Detect Genomic Islands by Combining Homogeneity and Heterogeneity Properties}, + shorttitle = {Zisland {{Explorer}}}, + author = {Wei, Wen and Gao, Feng and Du, Meng-Ze and Hua, Hong-Li and Wang, Ju and Guo, Feng-Biao}, + year = {2016}, + month = mar, + pages = {bbw019}, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bbw019}, + abstract = {Genomic islands are genomic fragments of alien origin in bacterial and archaeal genomes, usually involved in symbiosis or pathogenesis. In this work, we described Zisland Explorer, a novel tool to predict genomic islands based on the segmental cumulative GC profile. Zisland Explorer was designed with a novel strategy, as well as a combination of the homogeneity and heterogeneity of genomic sequences. While the sequence homogeneity reflects the composition consistence within each island, the heterogeneity measures the composition bias between an island and the core genome. The performance of Zisland Explorer was evaluated on the data sets of 11 different organisms. Our results suggested that the true-positive rate (TPR) of Zisland Explorer was at least 10.3\% higher than that of four other widely used tools. On the other hand, the new tool did not lose overall accuracy with the improvement in the TPR and showed better equilibrium among various evaluation indexes. Also, Zisland Explorer showed better accuracy in the prediction of experimental island data. Overall, the tool provides an alternative solution over other tools, which expands the field of island prediction and offers a supplement to increase the performance of the distinct predicting strategy. We have provided a web service as well as a graphical user interface and open-source code across multiple platforms for Zisland Explorer, which is available at http://cefg.uestc.edu.cn/Zisland\_Explorer/ or http://tubic.tju.edu.cn/Zisland\_Explorer/.}, + file = {/Users/laurent/Documents/bibliography/to_read/Wei et al. - 2016 - Zisland Explorer detect genomic islands by combin.pdf}, + journal = {Briefings in Bioinformatics}, + language = {en} +} + +@article{wenMethodologicalImplementationMixed2018, + title = {Methodological Implementation of Mixed Linear Models in Multi-Locus Genome-Wide Association Studies}, + author = {Wen, Yang-Jun and Zhang, Hanwen and Ni, Yuan-Li and Huang, Bo and Zhang, Jin and Feng, Jian-Ying and Wang, Shi-Bo and Dunwell, Jim M. and Zhang, Yuan-Ming and Wu, Rongling}, + year = {2018}, + month = jul, + volume = {19}, + pages = {700--712}, + issn = {1467-5463}, + doi = {10.1093/bib/bbw145}, + abstract = {Abstract. The mixed linear model has been widely used in genome-wide association studies (GWAS), but its application to multi-locus GWAS analysis has not been}, + file = {/Users/laurent/Zotero/storage/JV2MJWKP/Wen et al. - 2018 - Methodological implementation of mixed linear mode.pdf;/Users/laurent/Zotero/storage/TNAYL79K/2965637.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {4} +} + +@article{wenMethodsHandlingLongitudinal2018, + title = {Methods for Handling Longitudinal Outcome Processes Truncated by Dropout and Death}, + author = {Wen, Lan and Terrera, Graciela Muniz and Seaman, Shaun R.}, + year = {2018}, + month = oct, + volume = {19}, + pages = {407--425}, + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxx045}, + abstract = {SUMMARY. Cohort data are often incomplete because some subjects drop out of the study, and inverse probability weighting (IPW), multiple imputation (MI), and l}, + file = {/Users/laurent/Zotero/storage/4NJ7K5BE/Wen et al. - 2018 - Methods for handling longitudinal outcome processe.pdf;/Users/laurent/Zotero/storage/VJMWF3WW/4237504.html}, + journal = {Biostatistics}, + language = {en}, + number = {4} +} + +@article{wilsonBestPracticesScientific2014, + title = {Best {{Practices}} for {{Scientific Computing}}}, + author = {Wilson, Greg and Aruliah, D. A. and Brown, C. Titus and Chue Hong, Neil P. and Davis, Matt and Guy, Richard T. and Haddock, Steven H. D. and Huff, Kathryn D. and Mitchell, Ian M. and Plumbley, Mark D. and Waugh, Ben and White, Ethan P. and Wilson, Paul}, + editor = {Eisen, Jonathan A.}, + year = {2014}, + month = jan, + volume = {12}, + pages = {e1001745}, + issn = {1545-7885}, + doi = {10.1371/journal.pbio.1001745}, + file = {/Users/laurent/Documents/bibliography/bioinfo/documentation/Wilson et al. - 2014 - Best Practices for Scientific Computing.pdf;/Users/laurent/Documents/bibliography/bioinfo/Wilson et al. - 2014 - Best Practices for Scientific Computing.pdf}, + journal = {PLoS Biology}, + language = {en}, + number = {1} +} + +@article{wilsonGoodEnoughPractices2017, + title = {Good Enough Practices in Scientific Computing}, + author = {Wilson, Greg and Bryan, Jennifer and Cranston, Karen and Kitzes, Justin and Nederbragt, Lex and Teal, Tracy K.}, + editor = {Ouellette, Francis}, + year = {2017}, + month = jun, + volume = {13}, + pages = {e1005510}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005510}, + abstract = {We present a set of computing tools and techniques that every researcher can and should adopt. These recommendations synthesize inspiration from our own work, from the experiences of the thousands of people who have taken part in Software Carpentry and Data Carpentry workshops over the past six years, and from a variety of other guides. Unlike some other guides, our recommendations are aimed specifically at people who are new to research computing.}, + file = {/Users/laurent/Documents/bibliography/bioinfo/documentation/Wilson et al. - 2017 - Good enough practices in scientific computing.pdf;/Users/laurent/Documents/bibliography/bioinfo/Wilson et al. - 2017 - Good enough practices in scientific computing.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {6} +} + +@article{wilsonTenQuickTips2019, + title = {Ten Quick Tips for Delivering Programming Lessons}, + author = {Wilson, Greg}, + year = {2019}, + month = oct, + volume = {15}, + pages = {e1007433}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007433}, + abstract = {Teaching well is a craft like any other, and success often comes from an accumulation of small improvements rather than from any single large change. This paper describes 10 practices you can use when teaching programming (and other subjects). All are easy to adopt and have proven their value in institutional classrooms, intensive workshops, and other settings.}, + file = {/Users/laurent/Zotero/storage/NTTTJZ4Y/Wilson - 2019 - Ten quick tips for delivering programming lessons.pdf;/Users/laurent/Zotero/storage/NWFXFSNI/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Computer software,Human learning,Instructors,Language,Learning,Memory recall,Teachers,Workshops}, + language = {en}, + number = {10} +} + +@article{wilsonTenQuickTips2019a, + title = {Ten Quick Tips for Creating an Effective Lesson}, + author = {Wilson, Greg}, + year = {2019}, + month = apr, + volume = {15}, + pages = {e1006915}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006915}, + abstract = {We present 10 tips for building effective lessons that are grounded in empirical research on pedagogy and cognitive psychology and that we have found to be practically useful in both classroom and free-range settings}, + file = {/Users/laurent/Zotero/storage/V5AZJMXS/Wilson - 2019 - Ten quick tips for creating an effective lesson.pdf;/Users/laurent/Zotero/storage/BHRIFEZY/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Cognitive linguistics,Human learning,Instructors,Language acquisition,Learning,Memory recall,Neurolinguistics,Vision}, + language = {en}, + number = {4} +} + +@article{wolfSCANPYLargescaleSinglecell2018, + title = {{{SCANPY}}: Large-Scale Single-Cell Gene Expression Data Analysis}, + shorttitle = {{{SCANPY}}}, + author = {Wolf, F. Alexander and Angerer, Philipp and Theis, Fabian J.}, + year = {2018}, + month = feb, + volume = {19}, + pages = {15}, + issn = {1474-760X}, + doi = {10.1186/s13059-017-1382-0}, + abstract = {Scanpy is a scalable toolkit for analyzing single-cell gene expression data. It includes methods for preprocessing, visualization, clustering, pseudotime and trajectory inference, differential expression testing, and simulation of gene regulatory networks. Its Python-based implementation efficiently deals with data sets of more than one million cells (https://github.com/theislab/Scanpy). Along with Scanpy, we present AnnData, a generic class for handling annotated data matrices (https://github.com/theislab/anndata).}, + file = {/Users/laurent/Zotero/storage/H5BTDND8/Wolf et al. - 2018 - SCANPY large-scale single-cell gene expression da.pdf;/Users/laurent/Zotero/storage/XNS8JNCF/s13059-017-1382-0.html}, + journal = {Genome Biology}, + number = {1} +} + +@article{wrightConvergentRecombinationSuppression2017, + title = {Convergent Recombination Suppression Suggests Role of Sexual Selection in Guppy Sex Chromosome Formation}, + author = {Wright, Alison E. and Darolti, Iulia and Bloch, Natasha I. and Oostra, Vicencio and Sandkam, Ben and Buechel, Severine D. and Kolm, Niclas and Breden, Felix and Vicoso, Beatriz and Mank, Judith E.}, + year = {2017}, + month = jan, + volume = {8}, + pages = {14251}, + issn = {2041-1723}, + doi = {10.1038/ncomms14251}, + file = {/Users/laurent/Documents/bibliography/SNP/Wright et al. - 2017 - Convergent recombination suppression suggests role.pdf}, + journal = {Nature Communications}, + language = {en} +} + +@article{wuComputationalStrategyAdjust2016, + title = {A Computational Strategy to Adjust for Copy Number in Tumor {{Hi}}-{{C}} Data}, + author = {Wu, Hua-Jun and Michor, Franziska}, + year = {2016}, + month = dec, + volume = {32}, + pages = {3695--3701}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw540}, + abstract = {Motivation: The Hi-C technology was designed to decode the three-dimensional conformation of the genome. Despite progress towards more and more accurate contact maps, several systematic biases have been demonstrated to affect the resulting data matrix. Here we report a new source of bias that can arise in tumor Hi-C data, which is related to the copy number of genomic DNA. To address this bias, we designed a chromosome-adjusted iterative correction method called caICB. Our caICB correction method leads to significant improvements when compared to the original iterative correction in terms of eliminating copy number bias.}, + file = {/Users/laurent/Documents/bibliography/bioinfo/documentation/Wu and Michor - 2016 - A computational strategy to adjust for copy number.pdf;/Users/laurent/Documents/bibliography/Hi-C/Wu and Michor - 2016 - A computational strategy to adjust for copy number.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {24} +} + +@article{wuEstimatingErrorModels2017, + title = {Estimating Error Models for Whole Genome Sequencing Using Mixtures of {{Dirichlet}}-Multinomial Distributions}, + author = {Wu, Steven H. and Schwartz, Rachel S. and Winter, David J. and Conrad, Donald F. and Cartwright, Reed A.}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2322--2329}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx133}, + abstract = {Motivation: Accurate identification of genotypes is an essential part of the analysis of genomic data, including in identification of sequence polymorphisms, linking mutations with disease and determining mutation rates. Biological and technical processes that adversely affect genotyping include copy-number-variation, paralogous sequences, library preparation, sequencing error and reference-mapping biases, among others.}, + file = {/Users/laurent/Documents/bibliography/SNP/Wu et al. - 2017 - Estimating error models for whole genome sequencin.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {15} +} + +@article{wuTwophaseDifferentialExpression2018, + title = {Two-Phase Differential Expression Analysis for Single Cell {{RNA}}-Seq}, + author = {Wu, Zhijin and Zhang, Yi and Stitzel, Michael L and Wu, Hao}, + editor = {Berger, Bonnie}, + year = {2018}, + month = oct, + volume = {34}, + pages = {3340--3348}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/bty329}, + abstract = {Motivation: Single-cell RNA-sequencing (scRNA-seq) has brought the study of the transcriptome to higher resolution and makes it possible for scientists to provide answers with more clarity to the question of `differential expression'. However, most computational methods still stick with the old mentality of viewing differential expression as a simple `up or down' phenomenon. We advocate that we should fully embrace the features of single cell data, which allows us to observe binary (from Off to On) as well as continuous (the amount of expression) regulations.}, + file = {/Users/laurent/Zotero/storage/GW4UTJUU/Wu et al. - 2018 - Two-phase differential expression analysis for sin.pdf;/Users/laurent/Zotero/storage/GXQPG93M/Wu et al. - 2018 - Two-phase differential expression analysis for sin.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {19} +} + +@article{wuVisualizingSinglecellRNAseq2018, + title = {Visualizing Single-Cell {{RNA}}-Seq Datasets with {{Similarity Weighted Nonnegative Embedding}}}, + author = {Wu, Yan and Tamayo, Pablo and Zhang, Kun}, + year = {2018}, + month = apr, + doi = {10.1101/276261}, + abstract = {High throughput single-cell RNA-seq (scRNA-seq) has enabled the discovery of novel cell types, the identification of trajectories during development, and the characterization of responses to genetic perturbations. The most popular visualization method for scRNA-seq is tStochastic Neighbor embedding (t-SNE), which accurately captures the local structure of datasets, but often distorts global structure, such as distances between clusters. We developed a method for visualization and interpretation of scRNA-seq datasets, Similarity Weighted Nonnegative Embedding (SWNE), which captures both the global and local structure of the data, and enables relevant biological information to be embedded directly onto the visualization. SWNE uses nonnegative matrix factorization (NMF) to decompose the gene expression matrix into biologically relevant factors, embeds both the cells and the factors in a two dimensional visualization, and uses a similarity matrix to ensure that cells which are close in the original gene expression space are also close in the visualization. The embedded biological factors can be interpreted via their gene loadings, while SWNE can also embed genes onto the visualization directly, further enhancing biological interpretation. We demonstrate SWNE's ability to visualize and facilitate interpretation of hematopoietic progenitors and neuronal cells from the human visual cortex and cerebellum. The SWNE R package and the scripts used for this paper can be found at: https://github.com/yanwu2014/swne.}, + file = {/Users/laurent/Documents/bibliography/to_read/Wu et al. - 2018 - Visualizing single-cell RNA-seq datasets with Simi.pdf}, + language = {en} +} + +@article{xiaoAccuratePowerfulMethod2019, + title = {An Accurate and Powerful Method for Copy Number Variation Detection}, + author = {Xiao, Feifei and Luo, Xizhi and Hao, Ning and Niu, Yue S. and Xiao, Xiangjun and Cai, Guoshuai and Amos, Christopher I. and Zhang, Heping}, + year = {2019}, + month = sep, + volume = {35}, + pages = {2891--2898}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty1041}, + abstract = {AbstractMotivation. Integration of multiple genetic sources for copy number variation detection (CNV) is a powerful approach to improve the identification of v}, + file = {/Users/laurent/Zotero/storage/PPYE4435/5288773.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{xieDynamicTransActingFactor2013, + title = {Dynamic Trans-{{Acting Factor Colocalization}} in {{Human Cells}}}, + author = {Xie, Dan and Boyle, Alan P. and Wu, Linfeng and Zhai, Jie and Kawli, Trupti and Snyder, Michael}, + year = {2013}, + month = oct, + volume = {155}, + pages = {713--724}, + issn = {0092-8674, 1097-4172}, + doi = {10.1016/j.cell.2013.09.043}, + abstract = {{$<$}h2{$>$}Summary{$<$}/h2{$><$}p{$>$}Different \emph{trans}-acting factors (TFs) collaborate and act in concert at distinct loci to perform accurate regulation of their target genes. To date, the cobinding of TF pairs has been investigated in a limited context both in terms of the number of factors within a cell type and across cell types and the extent of combinatorial colocalizations. Here, we use an approach to analyze TF colocalization within a cell type and across multiple cell lines at an unprecedented level. We extend this approach with large-scale mass spectrometry analysis of immunoprecipitations of 50 TFs. Our combined approach reveals large numbers of interesting TF-TF associations. We observe extensive change in TF colocalizations both within a cell type exposed to different conditions and across multiple cell types. We show distinct functional annotations and properties of different TF cobinding patterns and provide insights into the complex regulatory landscape of the cell.{$<$}/p{$>$}}, + file = {/Users/laurent/Zotero/storage/A4SL5Q82/Xie et al. - 2013 - Dynamic trans-Acting Factor Colocalization in Huma.pdf;/Users/laurent/Zotero/storage/DETDVP8Z/S0092-8674(13)01217-8.html}, + journal = {Cell}, + language = {English}, + number = {3}, + pmid = {24243024} +} + +@article{xieItTimeApply2018, + title = {It Is Time to Apply Biclustering: A Comprehensive Review of Biclustering Applications in Biological and Biomedical Data}, + shorttitle = {It Is Time to Apply Biclustering}, + author = {Xie, Juan and Ma, Anjun and Fennell, Anne and Ma, Qin and Zhao, Jing}, + year = {2018}, + month = feb, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bby014}, + abstract = {Biclustering is a powerful data mining technique that allows clustering of rows and columns, simultaneously, in a matrixformat data set. It was first applied to gene expression data in 2000, aiming to identify co-expressed genes under a subset of all the conditions/samples. During the past 17 years, tens of biclustering algorithms and tools have been developed to enhance the ability to make sense out of large data sets generated in the wake of high-throughput omics technologies. These algorithms and tools have been applied to a wide variety of data types, including but not limited to, genomes, transcriptomes, exomes, epigenomes, phenomes and pharmacogenomes. However, there is still a considerable gap between biclustering methodology development and comprehensive data interpretation, mainly because of the lack of knowledge for the selection of appropriate biclustering tools and further supporting computational techniques in specific studies. Here, we first deliver a brief introduction to the existing biclustering algorithms and tools in public domain, and then systematically summarize the basic applications of biclustering for biological data and more advanced applications of biclustering for biomedical data. This review will assist researchers to effectively analyze their big data and generate valuable biological knowledge and novel insights with higher efficiency.}, + file = {/Users/laurent/Documents/bibliography/to_read/Xie et al. - 2018 - It is time to apply biclustering a comprehensive .pdf}, + journal = {Briefings in Bioinformatics}, + language = {en} +} + +@article{xieQUBIC2NovelRobust, + title = {{{QUBIC2}}: A Novel and Robust Biclustering Algorithm for Analyses and Interpretation of Large-Scale {{RNA}}-{{Seq}} Data}, + shorttitle = {{{QUBIC2}}}, + author = {Xie, Juan and Ma, Anjun and Zhang, Yu and Liu, Bingqiang and Cao, Sha and Wang, Cankun and Xu, Jennifer and Zhang, Chi and Ma, Qin}, + doi = {10.1093/bioinformatics/btz692}, + abstract = {AbstractMotivation. The biclustering of large-scale gene expression data holds promising potential for detecting condition-specific functional gene modules (i.}, + file = {/Users/laurent/Zotero/storage/AGZHXWKQ/Xie et al. - QUBIC2 a novel and robust biclustering algorithm .pdf;/Users/laurent/Zotero/storage/FLRM8SK9/5567116.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{xieQUBIC2NovelRobust2020, + title = {{{QUBIC2}}: A Novel and Robust Biclustering Algorithm for Analyses and Interpretation of Large-Scale {{RNA}}-{{Seq}} Data}, + shorttitle = {{{QUBIC2}}}, + author = {Xie, Juan and Ma, Anjun and Zhang, Yu and Liu, Bingqiang and Cao, Sha and Wang, Cankun and Xu, Jennifer and Zhang, Chi and Ma, Qin}, + year = {2020}, + month = feb, + volume = {36}, + pages = {1143--1149}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz692}, + abstract = {AbstractMotivation. The biclustering of large-scale gene expression data holds promising potential for detecting condition-specific functional gene modules (i.}, + file = {/Users/laurent/Zotero/storage/E2E7AXQJ/Xie et al. - 2020 - QUBIC2 a novel and robust biclustering algorithm .pdf;/Users/laurent/Zotero/storage/5HZG8YM2/5567116.html}, + journal = {Bioinformatics}, + language = {en}, + number = {4} +} + +@article{xiongSCALEMethodSinglecell2019, + title = {{{SCALE}} Method for Single-Cell {{ATAC}}-Seq Analysis via Latent Feature Extraction}, + author = {Xiong, Lei and Xu, Kui and Tian, Kang and Shao, Yanqiu and Tang, Lei and Gao, Ge and Zhang, Michael and Jiang, Tao and Zhang, Qiangfeng Cliff}, + year = {2019}, + month = oct, + volume = {10}, + pages = {1--10}, + issn = {2041-1723}, + doi = {10.1038/s41467-019-12630-7}, + abstract = {Single-cell ATAC-seq data is challenging to analyse for reasons such as high dimensionality and sparsity. Here, the authors develop SCALE, a deep learning method that leverages latent feature extraction for various tasks of scATACseq data analysis.}, + copyright = {2019 The Author(s)}, + file = {/Users/laurent/Zotero/storage/Z5HQSHHH/Xiong et al. - 2019 - SCALE method for single-cell ATAC-seq analysis via.pdf;/Users/laurent/Zotero/storage/PKRG4JZW/s41467-019-12630-7.html}, + journal = {Nature Communications}, + language = {en}, + number = {1} +} + +@article{xuDeepConvolutionalNeural2017, + title = {A Deep Convolutional Neural Network for Classification of Red Blood Cells in Sickle Cell Anemia}, + author = {Xu, Mengjia and Papageorgiou, Dimitrios P. and Abidi, Sabia Z. and Dao, Ming and Zhao, Hong and Karniadakis, George Em}, + editor = {Nie, Qing}, + year = {2017}, + month = oct, + volume = {13}, + pages = {e1005746}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1005746}, + file = {/Users/laurent/Documents/bibliography/to_read/Xu et al. - 2017 - A deep convolutional neural network for classifica.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {10} +} + +@article{xuGenotypefreeDemultiplexingPooled2019, + title = {Genotype-Free Demultiplexing of Pooled Single-Cell {{RNA}}-Seq}, + author = {Xu, Jun and Falconer, Caitlin and Coin, Lachlan}, + year = {2019}, + month = mar, + doi = {10.1101/570614}, + abstract = {A variety of experimental and computational methods have been developed to demultiplex samples from different individuals mixed in a single-cell RNA sequencing (scRNA-seq) experiment. However, these methods all require extra information is either added to samples (such as sample barcode) or measured from samples prior to mixing (such as genome-wide genotypes). We introduce an alternative approach, in which genetic differences between mixed samples are inferred directly from scRNAseq data without extra information and these differences are used to assign single cells to samples. Our method also identifies a minimal set of presence/absence genotypes which can be used to map samples to their biological source, or to track samples between experiments. We tested our tool ''scSplit'' on different real and simulated datasets and achieved high true positive rate ({$>$} 90\%) in mapping cells back to their original source. Doublets can also be sensitively detected as an independent group. Our method is ideally suited to samples for which external genome-wide genotype data cannot be obtained (for example for non-model organisms), or for which it is impossible to obtain unmixed samples directly, such as mixtures of genetically distinct tumour cells, or mixed infections.}, + file = {/Users/laurent/Zotero/storage/RGB65LSR/Xu et al. - 2019 - Genotype-free demultiplexing of pooled single-cell.pdf;/Users/laurent/Zotero/storage/XMMBN4WN/Xu et al. - 2019 - Genotype-free demultiplexing of pooled single-cell.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@techreport{xuSinglecellRNAseqImputation2020, + title = {Single-Cell {{RNA}}-Seq {{Imputation}} Using {{Generative Adversarial Networks}}}, + author = {Xu, Yungang and Zhang, Zhigang and You, Lei and Liu, Jiajia and Fan, Zhiwei and Zhou, Xiaobo}, + year = {2020}, + month = jan, + institution = {{Bioinformatics}}, + doi = {10.1101/2020.01.20.913384}, + abstract = {Abstract + Single-cell RNA-seq (scRNA-seq) enables the characterization of transcriptomic profiles at the single-cell resolution with increasingly high throughput. However, it suffers from many sources of technical noises, including insufficient mRNA molecules that lead to excess false zero values, often termed dropouts. Computational approaches have been proposed to recover the biologically meaningful expression by borrowing information from similar cells in the observed dataset. However, these methods suffer oversmoothing and removal of natural cell-to-cell stochasticity in gene expression. Here, we propose the generative adversarial networks for scRNA-seq imputation (scIGANs), which uses generated realistic rather than observed cells to avoid these limitations and the powerless for rare cells. Evaluations based on a variety of simulated and real scRNA-seq datasets demonstrate that scIGANs is effective for dropout imputation and enhancing various downstream analysis. ScIGANs is also scalable and robust to small datasets that have few genes with low expression and/or cell-to-cell variance.}, + file = {/Users/laurent/Zotero/storage/DXKVC9TF/Xu et al. - 2020 - Single-cell RNA-seq Imputation using Generative Ad.pdf}, + language = {en}, + type = {Preprint} +} + +@article{xuSmCounter2AccurateLowfrequency2019, + title = {{{smCounter2}}: An Accurate Low-Frequency Variant Caller for Targeted Sequencing Data with Unique Molecular Identifiers}, + shorttitle = {{{smCounter2}}}, + author = {Xu, Chang and Gu, Xiujing and Padmanabhan, Raghavendra and Wu, Zhong and Peng, Quan and DiCarlo, John and Wang, Yexun}, + editor = {Birol, Inanc}, + year = {2019}, + month = apr, + volume = {35}, + pages = {1299--1309}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/bty790}, + abstract = {Motivation: Low-frequency DNA mutations are often confounded with technical artifacts from sample preparation and sequencing. With unique molecular identifiers (UMIs), most of the sequencing errors can be corrected. However, errors before UMI tagging, such as DNA polymerase errors during end repair and the first PCR cycle, cannot be corrected with single-strand UMIs and impose fundamental limits to UMI-based variant calling.}, + file = {/Users/laurent/Zotero/storage/255VKEYB/Xu et al. - 2019 - smCounter2 an accurate low-frequency variant call.pdf;/Users/laurent/Zotero/storage/7TRAP46M/Xu et al. - 2019 - smCounter2 an accurate low-frequency variant call.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {8} +} + +@article{yangHiCRepAssessingReproducibility2017, + title = {{{HiCRep}}: Assessing the Reproducibility of {{Hi}}-{{C}} Data Using a Stratum-Adjusted Correlation Coefficient}, + shorttitle = {{{HiCRep}}}, + author = {Yang, Tao and Zhang, Feipeng and Yard\i{}mc\i, Galip G{\"u}rkan and Song, Fan and Hardison, Ross C. and Noble, William Stafford and Yue, Feng and Li, Qunhua}, + year = {2017}, + month = jan, + volume = {27}, + pages = {1939--1949}, + issn = {1088-9051, 1549-5469}, + doi = {10.1101/gr.220640.117}, + abstract = {Hi-C is a powerful technology for studying genome-wide chromatin interactions. However, current methods for assessing Hi-C data reproducibility can produce misleading results because they ignore spatial features in Hi-C data, such as domain structure and distance dependence. We present HiCRep, a framework for assessing the reproducibility of Hi-C data that systematically accounts for these features. In particular, we introduce a novel similarity measure, the stratum adjusted correlation coefficient (SCC), for quantifying the similarity between Hi-C interaction matrices. Not only does it provide a statistically sound and reliable evaluation of reproducibility, SCC can also be used to quantify differences between Hi-C contact matrices and to determine the optimal sequencing depth for a desired resolution. The measure consistently shows higher accuracy than existing approaches in distinguishing subtle differences in reproducibility and depicting interrelationships of cell lineages. The proposed measure is straightforward to interpret and easy to compute, making it well-suited for providing standardized, interpretable, automatable, and scalable quality control. The freely available R package HiCRep implements our approach.}, + file = {/Users/laurent/Zotero/storage/D8IQB82P/Yang et al. - 2017 - HiCRep assessing the reproducibility of Hi-C data.pdf;/Users/laurent/Zotero/storage/IW3II7IQ/1939.html}, + journal = {Genome Research}, + language = {en}, + number = {11}, + pmid = {28855260} +} + +@article{yangInteractionLafutidineBinding2016, + title = {Interaction of Lafutidine in Binding to Human Serum Albumin in Gastric Ulcer Therapy: {{STD}}-{{NMR}}, {{WaterLOGSY}}-{{NMR}}, {{NMR}} Relaxation Times, {{Tr}}-{{NOESY}}, Molecule Docking, and Spectroscopic Studies}, + shorttitle = {Interaction of Lafutidine in Binding to Human Serum Albumin in Gastric Ulcer Therapy}, + author = {Yang, Hongqin and Huang, Yanmei and He, Jiawei and Li, Shanshan and Tang, Bin and Li, Hui}, + year = {2016}, + volume = {606}, + pages = {81--89}, + issn = {1096-0384}, + doi = {10.1016/j.abb.2016.07.016}, + abstract = {In this study, lafutidine (LAF) was used as a model compound to investigate the binding mechanism between antiulcer drugs and human serum albumin (HSA) through various techniques, including STD-NMR, WaterLOGSY-NMR, (1)H NMR relaxation times, tr-NOESY, molecule docking calculation, FT-IR spectroscopy, and CD spectroscopy. The analyses of STD-NMR, which derived relative STD (\%) intensities, and WaterLOGSY-NMR, determined that LAF bound to HSA. In particular, the pyridyl group of LAF was in close contact with HSA binding pocket, whereas furyl group had a secondary binding. Competitive STD-NMR and WaterLOGSY-NMR experiments, with warifarin and ibuprofen as site-selective probes, indicated that LAF preferentially bound to site II in the hydrophobic subdomains IIIA of HSA. The bound conformation of LAF at the HSA binding site was further elucidated by transferred NOE effect (tr-NOESY) experiment. Relaxation experiments provided quantitative information about the relationship between the affinity and structure of LAF. The molecule docking simulations conducted with AutoDock and the restraints derived from STD results led to three-dimensional models that were consistent with the NMR spectroscopic data. The presence of hydrophobic forces and hydrogen interactions was also determined. Additionally, FT-IR and CD spectroscopies showed that LAF induced secondary structure changes of HSA.}, + journal = {Archives of Biochemistry and Biophysics}, + keywords = {Acetamides,Binding Sites,Circular Dichroism,Conformational changes,Fourier Transform Infrared,Human serum albumin,Humans,Lafutidine,Magnetic Resonance Spectroscopy,Molecular Docking Simulation,Molecule docking,Piperidines,Protein Binding,Protein Domains,Protein Structure,Protons,Pyridines,Relaxation experiments and tr-NOESY,Secondary,Serum Albumin,Spectrophotometry,Spectroscopy,STD and WaterLOGSY-NMR,Stomach Ulcer,Warfarin}, + language = {eng}, + pmid = {27457418} +} + +@article{yangMISCMissingImputation2018, + title = {{{MISC}}: Missing Imputation for Single-Cell {{RNA}} Sequencing Data}, + shorttitle = {{{MISC}}}, + author = {Yang, Mary Qu and Weissman, Sherman M. and Yang, William and Zhang, Jialing and Canaann, Allon and Guan, Renchu}, + year = {2018}, + month = dec, + volume = {12}, + issn = {1752-0509}, + doi = {10.1186/s12918-018-0638-y}, + abstract = {Background: Single-cell RNA sequencing (scRNA-seq) technology provides an effective way to study cell heterogeneity. However, due to the low capture efficiency and stochastic gene expression, scRNA-seq data often contains a high percentage of missing values. It has been showed that the missing rate can reach approximately 30\% even after noise reduction. To accurately recover missing values in scRNA-seq data, we need to know where the missing data is; how much data is missing; and what are the values of these data. +Methods: To solve these three problems, we propose a novel model with a hybrid machine learning method, namely, missing imputation for single-cell RNA-seq (MISC). To solve the first problem, we transformed it to a binary classification problem on the RNA-seq expression matrix. Then, for the second problem, we searched for the intersection of the classification results, zero-inflated model and false negative model results. Finally, we used the regression model to recover the data in the missing elements. +Results: We compared the raw data without imputation, the mean-smooth neighbor cell trajectory, MISC on chronic myeloid leukemia data (CML), the primary somatosensory cortex and the hippocampal CA1 region of mouse brain cells. On the CML data, MISC discovered a trajectory branch from the CP-CML to the BC-CML, which provides direct evidence of evolution from CP to BC stem cells. On the mouse brain data, MISC clearly divides the pyramidal CA1 into different branches, and it is direct evidence of pyramidal CA1 in the subpopulations. In the meantime, with MISC, the oligodendrocyte cells became an independent group with an apparent boundary. +Conclusions: Our results showed that the MISC model improved the cell type classification and could be instrumental to study cellular heterogeneity. Overall, MISC is a robust missing data imputation model for single-cell RNA-seq data.}, + file = {/Users/laurent/Zotero/storage/N9BDB9PD/Yang et al. - 2018 - MISC missing imputation for single-cell RNA seque.pdf;/Users/laurent/Zotero/storage/THMYLTB5/Yang et al. - 2018 - MISC missing imputation for single-cell RNA seque.pdf}, + journal = {BMC Systems Biology}, + language = {en}, + number = {S7} +} + +@article{yangSAFEclusteringSinglecellAggregated2019, + title = {{{SAFE}}-Clustering: {{Single}}-Cell {{Aggregated}} (from {{Ensemble}}) Clustering for Single-Cell {{RNA}}-Seq Data}, + shorttitle = {{{SAFE}}-Clustering}, + author = {Yang, Yuchen and Huh, Ruth and Culpepper, Houston W and Lin, Yuan and Love, Michael I and Li, Yun}, + editor = {Birol, Inanc}, + year = {2019}, + month = apr, + volume = {35}, + pages = {1269--1277}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/bty793}, + abstract = {Motivation: Accurately clustering cell types from a mass of heterogeneous cells is a crucial first step for the analysis of single-cell RNA-seq (scRNA-Seq) data. Although several methods have been recently developed, they utilize different characteristics of data and yield varying results in terms of both the number of clusters and actual cluster assignments.}, + file = {/Users/laurent/Zotero/storage/LS6NXKBB/Yang et al. - 2019 - SAFE-clustering Single-cell Aggregated (from Ense.pdf;/Users/laurent/Zotero/storage/TUSBBFPQ/Yang et al. - 2019 - SAFE-clustering Single-cell Aggregated (from Ense.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {8} +} + +@article{yangSMNNBatchEffect2019, + title = {{{SMNN}}: {{Batch Effect Correction}} for {{Single}}-Cell {{RNA}}-Seq Data via {{Supervised Mutual Nearest Neighbor Detection}}}, + shorttitle = {{{SMNN}}}, + author = {Yang, Yuchen and Li, Gang and Qian, Huijun and Wilhelmsen, Kirk C. and Shen, Yin and Li, Yun}, + year = {2019}, + month = jun, + pages = {672261}, + doi = {10.1101/672261}, + abstract = {{$<$}h3{$>$}ABSTRACT{$<$}/h3{$>$} {$<$}h3{$>$}Motivation{$<$}/h3{$>$} {$<$}p{$>$}An ever-increasing deluge of single-cell RNA-sequencing (scRNA-seq) data has been generated, often involving different time points, laboratories or sequencing protocols. Batch effect correction has been recognized to be indispensable when integrating scRNA-seq data from multiple batches. A recent study proposed an effective correction method based on mutual nearest neighbors (MNN) across batches. However, MNN is unsupervised in that it ignores cluster label information of single cells, which can further improve effectiveness of batch effect correction, particularly under realistic scenarios where true biological differences are not orthogonal to batch effect.{$<$}/p{$><$}h3{$>$}Results{$<$}/h3{$>$} {$<$}p{$>$}In this work, we propose SMNN for batch effect correction of scRNA-seq data via supervised mutual nearest neighbor detection. SMNN either takes cluster/cell-type label information as input or infers cell types using scRNA-seq clustering in the absence of such information. It then detects mutual nearest neighbors within matched cell types and corrects batch effect accordingly. Compared to MNN, SMNN provides improved merging within the corresponding cell types across batches and retains more cell type-specific features after correction.{$<$}/p{$><$}h3{$>$}Availability and implementation{$<$}/h3{$>$} {$<$}p{$>$}SMNN is implemented in R, and freely available at https://yunliweb.its.unc.edu/SMNN/ and https://github.com/yycunc/SMNNcorrect.{$<$}/p{$><$}h3{$>$}Contact{$<$}/h3{$>$} {$<$}p{$>$}yunli@med.unc.edu{$<$}/p{$>$}}, + copyright = {\textcopyright{} 2019, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + file = {/Users/laurent/Zotero/storage/PRIDQKW8/Yang et al. - 2019 - SMNN Batch Effect Correction for Single-cell RNA-.pdf;/Users/laurent/Zotero/storage/B3A6K4JX/672261v1.html}, + journal = {bioRxiv}, + language = {en} +} + +@article{yanPathwaySplicePackageUnbiased2018, + title = {{{PathwaySplice}}: An {{R}} Package for Unbiased Pathway Analysis of Alternative Splicing in {{RNA}}-{{Seq}} Data}, + shorttitle = {{{PathwaySplice}}}, + author = {Yan, Aimin and Ban, Yuguang and Gao, Zhen and Chen, Xi and Wang, Lily}, + year = {2018}, + month = sep, + volume = {34}, + pages = {3220--3222}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty317}, + abstract = {AbstractSummary. Pathway analysis of alternative splicing would be biased without accounting for the different number of exons or junctions associated with eac}, + file = {/Users/laurent/Zotero/storage/CWEG3GPP/Yan et al. - 2018 - PathwaySplice an R package for unbiased pathway a.pdf;/Users/laurent/Zotero/storage/NZIQ2P4E/4983063.html}, + journal = {Bioinformatics}, + language = {en}, + number = {18} +} + +@article{yeDBG2OLCEfficientAssembly2016, + title = {{{DBG2OLC}}: {{Efficient Assembly}} of {{Large Genomes Using Long Erroneous Reads}} of the {{Third Generation Sequencing Technologies}}}, + shorttitle = {{{DBG2OLC}}}, + author = {Ye, Chengxi and Hill, Christopher M. and Wu, Shigang and Ruan, Jue and Ma, Zhanshan (Sam)}, + year = {2016}, + month = aug, + volume = {6}, + pages = {31900}, + issn = {2045-2322}, + doi = {10.1038/srep31900}, + abstract = {The highly anticipated transition from next generation sequencing (NGS) to third generation sequencing (3GS) has been difficult primarily due to high error rates and excessive sequencing cost. The high error rates make the assembly of long erroneous reads of large genomes challenging because existing software solutions are often overwhelmed by error correction tasks. Here we report a hybrid assembly approach that simultaneously utilizes NGS and 3GS data to address both issues. We gain advantages from three general and basic design principles: (i) Compact representation of the long reads leads to efficient alignments. (ii) Base-level errors can be skipped; structural errors need to be detected and corrected. (iii) Structurally correct 3GS reads are assembled and polished. In our implementation, preassembled NGS contigs are used to derive the compact representation of the long reads, motivating an algorithmic conversion from a de Bruijn graph to an overlap graph, the two major assembly paradigms. Moreover, since NGS and 3GS data can compensate for each other, our hybrid assembly approach reduces both of their sequencing requirements. Experiments show that our software is able to assemble mammalian-sized genomes orders of magnitude more quickly than existing methods without consuming a lot of memory, while saving about half of the sequencing cost.}, + copyright = {2016 Nature Publishing Group}, + file = {/Users/laurent/Zotero/storage/IRAQWC55/Ye et al. - 2016 - DBG2OLC Efficient Assembly of Large Genomes Using.pdf}, + journal = {Scientific Reports}, + keywords = {hybrid assembly}, + language = {en} +} + +@article{yeScHinterImputingDropout, + title = {{{scHinter}}: Imputing Dropout Events for Single-Cell {{RNA}}-Seq Data with Limited Sample Size}, + shorttitle = {{{scHinter}}}, + author = {Ye, Pengchao and Ye, Wenbin and Ye, Congting and Li, Shuchao and Ye, Lishan and Ji, Guoli and Wu, Xiaohui}, + doi = {10.1093/bioinformatics/btz627}, + abstract = {AbstractMotivation. Single-cell RNA-sequencing (scRNA-seq) is fast and becoming a powerful technique for studying dynamic gene regulation at unprecedented reso}, + file = {/Users/laurent/Zotero/storage/BJ9Q75UQ/Ye et al. - scHinter imputing dropout events for single-cell .pdf;/Users/laurent/Zotero/storage/2AYNDP6I/5544928.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{yeScNPFIntegrativeFramework2019, + title = {{{scNPF}}: An Integrative Framework Assisted by Network Propagation and Network Fusion for Preprocessing of Single-Cell {{RNA}}-Seq Data}, + shorttitle = {{{scNPF}}}, + author = {Ye, Wenbin and Ji, Guoli and Ye, Pengchao and Long, Yuqi and Xiao, Xuesong and Li, Shuchao and Su, Yaru and Wu, Xiaohui}, + year = {2019}, + month = dec, + volume = {20}, + issn = {1471-2164}, + doi = {10.1186/s12864-019-5747-5}, + abstract = {Background: Single-cell RNA-sequencing (scRNA-seq) is fast becoming a powerful tool for profiling genome-scale transcriptomes of individual cells and capturing transcriptome-wide cell-to-cell variability. However, scRNA-seq technologies suffer from high levels of technical noise and variability, hindering reliable quantification of lowly and moderately expressed genes. Since most downstream analyses on scRNA-seq, such as cell type clustering and differential expression analysis, rely on the gene-cell expression matrix, preprocessing of scRNA-seq data is a critical preliminary step in the analysis of scRNA-seq data. +Results: We presented scNPF, an integrative scRNA-seq preprocessing framework assisted by network propagation and network fusion, for recovering gene expression loss, correcting gene expression measurements, and learning similarities between cells. scNPF leverages the context-specific topology inherent in the given data and the priori knowledge derived from publicly available molecular gene-gene interaction networks to augment gene-gene relationships in a data driven manner. We have demonstrated the great potential of scNPF in scRNA-seq preprocessing for accurately recovering gene expression values and learning cell similarity networks. Comprehensive evaluation of scNPF across a wide spectrum of scRNA-seq data sets showed that scNPF achieved comparable or higher performance than the competing approaches according to various metrics of internal validation and clustering accuracy. We have made scNPF an easy-to-use R package, which can be used as a versatile preprocessing plug-in for most existing scRNA-seq analysis pipelines or tools. +Conclusions: scNPF is a universal tool for preprocessing of scRNA-seq data, which jointly incorporates the global topology of priori interaction networks and the context-specific information encapsulated in the scRNA-seq data to capture both shared and complementary knowledge from diverse data sources. scNPF could be used to recover gene signatures and learn cell-to-cell similarities from emerging scRNA-seq data to facilitate downstream analyses such as dimension reduction, cell type clustering, and visualization.}, + file = {/Users/laurent/Zotero/storage/C4JN6FLV/Ye et al. - 2019 - scNPF an integrative framework assisted by networ.pdf;/Users/laurent/Zotero/storage/T5P7G3K2/Ye et al. - 2019 - scNPF an integrative framework assisted by networ.pdf}, + journal = {BMC Genomics}, + language = {en}, + number = {1} +} + +@article{yiDetectingHiddenBatch2018, + title = {Detecting Hidden Batch Factors through Data-Adaptive Adjustment for Biological Effects}, + author = {Yi, Haidong and Raman, Ayush T and Zhang, Han and Allen, Genevera I and Liu, Zhandong}, + year = {2018}, + month = apr, + volume = {34}, + pages = {1141--1147}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx635}, + abstract = {Motivation: Batch effects are one of the major source of technical variations that affect the measurements in high-throughput studies such as RNA sequencing. It has been well established that batch effects can be caused by different experimental platforms, laboratory conditions, different sources of samples and personnel differences. These differences can confound the outcomes of interest and lead to spurious results. A critical input for batch correction algorithms is the knowledge of batch factors, which in many cases are unknown or inaccurate. Hence, the primary motivation of our paper is to detect hidden batch factors that can be used in standard techniques to accurately capture the relationship between gene expression and other modeled variables of interest.}, + file = {/Users/laurent/Documents/bibliography/to_read/Yi et al. - 2018 - Detecting hidden batch factors through data-adapti.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {7} +} + +@article{yipEvaluationToolsHighly2018, + title = {Evaluation of Tools for Highly Variable Gene Discovery from Single-Cell {{RNA}}-Seq Data}, + author = {Yip, Shun H and Sham, Pak Chung and Wang, Junwen}, + year = {2018}, + month = feb, + issn = {1467-5463, 1477-4054}, + doi = {10.1093/bib/bby011}, + abstract = {Traditional RNA sequencing (RNA-seq) allows the detection of gene expression variations between two or more cell populations through differentially expressed gene (DEG) analysis. However, genes that contribute to cell-to-cell differences are not discoverable with RNA-seq because RNA-seq samples are obtained from a mixture of cells. Single-cell RNA-seq (scRNA-seq) allows the detection of gene expression in each cell. With scRNA-seq, highly variable gene (HVG) discovery allows the detection of genes that contribute strongly to cell-to-cell variation within a homogeneous cell population, such as a population of embryonic stem cells. This analysis is implemented in many software packages. In this study, we compare seven HVG methods from six software packages, including BASiCS, Brennecke, scLVM, scran, scVEGs and Seurat. Our results demonstrate that reproducibility in HVG analysis requires a larger sample size than DEG analysis. Discrepancies between methods and potential issues in these tools are discussed and recommendations are made.}, + file = {/Users/laurent/Documents/bibliography/to_read/Yip et al. - 2018 - Evaluation of tools for highly variable gene disco.pdf}, + journal = {Briefings in Bioinformatics}, + language = {en} +} + +@article{yousefiStochasticityReplicationForks2019, + title = {Stochasticity of Replication Forks' Speeds Plays a Key Role in the Dynamics of {{DNA}} Replication}, + author = {Yousefi, Razie and Rowicka, Maga}, + year = {2019}, + month = dec, + volume = {15}, + pages = {e1007519}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1007519}, + abstract = {Eukaryotic DNA replication is elaborately orchestrated to duplicate the genome timely and faithfully. Replication initiates at multiple origins from which replication forks emanate and travel bi-directionally. The complex spatio-temporal regulation of DNA replication remains incompletely understood. To study it, computational models of DNA replication have been developed in S. cerevisiae. However, in spite of the experimental evidence of forks' speed stochasticity, all models assumed that forks' speeds are the same. Here, we present the first model of DNA replication assuming that speeds vary stochastically between forks. Utilizing data from both wild-type and hydroxyurea-treated yeast cells, we show that our model is more accurate than models assuming constant forks' speed and reconstructs dynamics of DNA replication faithfully starting both from population-wide data and data reflecting fork movement in individual cells. Completion of replication in a timely manner is a challenge due to its stochasticity; we propose an empirically derived modification to replication speed based on the distance to the approaching fork, which promotes timely completion of replication. In summary, our work discovers a key role that stochasticity of the forks' speed plays in the dynamics of DNA replication. We show that without including stochasticity of forks' speed it is not possible to accurately reconstruct movement of individual replication forks, measured by DNA combing.}, + file = {/Users/laurent/Zotero/storage/PILL4H63/Yousefi and Rowicka - 2019 - Stochasticity of replication forks’ speeds plays a.pdf;/Users/laurent/Zotero/storage/ZQEPSSQP/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Biochemical simulations,DNA replication,Genome complexity,Normal distribution,Probability distribution,Saccharomyces cerevisiae,Simulation and modeling,Synthesis phase}, + language = {en}, + number = {12} +} + +@article{yuCellSortSupportVector2016, + title = {{{CellSort}}: {{A}} Support Vector Machine Tool for Optimizing Fluorescence-Activated Cell Sorting and Reducing Experimental Effort}, + shorttitle = {{{CellSort}}}, + author = {Yu, Jessica S. and Pertusi, Dante A. and Adeniran, Adebola V. and Tyo, Keith E. J.}, + year = {2016}, + month = dec, + pages = {btw710}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btw710}, + abstract = {Motivation: High throughput screening by fluorescence activated cell sorting (FACS) is a common task in protein engineering and directed evolution. It can also be a rate-limiting step if high false positive or negative rates necessitate multiple rounds of enrichment. Current FACS software requires the user to define sorting gates by intuition and is practically limited to two dimensions. In cases when multiple rounds of enrichment are required, the software cannot forecast the enrichment effort required.}, + file = {/Users/laurent/Documents/bibliography/FACS/Yu et al. - 2016 - CellSort A support vector machine tool for optimi.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{zeileisRegressionModelsCount2008, + title = {Regression {{Models}} for {{Count Data}} in {{R}}}, + author = {Zeileis, Achim and Kleiber, Christian and Jackman, Simon}, + year = {2008}, + volume = {27}, + file = {/Users/laurent/Zotero/storage/MQI52K52/Zeileis et al. - 2008 - Regression Models for Count Data in R.pdf;/Users/laurent/Zotero/storage/WHPPVQEH/v027i08.html}, + journal = {Journal of Statistical Software}, + number = {8} +} + +@article{zhangComparisonComputationalMethods2018, + title = {Comparison of Computational Methods for Imputing Single-Cell {{RNA}}-Sequencing Data}, + author = {Zhang, Lihua and Zhang, Shihua}, + year = {2018}, + pages = {1--1}, + issn = {1545-5963, 1557-9964, 2374-0043}, + doi = {10.1109/TCBB.2018.2848633}, + abstract = {Single-cell RNA-sequencing (scRNA-seq) is a recent breakthrough technology, which paves the way for measuring RNA levels at single cell resolution to study precise biological functions. One of the main challenges when analyzing scRNA-seq data is the presence of zeros or dropout events, which may mislead downstream analyses. To compensate the dropout effect, several methods have been developed to impute gene expression since the first Bayesian-based method being proposed in 2016. However, these methods have shown very diverse characteristics in terms of model hypothesis and imputation performance. Thus, large-scale comparison and evaluation of these methods is urgently needed now. To this end, we compared eight imputation methods, evaluated their power in recovering original real data, and performed broad analyses to explore their effects on clustering cell types, detecting differentially expressed genes, and reconstructing lineage trajectories in the context of both simulated and real data. Simulated datasets and case studies highlight that there are no one method performs the best in all the situations. Some defects of these methods such as scalability, robustness and unavailability in some situations need to be addressed in future studies.}, + file = {/Users/laurent/Zotero/storage/IBAGLMDI/Zhang and Zhang - 2018 - Comparison of computational methods for imputing s.pdf;/Users/laurent/Zotero/storage/RNJFAUST/Zhang and Zhang - 2018 - Comparison of computational methods for imputing s.pdf}, + journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics}, + language = {en} +} + +@article{zhangDiffNetFDRDifferentialNetwork2019, + title = {{{DiffNetFDR}}: Differential Network Analysis with False Discovery Rate Control}, + shorttitle = {{{DiffNetFDR}}}, + author = {Zhang, Xiao-Fei and {Ou-Yang}, Le and Yang, Shuo and Hu, Xiaohua and Yan, Hong}, + year = {2019}, + month = sep, + volume = {35}, + pages = {3184--3186}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz051}, + abstract = {AbstractSummary. To identify biological network rewiring under different conditions, we develop a user-friendly R package, named DiffNetFDR, to implement two m}, + file = {/Users/laurent/Zotero/storage/TGUF9AST/Zhang et al. - 2019 - DiffNetFDR differential network analysis with fal.pdf;/Users/laurent/Zotero/storage/SQ3PSP9Y/5299999.html}, + journal = {Bioinformatics}, + language = {en}, + number = {17} +} + +@article{zhangEnImputeImputingDropout2019, + title = {{{EnImpute}}: Imputing Dropout Events in Single Cell {{RNA}} Sequencing Data via Ensemble Learning}, + shorttitle = {{{EnImpute}}}, + author = {Zhang, Xiao-Fei and {Ou-Yang}, Le and Yang, Shuo and Zhao, Xing-Ming and Hu, Xiaohua and Yan, Hong}, + editor = {Berger, Bonnie}, + year = {2019}, + month = may, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btz435}, + abstract = {Summary: Imputation of dropout events that may mislead downstream analyses is a key step in analyzing single-cell RNA-sequencing (scRNA-seq) data. We develop EnImpute, an R package that introduces an ensemble learning method for imputing dropout events in scRNA-seq data. EnImpute combines the results obtained from multiple imputation methods to generate a more accurate result. A Shiny application is developed to provide easier implementation and visualization. Experiment results show that EnImpute outperforms the individual state-of-the-art methods in almost all situations. EnImpute is useful for correcting the noisy scRNA-seq data before performing downstream analysis.}, + file = {/Users/laurent/Zotero/storage/S2GC93FV/Zhang et al. - 2019 - EnImpute imputing dropout events in single cell R.pdf;/Users/laurent/Zotero/storage/VQY7KV25/Zhang et al. - 2019 - EnImpute imputing dropout events in single cell R.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{zhangGreedyAlgorithmAligning2000, + title = {A Greedy Algorithm for Aligning {{DNA}} Sequences}, + author = {Zhang, Z. and Schwartz, S. and Wagner, L. and Miller, W.}, + year = {2000}, + month = apr, + volume = {7}, + pages = {203--214}, + issn = {1066-5277}, + doi = {10.1089/10665270050081478}, + abstract = {For aligning DNA sequences that differ only by sequencing errors, or by equivalent errors from other sources, a greedy algorithm can be much faster than traditional dynamic programming approaches and yet produce an alignment that is guaranteed to be theoretically optimal. We introduce a new greedy alignment algorithm with particularly good performance and show that it computes the same alignment as does a certain dynamic programming algorithm, while executing over 10 times faster on appropriate data. An implementation of this algorithm is currently used in a program that assembles the UniGene database at the National Center for Biotechnology Information.}, + journal = {Journal of Computational Biology: A Journal of Computational Molecular Cell Biology}, + keywords = {Algorithms,Biometry,Databases,DNA,Factual,Sequence Alignment,Sequence Analysis,Software}, + language = {eng}, + number = {1-2}, + pmid = {10890397} +} + +@article{zhangIncorporatingPriorInformation2017, + title = {Incorporating Prior Information into Differential Network Analysis Using Non-Paranormal Graphical Models}, + author = {Zhang, Xiao-Fei and {Ou-Yang}, Le and Yan, Hong}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2436--2445}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx208}, + abstract = {Motivation: Understanding how gene regulatory networks change under different cellular states is important for revealing insights into network dynamics. Gaussian graphical models, which assume that the data follow a joint normal distribution, have been used recently to infer differential networks. However, the distributions of the omics data are non-normal in general. Furthermore, although much biological knowledge (or prior information) has been accumulated, most existing methods ignore the valuable prior information. Therefore, new statistical methods are needed to relax the normality assumption and make full use of prior information.}, + file = {/Users/laurent/Documents/bibliography/networks/Zhang et al. - 2017 - Incorporating prior information into differential .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {16} +} + +@inproceedings{zhangMeasuringReproducibilityHighThroughput2013, + title = {Measuring {{Reproducibility}} of {{High}}-{{Throughput Deep}}-{{Sequencing Experiments Based}} on {{Self}}-Adaptive {{Mixture Copula}}}, + booktitle = {Advances in {{Knowledge Discovery}} and {{Data Mining}}}, + author = {Zhang, Qian and Zhang, Junping and Xue, Chenghai}, + editor = {Pei, Jian and Tseng, Vincent S. and Cao, Longbing and Motoda, Hiroshi and Xu, Guandong}, + year = {2013}, + pages = {301--313}, + publisher = {{Springer Berlin Heidelberg}}, + abstract = {Measurement of the statistical reproducibility between biological experiment replicates is vital first step of the entire series of bioinformatics analysis for mining meaningful biological discovery from mega-data. To distinguish the real biological relevant signals from artificial signals, irreproducible discovery rate (IDR) employing Copula, which can separate dependence structure and marginal distribution from data, has been put forth. However, IDR employed a Gaussian Copula which may cause underestimation of risk and limit the robustness of the method. To address the issue, we propose a Self-adaptive Mixture Copula (SaMiC) to measure the reproducibility of experiment replicates from high-throughput deep-sequencing data. Simple and easy to implement, the proposed SaMiC method can self-adaptively tune its coefficients so that the measurement of reproducibility is more effective for general distributions. Experiments in simulated and real data indicate that compared with IDR, the SaMiC method can better estimate reproducibility between replicate samples.}, + file = {/Users/laurent/Zotero/storage/JQJICA64/Zhang et al. - 2013 - Measuring Reproducibility of High-Throughput Deep-.pdf}, + isbn = {978-3-642-37453-1}, + keywords = {Copula Model,Dependence Structure,Gaussian Copula,Marginal Distribution,Tail Dependence}, + language = {en}, + series = {Lecture {{Notes}} in {{Computer Science}}} +} + +@article{zhangModelbasedAnalysisChIPSeq2008, + title = {Model-Based {{Analysis}} of {{ChIP}}-{{Seq}} ({{MACS}})}, + author = {Zhang, Yong and Liu, Tao and Meyer, Clifford A and Eeckhoute, J{\'e}r{\^o}me and Johnson, David S and Bernstein, Bradley E and Nusbaum, Chad and Myers, Richard M and Brown, Myles and Li, Wei and Liu, X Shirley}, + year = {2008}, + volume = {9}, + pages = {R137}, + issn = {1465-6906}, + doi = {10.1186/gb-2008-9-9-r137}, + abstract = {MACS performs model-based analysis of ChIP-Seq data generated by short read sequencers., We present Model-based Analysis of ChIP-Seq data, MACS, which analyzes data generated by short read sequencers such as Solexa's Genome Analyzer. MACS empirically models the shift size of ChIP-Seq tags, and uses it to improve the spatial resolution of predicted binding sites. MACS also uses a dynamic Poisson distribution to effectively capture local biases in the genome, allowing for more robust predictions. MACS compares favorably to existing ChIP-Seq peak-finding algorithms, and is freely available.}, + file = {/Users/laurent/Zotero/storage/RQQUACGH/Zhang et al. - 2008 - Model-based Analysis of ChIP-Seq (MACS).pdf}, + journal = {Genome Biology}, + number = {9}, + pmcid = {PMC2592715}, + pmid = {18798982} +} + +@article{zhangMultitaskClusteringApproach2018, + title = {A Multitask Clustering Approach for Single-Cell {{RNA}}-Seq Analysis in {{Recessive Dystrophic Epidermolysis Bullosa}}}, + author = {Zhang, Huanan and Lee, Catherine A. A. and Li, Zhuliu and Garbe, John R. and Eide, Cindy R. and Petegrosso, Raphael and Kuang, Rui and Tolar, Jakub}, + editor = {Khan, Aly}, + year = {2018}, + month = apr, + volume = {14}, + pages = {e1006053}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006053}, + abstract = {Single-cell RNA sequencing (scRNA-seq) has been widely applied to discover new cell types by detecting sub-populations in a heterogeneous group of cells. Since scRNA-seq experiments have lower read coverage/tag counts and introduce more technical biases compared to bulk RNA-seq experiments, the limited number of sampled cells combined with the experimental biases and other dataset specific variations presents a challenge to crossdataset analysis and discovery of relevant biological variations across multiple cell populations. In this paper, we introduce a method of variance-driven multitask clustering of singlecell RNA-seq data (scVDMC) that utilizes multiple single-cell populations from biological replicates or different samples. scVDMC clusters single cells in multiple scRNA-seq experiments of similar cell types and markers but varying expression patterns such that the scRNA-seq data are better integrated than typical pooled analyses which only increase the sample size. By controlling the variance among the cell clusters within each dataset and across all the datasets, scVDMC detects cell sub-populations in each individual experiment with shared cell-type markers but varying cluster centers among all the experiments. Applied to two real scRNA-seq datasets with several replicates and one large-scale dropletbased dataset on three patient samples, scVDMC more accurately detected cell populations and known cell markers than pooled clustering and other recently proposed scRNA-seq clustering methods. In the case study applied to in-house Recessive Dystrophic Epidermolysis Bullosa (RDEB) scRNA-seq data, scVDMC revealed several new cell types and unknown markers validated by flow cytometry. MATLAB/Octave code available at https:// github.com/kuanglab/scVDMC.}, + file = {/Users/laurent/Documents/bibliography/to_read/Zhang et al. - 2018 - A multitask clustering approach for single-cell RN.pdf}, + journal = {PLOS Computational Biology}, + language = {en}, + number = {4} +} + +@article{zhangProbabilisticCelltypeAssignment2019, + title = {Probabilistic Cell-Type Assignment of Single-Cell {{RNA}}-Seq for Tumor Microenvironment Profiling}, + author = {Zhang, Allen W. and O'Flanagan, Ciara and Chavez, Elizabeth A. and Lim, Jamie L. P. and Ceglia, Nicholas and McPherson, Andrew and Wiens, Matt and Walters, Pascale and Chan, Tim and Hewitson, Brittany and Lai, Daniel and Mottok, Anja and Sarkozy, Clementine and Chong, Lauren and Aoki, Tomohiro and Wang, Xuehai and Weng, Andrew P. and McAlpine, Jessica N. and Aparicio, Samuel and Steidl, Christian and Campbell, Kieran R. and Shah, Sohrab P.}, + year = {2019}, + month = oct, + volume = {16}, + pages = {1007--1015}, + issn = {1548-7105}, + doi = {10.1038/s41592-019-0529-1}, + abstract = {CellAssign uses a probabilistic model to assign single cells to a given cell type defined by known marker genes, enabling automated annotation of cell types present in a tumor microenvironment.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature America, Inc.}, + file = {/Users/laurent/Zotero/storage/ZVXPRHUF/Zhang et al. - 2019 - Probabilistic cell-type assignment of single-cell .pdf;/Users/laurent/Zotero/storage/EAU78SA6/s41592-019-0529-1.html}, + journal = {Nature Methods}, + language = {en}, + number = {10} +} + +@article{zhangProperJointAnalysis2018, + title = {Proper Joint Analysis of Summary Association Statistics Requires the Adjustment of Heterogeneity in {{SNP}} Coverage Pattern}, + author = {Zhang, Han and Wheeler, William and Song, Lei and Yu, Kai}, + year = {2018}, + month = nov, + volume = {19}, + pages = {1337--1343}, + issn = {1467-5463}, + doi = {10.1093/bib/bbx072}, + abstract = {Abstract. As meta-analysis results published by consortia of genome-wide association studies (GWASs) become increasingly available, many association summary st}, + file = {/Users/laurent/Zotero/storage/6SIA4S68/Zhang et al. - 2018 - Proper joint analysis of summary association stati.pdf;/Users/laurent/Zotero/storage/KPKSNKQI/3933281.html}, + journal = {Briefings in Bioinformatics}, + language = {en}, + number = {6} +} + +@article{zhangSILGGMExtensivePackage2018, + title = {{{SILGGM}}: {{An}} Extensive {{R}} Package for Efficient Statistical Inference in Large-Scale Gene Networks}, + shorttitle = {{{SILGGM}}}, + author = {Zhang, Rong and Ren, Zhao and Chen, Wei}, + year = {2018}, + month = aug, + volume = {14}, + pages = {e1006369}, + issn = {1553-7358}, + doi = {10.1371/journal.pcbi.1006369}, + abstract = {Gene co-expression network analysis is extremely useful in interpreting a complex biological process. The recent droplet-based single-cell technology is able to generate much larger gene expression data routinely with thousands of samples and tens of thousands of genes. To analyze such a large-scale gene-gene network, remarkable progress has been made in rigorous statistical inference of high-dimensional Gaussian graphical model (GGM). These approaches provide a formal confidence interval or a p-value rather than only a single point estimator for conditional dependence of a gene pair and are more desirable for identifying reliable gene networks. To promote their widespread use, we herein introduce an extensive and efficient R package named SILGGM (Statistical Inference of Large-scale Gaussian Graphical Model) that includes four main approaches in statistical inference of high-dimensional GGM. Unlike the existing tools, SILGGM provides statistically efficient inference on both individual gene pair and whole-scale gene pairs. It has a novel and consistent false discovery rate (FDR) procedure in all four methodologies. Based on the user-friendly design, it provides outputs compatible with multiple platforms for interactive network visualization. Furthermore, comparisons in simulation illustrate that SILGGM can accelerate the existing MATLAB implementation to several orders of magnitudes and further improve the speed of the already very efficient R package FastGGM. Testing results from the simulated data confirm the validity of all the approaches in SILGGM even in a very large-scale setting with the number of variables or genes to a ten thousand level. We have also applied our package to a novel single-cell RNA-seq data set with pan T cells. The results show that the approaches in SILGGM significantly outperform the conventional ones in a biological sense. The package is freely available via CRAN at https://cran.r-project.org/package=SILGGM.}, + file = {/Users/laurent/Zotero/storage/G9YEKRYX/Zhang et al. - 2018 - SILGGM An extensive R package for efficient stati.pdf;/Users/laurent/Zotero/storage/GP8ZAZXS/Zhang et al. - 2018 - SILGGM An extensive R package for efficient stati.pdf;/Users/laurent/Zotero/storage/2ZD48CRT/article.html;/Users/laurent/Zotero/storage/PASDMJRG/article.html}, + journal = {PLOS Computational Biology}, + keywords = {Gene expression,Gene regulatory networks,Genetic networks,Network analysis,Simulation and modeling,Statistical inference,T cells,Test statistics}, + language = {en}, + number = {8} +} + +@article{zhangTopographerRevealsDynamic2018, + title = {Topographer {{Reveals Dynamic Mechanisms}} of {{Cell Fate Decisions}} from {{Single}}-{{Cell Transcriptomic Data}}}, + author = {Zhang, Jiajun and Zhou, Tianshou and Nie, Qing}, + year = {2018}, + month = mar, + doi = {10.1101/251207}, + abstract = {While single-cell measurement technologies provide an unprecedented opportunity to dissect developmental processes, revealing the mechanisms of cell fate decisions from single-cell RNA-seq data is challenging due to both cellular heterogeneity and transcriptional noise in the data. Here we developed Topographer, a bioinformatic pipeline, to construct an intuitive (i.e., every cell is equipped with both potential and pseudotime) developmental landscape, reveal stochastic dynamics of cell types, and infer both dynamic connections of marker gene networks and dynamic characteristics of transcriptional bursting kinetics across development. Applying this method to primary human myoblasts, we not only identified three known cell types but also estimated both their fate probabilities and transition probabilities among them. We found that the percent of the genes expressed in a bursty manner is significantly higher at the branch point than before or after branch, and there are apparent changes in both gene-gene and cell-cell correlations before and after branch. In general, single-cell transcriptome data with Topographer can well reveal the stochastic mechanisms of cell fate decisions from three different levels: cell lineage (macroscopic), gene network (mesoscopic) and gene expression (microscopic).}, + file = {/Users/laurent/Documents/bibliography/to_read/Zhang et al. - 2018 - Topographer Reveals Dynamic Mechanisms of Cell Fat.pdf}, + language = {en} +} + +@article{zhangZIAQQuantileRegression, + title = {{{ZIAQ}}: {{A}} Quantile Regression Method for Differential Expression Analysis of Single-Cell {{RNA}}-Seq Data}, + shorttitle = {{{ZIAQ}}}, + author = {Zhang, Wenfei and Wei, Ying and Zhang, Donghui and Xu, Ethan Y.}, + doi = {10.1093/bioinformatics/btaa098}, + abstract = {AbstractMotivation. Single-cell RNA sequencing (scRNA-seq) has enabled the simultaneous transcriptomic profiling of individual cells under different biological}, + file = {/Users/laurent/Zotero/storage/M42ZNF5Q/5735412.html}, + journal = {Bioinformatics}, + language = {en} +} + +@article{zhaoMultipleTestingWhen, + title = {Multiple {{Testing When Many}} P-Values Are {{Uniformly Conservative}}}, + author = {ZHAO, QINGYUAN and SMALL, DYLAN S and SU, WEIJIE}, + pages = {12}, + abstract = {When individual p-values are conservative under the null, usual multiple testing methods may lose power substantially. We propose to reduce the total number of tests by conditioning: p-values less than a chosen threshold 0 {$<$} {$\tau$} {$<$} 1 are kept and divided by {$\tau$} , then a usual multiple testing procedure is applied. This method controls the multiple testing error if the conservative p-values are also uniformly conservative, meaning the conditional distribution (p/{$\tau$} ) | p {$\leq$} {$\tau$} is stochastically larger than the uniform distribution on (0, 1) for any {$\tau$} where p is the conservative p-value. We show uniform conservativeness hold for one-sided tests in a one-dimensional exponential family (e.g. testing for qualitative interaction) as well as testing |{$\mathrm{\mu}$}| {$\leq$} {$\eta$} using a statistic X {$\sim$} N({$\mathrm{\mu}$}, 1) (e.g. testing for practical importance with threshold {$\eta$}). Our theoretical and numerical results suggest the proposed tests gain significant power when many p-values are uniformly conservative and lose little power when no p-value is uniformly conservative.}, + file = {/Users/laurent/Documents/bibliography/stats/ZHAO et al. - Multiple Testing When Many p-values are Uniformly .pdf}, + language = {en} +} + +@article{zhaoRiboProPProbabilisticRibosome2019, + title = {{{RiboProP}}: A Probabilistic Ribosome Positioning Algorithm for Ribosome Profiling}, + shorttitle = {{{RiboProP}}}, + author = {Zhao, Dengke and Baez, William D. and Fredrick, Kurt and Bundschuh, Ralf}, + year = {2019}, + month = may, + volume = {35}, + pages = {1486--1493}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty854}, + abstract = {AbstractMotivation. Ribosome profiling has been widely used to study translation in a genome-wide fashion. It requires deep sequencing of ribosome protected mR}, + file = {/Users/laurent/Zotero/storage/F7EXEC4F/Zhao et al. - 2019 - RiboProP a probabilistic ribosome positioning alg.pdf;/Users/laurent/Zotero/storage/FFDYYMLL/5126237.html}, + journal = {Bioinformatics}, + language = {en}, + number = {9} +} + +@article{zhengSENSESiameseNeural2019, + title = {{{SENSE}}: {{Siamese}} Neural Network for Sequence Embedding and Alignment-Free Comparison}, + shorttitle = {{{SENSE}}}, + author = {Zheng, Wei and Yang, Le and Genco, Robert J. and {Wactawski-Wende}, Jean and Buck, Michael and Sun, Yijun}, + year = {2019}, + month = jun, + volume = {35}, + pages = {1820--1828}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/bty887}, + abstract = {AbstractMotivation. Sequence analysis is arguably a foundation of modern biology. Classic approaches to sequence analysis are based on sequence alignment, whic}, + file = {/Users/laurent/Zotero/storage/ZRCFAMLN/5140215.html}, + journal = {Bioinformatics}, + language = {en}, + number = {11} +} + +@article{zhengSeqArrayStorageefficientHighperformance2017, + title = {{{SeqArray}}\textemdash{}a Storage-Efficient High-Performance Data Format for {{WGS}} Variant Calls}, + author = {Zheng, Xiuwen and Gogarten, Stephanie M. and Lawrence, Michael and Stilp, Adrienne and Conomos, Matthew P. and Weir, Bruce S. and Laurie, Cathy and Levine, David}, + year = {2017}, + month = aug, + volume = {33}, + pages = {2251--2257}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx145}, + abstract = {Motivation: Whole-genome sequencing (WGS) data is being generated at an unprecedented rate. Analysis of WGS data requires a flexible data format to store the different types of DNA variation. Variant call format (VCF) is a general text-based format developed to store variant genotypes and their annotations. However, VCF files are large and data retrieval is relatively slow. Here we introduce a new WGS variant data format implemented in the R/Bioconductor package ``SeqArray'' for storing variant calls in an arrayoriented manner which provides the same capabilities as VCF, but with multiple high compression options and data access using high-performance parallel computing.}, + file = {/Users/laurent/Documents/bibliography/to_read/Zheng et al. - 2017 - SeqArray—a storage-efficient high-performance data.pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {15} +} + +@article{zhengSinNLRRRobustSubspace2019, + title = {{{SinNLRR}}: A Robust Subspace Clustering Method for Cell Type Detection by Non-Negative and Low-Rank Representation}, + shorttitle = {{{SinNLRR}}}, + author = {Zheng, Ruiqing and Li, Min and Liang, Zhenlan and Wu, Fang-Xiang and Pan, Yi and Wang, Jianxin}, + editor = {Kelso, Janet}, + year = {2019}, + month = mar, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btz139}, + abstract = {Motivation: The development of single-cell RNA-sequencing (scRNA-seq) provides a new perspective to study biological problems at the single-cell level. One of the key issues in scRNA-seq analysis is to resolve the heterogeneity and diversity of cells, which is to cluster the cells into several groups. However, many existing clustering methods are designed to analyze bulk RNA-seq data, it is urgent to develop the new scRNA-seq clustering methods. Moreover, the high noise in scRNAseq data also brings a lot of challenges to computational methods.}, + file = {/Users/laurent/Zotero/storage/8ZFFAMNW/Zheng et al. - 2019 - SinNLRR a robust subspace clustering method for c.pdf;/Users/laurent/Zotero/storage/DTTE3H6M/Zheng et al. - 2019 - SinNLRR a robust subspace clustering method for c.pdf}, + journal = {Bioinformatics}, + language = {en} +} + +@article{zhouClassifyingNextgenerationSequencing2018, + title = {Classifying Next-Generation Sequencing Data Using a Zero-Inflated {{Poisson}} Model}, + author = {Zhou, Yan and Wan, Xiang and Zhang, Baoxue and Tong, Tiejun}, + year = {2018}, + month = apr, + volume = {34}, + pages = {1329--1335}, + issn = {1367-4803, 1460-2059}, + doi = {10.1093/bioinformatics/btx768}, + abstract = {Motivation: With the development of high-throughput techniques, RNA-sequencing (RNA-seq) is becoming increasingly popular as an alternative for gene expression analysis, such as RNAs profiling and classification. Identifying which type of diseases a new patient belongs to with RNA-seq data has been recognized as a vital problem in medical research. As RNA-seq data are discrete, statistical methods developed for classifying microarray data cannot be readily applied for RNAseq data classification. Witten proposed a Poisson linear discriminant analysis (PLDA) to classify the RNA-seq data in 2011. Note, however, that the count datasets are frequently characterized by excess zeros in real RNA-seq or microRNA sequence data (i.e. when the sequence depth is not enough or small RNAs with the length of 18\textendash{}30 nucleotides). Therefore, it is desired to develop a new model to analyze RNA-seq data with an excess of zeros.}, + file = {/Users/laurent/Documents/bibliography/to_read/Zhou et al. - 2018 - Classifying next-generation sequencing data using .pdf}, + journal = {Bioinformatics}, + language = {en}, + number = {8} +} + +@article{zhuNonparametricExpressionAnalysis2019, + title = {Nonparametric Expression Analysis Using Inferential Replicate Counts}, + author = {Zhu, Anqi and Srivastava, Avi and Ibrahim, Joseph G. and Patro, Rob and Love, Michael I.}, + year = {2019}, + month = feb, + doi = {10.1101/561084}, + abstract = {A primary challenge in the analysis of RNA-seq data is to identify differentially expressed genes or transcripts while controlling for technical biases present in the observations. Ideally, a statistical testing procedure should incorporate information about the inherent uncertainty of the abundance estimates, whether at the gene or transcript level, that arise from quantification of abundance. Most popular methods for RNA-seq differential expression analysis fit a parametric model to the counts or scaled counts for each gene or transcript, and a subset of methods can incorporate information about the uncertainty of the counts. Previous work has shown that nonparametric models for RNA-seq differential expression may in some cases have better control of the false discovery rate, and adapt well to new data types without requiring reformulation of a parametric model. Existing nonparametric models do not take into account the inferential uncertainty of the observations, leading to an inflated false discovery rate, in particular at the transcript level. Here we propose a nonparametric model for differential expression analysis using inferential replicate counts, extending the existing SAMseq method to account for inferential uncertainty, batch effects, and sample pairing. We compare our method, ``SAMseq With Inferential Samples Helps'', or Swish, with popular differential expression analysis methods. Swish has improved control of the false discovery rate, in particular for transcripts with high inferential uncertainty. We apply Swish to a singlecell RNA-seq dataset, assessing sensitivity to recover DE genes between sub-populations of cells, and compare its performance to the Wilcoxon rank sum test.}, + file = {/Users/laurent/Zotero/storage/QTJMF6HI/Zhu et al. - 2019 - Nonparametric expression analysis using inferentia.pdf}, + journal = {bioRxiv}, + language = {en} +} + +@article{ziegenhainComparativeAnalysisSingleCell2017, + title = {Comparative {{Analysis}} of {{Single}}-{{Cell RNA Sequencing Methods}}}, + author = {Ziegenhain, Christoph and Vieth, Beate and Parekh, Swati and Reinius, Bj{\"o}rn and {Guillaumet-Adkins}, Amy and Smets, Martha and Leonhardt, Heinrich and Heyn, Holger and Hellmann, Ines and Enard, Wolfgang}, + year = {2017}, + month = feb, + volume = {65}, + pages = {631-643.e4}, + issn = {10972765}, + doi = {10.1016/j.molcel.2017.01.023}, + abstract = {Single-cell RNA sequencing (scRNA-seq) offers new possibilities to address biological and medical questions. However, systematic comparisons of the performance of diverse scRNA-seq protocols are lacking. We generated data from 583 mouse embryonic stem cells to evaluate six prominent scRNA-seq methods: CEL-seq2, Drop-seq, MARS-seq, SCRBseq, Smart-seq, and Smart-seq2. While Smart-seq2 detected the most genes per cell and across cells, CEL-seq2, Drop-seq, MARS-seq, and SCRB-seq quantified mRNA levels with less amplification noise due to the use of unique molecular identifiers (UMIs). Power simulations at different sequencing depths showed that Drop-seq is more cost-efficient for transcriptome quantification of large numbers of cells, while MARS-seq, SCRB-seq, and Smart-seq2 are more efficient when analyzing fewer cells. Our quantitative comparison offers the basis for an informed choice among six prominent scRNA-seq methods, and it provides a framework for benchmarking further improvements of scRNA-seq protocols.}, + file = {/Users/laurent/Documents/bibliography/scRNASeq/Ziegenhain et al. - 2017 - Comparative Analysis of Single-Cell RNA Sequencing.pdf;/Users/laurent/Zotero/storage/2UTJ4SMU/Ziegenhain et al. - 2017 - Comparative Analysis of Single-Cell RNA Sequencing.pdf;/Users/laurent/Zotero/storage/9KEER2TC/Ziegenhain et al. - 2017 - Comparative Analysis of Single-Cell RNA Sequencing.pdf;/Users/laurent/Zotero/storage/TGY3858J/Ziegenhain et al. - 2017 - Comparative Analysis of Single-Cell RNA Sequencing.pdf}, + journal = {Molecular Cell}, + language = {en}, + number = {4} +} + +@article{ziminHybridAssemblyLarge2017, + title = {Hybrid Assembly of the Large and Highly Repetitive Genome of {{Aegilops}} Tauschii, a Progenitor of Bread Wheat, with the {{MaSuRCA}} Mega-Reads Algorithm}, + author = {Zimin, Aleksey V. and Puiu, Daniela and Luo, Ming-Cheng and Zhu, Tingting and Koren, Sergey and Mar{\c c}ais, Guillaume and Yorke, James A. and Dvo{\v r}{\'a}k, Jan and Salzberg, Steven L.}, + year = {2017}, + volume = {27}, + pages = {787--792}, + issn = {1549-5469}, + doi = {10.1101/gr.213405.116}, + abstract = {Long sequencing reads generated by single-molecule sequencing technology offer the possibility of dramatically improving the contiguity of genome assemblies. The biggest challenge today is that long reads have relatively high error rates, currently around 15\%. The high error rates make it difficult to use this data alone, particularly with highly repetitive plant genomes. Errors in the raw data can lead to insertion or deletion errors (indels) in the consensus genome sequence, which in turn create significant problems for downstream analysis; for example, a single indel may shift the reading frame and incorrectly truncate a protein sequence. Here, we describe an algorithm that solves the high error rate problem by combining long, high-error reads with shorter but much more accurate Illumina sequencing reads, whose error rates average \textbackslash{}textless1\%. Our hybrid assembly algorithm combines these two types of reads to construct mega-reads, which are both long and accurate, and then assembles the mega-reads using the CABOG assembler, which was designed for long reads. We apply this technique to a large data set of Illumina and PacBio sequences from the species Aegilops tauschii, a large and extremely repetitive plant genome that has resisted previous attempts at assembly. We show that the resulting assembled contigs are far larger than in any previous assembly, with an N50 contig size of 486,807 nucleotides. We compare the contigs to independently produced optical maps to evaluate their large-scale accuracy, and to a set of high-quality bacterial artificial chromosome (BAC)-based assemblies to evaluate base-level accuracy.}, + journal = {Genome Research}, + keywords = {Contig Mapping,DNA,Genome,Genome Size,Genomics,Nucleic Acid,Plant,Poaceae,Repetitive Sequences,Sequence Analysis,Software}, + language = {eng}, + number = {5}, + pmcid = {PMC5411773}, + pmid = {28130360} +} + +@misc{zotero-2259, + file = {/Users/laurent/Zotero/storage/IP8S27HE/scholar_url.html}, + howpublished = {http://scholar.google.fr/scholar\_url?url=http://www.techscience.com/CMES/v122n1/38250/pdf\&hl=en\&sa=X\&d=13540209967190386776\&scisig=AAGBfm0CPvqOoAtX3vg-PSORjQ0IzVAlHQ\&nossl=1\&oi=scholaralrt} +} + +@article{zytnickiMmquantHowCount2017, + title = {Mmquant: How to Count Multi-Mapping Reads?}, + shorttitle = {Mmquant}, + author = {Zytnicki, Matthias}, + year = {2017}, + month = sep, + volume = {18}, + pages = {411}, + issn = {1471-2105}, + doi = {10.1186/s12859-017-1816-4}, + abstract = {RNA-Seq is currently used routinely, and it provides accurate information on gene transcription. However, the method cannot accurately estimate duplicated genes expression. Several strategies have been previously used (drop duplicated genes, distribute uniformly the reads, or estimate expression), but all of them provide biased results.}, + file = {/Users/laurent/Zotero/storage/V2Q7Z7KI/Zytnicki - 2017 - mmquant how to count multi-mapping reads.pdf;/Users/laurent/Zotero/storage/MK2HRTS9/s12859-017-1816-4.html}, + journal = {BMC Bioinformatics}, + number = {1} +} + +