% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Sill:127522,
      author       = {M. Sill$^*$ and M. Saadati$^*$ and A. Benner$^*$},
      title        = {{A}pplying stability selection to consistently estimate
                      sparse principal components in high-dimensional molecular
                      data.},
      journal      = {Bioinformatics},
      volume       = {31},
      number       = {16},
      issn         = {1460-2059},
      address      = {Oxford},
      publisher    = {Oxford Univ. Press},
      reportid     = {DKFZ-2017-03545},
      pages        = {2683 - 2690},
      year         = {2015},
      abstract     = {Principal component analysis (PCA) is a basic tool often
                      used in bioinformatics for visualization and dimension
                      reduction. However, it is known that PCA may not
                      consistently estimate the true direction of maximal
                      variability in high-dimensional, low sample size settings,
                      which are typical for molecular data. Assuming that the
                      underlying signal is sparse, i.e. that only a fraction of
                      features contribute to a principal component (PC), this
                      estimation consistency can be retained. Most existing sparse
                      PCA methods use L1-penalization, i.e. the lasso, to perform
                      feature selection. But, the lasso is known to lack variable
                      selection consistency in high dimensions and therefore a
                      subsequent interpretation of selected features can give
                      misleading results.We present S4VDPCA, a sparse PCA method
                      that incorporates a subsampling approach, namely stability
                      selection. S4VDPCA can consistently select the truly
                      relevant variables contributing to a sparse PC while also
                      consistently estimate the direction of maximal variability.
                      The performance of the S4VDPCA is assessed in a simulation
                      study and compared to other PCA approaches, as well as to a
                      hypothetical oracle PCA that knows the truly relevant
                      features in advance and thus finds optimal, unbiased sparse
                      PCs. S4VDPCA is computationally efficient and performs best
                      in simulations regarding parameter estimation consistency
                      and feature selection consistency. Furthermore, S4VDPCA is
                      applied to a publicly available gene expression data set of
                      medulloblastoma brain tumors. Features contributing to the
                      first two estimated sparse PCs represent genes significantly
                      over-represented in pathways typically deregulated between
                      molecular subgroups of medulloblastoma.Software is available
                      at
                      https://github.com/mwsill/s4vdpca.m.sill@dkfz.deSupplementary
                      data are available at Bioinformatics online.},
      cin          = {C060},
      ddc          = {004},
      cid          = {I:(DE-He78)C060-20160331},
      pnm          = {313 - Cancer risk factors and prevention (POF3-313)},
      pid          = {G:(DE-HGF)POF3-313},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:25861969},
      pmc          = {pmc:PMC4528629},
      doi          = {10.1093/bioinformatics/btv197},
      url          = {https://inrepo02.dkfz.de/record/127522},
}