% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Hieke:128776,
      author       = {S. Hieke and A. Benner$^*$ and R. F. Schlenl and M.
                      Schumacher and L. Bullinger and H. Binder},
      title        = {{I}ntegrating multiple molecular sources into a clinical
                      risk prediction signature by extracting complementary
                      information.},
      journal      = {BMC bioinformatics},
      volume       = {17},
      number       = {1},
      issn         = {1471-2105},
      address      = {London},
      publisher    = {BioMed Central},
      reportid     = {DKFZ-2017-04791},
      pages        = {327},
      year         = {2016},
      abstract     = {High-throughput technology allows for genome-wide
                      measurements at different molecular levels for the same
                      patient, e.g. single nucleotide polymorphisms (SNPs) and
                      gene expression. Correspondingly, it might be beneficial to
                      also integrate complementary information from different
                      molecular levels when building multivariable risk prediction
                      models for a clinical endpoint, such as treatment response
                      or survival. Unfortunately, such a high-dimensional modeling
                      task will often be complicated by a limited overlap of
                      molecular measurements at different levels between patients,
                      i.e. measurements from all molecular levels are available
                      only for a smaller proportion of patients.We propose a
                      sequential strategy for building clinical risk prediction
                      models that integrate genome-wide measurements from two
                      molecular levels in a complementary way. To deal with
                      partial overlap, we develop an imputation approach that
                      allows us to use all available data. This approach is
                      investigated in two acute myeloid leukemia applications
                      combining gene expression with either SNP or DNA methylation
                      data. After obtaining a sparse risk prediction signature
                      e.g. from SNP data, an automatically selected set of
                      prognostic SNPs, by componentwise likelihood-based boosting,
                      imputation is performed for the corresponding linear
                      predictor by a linking model that incorporates e.g. gene
                      expression measurements. The imputed linear predictor is
                      then used for adjustment when building a prognostic
                      signature from the gene expression data. For evaluation, we
                      consider stability, as quantified by inclusion frequencies
                      across resampling data sets. Despite an extremely small
                      overlap in the application example with gene expression and
                      SNPs, several genes are seen to be more stably identified
                      when taking the (imputed) linear predictor from the SNP data
                      into account. In the application with gene expression and
                      DNA methylation, prediction performance with respect to
                      survival also indicates that the proposed approach might
                      work well.We consider imputation of linear predictor values
                      to be a feasible and sensible approach for dealing with
                      partial overlap in complementary integrative analysis of
                      molecular measurements at different levels. More generally,
                      these results indicate that a complementary strategy for
                      integrating different molecular levels can result in more
                      stable risk prediction signatures, potentially providing a
                      more reliable insight into the underlying biology.},
      cin          = {C060},
      ddc          = {004},
      cid          = {I:(DE-He78)C060-20160331},
      pnm          = {313 - Cancer risk factors and prevention (POF3-313)},
      pid          = {G:(DE-HGF)POF3-313},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:27578050},
      pmc          = {pmc:PMC5004308},
      doi          = {10.1186/s12859-016-1183-6},
      url          = {https://inrepo02.dkfz.de/record/128776},
}