% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Hieke:128776,
author = {S. Hieke and A. Benner$^*$ and R. F. Schlenl and M.
Schumacher and L. Bullinger and H. Binder},
title = {{I}ntegrating multiple molecular sources into a clinical
risk prediction signature by extracting complementary
information.},
journal = {BMC bioinformatics},
volume = {17},
number = {1},
issn = {1471-2105},
address = {London},
publisher = {BioMed Central},
reportid = {DKFZ-2017-04791},
pages = {327},
year = {2016},
abstract = {High-throughput technology allows for genome-wide
measurements at different molecular levels for the same
patient, e.g. single nucleotide polymorphisms (SNPs) and
gene expression. Correspondingly, it might be beneficial to
also integrate complementary information from different
molecular levels when building multivariable risk prediction
models for a clinical endpoint, such as treatment response
or survival. Unfortunately, such a high-dimensional modeling
task will often be complicated by a limited overlap of
molecular measurements at different levels between patients,
i.e. measurements from all molecular levels are available
only for a smaller proportion of patients.We propose a
sequential strategy for building clinical risk prediction
models that integrate genome-wide measurements from two
molecular levels in a complementary way. To deal with
partial overlap, we develop an imputation approach that
allows us to use all available data. This approach is
investigated in two acute myeloid leukemia applications
combining gene expression with either SNP or DNA methylation
data. After obtaining a sparse risk prediction signature
e.g. from SNP data, an automatically selected set of
prognostic SNPs, by componentwise likelihood-based boosting,
imputation is performed for the corresponding linear
predictor by a linking model that incorporates e.g. gene
expression measurements. The imputed linear predictor is
then used for adjustment when building a prognostic
signature from the gene expression data. For evaluation, we
consider stability, as quantified by inclusion frequencies
across resampling data sets. Despite an extremely small
overlap in the application example with gene expression and
SNPs, several genes are seen to be more stably identified
when taking the (imputed) linear predictor from the SNP data
into account. In the application with gene expression and
DNA methylation, prediction performance with respect to
survival also indicates that the proposed approach might
work well.We consider imputation of linear predictor values
to be a feasible and sensible approach for dealing with
partial overlap in complementary integrative analysis of
molecular measurements at different levels. More generally,
these results indicate that a complementary strategy for
integrating different molecular levels can result in more
stable risk prediction signatures, potentially providing a
more reliable insight into the underlying biology.},
cin = {C060},
ddc = {004},
cid = {I:(DE-He78)C060-20160331},
pnm = {313 - Cancer risk factors and prevention (POF3-313)},
pid = {G:(DE-HGF)POF3-313},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:27578050},
pmc = {pmc:PMC5004308},
doi = {10.1186/s12859-016-1183-6},
url = {https://inrepo02.dkfz.de/record/128776},
}