% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Rahnenfhrer:276080,
author = {J. Rahnenführer and R. De Bin and A. Benner$^*$ and F.
Ambrogi and L. Lusa and A.-L. Boulesteix and E. Migliavacca
and H. Binder and S. Michiels and W. Sauerbrei and L.
McShane},
collaboration = {f. t. g. “. data”},
title = {{S}tatistical analysis of high-dimensional biomedical data:
a gentle introduction to analytical goals, common approaches
and challenges.},
journal = {BMC medicine},
volume = {21},
number = {1},
issn = {1741-7015},
address = {Heidelberg [u.a.]},
publisher = {Springer},
reportid = {DKFZ-2023-00986},
pages = {182},
year = {2023},
abstract = {In high-dimensional data (HDD) settings, the number of
variables associated with each observation is very large.
Prominent examples of HDD in biomedical research include
omics data with a large number of variables such as many
measurements across the genome, proteome, or metabolome, as
well as electronic health records data that have large
numbers of variables recorded for each patient. The
statistical analysis of such data requires knowledge and
experience, sometimes of complex methods adapted to the
respective research questions.Advances in statistical
methodology and machine learning methods offer new
opportunities for innovative analyses of HDD, but at the
same time require a deeper understanding of some fundamental
statistical concepts. Topic group TG9 'High-dimensional
data' of the STRATOS (STRengthening Analytical Thinking for
Observational Studies) initiative provides guidance for the
analysis of observational studies, addressing particular
statistical challenges and opportunities for the analysis of
studies involving HDD. In this overview, we discuss key
aspects of HDD analysis to provide a gentle introduction for
non-statisticians and for classically trained statisticians
with little experience specific to HDD.The paper is
organized with respect to subtopics that are most relevant
for the analysis of HDD, in particular initial data
analysis, exploratory data analysis, multiple testing, and
prediction. For each subtopic, main analytical goals in HDD
settings are outlined. For each of these goals, basic
explanations for some commonly used analysis methods are
provided. Situations are identified where traditional
statistical methods cannot, or should not, be used in the
HDD setting, or where adequate analytic tools are still
lacking. Many key references are provided.This review aims
to provide a solid statistical foundation for researchers,
including statisticians and non-statisticians, who are new
to research with HDD or simply want to better evaluate and
understand the results of HDD analyses.},
subtyp = {Review Article},
keywords = {Humans / Goals / Biomedical Research / Research Design /
Analytical goals (Other) / Clustering (Other) / Exploratory
data analysis (Other) / High-dimensional data (Other) /
Initial data analysis (Other) / Multiple testing (Other) /
Omics data (Other) / Prediction (Other) / STRATOS initiative
(Other)},
cin = {C060},
ddc = {610},
cid = {I:(DE-He78)C060-20160331},
pnm = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
pid = {G:(DE-HGF)POF4-313},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:37189125},
pmc = {pmc:PMC10186672},
doi = {10.1186/s12916-023-02858-y},
url = {https://inrepo02.dkfz.de/record/276080},
}