% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Pestarino:303479,
author = {L. Pestarino and R. Turzanski-Fortner$^*$ and T. H. Nøst
and I. Fotopoulos and I. Urbarova and O. D. Røe and H.
Langseth and T. B. Rounge},
title = {{R}ecommendations for validating omics prediction models:
{I}nsights from a lung cancer {RNA} biomarker study.},
journal = {Cancer epidemiology, biomarkers $\&$ prevention},
volume = {nn},
issn = {1055-9965},
address = {Philadelphia, Pa.},
publisher = {AACR},
reportid = {DKFZ-2025-01676},
pages = {nn},
year = {2025},
note = {epub},
abstract = {External validation of predictive models in medical
research is crucial to ensure their generalizability and
applicability across diverse populations. However,
validation often reveals discrepancies in model performance
due to cohort differences, sample collection and storage,
overfitting, and inconsistencies in data handling. This
study investigates the challenges encountered during
external validation of predictive models for early lung
cancer detection using small RNA biomarkers, tying these
challenges to specific validation outcomes and deriving
recommendations.Predictive models based on the XGBoost
algorithm, developed from serum samples in the JanusRNA
cohort, were externally validated in two independent
Norwegian cohorts: HUNT and NOWAC. These cohorts differed in
sample types, RNA abundance, library preparation protocols,
and lung cancer histological classification. Strategies to
harmonize data processing and address these discrepancies
were employed to ensure a robust validation
process.Validation revealed significant challenges due to
cohort heterogeneity. Median AUC values ranged from 0.50 to
0.66 in validation cohorts, compared to 0.62-0.76 in the
original models. Models performed worse in the female-only
NOWAC cohort, where plasma was used, highlighting the impact
of sample type and cohort characteristics on predictive
accuracy.Based on the challenges encountered during
validation, we propose seven recommendations to guide robust
external validation of omics-based predictive models
including harmonizing data processing across cohorts,
re-evaluating overfitting, and critically assessing model
performance for clinical applications.By highlighting
practical issues in model validation and providing
recommendations, this study supports more reliable and
clinically applicable biomarker-based prediction models,
ultimately aiding cancer screening and prevention efforts.},
cin = {C180},
ddc = {610},
cid = {I:(DE-He78)C180-20160331},
pnm = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
pid = {G:(DE-HGF)POF4-313},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:40794097},
doi = {10.1158/1055-9965.EPI-25-0787},
url = {https://inrepo02.dkfz.de/record/303479},
}