% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Pestarino:303479,
      author       = {L. Pestarino and R. Turzanski-Fortner$^*$ and T. H. Nøst
                      and I. Fotopoulos and I. Urbarova and O. D. Røe and H.
                      Langseth and T. B. Rounge},
      title        = {{R}ecommendations for validating omics prediction models:
                      {I}nsights from a lung cancer {RNA} biomarker study.},
      journal      = {Cancer epidemiology, biomarkers $\&$ prevention},
      volume       = {nn},
      issn         = {1055-9965},
      address      = {Philadelphia, Pa.},
      publisher    = {AACR},
      reportid     = {DKFZ-2025-01676},
      pages        = {nn},
      year         = {2025},
      note         = {epub},
      abstract     = {External validation of predictive models in medical
                      research is crucial to ensure their generalizability and
                      applicability across diverse populations. However,
                      validation often reveals discrepancies in model performance
                      due to cohort differences, sample collection and storage,
                      overfitting, and inconsistencies in data handling. This
                      study investigates the challenges encountered during
                      external validation of predictive models for early lung
                      cancer detection using small RNA biomarkers, tying these
                      challenges to specific validation outcomes and deriving
                      recommendations.Predictive models based on the XGBoost
                      algorithm, developed from serum samples in the JanusRNA
                      cohort, were externally validated in two independent
                      Norwegian cohorts: HUNT and NOWAC. These cohorts differed in
                      sample types, RNA abundance, library preparation protocols,
                      and lung cancer histological classification. Strategies to
                      harmonize data processing and address these discrepancies
                      were employed to ensure a robust validation
                      process.Validation revealed significant challenges due to
                      cohort heterogeneity. Median AUC values ranged from 0.50 to
                      0.66 in validation cohorts, compared to 0.62-0.76 in the
                      original models. Models performed worse in the female-only
                      NOWAC cohort, where plasma was used, highlighting the impact
                      of sample type and cohort characteristics on predictive
                      accuracy.Based on the challenges encountered during
                      validation, we propose seven recommendations to guide robust
                      external validation of omics-based predictive models
                      including harmonizing data processing across cohorts,
                      re-evaluating overfitting, and critically assessing model
                      performance for clinical applications.By highlighting
                      practical issues in model validation and providing
                      recommendations, this study supports more reliable and
                      clinically applicable biomarker-based prediction models,
                      ultimately aiding cancer screening and prevention efforts.},
      cin          = {C180},
      ddc          = {610},
      cid          = {I:(DE-He78)C180-20160331},
      pnm          = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
      pid          = {G:(DE-HGF)POF4-313},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:40794097},
      doi          = {10.1158/1055-9965.EPI-25-0787},
      url          = {https://inrepo02.dkfz.de/record/303479},
}