% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Legha:307383,
      author       = {A. Legha and J. Ensor and R. Whittle and L. Archer and B.
                      Van Calster and E. Christodoulou$^*$ and K. I. E. Snell and
                      M. Sadatsafavi and G. S. Collins and R. D. Riley},
      title        = {{S}equential sample size calculations and learning curves
                      safeguard the robust development of a clinical prediction
                      model for individuals.},
      journal      = {Journal of clinical epidemiology},
      volume       = {nn},
      issn         = {0895-4356},
      address      = {Amsterdam [u.a.]},
      publisher    = {Elsevier Science},
      reportid     = {DKFZ-2025-03027},
      pages        = {nn},
      year         = {2025},
      note         = {epub},
      abstract     = {When recruiting participants to a new study developing a
                      clinical prediction model (CPM), sample size calculations
                      are typically conducted before data collection based on
                      sensible assumptions. This leads to a fixed sample size, but
                      if the assumptions are inaccurate, the actual sample size
                      required to develop a reliable model may be higher or even
                      lower. To safeguard against this, adaptive sample size
                      approaches have been proposed, based on sequential
                      evaluation of (changes in) a model's predictive
                      performance.To illustrate and extend sequential sample size
                      calculations for CPM development by (i) proposing stopping
                      rules for prospective data collection based on minimising
                      uncertainty (instability) and misclassification of
                      individual-level predictions, and (ii) showcasing how it
                      safeguards against inaccurate fixed sample size
                      calculations.Using the sequential approach repeats the
                      pre-defined model development strategy every time a chosen
                      number (e.g., 100) of participants are recruited and
                      adequately followed up. At each stage, CPM performance is
                      evaluated using bootstrapping, leading to prediction and
                      classification stability statistics and plots, alongside
                      optimism-adjusted measures of calibration and
                      discrimination. Learning curves display the trend of results
                      against sample size and recruitment is stopped when a chosen
                      stopping rule is met.Our approach is illustrated for model
                      development of acute kidney injury using (penalised)
                      logistic regression CPMs. Prior to recruitment based on
                      perceived sensible assumptions, the fixed sample size
                      calculation suggests recruiting 342 patients to minimise
                      overfitting; however, during data collection the sequential
                      approach reveals that a much larger sample size of 1100 is
                      required to minimise overfitting (targeting a
                      bootstrap-corrected calibration slope ≥0.9). If the
                      stopping rule criteria also target small uncertainty and
                      misclassification probability of individual predictions, the
                      sequential approach suggests an even larger sample size of
                      about n=1800.For CPM development studies involving
                      prospective data collection, a sequential sample size
                      approach allows users to dynamically monitor
                      individual-level prediction and classification instability.
                      This helps determine when enough participants have been
                      recruited and safeguards against using inaccurate
                      assumptions in a sample size calculation prior to data
                      recruitment. Engagement with patients and other stakeholders
                      is crucial to identify sensible context-specific stopping
                      rules for robust individual predictions.},
      keywords     = {Clinical Prediction Models (Other) / Instability (Other) /
                      Learning Curves (Other) / Model Development (Other) / Sample
                      Size (Other) / Sequential (Other) / Uncertainty (Other)},
      cin          = {E130},
      ddc          = {610},
      cid          = {I:(DE-He78)E130-20160331},
      pnm          = {315 - Bildgebung und Radioonkologie (POF4-315)},
      pid          = {G:(DE-HGF)POF4-315},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:41423140},
      doi          = {10.1016/j.jclinepi.2025.112117},
      url          = {https://inrepo02.dkfz.de/record/307383},
}