% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Legha:307383,
author = {A. Legha and J. Ensor and R. Whittle and L. Archer and B.
Van Calster and E. Christodoulou$^*$ and K. I. E. Snell and
M. Sadatsafavi and G. S. Collins and R. D. Riley},
title = {{S}equential sample size calculations and learning curves
safeguard the robust development of a clinical prediction
model for individuals.},
journal = {Journal of clinical epidemiology},
volume = {nn},
issn = {0895-4356},
address = {Amsterdam [u.a.]},
publisher = {Elsevier Science},
reportid = {DKFZ-2025-03027},
pages = {nn},
year = {2025},
note = {epub},
abstract = {When recruiting participants to a new study developing a
clinical prediction model (CPM), sample size calculations
are typically conducted before data collection based on
sensible assumptions. This leads to a fixed sample size, but
if the assumptions are inaccurate, the actual sample size
required to develop a reliable model may be higher or even
lower. To safeguard against this, adaptive sample size
approaches have been proposed, based on sequential
evaluation of (changes in) a model's predictive
performance.To illustrate and extend sequential sample size
calculations for CPM development by (i) proposing stopping
rules for prospective data collection based on minimising
uncertainty (instability) and misclassification of
individual-level predictions, and (ii) showcasing how it
safeguards against inaccurate fixed sample size
calculations.Using the sequential approach repeats the
pre-defined model development strategy every time a chosen
number (e.g., 100) of participants are recruited and
adequately followed up. At each stage, CPM performance is
evaluated using bootstrapping, leading to prediction and
classification stability statistics and plots, alongside
optimism-adjusted measures of calibration and
discrimination. Learning curves display the trend of results
against sample size and recruitment is stopped when a chosen
stopping rule is met.Our approach is illustrated for model
development of acute kidney injury using (penalised)
logistic regression CPMs. Prior to recruitment based on
perceived sensible assumptions, the fixed sample size
calculation suggests recruiting 342 patients to minimise
overfitting; however, during data collection the sequential
approach reveals that a much larger sample size of 1100 is
required to minimise overfitting (targeting a
bootstrap-corrected calibration slope ≥0.9). If the
stopping rule criteria also target small uncertainty and
misclassification probability of individual predictions, the
sequential approach suggests an even larger sample size of
about n=1800.For CPM development studies involving
prospective data collection, a sequential sample size
approach allows users to dynamically monitor
individual-level prediction and classification instability.
This helps determine when enough participants have been
recruited and safeguards against using inaccurate
assumptions in a sample size calculation prior to data
recruitment. Engagement with patients and other stakeholders
is crucial to identify sensible context-specific stopping
rules for robust individual predictions.},
keywords = {Clinical Prediction Models (Other) / Instability (Other) /
Learning Curves (Other) / Model Development (Other) / Sample
Size (Other) / Sequential (Other) / Uncertainty (Other)},
cin = {E130},
ddc = {610},
cid = {I:(DE-He78)E130-20160331},
pnm = {315 - Bildgebung und Radioonkologie (POF4-315)},
pid = {G:(DE-HGF)POF4-315},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:41423140},
doi = {10.1016/j.jclinepi.2025.112117},
url = {https://inrepo02.dkfz.de/record/307383},
}