% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Rabe:301473,
author = {M. Rabe and E. F. Meliadò and S. N. Marschner$^*$ and C.
Belka$^*$ and S. Corradini and C. A. T. van den Berg and G.
Landry and C. Kurz},
title = {{P}atient-specific uncertainty calibration of deep
learning-based autosegmentation networks for adaptive
{MRI}-guided lung radiotherapy.},
journal = {Physics in medicine and biology},
volume = {70},
number = {10},
issn = {0031-9155},
address = {Bristol},
publisher = {IOP Publ.},
reportid = {DKFZ-2025-01015},
pages = {105018},
year = {2025},
abstract = {Objective.Uncertainty assessment of deep learning
autosegmentation (DLAS) models can support contour
corrections in adaptive radiotherapy (ART), e.g. by
utilizing Monte Carlo Dropout (MCD) uncertainty maps.
However, poorly calibrated uncertainties at the patient
level often render these clinically nonviable. We evaluated
population-based and patient-specific DLAS accuracy and
uncertainty calibration and propose a patient-specific
post-training uncertainty calibration method for DLAS in
ART.Approach.The study included 122 lung cancer patients
treated with a low-field MR-linac (80/19/23
training/validation/test cases). Ten single-label 3D-U-Net
population-based baseline models (BM) were trained with
dropout using planning MRIs (pMRIs) and contours for nine
organs-at-riks (OARs) and gross tumor volumes (GTVs).
Patient-specific models (PS) were created by fine-tuning BMs
with each test patient's pMRI. Model uncertainty was
assessed with MCD, averaged into probability maps.
Uncertainty calibration was evaluated with reliability
diagrams and expected calibration error (ECE). A proposed
post-training calibration method rescaled MCD probabilities
for fraction images in BM (calBM) and PS (calPS) after
fitting reliability diagrams from pMRIs. All models were
evaluated on fraction images using Dice similarity
coefficient (DSC), 95th percentile Hausdorff distance (HD95)
and ECE. Metrics were compared among models for all OARs
combined (n = 163), and the GTV (n = 23), using Friedman and
posthoc-Nemenyi tests (α = 0.05).Main results.For the OARs,
patient-specific fine-tuning significantly (p < 0.001)
increased median DSC from 0.78 (BM) to 0.86 (PS) and reduced
HD95 from 14 mm (BM) to 6.0 mm (PS). Uncertainty calibration
achieved substantial reductions in ECE, from 0.25 (BM) to
0.091 (calBM) and 0.22 (PS) to 0.11 (calPS) (p < 0.001),
without significantly affecting DSC or HD95 (p > 0.05). For
the GTV, BM performance was poor (DSC = 0.05) but
significantly (p < 0.001) improved with PS training (DSC =
0.75) while uncertainty calibration reduced ECE from 0.22
(PS) to 0.15 (calPS) (p = 0.45).Significance.Post-training
uncertainty calibration yields geometrically accurate DLAS
models with well-calibrated uncertainty estimates, crucial
for ART applications.},
keywords = {Humans / Deep Learning / Uncertainty / Radiotherapy,
Image-Guided: methods / Calibration / Lung Neoplasms:
radiotherapy / Lung Neoplasms: diagnostic imaging / Magnetic
Resonance Imaging / Organs at Risk: radiation effects /
Radiotherapy Planning, Computer-Assisted: methods / MR-linac
(Other) / Monte Carlo dropout (Other) / adaptive
radiotherapy (Other) / autosegmentation (Other) / deep
learning (Other) / epistemic uncertainty (Other) /
uncertainty calibration (Other)},
cin = {MU01},
ddc = {530},
cid = {I:(DE-He78)MU01-20160331},
pnm = {899 - ohne Topic (POF4-899)},
pid = {G:(DE-HGF)POF4-899},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:40340988},
doi = {10.1088/1361-6560/add640},
url = {https://inrepo02.dkfz.de/record/301473},
}