% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Dexl:307499,
author = {J. Dexl and S. Gatidis and M. Früh and K. Jeblick and A.
Mittermeier and A. T. Stüber and B. Schachtner and J.
Topalis and M. P. Fabritius and S. Gu and G. K. Murugesan
and J. VanOss and J. Ye and J. He and A. Alloula and B. W.
Papież and Z. Mesbah and R. Modzelewski and M. Hadlich and
Z. Marinov and R. Stiefelhagen and F. Isensee$^*$ and K. H.
Maier-Hein$^*$ and A. Galdran and K. Nikolaou and C. la
Fougère$^*$ and M. Kim and N. Kallenberg and J.
Kleesiek$^*$ and K. Herrmann and R. Werner and M. Ingrisch
and C. C. Cyran and T. Küstner},
title = {{A}uto{PET} {C}hallenge on {F}ully {A}utomated {L}esion
{S}egmentation in {O}ncologic {PET}/{CT} {I}maging, {P}art
2: {D}omain {G}eneralization.},
journal = {Journal of nuclear medicine},
volume = {nn},
issn = {0097-9058},
address = {New York, NY},
publisher = {Soc.},
reportid = {DKFZ-2026-00004},
pages = {nn},
year = {2025},
note = {epub},
abstract = {This article reports the results of the second iteration of
the autoPET challenge on automated lesion segmentation in
whole-body PET/CT, held in conjunction with the 26th
International Conference on Medical Image Computing and
Computer Assisted Intervention in 2023. In contrast to the
first autoPET challenge, which served as a proof of concept,
this study investigates whether machine learning-based
segmentation models trained on data from a single source can
maintain performance across clinically relevant variations
in PET/CT data, reflecting the demands of real-world
deployment. Methods: A comprehensive biomedical segmentation
challenge on PET/CT domain generalization was designed and
conducted. Participants were tasked to train machine
learning models on annotated whole-body 18F-FDG data (n =
1,014). These models were then evaluated on a test set of
200 samples from 5 clinically relevant domains, including
variations in institutions, pathologies, and populations and
a different tracer. Performance was measured in terms of
average dice similarity coefficient, average false-positive
volume, and average false-negative volume. The
best-performing teams were awarded in 3 categories.
Furthermore, a detailed analysis was conducted after the
challenge, examining results across domains and unique
instances, along with a ranking analysis. Results:
Generalization from a single-source domain remains a
significant challenge. Seventeen international teams
successfully participated in the challenge. The
best-performing team reached an average dice similarity
coefficient of 0.5038, a mean false-positive volume of
87.8388 mL, and a mean false-negative volume of 8.4154 mL on
the test set. nnU-Net was the most commonly used framework,
with most participants using a 3-dimensional U-Net. Despite
competitive in-domain results, out-of-domain performance
deteriorated substantially, particularly on pediatric and
prostate-specific membrane antigen data. Detailed error
analysis revealed frequent false-positives due to
physiologic uptake and decreased sensitivity in detecting
small or low-uptake lesions. A majority-vote ensemble
offered minimal performance gains, whereas an oracle
ensemble indicates hypothetical gains. Ranking analysis
showed no single team consistently outperformed all others
across ranking schemes. Conclusion: The second autoPET
challenge provides a comprehensive evaluation of the current
state of automated PET/CT tumor segmentation, highlighting
both progress and persistent challenges of single-source
domain generalization and the need for diverse public
datasets to enhance algorithm robustness.},
keywords = {PET/CT (Other) / biomedical image analysis challenge
(Other) / deep learning (Other) / domain generalization
(Other) / oncology (Other) / segmentation (Other)},
cin = {E230 / TU01 / ED01},
ddc = {610},
cid = {I:(DE-He78)E230-20160331 / I:(DE-He78)TU01-20160331 /
I:(DE-He78)ED01-20160331},
pnm = {315 - Bildgebung und Radioonkologie (POF4-315)},
pid = {G:(DE-HGF)POF4-315},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:41469162},
doi = {10.2967/jnumed.125.270260},
url = {https://inrepo02.dkfz.de/record/307499},
}