% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Reinke:282494,
author = {A. Reinke$^*$ and M. D. Tizabi$^*$ and M. Baumgartner$^*$
and M. Eisenmann$^*$ and D. Heckmann-Nötzel$^*$ and A. E.
Kavur$^*$ and T. Rädsch$^*$ and C. H. Sudre and L. Acion
and M. Antonelli and T. Arbel and S. Bakas and A. Benis and
M. Blaschko and F. Büttner$^*$ and M. J. Cardoso and V.
Cheplygina and J. Chen and E. Christodoulou$^*$ and B. A.
Cimini and G. S. Collins and K. Farahani and L. Ferrer and
A. Galdran and B. van Ginneken and B. Glocker and P.
Godau$^*$ and R. Haase and D. A. Hashimoto and M. M. Hoffman
and M. Huisman and F. Isensee$^*$ and P. Jannin and C. E.
Kahn and D. Kainmueller and B. Kainz and A. Karargyris and
A. Karthikesalingam and H. Kenngott and J. Kleesiek$^*$ and
F. Kofler and T. Kooi and A. Kopp-Schneider$^*$ and M.
Kozubek and A. Kreshuk and T. Kurc and B. A. Landman and G.
Litjens and A. Madani and K. Maier-Hein$^*$ and A. L. Martel
and P. Mattson and E. Meijering and B. Menze and K. G. M.
Moons and H. Müller and B. Nichyporuk and F. Nickel and J.
Petersen and S. M. Rafelski and N. Rajpoot and M. Reyes and
M. A. Riegler and N. Rieke and J. Saez-Rodriguez$^*$ and C.
I. Sánchez and S. Shetty and M. van Smeden and R. M.
Summers and A. A. Taha and A. Tiulpin and S. A. Tsaftaris
and B. Van Calster and G. Varoquaux and M. Wiesenfarth$^*$
and Z. R. Yaniv and P. F. Jäger$^*$ and L. Maier-Hein$^*$},
title = {{U}nderstanding metric-related pitfalls in image analysis
validation},
publisher = {arXiv},
reportid = {DKFZ-2023-01783, arXiv:2302.01790},
year = {2023},
note = {arXiv:2302.01790 [cs.CV] (or arXiv:2302.01790v2 [cs.CV] for
this version) https://doi.org/10.48550/arXiv.2302.01790},
abstract = {Validation metrics are key for the reliable tracking of
scientific progress and for bridging the current chasm
between artificial intelligence (AI) research and its
translation into practice. However, increasing evidence
shows that particularly in image analysis, metrics are often
chosen inadequately in relation to the underlying research
problem. This could be attributed to a lack of accessibility
of metric-related knowledge: While taking into account the
individual strengths, weaknesses, and limitations of
validation metrics is a critical prerequisite to making
educated choices, the relevant knowledge is currently
scattered and poorly accessible to individual researchers.
Based on a multi-stage Delphi process conducted by a
multidisciplinary expert consortium as well as extensive
community feedback, the present work provides the first
reliable and comprehensive common point of access to
information on pitfalls related to validation metrics in
image analysis. Focusing on biomedical image analysis but
with the potential of transfer to other fields, the
addressed pitfalls generalize across application domains and
are categorized according to a newly created,
domain-agnostic taxonomy. To facilitate comprehension,
illustrations and specific examples accompany each pitfall.
As a structured body of information accessible to
researchers of all levels of expertise, this work enhances
global comprehension of a key topic in image analysis
validation.},
keywords = {Computer Vision and Pattern Recognition (cs.CV) (Other) /
FOS: Computer and information sciences (Other)},
cin = {E130 / C060 / E230 / E290},
cid = {I:(DE-He78)E130-20160331 / I:(DE-He78)C060-20160331 /
I:(DE-He78)E230-20160331 / I:(DE-He78)E290-20160331},
pnm = {315 - Bildgebung und Radioonkologie (POF4-315)},
pid = {G:(DE-HGF)POF4-315},
typ = {PUB:(DE-HGF)25},
eprint = {2302.01790},
howpublished = {arXiv:2302.01790},
archivePrefix = {arXiv},
SLACcitation = {$\%\%CITATION$ = $arXiv:2302.01790;\%\%$},
doi = {doi.org/10.48550/arXiv.2302.01790},
url = {https://inrepo02.dkfz.de/record/282494},
}