Understanding metric-related pitfalls in image analysis validation

Reinke, Annika; Sudre, Carole H.; Antonelli, Michela; Karargyris, Alexandros; Kainmueller, Dagmar; Kenngott, Hannes; Tsaftaris, Sotirios A.; Tiulpin, Aleksei; Blaschko, Matthew; Galdran, Adrian; Rädsch, Tim; Glocker, Ben; Petersen, Jens; Taha, Abdel A.; Menze, Bjoern; Riegler, Michael A.; Tizabi, Minu D.; Sánchez, Clara I.; Ferrer, Luciana; Rajpoot, Nasir; Varoquaux, Gaël; Karthikesalingam, Alan; Martel, Anne L.; Litjens, Geert; Kreshuk, Anna; Acion, Laura; Büttner, Florian; Hoffman, Michael M.; Madani, Amin; Hashimoto, Daniel A.; Bakas, Spyridon; Kainz, Bernhard; Maier-Hein, Lena; Baumgartner, Michael; van Smeden, Maarten; Chen, Jianxu; Meijering, Erik; Kurc, Tahsin; Eisenmann, Matthias; Benis, Arriel; Kleesiek, Jens; Landman, Bennett A.; Farahani, Keyvan; Nichyporuk, Brennan; Cimini, Beth A.; Summers, Ronald M.; Kozubek, Michal; Saez-Rodriguez, Julio; Kahn, Charles E.; Christodoulou, Evangelia; Jäger, Paul F.; Arbel, Tal; Godau, Patrick; Wiesenfarth, Manuel; Jannin, Pierre; Kofler, Florian; Müller, Henning; Rieke, Nicola; Mattson, Peter; Isensee, Fabian; Heckmann-Nötzel, Doreen; Huisman, Merel; Collins, Gary S.; Nickel, Felix; Kavur, A. Emre; Van Calster, Ben; Moons, Karel G. M.; Rafelski, Susanne M.; van Ginneken, Bram; Cardoso, M. Jorge; Maier-Hein, Klaus; Kooi, Thijs; Shetty, Shravya; Cheplygina, Veronika; Yaniv, Ziv R.; Reyes, Mauricio; Kopp-Schneider, Annette; Haase, Robert

doi:doi.org/10.48550/arXiv.2302.01790

% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Reinke:282494,
      author       = {A. Reinke$^*$ and M. D. Tizabi$^*$ and M. Baumgartner$^*$
                      and M. Eisenmann$^*$ and D. Heckmann-Nötzel$^*$ and A. E.
                      Kavur$^*$ and T. Rädsch$^*$ and C. H. Sudre and L. Acion
                      and M. Antonelli and T. Arbel and S. Bakas and A. Benis and
                      M. Blaschko and F. Büttner$^*$ and M. J. Cardoso and V.
                      Cheplygina and J. Chen and E. Christodoulou$^*$ and B. A.
                      Cimini and G. S. Collins and K. Farahani and L. Ferrer and
                      A. Galdran and B. van Ginneken and B. Glocker and P.
                      Godau$^*$ and R. Haase and D. A. Hashimoto and M. M. Hoffman
                      and M. Huisman and F. Isensee$^*$ and P. Jannin and C. E.
                      Kahn and D. Kainmueller and B. Kainz and A. Karargyris and
                      A. Karthikesalingam and H. Kenngott and J. Kleesiek$^*$ and
                      F. Kofler and T. Kooi and A. Kopp-Schneider$^*$ and M.
                      Kozubek and A. Kreshuk and T. Kurc and B. A. Landman and G.
                      Litjens and A. Madani and K. Maier-Hein$^*$ and A. L. Martel
                      and P. Mattson and E. Meijering and B. Menze and K. G. M.
                      Moons and H. Müller and B. Nichyporuk and F. Nickel and J.
                      Petersen and S. M. Rafelski and N. Rajpoot and M. Reyes and
                      M. A. Riegler and N. Rieke and J. Saez-Rodriguez$^*$ and C.
                      I. Sánchez and S. Shetty and M. van Smeden and R. M.
                      Summers and A. A. Taha and A. Tiulpin and S. A. Tsaftaris
                      and B. Van Calster and G. Varoquaux and M. Wiesenfarth$^*$
                      and Z. R. Yaniv and P. F. Jäger$^*$ and L. Maier-Hein$^*$},
      title        = {{U}nderstanding metric-related pitfalls in image analysis
                      validation},
      publisher    = {arXiv},
      reportid     = {DKFZ-2023-01783, arXiv:2302.01790},
      year         = {2023},
      note         = {arXiv:2302.01790 [cs.CV] (or arXiv:2302.01790v2 [cs.CV] for
                      this version) https://doi.org/10.48550/arXiv.2302.01790},
      abstract     = {Validation metrics are key for the reliable tracking of
                      scientific progress and for bridging the current chasm
                      between artificial intelligence (AI) research and its
                      translation into practice. However, increasing evidence
                      shows that particularly in image analysis, metrics are often
                      chosen inadequately in relation to the underlying research
                      problem. This could be attributed to a lack of accessibility
                      of metric-related knowledge: While taking into account the
                      individual strengths, weaknesses, and limitations of
                      validation metrics is a critical prerequisite to making
                      educated choices, the relevant knowledge is currently
                      scattered and poorly accessible to individual researchers.
                      Based on a multi-stage Delphi process conducted by a
                      multidisciplinary expert consortium as well as extensive
                      community feedback, the present work provides the first
                      reliable and comprehensive common point of access to
                      information on pitfalls related to validation metrics in
                      image analysis. Focusing on biomedical image analysis but
                      with the potential of transfer to other fields, the
                      addressed pitfalls generalize across application domains and
                      are categorized according to a newly created,
                      domain-agnostic taxonomy. To facilitate comprehension,
                      illustrations and specific examples accompany each pitfall.
                      As a structured body of information accessible to
                      researchers of all levels of expertise, this work enhances
                      global comprehension of a key topic in image analysis
                      validation.},
      keywords     = {Computer Vision and Pattern Recognition (cs.CV) (Other) /
                      FOS: Computer and information sciences (Other)},
      cin          = {E130 / C060 / E230 / E290},
      cid          = {I:(DE-He78)E130-20160331 / I:(DE-He78)C060-20160331 /
                      I:(DE-He78)E230-20160331 / I:(DE-He78)E290-20160331},
      pnm          = {315 - Bildgebung und Radioonkologie (POF4-315)},
      pid          = {G:(DE-HGF)POF4-315},
      typ          = {PUB:(DE-HGF)25},
      eprint       = {2302.01790},
      howpublished = {arXiv:2302.01790},
      archivePrefix = {arXiv},
      SLACcitation = {$\%\%CITATION$ = $arXiv:2302.01790;\%\%$},
      doi          = {doi.org/10.48550/arXiv.2302.01790},
      url          = {https://inrepo02.dkfz.de/record/282494},
}

guest :: login DKFZ
		Search		Submit		Personalize Your alerts Your baskets Your searches		Help