% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Ross:274460,
      author       = {T. Ross$^*$ and P. Bruno$^*$ and A. Reinke$^*$ and M.
                      Wiesenfarth$^*$ and L. Koeppel and P. M. Full$^*$ and B.
                      Pekdemir$^*$ and P. Godau$^*$ and D. Trofimova$^*$ and F.
                      Isensee$^*$ and T. J. Adler$^*$ and T. N. Tran$^*$ and S.
                      Moccia and F. Calimeri and B. P. Müller-Stich and A.
                      Kopp-Schneider$^*$ and L. Maier-Hein$^*$},
      title        = {{B}eyond rankings: {L}earning (more) from algorithm
                      validation.},
      journal      = {Medical image analysis},
      volume       = {86},
      issn         = {1361-8415},
      address      = {Amsterdam [u.a.]},
      publisher    = {Elsevier Science},
      reportid     = {DKFZ-2023-00605},
      pages        = {102765},
      year         = {2023},
      note         = {#EA:E130#LA:E130#},
      abstract     = {Challenges have become the state-of-the-art approach to
                      benchmark image analysis algorithms in a comparative manner.
                      While the validation on identical data sets was a great step
                      forward, results analysis is often restricted to pure
                      ranking tables, leaving relevant questions unanswered.
                      Specifically, little effort has been put into the systematic
                      investigation on what characterizes images in which
                      state-of-the-art algorithms fail. To address this gap in the
                      literature, we (1) present a statistical framework for
                      learning from challenges and (2) instantiate it for the
                      specific task of instrument instance segmentation in
                      laparoscopic videos. Our framework relies on the semantic
                      meta data annotation of images, which serves as foundation
                      for a General Linear Mixed Models (GLMM) analysis. Based on
                      51,542 meta data annotations performed on 2,728 images, we
                      applied our approach to the results of the Robust Medical
                      Instrument Segmentation Challenge (ROBUST-MIS) challenge
                      2019 and revealed underexposure, motion and occlusion of
                      instruments as well as the presence of smoke or other
                      objects in the background as major sources of algorithm
                      failure. Our subsequent method development, tailored to the
                      specific remaining issues, yielded a deep learning model
                      with state-of-the-art overall performance and specific
                      strengths in the processing of images in which previous
                      methods tended to fail. Due to the objectivity and generic
                      applicability of our approach, it could become a valuable
                      tool for validation in the field of medical image analysis
                      and beyond.},
      keywords     = {Artificial intelligence (Other) / Biomedical image analysis
                      challenges (Other) / Deep learning (Other) / Endoscopic
                      vision (Other) / Generalized linear mixed models (Other) /
                      Grand challenges (Other) / Image characteristics driven
                      algorithm development (Other) / Instrument segmentation
                      (Other) / Minimally invasive surgery (Other) / Surgical data
                      science (Other)},
      cin          = {E130 / C060 / E230},
      ddc          = {610},
      cid          = {I:(DE-He78)E130-20160331 / I:(DE-He78)C060-20160331 /
                      I:(DE-He78)E230-20160331},
      pnm          = {315 - Bildgebung und Radioonkologie (POF4-315)},
      pid          = {G:(DE-HGF)POF4-315},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:36965252},
      doi          = {10.1016/j.media.2023.102765},
      url          = {https://inrepo02.dkfz.de/record/274460},
}