% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Godau:299507,
      author       = {P. Godau$^*$ and P. Kalinowski$^*$ and E. Christodoulou$^*$
                      and A. Reinke$^*$ and M. Tizabi$^*$ and L. Ferrer and P.
                      Jäger$^*$ and L. Maier-Hein$^*$},
      title        = {{N}avigating prevalence shifts in image analysis algorithm
                      deployment.},
      journal      = {Medical image analysis},
      volume       = {102},
      issn         = {1361-8415},
      address      = {Amsterdam [u.a.]},
      publisher    = {Elsevier Science},
      reportid     = {DKFZ-2025-00465},
      pages        = {103504},
      year         = {2025},
      note         = {#EA:E130#LA:E130#},
      abstract     = {Domain gaps are significant obstacles to the clinical
                      implementation of machine learning (ML) solutions for
                      medical image analysis. Although current research emphasizes
                      new training methods and network architectures, the specific
                      impact of prevalence shifts on algorithms in real-world
                      applications is often overlooked. Differences in class
                      frequencies between development and deployment data are
                      crucial, particularly for the widespread adoption of
                      artificial intelligence (AI), as disease prevalence can vary
                      greatly across different times and locations. Our
                      contribution is threefold. Based on a diverse set of 30
                      medical classification tasks (1) we demonstrate that lack of
                      prevalence shift handling can have severe consequences on
                      the quality of calibration, decision threshold, and
                      performance assessment. Furthermore, (2) we show that
                      prevalences can be accurately and reliably estimated in a
                      data-driven manner. Finally, (3) we propose a new workflow
                      for prevalence-aware image classification that uses
                      estimated deployment prevalences to adjust a trained
                      classifier to a new environment, without requiring
                      additional annotated deployment data. Comprehensive
                      experiments indicate that our proposed approach could
                      contribute to generating better classifier decisions and
                      more reliable performance estimates compared to current
                      practice.},
      keywords     = {Class imbalance (Other) / Domain gap (Other) /
                      Generalization (Other) / Medical image classification
                      (Other) / Prevalence shift (Other)},
      cin          = {E130 / HD01 / E290},
      ddc          = {610},
      cid          = {I:(DE-He78)E130-20160331 / I:(DE-He78)HD01-20160331 /
                      I:(DE-He78)E290-20160331},
      pnm          = {315 - Bildgebung und Radioonkologie (POF4-315)},
      pid          = {G:(DE-HGF)POF4-315},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:40020420},
      doi          = {10.1016/j.media.2025.103504},
      url          = {https://inrepo02.dkfz.de/record/299507},
}