% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{GhaffariLaleh:180070,
      author       = {N. Ghaffari Laleh and H. S. Muti and C. M. L. Loeffler and
                      A. Echle and O. L. Saldanha and F. Mahmood and M. Y. Lu and
                      C. Trautwein and R. Langer and B. Dislich and R. D. Buelow
                      and H. I. Grabsch and H. Brenner$^*$ and J. Chang-Claude$^*$
                      and E. Alwers$^*$ and T. J. Brinker$^*$ and F. Khader and D.
                      Truhn and N. T. Gaisa and P. Boor and M. Hoffmeister$^*$ and
                      V. Schulz and J. N. Kather},
      title        = {{B}enchmarking weakly-supervised deep learning pipelines
                      for whole slide classification in computational pathology.},
      journal      = {Medical image analysis},
      volume       = {79},
      issn         = {1361-8415},
      address      = {Amsterdam [u.a.]},
      publisher    = {Elsevier Science},
      reportid     = {DKFZ-2022-01069},
      pages        = {102474},
      year         = {2022},
      abstract     = {Artificial intelligence (AI) can extract visual information
                      from histopathological slides and yield biological insight
                      and clinical biomarkers. Whole slide images are cut into
                      thousands of tiles and classification problems are often
                      weakly-supervised: the ground truth is only known for the
                      slide, not for every single tile. In classical
                      weakly-supervised analysis pipelines, all tiles inherit the
                      slide label while in multiple-instance learning (MIL), only
                      bags of tiles inherit the label. However, it is still
                      unclear how these widely used but markedly different
                      approaches perform relative to each other. We implemented
                      and systematically compared six methods in six clinically
                      relevant end-to-end prediction tasks using data from N=2980
                      patients for training with rigorous external validation. We
                      tested three classical weakly-supervised approaches with
                      convolutional neural networks and vision transformers (ViT)
                      and three MIL-based approaches with and without an
                      additional attention module. Our results empirically
                      demonstrate that histological tumor subtyping of renal cell
                      carcinoma is an easy task in which all approaches achieve an
                      area under the receiver operating curve (AUROC) of above
                      0.9. In contrast, we report significant performance
                      differences for clinically relevant tasks of mutation
                      prediction in colorectal, gastric, and bladder cancer. In
                      these mutation prediction tasks, classical weakly-supervised
                      workflows outperformed MIL-based weakly-supervised methods
                      for mutation prediction, which is surprising given their
                      simplicity. This shows that new end-to-end image analysis
                      pipelines in computational pathology should be compared to
                      classical weakly-supervised methods. Also, these findings
                      motivate the development of new methods which combine the
                      elegant assumptions of MIL with the empirically observed
                      higher performance of classical weakly-supervised
                      approaches. We make all source codes publicly available at
                      https://github.com/KatherLab/HIA, allowing easy application
                      of all methods to any similar task.},
      keywords     = {Artificial intelligence (Other) / Computational pathology
                      (Other) / Convolutional neural networks (Other) /
                      Multiple-Instance Learning (Other) / Vision transformers
                      (Other) / Weakly-supervised deep learning (Other)},
      cin          = {C070 / C120 / HD01 / C020 / C140},
      ddc          = {610},
      cid          = {I:(DE-He78)C070-20160331 / I:(DE-He78)C120-20160331 /
                      I:(DE-He78)HD01-20160331 / I:(DE-He78)C020-20160331 /
                      I:(DE-He78)C140-20160331},
      pnm          = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
      pid          = {G:(DE-HGF)POF4-313},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:35588568},
      doi          = {10.1016/j.media.2022.102474},
      url          = {https://inrepo02.dkfz.de/record/180070},
}