% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Hilgers:288710,
      author       = {L. Hilgers and N. Ghaffari Laleh and N. P. West and A.
                      Westwood and K. J. Hewitt and P. Quirke and H. I. Grabsch
                      and Z. I. Carrero and E. Matthaei and C. M. L. Loeffler and
                      T. J. Brinker$^*$ and T. Yuan$^*$ and H. Brenner$^*$ and A.
                      Brobeil and M. Hoffmeister$^*$ and J. N. Kather},
      title        = {{A}utomated curation of large-scale cancer histopathology
                      image datasets using deep learning.},
      journal      = {Histopathology},
      volume       = {84},
      number       = {7},
      issn         = {0309-0167},
      address      = {Oxford [u.a.]},
      publisher    = {Wiley-Blackwell},
      reportid     = {DKFZ-2024-00429},
      pages        = {1139-1153},
      year         = {2024},
      note         = {2024 Jun;84(7):1139-1153},
      abstract     = {Artificial intelligence (AI) has numerous applications in
                      pathology, supporting diagnosis and prognostication in
                      cancer. However, most AI models are trained on highly
                      selected data, typically one tissue slide per patient. In
                      reality, especially for large surgical resection specimens,
                      dozens of slides can be available for each patient. Manually
                      sorting and labelling whole-slide images (WSIs) is a very
                      time-consuming process, hindering the direct application of
                      AI on the collected tissue samples from large cohorts. In
                      this study we addressed this issue by developing a
                      deep-learning (DL)-based method for automatic curation of
                      large pathology datasets with several slides per patient.We
                      collected multiple large multicentric datasets of colorectal
                      cancer histopathological slides from the United Kingdom
                      (FOXTROT, N = 21,384 slides; CR07, N = 7985 slides) and
                      Germany (DACHS, N = 3606 slides). These datasets contained
                      multiple types of tissue slides, including bowel resection
                      specimens, endoscopic biopsies, lymph node resections,
                      immunohistochemistry-stained slides, and tissue microarrays.
                      We developed, trained, and tested a deep convolutional
                      neural network model to predict the type of slide from the
                      slide overview (thumbnail) image. The primary statistical
                      endpoint was the macro-averaged area under the receiver
                      operating curve (AUROCs) for detection of the type of
                      slide.In the primary dataset (FOXTROT), with an AUROC of
                      0.995 $[95\%$ confidence interval [CI]: 0.994-0.996] the
                      algorithm achieved a high classification performance and was
                      able to accurately predict the type of slide from the
                      thumbnail image alone. In the two external test cohorts
                      (CR07, DACHS) AUROCs of 0.982 $[95\%$ CI: 0.979-0.985] and
                      0.875 $[95\%$ CI: 0.864-0.887] were observed, which
                      indicates the generalizability of the trained model on
                      unseen datasets. With a confidence threshold of 0.95, the
                      model reached an accuracy of $94.6\%$ (7331 classified
                      cases) in CR07 and $85.1\%$ (2752 classified cases) for the
                      DACHS cohort.Our findings show that using the low-resolution
                      thumbnail image is sufficient to accurately classify the
                      type of slide in digital pathology. This can support
                      researchers to make the vast resource of existing pathology
                      archives accessible to modern AI models with only minimal
                      manual annotations.},
      keywords     = {colorectal cancer (Other) / deep learning (Other) / digital
                      pathology (Other) / quality control (Other)},
      cin          = {C140 / C070 / C120 / HD01},
      ddc          = {610},
      cid          = {I:(DE-He78)C140-20160331 / I:(DE-He78)C070-20160331 /
                      I:(DE-He78)C120-20160331 / I:(DE-He78)HD01-20160331},
      pnm          = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
      pid          = {G:(DE-HGF)POF4-313},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:38409878},
      doi          = {10.1111/his.15159},
      url          = {https://inrepo02.dkfz.de/record/288710},
}