% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Hilgers:288710,
author = {L. Hilgers and N. Ghaffari Laleh and N. P. West and A.
Westwood and K. J. Hewitt and P. Quirke and H. I. Grabsch
and Z. I. Carrero and E. Matthaei and C. M. L. Loeffler and
T. J. Brinker$^*$ and T. Yuan$^*$ and H. Brenner$^*$ and A.
Brobeil and M. Hoffmeister$^*$ and J. N. Kather},
title = {{A}utomated curation of large-scale cancer histopathology
image datasets using deep learning.},
journal = {Histopathology},
volume = {84},
number = {7},
issn = {0309-0167},
address = {Oxford [u.a.]},
publisher = {Wiley-Blackwell},
reportid = {DKFZ-2024-00429},
pages = {1139-1153},
year = {2024},
note = {2024 Jun;84(7):1139-1153},
abstract = {Artificial intelligence (AI) has numerous applications in
pathology, supporting diagnosis and prognostication in
cancer. However, most AI models are trained on highly
selected data, typically one tissue slide per patient. In
reality, especially for large surgical resection specimens,
dozens of slides can be available for each patient. Manually
sorting and labelling whole-slide images (WSIs) is a very
time-consuming process, hindering the direct application of
AI on the collected tissue samples from large cohorts. In
this study we addressed this issue by developing a
deep-learning (DL)-based method for automatic curation of
large pathology datasets with several slides per patient.We
collected multiple large multicentric datasets of colorectal
cancer histopathological slides from the United Kingdom
(FOXTROT, N = 21,384 slides; CR07, N = 7985 slides) and
Germany (DACHS, N = 3606 slides). These datasets contained
multiple types of tissue slides, including bowel resection
specimens, endoscopic biopsies, lymph node resections,
immunohistochemistry-stained slides, and tissue microarrays.
We developed, trained, and tested a deep convolutional
neural network model to predict the type of slide from the
slide overview (thumbnail) image. The primary statistical
endpoint was the macro-averaged area under the receiver
operating curve (AUROCs) for detection of the type of
slide.In the primary dataset (FOXTROT), with an AUROC of
0.995 $[95\%$ confidence interval [CI]: 0.994-0.996] the
algorithm achieved a high classification performance and was
able to accurately predict the type of slide from the
thumbnail image alone. In the two external test cohorts
(CR07, DACHS) AUROCs of 0.982 $[95\%$ CI: 0.979-0.985] and
0.875 $[95\%$ CI: 0.864-0.887] were observed, which
indicates the generalizability of the trained model on
unseen datasets. With a confidence threshold of 0.95, the
model reached an accuracy of $94.6\%$ (7331 classified
cases) in CR07 and $85.1\%$ (2752 classified cases) for the
DACHS cohort.Our findings show that using the low-resolution
thumbnail image is sufficient to accurately classify the
type of slide in digital pathology. This can support
researchers to make the vast resource of existing pathology
archives accessible to modern AI models with only minimal
manual annotations.},
keywords = {colorectal cancer (Other) / deep learning (Other) / digital
pathology (Other) / quality control (Other)},
cin = {C140 / C070 / C120 / HD01},
ddc = {610},
cid = {I:(DE-He78)C140-20160331 / I:(DE-He78)C070-20160331 /
I:(DE-He78)C120-20160331 / I:(DE-He78)HD01-20160331},
pnm = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
pid = {G:(DE-HGF)POF4-313},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:38409878},
doi = {10.1111/his.15159},
url = {https://inrepo02.dkfz.de/record/288710},
}