% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Tran:301312,
author = {M. Tran and S. Wagner and W. Weichert$^*$ and C. Matek and
M. Boxberg and T. Peng},
title = {{N}avigating {T}hrough {W}hole {S}lide {I}mages {W}ith
{H}ierarchy, {M}ulti-{O}bject, and {M}ulti-{S}cale {D}ata.},
journal = {IEEE transactions on medical imaging},
volume = {44},
number = {5},
issn = {0278-0062},
address = {New York, NY},
publisher = {Institute of Electrical and Electronics Engineers,},
reportid = {DKFZ-2025-00984},
pages = {2002 - 2015},
year = {2025},
abstract = {Building deep learning models that can rapidly segment
whole slide images (WSIs) using only a handful of training
samples remains an open challenge in computational
pathology. The difficulty lies in the histological images
themselves: many morphological structures within a slide are
closely related and very similar in appearance, making it
difficult to distinguish between them. However, a skilled
pathologist can quickly identify the relevant phenotypes.
Through years of training, they have learned to organize
visual features into a hierarchical taxonomy (e.g.,
identifying carcinoma versus healthy tissue, or
distinguishing regions within a tumor as cancer cells, the
microenvironment, …). Thus, each region is associated with
multiple labels representing different tissue types.
Pathologists typically deal with this by analyzing the
specimen at multiple scales and comparing visual features
between different magnifications. Inspired by this
multi-scale diagnostic workflow, we introduce the Navigator,
a vision model that navigates through WSIs like a domain
expert: it searches for the region of interest at a low
scale, zooms in gradually, and localizes ever finer
microanatomical classes. As a result, the Navigator can
detect coarse-grained patterns at lower resolution and
fine-grained features at higher resolution. In addition, to
deal with sparsely annotated samples, we train the Navigator
with a novel semi-supervised framework called S5CL v2. The
proposed model improves the F1 score by up to $8\%$ on
various datasets including our challenging new
TCGA-COAD-30CLS and Erlangen cohorts.},
keywords = {Humans / Deep Learning / Image Interpretation,
Computer-Assisted: methods / Algorithms},
cin = {MU01},
ddc = {620},
cid = {I:(DE-He78)MU01-20160331},
pnm = {899 - ohne Topic (POF4-899)},
pid = {G:(DE-HGF)POF4-899},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:40031287},
doi = {10.1109/TMI.2025.3532728},
url = {https://inrepo02.dkfz.de/record/301312},
}