% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Tran:301312,
      author       = {M. Tran and S. Wagner and W. Weichert$^*$ and C. Matek and
                      M. Boxberg and T. Peng},
      title        = {{N}avigating {T}hrough {W}hole {S}lide {I}mages {W}ith
                      {H}ierarchy, {M}ulti-{O}bject, and {M}ulti-{S}cale {D}ata.},
      journal      = {IEEE transactions on medical imaging},
      volume       = {44},
      number       = {5},
      issn         = {0278-0062},
      address      = {New York, NY},
      publisher    = {Institute of Electrical and Electronics Engineers,},
      reportid     = {DKFZ-2025-00984},
      pages        = {2002 - 2015},
      year         = {2025},
      abstract     = {Building deep learning models that can rapidly segment
                      whole slide images (WSIs) using only a handful of training
                      samples remains an open challenge in computational
                      pathology. The difficulty lies in the histological images
                      themselves: many morphological structures within a slide are
                      closely related and very similar in appearance, making it
                      difficult to distinguish between them. However, a skilled
                      pathologist can quickly identify the relevant phenotypes.
                      Through years of training, they have learned to organize
                      visual features into a hierarchical taxonomy (e.g.,
                      identifying carcinoma versus healthy tissue, or
                      distinguishing regions within a tumor as cancer cells, the
                      microenvironment, …). Thus, each region is associated with
                      multiple labels representing different tissue types.
                      Pathologists typically deal with this by analyzing the
                      specimen at multiple scales and comparing visual features
                      between different magnifications. Inspired by this
                      multi-scale diagnostic workflow, we introduce the Navigator,
                      a vision model that navigates through WSIs like a domain
                      expert: it searches for the region of interest at a low
                      scale, zooms in gradually, and localizes ever finer
                      microanatomical classes. As a result, the Navigator can
                      detect coarse-grained patterns at lower resolution and
                      fine-grained features at higher resolution. In addition, to
                      deal with sparsely annotated samples, we train the Navigator
                      with a novel semi-supervised framework called S5CL v2. The
                      proposed model improves the F1 score by up to $8\%$ on
                      various datasets including our challenging new
                      TCGA-COAD-30CLS and Erlangen cohorts.},
      keywords     = {Humans / Deep Learning / Image Interpretation,
                      Computer-Assisted: methods / Algorithms},
      cin          = {MU01},
      ddc          = {620},
      cid          = {I:(DE-He78)MU01-20160331},
      pnm          = {899 - ohne Topic (POF4-899)},
      pid          = {G:(DE-HGF)POF4-899},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:40031287},
      doi          = {10.1109/TMI.2025.3532728},
      url          = {https://inrepo02.dkfz.de/record/301312},
}