% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Kalweit:302802,
author = {G. Kalweit and A. Klett and P. Silvestrini and J. Rahnfeld
and M. Naouar and Y. Vogt and D. Infante and R. Berger and
J. Duque-Afonso and T. N. Hartmann and M. Follo and E.
Bodurova-Spassova and M. Lübbert$^*$ and R. Mertelsmann and
J. Boedecker and E. Ullrich$^*$ and M. Kalweit},
title = {{L}everaging a foundation model zoo for cell similarity
search in oncological microscopy across devices.},
journal = {Frontiers in oncology},
volume = {15},
issn = {2234-943X},
address = {Lausanne},
publisher = {Frontiers Media},
reportid = {DKFZ-2025-01342},
pages = {1480384},
year = {2025},
abstract = {Cellular imaging analysis using the traditional
retrospective approach is extremely time-consuming and
labor-intensive. Although AI-based solutions are available,
these approaches rely heavily on supervised learning
techniques that require high quality, large labeled datasets
from the same microscope to be reliable. In addition,
primary patient samples are often heterogeneous cell
populations and need to be stained to distinguish the
cellular subsets. The resulting imaging data is analyzed and
labeled manually by experts. Therefore, a method to
distinguish cell populations across imaging devices without
the need for staining and extensive manual labeling would
help immensely to gain real-time insights into cell
population dynamics. This especially holds true for
recognizing specific cell types and states in response to
treatments.We aim to develop an unsupervised approach using
general vision foundation models trained on diverse and
extensive imaging datasets to extract rich visual features
for cell-analysis across devices, including both stained and
unstained live cells. Our method, Entropy-guided Weighted
Combinational FAISS (EWC-FAISS), uses these models purely in
an inference-only mode without task-specific retraining on
the cellular data. Combining the generated embeddings in an
efficient and adaptive k-nearest neighbor search allows for
automated, cross device identification of cell types and
states, providing a strong basis for AI-assisted cancer
therapy.We utilized two publicly available datasets. The WBC
dataset includes 14,424 images of stained white blood cell
samples from patients with acute myeloid and lymphoid
leukemia, as well as those without leukemic pathology. The
LISC dataset comprises 257 images of white blood cell
samples from healthy individuals. We generated four in-house
datasets utilizing the JIMT-1 breast cancer cell line, as
well as Jurkat and K562 (leukemic cell lines). These
datasets were acquired using the Nanolive 3D Cell
Explorer-fluo (CX-A) holotomographic microscope and the
BioTek Lionheart FX automated brightfield microscope. The
images from the in-house datasets were manually annotated
using Roboflow software. To generate the embeddings, we used
and optimized a concatenated combination of SAM, DINO,
ConvNeXT, SWIN, CLIP and ViTMAE. The combined embeddings
were used as input for the adaptive k-nearest neighbor
search, building an approximate Hierarchical Navigable Small
World FAISS index. We compared EWC-FAISS to fully
fined-tuned ViT-Classifiers with DINO-, and SWIN-backbones,
a ConvNeXT architecture, as well as to NMTune as a
lightweight domain-adaptation method with frozen
backbone.EWC-FAISS performed competitively with the
baselines on the original datasets in terms of macro
accuracy. Macro accuracy is the average of class-specific
accuracies, treating all classes equally by averaging their
individual accuracies. EWC-FAISS ranked second for the WBC
dataset (macro accuracy: 97.6 ± 0.2), first for cell state
classification from Nanolive (macro accuracy: 90 ± 0), and
performed comparably for cell type classification from
Lionheart (macro accuracy: 87 ± 0). For the transfer to
out-of-distribution (OOD) datasets, which the model had not
seen during training, EWC-FAISS consistently outperformed
the other baselines. For the LISC dataset, EWC-FAISS
achieved a macro accuracy of 78.5 ± 0.3, compared to DINO
FT's 17 ± 1, SWIN FT's 44 ± 14, ConvNeXT FT's 45 ± 9, and
NMTune's 52 ± 10. For the cell state classification from
Lionheart, EWC-FAISS had a macro accuracy of 86 ± 1, while
DINO FT, SWIN FT, and ConvNeXT FT achieved 65 ± 11, 68 ±
16, and 81 ± 1, respectively, and NMTune 81 ± 7. For the
transfer of cell type classification from Nanolive,
EWC-FAISS attained a macro accuracy of 85 ± 0, compared to
DINO FT's 24.5 ± 0.9, SWIN FT's 57 ± 6, ConvNeXT FT's 54
± 4, and NMTune's 63 ± 4. Additionally, building EWC-FAISS
after embedding generation was significantly faster than
training DINO FT (∼ 6 minutes compared to > 10 hours).
Lastly, EWC-FAISS performed comparably in distinguishing
cancerous cell lines from Peripheral Blood Mononuclear Cells
with a mean accuracy of 80 ± 5, compared to CellMixer with
a mean accuracy of 79.7.We present a novel approach to
identify various cell lines and primary cells based on their
identity and state using images acquired across various
imaging platforms which vary in resolution, magnification
and image quality. Despite these differences, we could show
that our efficient, adaptive k-nearest neighbor search
pipeline can be applied on a large image dataset containing
different cell types and effectively differentiate between
the cells and their states such as live, apoptotic or
necrotic. There are several applications, particularly in
distinguishing various cell populations in patient samples
or monitoring therapy.},
keywords = {artificial intelligence (Other) / cell imaging (Other) /
deep learning (Other) / foundation models (Other) / nearest
neighbor search (Other)},
cin = {FR01 / FM01},
ddc = {610},
cid = {I:(DE-He78)FR01-20160331 / I:(DE-He78)FM01-20160331},
pnm = {899 - ohne Topic (POF4-899)},
pid = {G:(DE-HGF)POF4-899},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:40606969},
pmc = {pmc:PMC12213826},
doi = {10.3389/fonc.2025.1480384},
url = {https://inrepo02.dkfz.de/record/302802},
}