Prompt injection attacks on vision language models in oncology.

Clusmann, Jan; Brinker, Titus J; Schneider, Carolin V; Wiest, Isabella C; Kather, Jakob Nikolas; Ferber, Dyke; Truhn, Daniel; Foersch, Sebastian
doi:10.1038/s41467-024-55631-x
% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Clusmann:298420,
      author       = {J. Clusmann and D. Ferber and I. C. Wiest and C. V.
                      Schneider and T. J. Brinker$^*$ and S. Foersch and D. Truhn
                      and J. N. Kather},
      title        = {{P}rompt injection attacks on vision language models in
                      oncology.},
      journal      = {Nature Communications},
      volume       = {16},
      number       = {1},
      issn         = {2041-1723},
      address      = {[London]},
      publisher    = {Springer Nature},
      reportid     = {DKFZ-2025-00276},
      pages        = {1239},
      year         = {2025},
      abstract     = {Vision-language artificial intelligence models (VLMs)
                      possess medical knowledge and can be employed in healthcare
                      in numerous ways, including as image interpreters, virtual
                      scribes, and general decision support systems. However,
                      here, we demonstrate that current VLMs applied to medical
                      tasks exhibit a fundamental security flaw: they can be
                      compromised by prompt injection attacks. These can be used
                      to output harmful information just by interacting with the
                      VLM, without any access to its parameters. We perform a
                      quantitative study to evaluate the vulnerabilities to these
                      attacks in four state of the art VLMs: Claude-3 Opus,
                      Claude-3.5 Sonnet, Reka Core, and GPT-4o. Using a set of N =
                      594 attacks, we show that all of these models are
                      susceptible. Specifically, we show that embedding sub-visual
                      prompts in manifold medical imaging data can cause the model
                      to provide harmful output, and that these prompts are
                      non-obvious to human observers. Thus, our study demonstrates
                      a key vulnerability in medical VLMs which should be
                      mitigated before widespread clinical adoption.},
      keywords     = {Humans / Artificial Intelligence / Medical Oncology:
                      methods / Algorithms},
      cin          = {C140},
      ddc          = {500},
      cid          = {I:(DE-He78)C140-20160331},
      pnm          = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
      pid          = {G:(DE-HGF)POF4-313},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:39890777},
      pmc          = {pmc:PMC11785991},
      doi          = {10.1038/s41467-024-55631-x},
      url          = {https://inrepo02.dkfz.de/record/298420},
}
guest :: login DKFZ
		Search		Submit		Personalize Your alerts Your baskets Your searches		Help