Development and validation of an autonomous artificial intelligence agent for clinical decision-making in oncology.

Ferber, Dyke; El Nahhas, Omar S M; Clusmann, Jan; Wölflein, Georg; Schultz, Nikolaus; Jäger, Dirk; Leßmann, Marie-Elisabeth; Wiest, Isabella C; Tschochohei, Maximilian; Salto-Tellez, Manuel; Kather, Jakob Nikolas; Lammert, Jacqueline; Truhn, Daniel; Foersch, Sebastian

doi:10.1038/s43018-025-00991-6

% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Ferber:301910,
      author       = {D. Ferber and O. S. M. El Nahhas and G. Wölflein and I. C.
                      Wiest and J. Clusmann and M.-E. Leßmann and S. Foersch and
                      J. Lammert$^*$ and M. Tschochohei and D. Jäger and M.
                      Salto-Tellez and N. Schultz and D. Truhn and J. N. Kather},
      title        = {{D}evelopment and validation of an autonomous artificial
                      intelligence agent for clinical decision-making in
                      oncology.},
      journal      = {Nature cancer},
      volume       = {6},
      number       = {8},
      issn         = {2662-1347},
      address      = {London},
      publisher    = {Nature Research},
      reportid     = {DKFZ-2025-01180},
      pages        = {1337-1349},
      year         = {2025},
      note         = {2025 Aug;6(8):1337-1349},
      abstract     = {Clinical decision-making in oncology is complex, requiring
                      the integration of multimodal data and multidomain
                      expertise. We developed and evaluated an autonomous clinical
                      artificial intelligence (AI) agent leveraging GPT-4 with
                      multimodal precision oncology tools to support personalized
                      clinical decision-making. The system incorporates vision
                      transformers for detecting microsatellite instability and
                      KRAS and BRAF mutations from histopathology slides, MedSAM
                      for radiological image segmentation and web-based search
                      tools such as OncoKB, PubMed and Google. Evaluated on 20
                      realistic multimodal patient cases, the AI agent
                      autonomously used appropriate tools with $87.5\%$ accuracy,
                      reached correct clinical conclusions in $91.0\%$ of cases
                      and accurately cited relevant oncology guidelines $75.5\%$
                      of the time. Compared to GPT-4 alone, the integrated AI
                      agent drastically improved decision-making accuracy from
                      $30.3\%$ to $87.2\%.$ These findings demonstrate that
                      integrating language models with precision oncology and
                      search tools substantially enhances clinical accuracy,
                      establishing a robust foundation for deploying AI-driven
                      personalized oncology support systems.},
      cin          = {MU01},
      ddc          = {610},
      cid          = {I:(DE-He78)MU01-20160331},
      pnm          = {899 - ohne Topic (POF4-899)},
      pid          = {G:(DE-HGF)POF4-899},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:40481323},
      doi          = {10.1038/s43018-025-00991-6},
      url          = {https://inrepo02.dkfz.de/record/301910},
}

guest :: login DKFZ
		Search		Submit		Personalize Your alerts Your baskets Your searches		Help