% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Carl:298963,
author = {N. Carl$^*$ and S. Haggenmüller$^*$ and C. Wies$^*$ and L.
Nguyen and J. T. Winterstein$^*$ and M. J. Hetz$^*$ and M.
H. Mangold and F. O. Hartung and B. Grüne and T.
Holland-Letz$^*$ and M. S. Michel and T. Brinker$^*$ and F.
Wessels},
title = {{E}valuating interactions of patients with large language
models for medical information.},
journal = {BJU international},
volume = {135},
number = {6},
issn = {1464-4096},
address = {Oxford},
publisher = {Wiley-Blackwell},
reportid = {DKFZ-2025-00392},
pages = {1010-1017},
year = {2025},
note = {#EA:C140#LA:C140# / 2025 Jun;135(6):1010-1017},
abstract = {To explore the interaction of real-world patients with a
chatbot in a clinical setting, investigating key aspects of
medical information provided by large language models
(LLMs).The study enrolled 300 patients seeking urological
counselling between February and July 2024. First,
participants voluntarily conversed with a Generative
Pre-trained Transformer 4 (GPT-4) powered chatbot to ask
questions related to their medical situation. In the
following survey, patients rated the perceived utility,
completeness, and understandability of the information
provided during the simulated conversation as well as
user-friendliness. Finally, patients were asked which, in
their experience, best answered their questions: LLMs,
urologists, or search engines.A total of 292 patients
completed the study. The majority of patients perceived the
chatbot as providing useful, complete, and understandable
information, as well as being user-friendly. However, the
ability of human urologists to answer medical questions in
an understandable way was rated higher than of LLMs.
Interestingly, $53\%$ of participants rated the
question-answering ability of LLMs higher than search
engines. Age was not associated with preferences.
Limitations include social desirability and sampling
biases.This study highlights the potential of LLMs to
enhance patient education and communication in clinical
settings, with patients valuing their user-friendliness and
comprehensiveness for medical information. By addressing
preliminary questions, LLMs could potentially relieve time
constraints on healthcare providers, enabling medical
personnel to focus on complex inquiries and patient care.},
keywords = {artificial intelligence (Other) / clinical trial (Other) /
implementation science (Other) / large language models
(Other) / patient interaction (Other)},
cin = {C140 / C060},
ddc = {610},
cid = {I:(DE-He78)C140-20160331 / I:(DE-He78)C060-20160331},
pnm = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
pid = {G:(DE-HGF)POF4-313},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:39967059},
doi = {10.1111/bju.16676},
url = {https://inrepo02.dkfz.de/record/298963},
}