% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Schnorr:300319,
author = {I. Schnorr and S. Andreas$^*$ and L. Schumann and S. Hahn
and J. J. Vehreschild and D. Maier$^*$},
title = {{ATC}ode{R}: a dictionary-based {R}-tool to standardize
medication free-text.},
journal = {Scientific reports},
volume = {15},
number = {1},
issn = {2045-2322},
address = {[London]},
publisher = {Springer Nature},
reportid = {DKFZ-2025-00765},
pages = {12252},
year = {2025},
abstract = {Over the past decades, oncology treatment paradigms have
developed significantly. Yet, the often unstructured nature
of substance-related documentation in medical records
presents a time-consuming challenge for analyzing treatment
patterns and outcomes. To advance oncological research
further, clinical data science must offer solutions that
facilitate research and analysis with real-world data (RWD).
The present contribution introduces a user-friendly R-tool
designed to transform free-text medication entries into the
structured Anatomical Therapeutic Chemical (ATC)
Classification System by applying a dictionary-based
approach. The resulting output is a structured data frame
containing columns for antineoplastic medication, other
medications, and supplementary information. For accuracy
validation, 561 data entries from an evaluation data set
were reviewed, consisting of 935 tokens. $88.5\%$ of these
tokens were successfully transformed into their respective
ATC codes. Additional information was extracted from 129
data entries $(23\%),$ while 23 entries $(4.1\%)$ presented
no usable information. All tokens underwent a manual review;
$8.9\%$ (84 tokens) failed transformations. This approach
improves the standardization and analysis of systemic
anti-cancer treatment data in German-speaking regions by
optimizing efficiency while maintaining relevant accuracy.},
keywords = {Humans / Antineoplastic Agents: therapeutic use /
Antineoplastic Agents: classification / Neoplasms: drug
therapy / Software / ATC code (Other) / Dictionary (Other) /
Language processing (Other) / Medication dictionary (Other)
/ R-tool (Other) / Standardizing free-text (Other) /
Substance dictionary (Other) / Antineoplastic Agents (NLM
Chemicals)},
cin = {FM01},
ddc = {600},
cid = {I:(DE-He78)FM01-20160331},
pnm = {899 - ohne Topic (POF4-899)},
pid = {G:(DE-HGF)POF4-899},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:40211013},
doi = {10.1038/s41598-025-97150-9},
url = {https://inrepo02.dkfz.de/record/300319},
}