% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Schnorr:300319,
      author       = {I. Schnorr and S. Andreas$^*$ and L. Schumann and S. Hahn
                      and J. J. Vehreschild and D. Maier$^*$},
      title        = {{ATC}ode{R}: a dictionary-based {R}-tool to standardize
                      medication free-text.},
      journal      = {Scientific reports},
      volume       = {15},
      number       = {1},
      issn         = {2045-2322},
      address      = {[London]},
      publisher    = {Springer Nature},
      reportid     = {DKFZ-2025-00765},
      pages        = {12252},
      year         = {2025},
      abstract     = {Over the past decades, oncology treatment paradigms have
                      developed significantly. Yet, the often unstructured nature
                      of substance-related documentation in medical records
                      presents a time-consuming challenge for analyzing treatment
                      patterns and outcomes. To advance oncological research
                      further, clinical data science must offer solutions that
                      facilitate research and analysis with real-world data (RWD).
                      The present contribution introduces a user-friendly R-tool
                      designed to transform free-text medication entries into the
                      structured Anatomical Therapeutic Chemical (ATC)
                      Classification System by applying a dictionary-based
                      approach. The resulting output is a structured data frame
                      containing columns for antineoplastic medication, other
                      medications, and supplementary information. For accuracy
                      validation, 561 data entries from an evaluation data set
                      were reviewed, consisting of 935 tokens. $88.5\%$ of these
                      tokens were successfully transformed into their respective
                      ATC codes. Additional information was extracted from 129
                      data entries $(23\%),$ while 23 entries $(4.1\%)$ presented
                      no usable information. All tokens underwent a manual review;
                      $8.9\%$ (84 tokens) failed transformations. This approach
                      improves the standardization and analysis of systemic
                      anti-cancer treatment data in German-speaking regions by
                      optimizing efficiency while maintaining relevant accuracy.},
      keywords     = {Humans / Antineoplastic Agents: therapeutic use /
                      Antineoplastic Agents: classification / Neoplasms: drug
                      therapy / Software / ATC code (Other) / Dictionary (Other) /
                      Language processing (Other) / Medication dictionary (Other)
                      / R-tool (Other) / Standardizing free-text (Other) /
                      Substance dictionary (Other) / Antineoplastic Agents (NLM
                      Chemicals)},
      cin          = {FM01},
      ddc          = {600},
      cid          = {I:(DE-He78)FM01-20160331},
      pnm          = {899 - ohne Topic (POF4-899)},
      pid          = {G:(DE-HGF)POF4-899},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:40211013},
      doi          = {10.1038/s41598-025-97150-9},
      url          = {https://inrepo02.dkfz.de/record/300319},
}