% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Aldraimli:169807,
author = {M. Aldraimli and D. Soria and D. Grishchuck and S. Ingram
and R. Lyon and A. Mistry and J. Oliveira and R. Samuel and
L. E. A. Shelley and S. Osman and M. V. Dwek and D. Azria
and J. Chang-Claude$^*$ and S. Gutiérrez-Enríquez and M.
C. De Santis and B. S. Rosenstein and D. De Ruysscher and E.
Sperk and R. P. Symonds and H. Stobart and A. Vega and L.
Veldeman and A. Webb and C. J. Talbot and C. M. West and T.
Rattay and T. J. Chaussalet},
collaboration = {R. consortium},
title = {{A} data science approach for early-stage prediction of
{P}atient's susceptibility to acute side effects of advanced
radiotherapy.},
journal = {Computers in biology and medicine},
volume = {135},
issn = {0010-4825},
address = {Amsterdam [u.a.]},
publisher = {Elsevier Science},
reportid = {DKFZ-2021-01561},
pages = {104624},
year = {2021},
abstract = {The prediction by classification of side effects incidence
in a given medical treatment is a common challenge in
medical research. Machine Learning (ML) methods are widely
used in the areas of risk prediction and classification. The
primary objective of such algorithms is to use several
features to predict dichotomous responses (e.g., disease
positive/negative). Similar to statistical inference
modelling, ML modelling is subject to the class imbalance
problem and is affected by the majority class, increasing
the false-negative rate. In this study, seventy-nine ML
models were built and evaluated to classify approximately
2000 participants from 26 hospitals in eight different
countries into two groups of radiotherapy (RT) side effects
incidence based on recorded observations from the
international study of RT related toxicity 'REQUITE'. We
also examined the effect of sampling techniques and
cost-sensitive learning methods on the models when dealing
with class imbalance. The combinations of such techniques
used had a significant impact on the classification. They
resulted in an improvement in incidence status prediction by
shifting classifiers' attention to the minority group. The
best classification model for RT acute toxicity prediction
was identified based on domain experts' success criteria.
The Area Under Receiver Operator Characteristic curve of the
models tested with an isolated dataset ranged from 0.50 to
0.77. The scale of improved results is promising and will
guide further development of models to predict RT acute
toxicities. One model was optimised and found to be
beneficial to identify patients who are at risk of
developing acute RT early-stage toxicities as a result of
undergoing breast RT ensuring relevant treatment
interventions can be appropriately targeted. The design of
the approach presented in this paper resulted in producing a
preclinical-valid prediction model. The study was developed
by a multi-disciplinary collaboration of data scientists,
medical physicists, oncologists and surgeons in the UK
Radiotherapy Machine Learning Network.},
keywords = {Classification (Other) / Desquamation (Other) / Early
toxicities (Other) / Imbalanced learning (Other) / Machine
learning (Other) / Meta-learning (Other) / REQUITE (Other) /
Radiotherapy (Other) / SMOTE (Other)},
cin = {C020},
ddc = {570},
cid = {I:(DE-He78)C020-20160331},
pnm = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
pid = {G:(DE-HGF)POF4-313},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:34247131},
doi = {10.1016/j.compbiomed.2021.104624},
url = {https://inrepo02.dkfz.de/record/169807},
}