% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Wennmann:290170,
author = {M. Wennmann$^*$ and L. T. Rotkopf$^*$ and F. Bauer$^*$ and
T. Hielscher$^*$ and J. Kächele$^*$ and E. K. Mai and N.
Weinhold and M.-S. Raab and H. Goldschmidt and T. F. Weber
and H.-P. Schlemmer$^*$ and S. Delorme$^*$ and K.
Maier-Hein$^*$ and P. Neher$^*$},
title = {{R}eproducible {R}adiomics {F}eatures from
{M}ulti-{MRI}-{S}canner {T}est-{R}etest-{S}tudy: {I}nfluence
on {P}erformance and {G}eneralizability of {M}odels.},
journal = {Journal of magnetic resonance imaging},
volume = {61},
number = {2},
issn = {1053-1807},
address = {New York, NY},
publisher = {Wiley-Liss},
reportid = {DKFZ-2024-01006},
pages = {676-686},
year = {2025},
note = {EA:E010#LA:E230# / Volume 61, Issue2, February 2025, Pages
676-686},
abstract = {Radiomics models trained on data from one center typically
show a decline of performance when applied to data from
external centers, hindering their introduction into
large-scale clinical practice. Current expert
recommendations suggest to use only reproducible radiomics
features isolated by multiscanner test-retest experiments,
which might help to overcome the problem of limited
generalizability to external data.To evaluate the influence
of using only a subset of robust radiomics features, defined
in a prior in vivo multi-MRI-scanner test-retest-study, on
the performance and generalizability of radiomics
models.Retrospective.Patients with monoclonal plasma cell
disorders. Training set (117 MRIs from center 1); internal
test set (42 MRIs from center 1); external test set (143
MRIs from center 2-8).1.5T and 3.0T; T1-weighted turbo spin
echo.The task for the radiomics models was to predict plasma
cell infiltration, determined by bone marrow biopsy,
noninvasively from MRI. Radiomics machine learning models,
including linear regressor, support vector regressor (SVR),
and random forest regressor (RFR), were trained on data from
center 1, using either all radiomics features, or using only
reproducible radiomics features. Models were tested on an
internal (center 1) and a multicentric external data set
(center 2-8).Pearson correlation coefficient r and mean
absolute error (MAE) between predicted and actual plasma
cell infiltration. Fisher's z-transformation, Wilcoxon
signed-rank test, Wilcoxon rank-sum test; significance level
P < 0.05.When using only reproducible features compared with
all features, the performance of the SVR on the external
test set significantly improved (r = 0.43 vs. r = 0.18 and
MAE = 22.6 vs. MAE = 28.2). For the RFR, the performance on
the external test set deteriorated when using only
reproducible instead of all radiomics features (r = 0.33 vs.
r = 0.44, P = 0.29 and MAE = 21.9 vs. MAE = 20.5, P =
0.10).Using only reproducible radiomics features improves
the external performance of some, but not all machine
learning models, and did not automatically lead to an
improvement of the external performance of the overall best
radiomics model.Stage 2.},
keywords = {feature selection (Other) / generalizability (Other) /
machine learning (Other) / multicenter (Other) / radiomics
(Other) / reproducibility (Other)},
cin = {E010 / C060 / E230 / HD01},
ddc = {610},
cid = {I:(DE-He78)E010-20160331 / I:(DE-He78)C060-20160331 /
I:(DE-He78)E230-20160331 / I:(DE-He78)HD01-20160331},
pnm = {315 - Bildgebung und Radioonkologie (POF4-315)},
pid = {G:(DE-HGF)POF4-315},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:38733369},
doi = {10.1002/jmri.29442},
url = {https://inrepo02.dkfz.de/record/290170},
}