% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Wennmann:290170,
      author       = {M. Wennmann$^*$ and L. T. Rotkopf$^*$ and F. Bauer$^*$ and
                      T. Hielscher$^*$ and J. Kächele$^*$ and E. K. Mai and N.
                      Weinhold and M.-S. Raab and H. Goldschmidt and T. F. Weber
                      and H.-P. Schlemmer$^*$ and S. Delorme$^*$ and K.
                      Maier-Hein$^*$ and P. Neher$^*$},
      title        = {{R}eproducible {R}adiomics {F}eatures from
                      {M}ulti-{MRI}-{S}canner {T}est-{R}etest-{S}tudy: {I}nfluence
                      on {P}erformance and {G}eneralizability of {M}odels.},
      journal      = {Journal of magnetic resonance imaging},
      volume       = {61},
      number       = {2},
      issn         = {1053-1807},
      address      = {New York, NY},
      publisher    = {Wiley-Liss},
      reportid     = {DKFZ-2024-01006},
      pages        = {676-686},
      year         = {2025},
      note         = {EA:E010#LA:E230# / Volume 61, Issue2, February 2025, Pages
                      676-686},
      abstract     = {Radiomics models trained on data from one center typically
                      show a decline of performance when applied to data from
                      external centers, hindering their introduction into
                      large-scale clinical practice. Current expert
                      recommendations suggest to use only reproducible radiomics
                      features isolated by multiscanner test-retest experiments,
                      which might help to overcome the problem of limited
                      generalizability to external data.To evaluate the influence
                      of using only a subset of robust radiomics features, defined
                      in a prior in vivo multi-MRI-scanner test-retest-study, on
                      the performance and generalizability of radiomics
                      models.Retrospective.Patients with monoclonal plasma cell
                      disorders. Training set (117 MRIs from center 1); internal
                      test set (42 MRIs from center 1); external test set (143
                      MRIs from center 2-8).1.5T and 3.0T; T1-weighted turbo spin
                      echo.The task for the radiomics models was to predict plasma
                      cell infiltration, determined by bone marrow biopsy,
                      noninvasively from MRI. Radiomics machine learning models,
                      including linear regressor, support vector regressor (SVR),
                      and random forest regressor (RFR), were trained on data from
                      center 1, using either all radiomics features, or using only
                      reproducible radiomics features. Models were tested on an
                      internal (center 1) and a multicentric external data set
                      (center 2-8).Pearson correlation coefficient r and mean
                      absolute error (MAE) between predicted and actual plasma
                      cell infiltration. Fisher's z-transformation, Wilcoxon
                      signed-rank test, Wilcoxon rank-sum test; significance level
                      P < 0.05.When using only reproducible features compared with
                      all features, the performance of the SVR on the external
                      test set significantly improved (r = 0.43 vs. r = 0.18 and
                      MAE = 22.6 vs. MAE = 28.2). For the RFR, the performance on
                      the external test set deteriorated when using only
                      reproducible instead of all radiomics features (r = 0.33 vs.
                      r = 0.44, P = 0.29 and MAE = 21.9 vs. MAE = 20.5, P =
                      0.10).Using only reproducible radiomics features improves
                      the external performance of some, but not all machine
                      learning models, and did not automatically lead to an
                      improvement of the external performance of the overall best
                      radiomics model.Stage 2.},
      keywords     = {feature selection (Other) / generalizability (Other) /
                      machine learning (Other) / multicenter (Other) / radiomics
                      (Other) / reproducibility (Other)},
      cin          = {E010 / C060 / E230 / HD01},
      ddc          = {610},
      cid          = {I:(DE-He78)E010-20160331 / I:(DE-He78)C060-20160331 /
                      I:(DE-He78)E230-20160331 / I:(DE-He78)HD01-20160331},
      pnm          = {315 - Bildgebung und Radioonkologie (POF4-315)},
      pid          = {G:(DE-HGF)POF4-315},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:38733369},
      doi          = {10.1002/jmri.29442},
      url          = {https://inrepo02.dkfz.de/record/290170},
}