% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Netzer:277857,
      author       = {N. Netzer$^*$ and C. Eith$^*$ and O. Bethge and T.
                      Hielscher$^*$ and C. Schwab and A. Stenzinger and R.
                      Gnirs$^*$ and H.-P. Schlemmer$^*$ and K. H. Maier-Hein$^*$
                      and L. Schimmöller and D. Bonekamp$^*$},
      title        = {{A}pplication of a validated prostate {MRI} deep learning
                      system to independent same-vendor multi-institutional data:
                      demonstration of transferability.},
      journal      = {European radiology},
      volume       = {33},
      number       = {11},
      issn         = {0938-7994},
      address      = {Heidelberg},
      publisher    = {Springer},
      reportid     = {DKFZ-2023-01525},
      pages        = {7463-7476},
      year         = {2023},
      note         = {#EA:E010#LA:E010# / 2023 Nov;33(11):7463-7476},
      abstract     = {To evaluate a fully automatic deep learning system to
                      detect and segment clinically significant prostate cancer
                      (csPCa) on same-vendor prostate MRI from two different
                      institutions not contributing to training of the system.In
                      this retrospective study, a previously bi-institutionally
                      validated deep learning system (UNETM) was applied to
                      bi-parametric prostate MRI data from one external
                      institution (A), a PI-RADS distribution-matched internal
                      cohort (B), and a csPCa stratified subset of
                      single-institution external public challenge data (C). csPCa
                      was defined as ISUP Grade Group ≥ 2 determined from
                      combined targeted and extended systematic MRI/transrectal
                      US-fusion biopsy. Performance of UNETM was evaluated by
                      comparing ROC AUC and specificity at typical PI-RADS
                      sensitivity levels. Lesion-level analysis between UNETM
                      segmentations and radiologist-delineated segmentations was
                      performed using Dice coefficient, free-response operating
                      characteristic (FROC), and weighted alternative (waFROC).
                      The influence of using different diffusion sequences was
                      analyzed in cohort A.In 250/250/140 exams in cohorts A/B/C,
                      differences in ROC AUC were insignificant with 0.80 $(95\%$
                      CI: 0.74-0.85)/0.87 $(95\%$ CI: 0.83-0.92)/0.82 $(95\%$ CI:
                      0.75-0.89). At sensitivities of $95\%$ and $90\%,$ UNETM
                      achieved specificity of $30\%/50\%$ in A, $44\%/71\%$ in B,
                      and $43\%/49\%$ in C, respectively. Dice coefficient of
                      UNETM and radiologist-delineated lesions was 0.36 in A and
                      0.49 in B. The waFROC AUC was 0.67 $(95\%$ CI: 0.60-0.83) in
                      A and 0.7 $(95\%$ CI: 0.64-0.78) in B. UNETM performed
                      marginally better on readout-segmented than on single-shot
                      echo-planar-imaging.For same-vendor examinations, deep
                      learning provided comparable discrimination of csPCa and
                      non-csPCa lesions and examinations between local and two
                      independent external data sets, demonstrating the
                      applicability of the system to institutions not
                      participating in model training.A previously
                      bi-institutionally validated fully automatic deep learning
                      system maintained acceptable exam-level diagnostic
                      performance in two independent external data sets,
                      indicating the potential of deploying AI models without
                      retraining or fine-tuning, and corroborating evidence that
                      AI models extract a substantial amount of transferable
                      domain knowledge about MRI-based prostate cancer
                      assessment.• A previously bi-institutionally validated
                      fully automatic deep learning system maintained acceptable
                      exam-level diagnostic performance in two independent
                      external data sets. • Lesion detection performance and
                      segmentation congruence was similar on the institutional and
                      an external data set, as measured by the weighted
                      alternative FROC AUC and Dice coefficient. • Although the
                      system generalized to two external institutions without
                      re-training, achieving expected sensitivity and specificity
                      levels using the deep learning system requires probability
                      thresholds to be adjusted, underlining the importance of
                      institution-specific calibration and quality control.},
      keywords     = {Deep learning (Other) / Magnetic resonance imaging (Other)
                      / Prostatic neoplasms (Other) / Validation study (Other)},
      cin          = {E010 / C060 / HD01 / E230},
      ddc          = {610},
      cid          = {I:(DE-He78)E010-20160331 / I:(DE-He78)C060-20160331 /
                      I:(DE-He78)HD01-20160331 / I:(DE-He78)E230-20160331},
      pnm          = {315 - Bildgebung und Radioonkologie (POF4-315)},
      pid          = {G:(DE-HGF)POF4-315},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:37507610},
      doi          = {10.1007/s00330-023-09882-9},
      url          = {https://inrepo02.dkfz.de/record/277857},
}