% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Schelb:147218,
      author       = {P. Schelb$^*$ and S. Kohl$^*$ and J. P. Radtke$^*$ and M.
                      Wiesenfarth$^*$ and P. Kickingereder$^*$ and S.
                      Bickelhaupt$^*$ and T. A. Kuder$^*$ and A. Stenzinger and M.
                      Hohenfellner and H.-P. Schlemmer$^*$ and K. Maier-Hein$^*$
                      and D. Bonekamp$^*$},
      title        = {{C}lassification of {C}ancer at {P}rostate {MRI}: {D}eep
                      {L}earning versus {C}linical {PI}-{RADS} {A}ssessment.},
      journal      = {Radiology},
      volume       = {293},
      number       = {3},
      issn         = {1527-1315},
      address      = {Oak Brook, Ill.},
      publisher    = {Soc.},
      reportid     = {DKFZ-2019-02344},
      pages        = {607-617},
      year         = {2019},
      note         = {2019 Dec;293(3):607-617},
      abstract     = {Background Men suspected of having clinically significant
                      prostate cancer (sPC) increasingly undergo prostate MRI. The
                      potential of deep learning to provide diagnostic support for
                      human interpretation requires further evaluation. Purpose To
                      compare the performance of clinical assessment to a deep
                      learning system optimized for segmentation trained with
                      T2-weighted and diffusion MRI in the task of detection and
                      segmentation of lesions suspicious for sPC. Materials and
                      Methods In this retrospective study, T2-weighted and
                      diffusion prostate MRI sequences from consecutive men
                      examined with a single 3.0-T MRI system between 2015 and
                      2016 were manually segmented. Ground truth was provided by
                      combined targeted and extended systematic MRI-transrectal US
                      fusion biopsy, with sPC defined as International Society of
                      Urological Pathology Gleason grade group greater than or
                      equal to 2. By using split-sample validation, U-Net was
                      internally validated on the training set $(80\%$ of the
                      data) through cross validation and subsequently externally
                      validated on the test set $(20\%$ of the data).
                      U-Net-derived sPC probability maps were calibrated by
                      matching sextant-based cross-validation performance to
                      clinical performance of Prostate Imaging Reporting and Data
                      System (PI-RADS). Performance of PI-RADS and U-Net were
                      compared by using sensitivities, specificities, predictive
                      values, and Dice coefficient. Results A total of 312 men
                      (median age, 64 years; interquartile range [IQR], 58-71
                      years) were evaluated. The training set consisted of 250 men
                      (median age, 64 years; IQR, 58-71 years) and the test set of
                      62 men (median age, 64 years; IQR, 60-69 years). In the test
                      set, PI-RADS cutoffs greater than or equal to 3 versus
                      cutoffs greater than or equal to 4 on a per-patient basis
                      had sensitivity of $96\%$ (25 of 26) versus $88\%$ (23 of
                      26) at specificity of $22\%$ (eight of 36) versus $50\%$ (18
                      of 36). U-Net at probability thresholds of greater than or
                      equal to 0.22 versus greater than or equal to 0.33 had
                      sensitivity of $96\%$ (25 of 26) versus $92\%$ (24 of 26)
                      (both P > .99) with specificity of $31\%$ (11 of 36) versus
                      $47\%$ (17 of 36) (both P > .99), not statistically
                      different from PI-RADS. Dice coefficients were 0.89 for
                      prostate and 0.35 for MRI lesion segmentation. In the test
                      set, coincidence of PI-RADS greater than or equal to 4 with
                      U-Net lesions improved the positive predictive value from
                      $48\%$ (28 of 58) to $67\%$ (24 of 36) for U-Net probability
                      thresholds greater than or equal to 0.33 (P = .01), while
                      the negative predictive value remained unchanged $(83\%$ [25
                      of 30] vs $83\%$ [43 of 52]; P > .99). Conclusion U-Net
                      trained with T2-weighted and diffusion MRI achieves similar
                      performance to clinical Prostate Imaging Reporting and Data
                      System assessment. © RSNA, 2019 Online supplemental
                      material is available for this article. See also the
                      editorial by Padhani and Turkbey in this issue.},
      cin          = {E010 / E230 / C060 / E250 / E020 / L101},
      ddc          = {610},
      cid          = {I:(DE-He78)E010-20160331 / I:(DE-He78)E230-20160331 /
                      I:(DE-He78)C060-20160331 / I:(DE-He78)E250-20160331 /
                      I:(DE-He78)E020-20160331 / I:(DE-He78)L101-20160331},
      pnm          = {315 - Imaging and radiooncology (POF3-315)},
      pid          = {G:(DE-HGF)POF3-315},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:31592731},
      doi          = {10.1148/radiol.2019190938},
      url          = {https://inrepo02.dkfz.de/record/147218},
}