% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Schelb:165963,
      author       = {P. Schelb$^*$ and A. A. Tavakoli$^*$ and T. Tubtawee$^*$
                      and T. Hielscher$^*$ and J.-P. Radtke and M. Görtz and V.
                      Schütz and T. A. Kuder$^*$ and L. Schimmöller and A.
                      Stenzinger and M. Hohenfellner and H.-P. Schlemmer$^*$ and
                      D. Bonekamp$^*$},
      title        = {{C}omparison of {P}rostate {MRI} {L}esion {S}egmentation
                      {A}greement {B}etween {M}ultiple {R}adiologists and a
                      {F}ully {A}utomatic {D}eep {L}earning {S}ystem.[{V}ergleich
                      der {K}ongruenz von
                      {P}rostata-{MRT}-{L}äsionssegmentationen durch mehrere
                      {R}adiologen und ein vollautomatisches
                      {D}eep-{L}earning-{S}ystem].},
      journal      = {RöFo},
      volume       = {193},
      number       = {5},
      issn         = {1438-9010},
      address      = {Stuttgart [u.a.]},
      publisher    = {Thieme},
      reportid     = {DKFZ-2020-02512},
      pages        = {559-573},
      year         = {2021},
      note         = {#EA:E010#LA:E010#2021 May;193(5):559-573},
      abstract     = {A recently developed deep learning model (U-Net)
                      approximated the clinical performance of radiologists in the
                      prediction of clinically significant prostate cancer (sPC)
                      from prostate MRI. Here, we compare the agreement between
                      lesion segmentations by U-Net with manual lesion
                      segmentations performed by different radiologists. 165
                      patients with suspicion for sPC underwent targeted and
                      systematic fusion biopsy following 3 Tesla multiparametric
                      MRI (mpMRI). Five sets of segmentations were generated
                      retrospectively: segmentations of clinical lesions,
                      independent segmentations by three radiologists, and fully
                      automated bi-parametric U-Net segmentations. Per-lesion
                      agreement was calculated for each rater by averaging Dice
                      coefficients with all overlapping lesions from other raters.
                      Agreement was compared using descriptive statistics and
                      linear mixed models. The mean Dice coefficient for manual
                      segmentations showed only moderate agreement at 0.48-0.52,
                      reflecting the difficult visual task of determining the
                      outline of otherwise jointly detected lesions. U-net
                      segmentations were significantly smaller than manual
                      segmentations (p < 0.0001) and exhibited a lower mean Dice
                      coefficient of 0.22, which was significantly lower compared
                      to manual segmentations (all p < 0.0001). These differences
                      remained after correction for lesion size and were
                      unaffected between sPC and non-sPC lesions and between
                      peripheral and transition zone lesions. Knowledge of the
                      order of agreement of manual segmentations of different
                      radiologists is important to set the expectation value for
                      artificial intelligence (AI) systems in the task of prostate
                      MRI lesion segmentation. Perfect agreement (Dice coefficient
                      of one) should not be expected for AI. Lower Dice
                      coefficients of U-Net compared to manual segmentations are
                      only partially explained by smaller segmentation sizes and
                      may result from a focus on the lesion core and a small
                      relative lesion center shift. Although it is primarily
                      important that AI detects sPC correctly, the Dice
                      coefficient for overlapping lesions from multiple raters can
                      be used as a secondary measure for segmentation quality in
                      future studies. · Intermediate human Dice coefficients
                      reflect the difficulty of outlining jointly detected
                      lesions.. · Lower Dice coefficients of deep learning
                      motivate further research to approximate human perception..
                      · Comparable predictive performance of deep learning
                      appears independent of Dice agreement.. · Dice agreement
                      independent of significant cancer presence indicates
                      indistinguishability of some benign imaging findings.. ·
                      Improving DWI to T2 registration may improve the observed
                      U-Net Dice coefficients..· Schelb P, Tavakoli AA, Tubtawee
                      T et al. Comparison of Prostate MRI Lesion Segmentation
                      Agreement Between Multiple Radiologists and a Fully
                      Automatic Deep Learning System. Fortschr Röntgenstr 2020;
                      DOI: 10.1055/a-1290-8070.},
      cin          = {E010 / C060 / E020},
      ddc          = {610},
      cid          = {I:(DE-He78)E010-20160331 / I:(DE-He78)C060-20160331 /
                      I:(DE-He78)E020-20160331},
      pnm          = {315 - Bildgebung und Radioonkologie (POF4-315)},
      pid          = {G:(DE-HGF)POF4-315},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:33212541},
      doi          = {10.1055/a-1290-8070},
      url          = {https://inrepo02.dkfz.de/record/165963},
}