% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Schelb:157380,
author = {P. Schelb$^*$ and X. Wang$^*$ and J. P. Radtke$^*$ and M.
Wiesenfarth$^*$ and P. Kickingereder and A. Stenzinger$^*$
and M. Hohenfellner and H.-P. Schlemmer$^*$ and K. H.
Maier-Hein$^*$ and D. Bonekamp$^*$},
title = {{S}imulated clinical deployment of fully automatic deep
learning for clinical prostate {MRI} assessment.},
journal = {European radiology},
volume = {31},
number = {1},
issn = {1432-1084},
address = {Heidelberg},
publisher = {Springer},
reportid = {DKFZ-2020-01603},
pages = {302-313},
year = {2021},
note = {2021 Jan;31(1):302-313#EA:E010#LA:E010#},
abstract = {To simulate clinical deployment, evaluate performance, and
establish quality assurance of a deep learning algorithm
(U-Net) for detection, localization, and segmentation of
clinically significant prostate cancer (sPC), ISUP grade
group ≥ 2, using bi-parametric MRI.In 2017, 284
consecutive men in active surveillance, biopsy-naïve or
pre-biopsied, received targeted and extended systematic
MRI/transrectal US-fusion biopsy, after examination on a
single MRI scanner (3 T). A prospective adjustment scheme
was evaluated comparing the performance of the Prostate
Imaging Reporting and Data System (PI-RADS) and U-Net using
sensitivity, specificity, predictive values, and the Dice
coefficient.In the 259 eligible men (median 64 [IQR
61-72] years), PI-RADS had a sensitivity of $98\%$
$[106/108]/84\%$ [91/108] with a specificity of $17\%$
$[25/151]/58\%$ [88/151], for thresholds at ≥ 3/≥ 4
respectively. U-Net using dynamic threshold adjustment had a
sensitivity of $99\%$ $[107/108]/83\%$ [90/108]
(p > 0.99/> 0.99) with a specificity of $24\%$
$[36/151]/55\%$ [83/151] (p > 0.99/> 0.99) for
probability thresholds d3 and d4 emulating PI-RADS ≥ 3 and
≥ 4 decisions respectively, not statistically different
from PI-RADS. Co-occurrence of a radiological PI-RADS ≥ 4
examination and U-Net ≥ d3 assessment significantly
improved the positive predictive value from 59 to $63\%$
(p = 0.03), on a per-patient basis.U-Net has similar
performance to PI-RADS in simulated continued clinical use.
Regular quality assurance should be implemented to ensure
desired performance.• U-Net maintained similar diagnostic
performance compared to radiological assessment of PI-RADS
≥ 4 when applied in a simulated clinical deployment. •
Application of our proposed prospective dynamic calibration
method successfully adjusted U-Net performance within
acceptable limits of the PI-RADS reference over time, while
not being limited to PI-RADS as a reference. •
Simultaneous detection by U-Net and radiological assessment
significantly improved the positive predictive value on a
per-patient and per-lesion basis, while the negative
predictive value remained unchanged.},
cin = {E010 / E230 / C060 / HD01},
ddc = {610},
cid = {I:(DE-He78)E010-20160331 / I:(DE-He78)E230-20160331 /
I:(DE-He78)C060-20160331 / I:(DE-He78)HD01-20160331},
pnm = {315 - Bildgebung und Radioonkologie (POF4-315)},
pid = {G:(DE-HGF)POF4-315},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:32767102},
doi = {10.1007/s00330-020-07086-z},
url = {https://inrepo02.dkfz.de/record/157380},
}