% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{DAlmeida:292112,
author = {S. D Almeida$^*$ and T. Norajitra$^*$ and C. T. Lüth$^*$
and T. Wald$^*$ and V. Weru$^*$ and M. Nolden$^*$ and P. F.
Jäger$^*$ and O. von Stackelberg and C. P. Heußel and O.
Weinheimer and J. Biederer and H.-U. Kauczor and K.
Maier-Hein$^*$},
title = {{H}ow do deep-learning models generalize across
populations? {C}ross-ethnicity generalization of {COPD}
detection.},
journal = {Insights into imaging},
volume = {15},
number = {1},
issn = {1869-4101},
address = {Heidelberg},
publisher = {Springer},
reportid = {DKFZ-2024-01611},
pages = {198},
year = {2024},
note = {#EA:E230#LA:E230#},
abstract = {To evaluate the performance and potential biases of
deep-learning models in detecting chronic obstructive
pulmonary disease (COPD) on chest CT scans across different
ethnic groups, specifically non-Hispanic White (NHW) and
African American (AA) populations.Inspiratory chest CT and
clinical data from 7549 Genetic epidemiology of COPD
individuals (mean age 62 years old, 56-69 interquartile
range), including 5240 NHW and 2309 AA individuals, were
retrospectively analyzed. Several factors influencing COPD
binary classification performance on different ethnic
populations were examined: (1) effects of training
population: NHW-only, AA-only, balanced set (half NHW, half
AA) and the entire set (NHW + AA all); (2) learning
strategy: three supervised learning (SL) vs. three
self-supervised learning (SSL) methods. Distribution shifts
across ethnicity were further assessed for the
top-performing methods.The learning strategy significantly
influenced model performance, with SSL methods achieving
higher performances compared to SL methods (p < 0.001),
across all training configurations. Training on balanced
datasets containing NHW and AA individuals resulted in
improved model performance compared to population-specific
datasets. Distribution shifts were found between ethnicities
for the same health status, particularly when models were
trained on nearest-neighbor contrastive SSL. Training on a
balanced dataset resulted in fewer distribution shifts
across ethnicity and health status, highlighting its
efficacy in reducing biases.Our findings demonstrate that
utilizing SSL methods and training on large and balanced
datasets can enhance COPD detection model performance and
reduce biases across diverse ethnic populations. These
findings emphasize the importance of equitable AI-driven
healthcare solutions for COPD diagnosis.Self-supervised
learning coupled with balanced datasets significantly
improves COPD detection model performance, addressing biases
across diverse ethnic populations and emphasizing the
crucial role of equitable AI-driven healthcare
solutions.Self-supervised learning methods outperform
supervised learning methods, showing higher AUC values (p <
0.001). Balanced datasets with non-Hispanic White and
African American individuals improve model performance.
Training on diverse datasets enhances COPD detection
accuracy. Ethnically diverse datasets reduce bias in COPD
detection models. SimCLR models mitigate biases in COPD
detection across ethnicities.},
keywords = {Artificial intelligence (Other) / Chronic obstructive
pulmonary disease (Other) / Computed tomography (Other) /
Deep learning (Other) / Ethnicity (Other)},
cin = {E230 / E290 / C060},
ddc = {610},
cid = {I:(DE-He78)E230-20160331 / I:(DE-He78)E290-20160331 /
I:(DE-He78)C060-20160331},
pnm = {315 - Bildgebung und Radioonkologie (POF4-315)},
pid = {G:(DE-HGF)POF4-315},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:39112910},
doi = {10.1186/s13244-024-01781-x},
url = {https://inrepo02.dkfz.de/record/292112},
}