% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Felici:301751,
author = {A. Felici$^*$ and G. Peduzzi and R. Pellungrini and D.
Campa and F. Canzian$^*$},
title = {{R}egression and machine learning approaches identify
potential risk factors for glioblastoma multiforme.},
journal = {Brain communications},
volume = {7},
number = {3},
issn = {2632-1297},
address = {[Oxford]},
publisher = {Oxford University Press},
reportid = {DKFZ-2025-01135},
pages = {fcaf187},
year = {2025},
note = {#EA:C055#LA:C055#},
abstract = {Glioblastoma multiforme is a lethal disease, with a 5-year
survival rate of $<10\%.$ The identification of risk factors
for glioblastoma multiforme is essential for the
understanding of this disease and could facilitate more
effective stratification of high-risk individuals. However,
our current knowledge of glioblastoma multiforme risk
factors is limited. Given the complexity and heterogeneity
of the disease, traditional epidemiological approaches may
be insufficient to study risk factors for glioblastoma
multiforme. The combination of traditional approaches with
machine learning models could prove effective in identifying
relevant factors for glioblastoma multiforme risk. In this
study, we developed glioblastoma multiformerisk models in
the UK Biobank cohort using 576 glioblastoma multiforme
cases and 302 602 controls. First, 369 exposures were tested
with traditional regression models in a case-control study
and significant associations were identified. Subsequently,
significant features were filtered based on their completion
rate and correlation. The selected exposures were then used
to develop two machine learning models: a support vector
machine and a Multi-Layer Perceptron. To address the
imbalance within the subpopulation, two controls per case
with full data were selected, resulting in 442 glioblastoma
multiforme cases and 884 controls being analysed with the
machine learning models. Relevant factors for glioblastoma
multiforme risk were identified by explaining the results of
the two models with Shapley Additive explanations.
Traditional regression methods identified 38 significant
associations between environmental exposures and
glioblastoma multiforme risk under the Bonferroni threshold
(P < 1.35 × 10-4). Subsequent filtration results in the
selection of 12 exposures, which were then analysed with
age, sex and a polygenic score using the two machine
learning models. Support vector machine and the multi-layer
perceptron demonstrated a good sensitivity (0.91 and 0.82,
respectively). In addition to age and genetics, Shapley
Additive explanations demonstrated significant contributions
of insulin-like growth factor 1 blood levels and the
right-hand grip strength on the predictions made by the
models, with the latter effect potentially being confounded
by endogenous testosterone levels. The integration of
machine learning with traditional models has the potential
to enhance the identification of risk factors for
glioblastoma multiforme.},
keywords = {Glioblastoma multiforme (Other) / IGF1 (Other) /
epidemiology (Other) / genomics (Other) / machine learning
(Other)},
cin = {C055},
ddc = {610},
cid = {I:(DE-He78)C055-20160331},
pnm = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
pid = {G:(DE-HGF)POF4-313},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:40458457},
pmc = {pmc:PMC12127608},
doi = {10.1093/braincomms/fcaf187},
url = {https://inrepo02.dkfz.de/record/301751},
}