% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Felici:301751,
      author       = {A. Felici$^*$ and G. Peduzzi and R. Pellungrini and D.
                      Campa and F. Canzian$^*$},
      title        = {{R}egression and machine learning approaches identify
                      potential risk factors for glioblastoma multiforme.},
      journal      = {Brain communications},
      volume       = {7},
      number       = {3},
      issn         = {2632-1297},
      address      = {[Oxford]},
      publisher    = {Oxford University Press},
      reportid     = {DKFZ-2025-01135},
      pages        = {fcaf187},
      year         = {2025},
      note         = {#EA:C055#LA:C055#},
      abstract     = {Glioblastoma multiforme is a lethal disease, with a 5-year
                      survival rate of $<10\%.$ The identification of risk factors
                      for glioblastoma multiforme is essential for the
                      understanding of this disease and could facilitate more
                      effective stratification of high-risk individuals. However,
                      our current knowledge of glioblastoma multiforme risk
                      factors is limited. Given the complexity and heterogeneity
                      of the disease, traditional epidemiological approaches may
                      be insufficient to study risk factors for glioblastoma
                      multiforme. The combination of traditional approaches with
                      machine learning models could prove effective in identifying
                      relevant factors for glioblastoma multiforme risk. In this
                      study, we developed glioblastoma multiformerisk models in
                      the UK Biobank cohort using 576 glioblastoma multiforme
                      cases and 302 602 controls. First, 369 exposures were tested
                      with traditional regression models in a case-control study
                      and significant associations were identified. Subsequently,
                      significant features were filtered based on their completion
                      rate and correlation. The selected exposures were then used
                      to develop two machine learning models: a support vector
                      machine and a Multi-Layer Perceptron. To address the
                      imbalance within the subpopulation, two controls per case
                      with full data were selected, resulting in 442 glioblastoma
                      multiforme cases and 884 controls being analysed with the
                      machine learning models. Relevant factors for glioblastoma
                      multiforme risk were identified by explaining the results of
                      the two models with Shapley Additive explanations.
                      Traditional regression methods identified 38 significant
                      associations between environmental exposures and
                      glioblastoma multiforme risk under the Bonferroni threshold
                      (P < 1.35 × 10-4). Subsequent filtration results in the
                      selection of 12 exposures, which were then analysed with
                      age, sex and a polygenic score using the two machine
                      learning models. Support vector machine and the multi-layer
                      perceptron demonstrated a good sensitivity (0.91 and 0.82,
                      respectively). In addition to age and genetics, Shapley
                      Additive explanations demonstrated significant contributions
                      of insulin-like growth factor 1 blood levels and the
                      right-hand grip strength on the predictions made by the
                      models, with the latter effect potentially being confounded
                      by endogenous testosterone levels. The integration of
                      machine learning with traditional models has the potential
                      to enhance the identification of risk factors for
                      glioblastoma multiforme.},
      keywords     = {Glioblastoma multiforme (Other) / IGF1 (Other) /
                      epidemiology (Other) / genomics (Other) / machine learning
                      (Other)},
      cin          = {C055},
      ddc          = {610},
      cid          = {I:(DE-He78)C055-20160331},
      pnm          = {313 - Krebsrisikofaktoren und Prävention (POF4-313)},
      pid          = {G:(DE-HGF)POF4-313},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:40458457},
      pmc          = {pmc:PMC12127608},
      doi          = {10.1093/braincomms/fcaf187},
      url          = {https://inrepo02.dkfz.de/record/301751},
}