% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Gruber:300314,
author = {S. Gruber$^*$ and F. Bach},
title = {{O}ptimizing {E}stimators of {S}quared {C}alibration
{E}rrors in {C}lassification},
journal = {Transactions on machine learning research},
volume = {nn},
issn = {2835-8856},
address = {[Amherst, Massachusetts]},
publisher = {OpenReview.net},
reportid = {DKFZ-2025-00760},
pages = {nn},
year = {2025},
note = {epub},
abstract = {In this work, we propose a mean-squared error-based risk
that enables the comparison and optimization of estimators
of squared calibration errors in practical settings.
Improving the calibration of classifiers is crucial for
enhancing the trustworthiness and interpretability of
machine learning models, especially in sensitive
decision-making scenarios. Although various calibration
(error) estimators exist in the current literature, there is
a lack of guidance on selecting the appropriate estimator
and tuning its hyperparameters. By leveraging the bilinear
structure of squared calibration errors, we reformulate
calibration estimation as a regression problem with
independent and identically distributed (i.i.d.) input
pairs. This reformulation allows us to quantify the
performance of different estimators even for the most
challenging calibration criterion, known as canonical
calibration. Our approach advocates for a
training-validation-testing pipeline when estimating a
calibration error on an evaluation dataset. We demonstrate
the effectiveness of our pipeline by optimizing existing
calibration estimators and comparing them with novel kernel
ridge regression-based estimators on standard image
classification tasks.},
keywords = {Machine Learning (cs.LG) (Other) / Machine Learning
(stat.ML) (Other) / FOS: Computer and information sciences
(Other)},
cin = {FM01},
ddc = {004},
cid = {I:(DE-He78)FM01-20160331},
pnm = {899 - ohne Topic (POF4-899)},
pid = {G:(DE-HGF)POF4-899},
typ = {PUB:(DE-HGF)16},
doi = {DOI:10.48550/arXiv.2410.07014},
url = {https://inrepo02.dkfz.de/record/300314},
}