% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Ramon:296154,
author = {A. Ramon and M. Ni and O. Predeina and R. Gaffey and P.
Kunz$^*$ and S. Onuoha and P. Sormanni},
title = {{P}rediction of protein biophysical traits from limited
data: a case study on nanobody thermostability through
{N}ano{M}elt.},
journal = {mAbs},
volume = {17},
number = {1},
issn = {1942-0862},
address = {London},
publisher = {Taylor $\&$ Francis},
reportid = {DKFZ-2025-00082},
pages = {2442750},
year = {2025},
abstract = {In-silico prediction of protein biophysical traits is often
hindered by the limited availability of experimental data
and their heterogeneity. Training on limited data can lead
to overfitting and poor generalizability to sequences
distant from those in the training set. Additionally,
inadequate use of scarce and disparate data can introduce
biases during evaluation, leading to unreliable model
performances being reported. Here, we present a
comprehensive study exploring various approaches for protein
fitness prediction from limited data, leveraging pre-trained
embeddings, repeated stratified nested cross-validation, and
ensemble learning to ensure an unbiased assessment of the
performances. We applied our framework to introduce
NanoMelt, a predictor of nanobody thermostability trained
with a dataset of 640 measurements of apparent melting
temperature, obtained by integrating data from the
literature with 129 new measurements from this study. We
find that an ensemble model stacking multiple regression
using diverse sequence embeddings achieves state-of-the-art
accuracy in predicting nanobody thermostability. We further
demonstrate NanoMelt's potential to streamline nanobody
development by guiding the selection of highly stable
nanobodies. We make the curated dataset of nanobody
thermostability freely available and NanoMelt accessible as
a downloadable software and webserver.},
keywords = {Single-Domain Antibodies: chemistry / Single-Domain
Antibodies: immunology / Protein Stability / Humans /
Software / Computer Simulation / Biological sciences –
biophysics and computational biology (Other) / Protein
fitness (Other) / antibody design (Other) / antibody
engineering (Other) / ensemble model (Other) / machine
learning (Other) / nanobody (Other) / semi-supervised
learning (Other) / thermostability (Other) / Single-Domain
Antibodies (NLM Chemicals)},
cin = {B070},
ddc = {610},
cid = {I:(DE-He78)B070-20160331},
pnm = {312 - Funktionelle und strukturelle Genomforschung
(POF4-312)},
pid = {G:(DE-HGF)POF4-312},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:39772905},
doi = {10.1080/19420862.2024.2442750},
url = {https://inrepo02.dkfz.de/record/296154},
}