% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Ramon:296154,
      author       = {A. Ramon and M. Ni and O. Predeina and R. Gaffey and P.
                      Kunz$^*$ and S. Onuoha and P. Sormanni},
      title        = {{P}rediction of protein biophysical traits from limited
                      data: a case study on nanobody thermostability through
                      {N}ano{M}elt.},
      journal      = {mAbs},
      volume       = {17},
      number       = {1},
      issn         = {1942-0862},
      address      = {London},
      publisher    = {Taylor $\&$ Francis},
      reportid     = {DKFZ-2025-00082},
      pages        = {2442750},
      year         = {2025},
      abstract     = {In-silico prediction of protein biophysical traits is often
                      hindered by the limited availability of experimental data
                      and their heterogeneity. Training on limited data can lead
                      to overfitting and poor generalizability to sequences
                      distant from those in the training set. Additionally,
                      inadequate use of scarce and disparate data can introduce
                      biases during evaluation, leading to unreliable model
                      performances being reported. Here, we present a
                      comprehensive study exploring various approaches for protein
                      fitness prediction from limited data, leveraging pre-trained
                      embeddings, repeated stratified nested cross-validation, and
                      ensemble learning to ensure an unbiased assessment of the
                      performances. We applied our framework to introduce
                      NanoMelt, a predictor of nanobody thermostability trained
                      with a dataset of 640 measurements of apparent melting
                      temperature, obtained by integrating data from the
                      literature with 129 new measurements from this study. We
                      find that an ensemble model stacking multiple regression
                      using diverse sequence embeddings achieves state-of-the-art
                      accuracy in predicting nanobody thermostability. We further
                      demonstrate NanoMelt's potential to streamline nanobody
                      development by guiding the selection of highly stable
                      nanobodies. We make the curated dataset of nanobody
                      thermostability freely available and NanoMelt accessible as
                      a downloadable software and webserver.},
      keywords     = {Single-Domain Antibodies: chemistry / Single-Domain
                      Antibodies: immunology / Protein Stability / Humans /
                      Software / Computer Simulation / Biological sciences –
                      biophysics and computational biology (Other) / Protein
                      fitness (Other) / antibody design (Other) / antibody
                      engineering (Other) / ensemble model (Other) / machine
                      learning (Other) / nanobody (Other) / semi-supervised
                      learning (Other) / thermostability (Other) / Single-Domain
                      Antibodies (NLM Chemicals)},
      cin          = {B070},
      ddc          = {610},
      cid          = {I:(DE-He78)B070-20160331},
      pnm          = {312 - Funktionelle und strukturelle Genomforschung
                      (POF4-312)},
      pid          = {G:(DE-HGF)POF4-312},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:39772905},
      doi          = {10.1080/19420862.2024.2442750},
      url          = {https://inrepo02.dkfz.de/record/296154},
}