% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Aybey:278729,
author = {B. Aybey and S. Zhao and B. Brors$^*$ and E. Staub},
title = {{I}mmune cell type signature discovery and random forest
classification for analysis of single cell gene expression
datasets.},
journal = {Frontiers in immunology},
volume = {14},
issn = {1664-3224},
address = {Lausanne},
publisher = {Frontiers Media},
reportid = {DKFZ-2023-01700},
pages = {1194745},
year = {2023},
abstract = {Robust immune cell gene expression signatures are central
to the analysis of single cell studies. Nearly all known
sets of immune cell signatures have been derived by making
use of only single gene expression datasets. Utilizing the
power of multiple integrated datasets could lead to
high-quality immune cell signatures which could be used as
superior inputs to machine learning-based cell type
classification approaches.We established a novel workflow
for the discovery of immune cell type signatures based
primarily on gene-versus-gene expression similarity. It
leverages multiple datasets, here seven single cell
expression datasets from six different cancer types and
resulted in eleven immune cell type-specific gene expression
signatures. We used these to train random forest classifiers
for immune cell type assignment for single-cell RNA-seq
datasets. We obtained similar or better prediction results
compared to commonly used methods for cell type assignment
in independent benchmarking datasets. Our gene signature set
yields higher prediction scores than other published immune
cell type gene sets in random forest-based cell type
classification. We further demonstrate how our approach
helps to avoid bias in downstream statistical analyses by
re-analysis of a published IFN stimulation experiment.We
demonstrated the quality of our immune cell signatures and
their strong performance in a random forest-based cell
typing approach. We argue that classifying cells based on
our comparably slim sets of genes accompanied by a random
forest-based approach not only matches or outperforms widely
used published approaches. It also facilitates unbiased
downstream statistical analyses of differential gene
expression between cell types for significantly more genes
compared to previous cell classification algorithms.},
keywords = {cell clustering (Other) / cell type classification (Other)
/ gene signature discovery (Other) / machine learning
(Other) / single-cell RNA sequencing (Other) / tumor
microenvironment (Other)},
cin = {B330 / HD01},
ddc = {610},
cid = {I:(DE-He78)B330-20160331 / I:(DE-He78)HD01-20160331},
pnm = {312 - Funktionelle und strukturelle Genomforschung
(POF4-312)},
pid = {G:(DE-HGF)POF4-312},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:37609075},
pmc = {pmc:PMC10441575},
doi = {10.3389/fimmu.2023.1194745},
url = {https://inrepo02.dkfz.de/record/278729},
}