% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Johann:144575,
author = {P. Johann$^*$ and N. Jäger$^*$ and S. M. Pfister$^*$ and
M. Sill$^*$},
title = {${RF}_{P}urify:$ a novel tool for comprehensive analysis of
tumor-purity in methylation array data based on random
forest regression.},
journal = {BMC bioinformatics},
volume = {20},
number = {1},
issn = {1471-2105},
address = {Heidelberg},
publisher = {Springer},
reportid = {DKFZ-2019-02018},
pages = {428},
year = {2019},
abstract = {With the advent of array-based techniques to measure
methylation levels in primary tumor samples, systematic
investigations of methylomes have widely been performed on a
large number of tumor entities. Most of these approaches are
not based on measuring individual cell methylation but
rather the bulk tumor sample DNA, which contains a mixture
of tumor cells, infiltrating immune cells and other stromal
components. This raises questions about the purity of a
certain tumor sample, given the varying degrees of stromal
infiltration in different entities. Previous methods to
infer tumor purity require or are based on the use of
matching control samples which are rarely available. Here we
present a novel, reference free method to quantify tumor
purity, based on two Random Forest classifiers, which were
trained on ABSOLUTE as well as ESTIMATE purity values from
TCGA tumor samples. We subsequently apply this method to a
previously published, large dataset of brain tumors, proving
that these models perform well in datasets that have not
been characterized with respect to tumor purity .Using two
gold standard methods to infer purity - the ABSOLUTE score
based on whole genome sequencing data and the ESTIMATE score
based on gene expression data- we have optimized Random
Forest classifiers to predict tumor purity in entities that
were contained in the TCGA project. We validated these
classifiers using an independent test data set and
cross-compared it to other methods which have been applied
to the TCGA datasets (such as ESTIMATE and LUMP). Using
Illumina methylation array data of brain tumor entities (as
published in Capper et al. (Nature 555:469-474,2018)) we
applied this model to estimate tumor purity and find that
subgroups of brain tumors display substantial differences in
tumor purity.Random forest- based tumor purity prediction is
a well suited tool to extrapolate gold standard measures of
purity to novel methylation array datasets. In contrast to
other available methylation based tumor purity estimation
methods, our classifiers do not need a priori knowledge
about the tumor entity or matching control tissue to predict
tumor purity.},
cin = {B062 / L101},
ddc = {610},
cid = {I:(DE-He78)B062-20160331 / I:(DE-He78)L101-20160331},
pnm = {312 - Functional and structural genomics (POF3-312)},
pid = {G:(DE-HGF)POF3-312},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:31419933},
pmc = {pmc:PMC6697926},
doi = {10.1186/s12859-019-3014-z},
url = {https://inrepo02.dkfz.de/record/144575},
}