% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Perrier:132751,
author = {F. Perrier and A. Novoloaca and S. Ambatipudi and L.
Baglietto and A. Ghantous and V. Perduca and M. Barrdahl$^*$
and S. Harlid and K. K. Ong and A. Cardona and S. Polidoro
and T. H. Nøst and K. Overvad and H. Omichessan and M.
Dollé and C. Bamia and J. M. Huerta and P. Vineis and Z.
Herceg and I. Romieu and P. Ferrari},
title = {{I}dentifying and correcting epigenetics measurements for
systematic sources of variation.},
journal = {Clinical epigenetics},
volume = {10},
number = {1},
issn = {1868-7083},
address = {[S.l.]},
publisher = {BioMed Central},
reportid = {DKFZ-2018-00404},
pages = {38},
year = {2018},
abstract = {Methylation measures quantified by microarray techniques
can be affected by systematic variation due to the technical
processing of samples, which may compromise the accuracy of
the measurement process and contribute to bias the estimate
of the association under investigation. The quantification
of the contribution of the systematic source of variation is
challenging in datasets characterized by hundreds of
thousands of features.In this study, we introduce a method
previously developed for the analysis of metabolomics data
to evaluate the performance of existing normalizing
techniques to correct for unwanted variation. Illumina
Infinium HumanMethylation450K was used to acquire
methylation levels in over 421,000 CpG sites for 902 study
participants of a case-control study on breast cancer nested
within the EPIC cohort. The principal component partial
R-square (PC-PR2) analysis was used to identify and quantify
the variability attributable to potential systematic sources
of variation. Three correcting techniques, namely ComBat,
surrogate variables analysis (SVA) and a linear regression
model to compute residuals were applied. The impact of each
correcting method on the association between smoking status
and DNA methylation levels was evaluated, and results were
compared with findings from a large meta-analysis.A sizeable
proportion of systematic variability due to variables
expressing 'batch' and 'sample position' within 'chip' was
identified, with values of the partial R2 statistics equal
to 9.5 and $11.4\%$ of total variation, respectively. After
application of ComBat or the residuals' methods, the
contribution was 1.3 and $0.2\%,$ respectively. The SVA
technique resulted in a reduced variability due to 'batch'
$(1.3\%)$ and 'sample position' $(0.6\%),$ and in a
diminished variability attributable to 'chip' within a batch
$(0.9\%).$ After ComBat or the residuals' corrections, a
larger number of significant sites (k = 600 and
k = 427, respectively) were associated to smoking status
than the SVA correction (k = 96).The three correction
methods removed systematic variation in DNA methylation
data, as assessed by the PC-PR2, which lent itself as a
useful tool to explore variability in large dimension data.
SVA produced more conservative findings than ComBat in the
association between smoking and DNA methylation.},
cin = {C020},
ddc = {610},
cid = {I:(DE-He78)C020-20160331},
pnm = {313 - Cancer risk factors and prevention (POF3-313)},
pid = {G:(DE-HGF)POF3-313},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:29588806},
pmc = {pmc:PMC5863487},
doi = {10.1186/s13148-018-0471-6},
url = {https://inrepo02.dkfz.de/record/132751},
}