% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Perrier:132751,
      author       = {F. Perrier and A. Novoloaca and S. Ambatipudi and L.
                      Baglietto and A. Ghantous and V. Perduca and M. Barrdahl$^*$
                      and S. Harlid and K. K. Ong and A. Cardona and S. Polidoro
                      and T. H. Nøst and K. Overvad and H. Omichessan and M.
                      Dollé and C. Bamia and J. M. Huerta and P. Vineis and Z.
                      Herceg and I. Romieu and P. Ferrari},
      title        = {{I}dentifying and correcting epigenetics measurements for
                      systematic sources of variation.},
      journal      = {Clinical epigenetics},
      volume       = {10},
      number       = {1},
      issn         = {1868-7083},
      address      = {[S.l.]},
      publisher    = {BioMed Central},
      reportid     = {DKFZ-2018-00404},
      pages        = {38},
      year         = {2018},
      abstract     = {Methylation measures quantified by microarray techniques
                      can be affected by systematic variation due to the technical
                      processing of samples, which may compromise the accuracy of
                      the measurement process and contribute to bias the estimate
                      of the association under investigation. The quantification
                      of the contribution of the systematic source of variation is
                      challenging in datasets characterized by hundreds of
                      thousands of features.In this study, we introduce a method
                      previously developed for the analysis of metabolomics data
                      to evaluate the performance of existing normalizing
                      techniques to correct for unwanted variation. Illumina
                      Infinium HumanMethylation450K was used to acquire
                      methylation levels in over 421,000 CpG sites for 902 study
                      participants of a case-control study on breast cancer nested
                      within the EPIC cohort. The principal component partial
                      R-square (PC-PR2) analysis was used to identify and quantify
                      the variability attributable to potential systematic sources
                      of variation. Three correcting techniques, namely ComBat,
                      surrogate variables analysis (SVA) and a linear regression
                      model to compute residuals were applied. The impact of each
                      correcting method on the association between smoking status
                      and DNA methylation levels was evaluated, and results were
                      compared with findings from a large meta-analysis.A sizeable
                      proportion of systematic variability due to variables
                      expressing 'batch' and 'sample position' within 'chip' was
                      identified, with values of the partial R2 statistics equal
                      to 9.5 and $11.4\%$ of total variation, respectively. After
                      application of ComBat or the residuals' methods, the
                      contribution was 1.3 and $0.2\%,$ respectively. The SVA
                      technique resulted in a reduced variability due to 'batch'
                      $(1.3\%)$ and 'sample position' $(0.6\%),$ and in a
                      diminished variability attributable to 'chip' within a batch
                      $(0.9\%).$ After ComBat or the residuals' corrections, a
                      larger number of significant sites (k = 600 and
                      k = 427, respectively) were associated to smoking status
                      than the SVA correction (k = 96).The three correction
                      methods removed systematic variation in DNA methylation
                      data, as assessed by the PC-PR2, which lent itself as a
                      useful tool to explore variability in large dimension data.
                      SVA produced more conservative findings than ComBat in the
                      association between smoking and DNA methylation.},
      cin          = {C020},
      ddc          = {610},
      cid          = {I:(DE-He78)C020-20160331},
      pnm          = {313 - Cancer risk factors and prevention (POF3-313)},
      pid          = {G:(DE-HGF)POF3-313},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:29588806},
      pmc          = {pmc:PMC5863487},
      doi          = {10.1186/s13148-018-0471-6},
      url          = {https://inrepo02.dkfz.de/record/132751},
}