% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Rohde:166796,
      author       = {F. Rohde and M. Franke and Z. Sehili and M. Lablans$^*$ and
                      E. Rahm},
      title        = {{O}ptimization of the {M}ainzelliste software for fast
                      privacy-preserving record linkage.},
      journal      = {Journal of translational medicine},
      volume       = {19},
      number       = {1},
      issn         = {1479-5876},
      address      = {London},
      publisher    = {BioMed Central},
      reportid     = {DKFZ-2021-00126},
      pages        = {33},
      year         = {2021},
      abstract     = {Data analysis for biomedical research often requires a
                      record linkage step to identify records from multiple data
                      sources referring to the same person. Due to the lack of
                      unique personal identifiers across these sources, record
                      linkage relies on the similarity of personal data such as
                      first and last names or birth dates. However, the exchange
                      of such identifying data with a third party, as is the case
                      in record linkage, is generally subject to strict privacy
                      requirements. This problem is addressed by
                      privacy-preserving record linkage (PPRL) and
                      pseudonymization services. Mainzelliste is an open-source
                      record linkage and pseudonymization service used to carry
                      out PPRL processes in real-world use cases.We evaluate the
                      linkage quality and performance of the linkage process using
                      several real and near-real datasets with different
                      properties w.r.t. size and error-rate of matching records.
                      We conduct a comparison between (plaintext) record linkage
                      and PPRL based on encoded records (Bloom filters).
                      Furthermore, since the Mainzelliste software offers no
                      blocking mechanism, we extend it by phonetic blocking as
                      well as novel blocking schemes based on locality-sensitive
                      hashing (LSH) to improve runtime for both standard and
                      privacy-preserving record linkage.The Mainzelliste achieves
                      high linkage quality for PPRL using field-level Bloom
                      filters due to the use of an error-tolerant matching
                      algorithm that can handle variances in names, in particular
                      missing or transposed name compounds. However, due to the
                      absence of blocking, the runtimes are unacceptable for real
                      use cases with larger datasets. The newly implemented
                      blocking approaches improve runtimes by orders of magnitude
                      while retaining high linkage quality.We conduct the first
                      comprehensive evaluation of the record linkage facilities of
                      the Mainzelliste software and extend it with blocking
                      methods to improve its runtime. We observed a very high
                      linkage quality for both plaintext as well as encoded data
                      even in the presence of errors. The provided blocking
                      methods provide order of magnitude improvements regarding
                      runtime performance thus facilitating the use in research
                      projects with large datasets and many participants.},
      keywords     = {Blocking (Other) / Locality-sensitive hashing (Other) /
                      Mainzelliste (Other) / Privacy-preserving record linkage
                      (Other)},
      cin          = {E260},
      ddc          = {610},
      cid          = {I:(DE-He78)E260-20160331},
      pnm          = {315 - Bildgebung und Radioonkologie (POF4-315)},
      pid          = {G:(DE-HGF)POF4-315},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:33451317},
      doi          = {10.1186/s12967-020-02678-1},
      url          = {https://inrepo02.dkfz.de/record/166796},
}