@inproceedings{7e7efda0774a4166a890682d657436ae,
title = "Cache-oblivious High-performance Similarity Join",
abstract = "A similarity join combines vectors based on a distance condition. Typically, such algorithms apply a filter step (by indexing or sorting) and then refine pairs of candidate vectors. In this paper, we propose to refine the pairs in an order defined by a space-filling curve which dramatically improves data locality. Modern multi-core microprocessors are supported by a deep memory hierarchy including RAM, various levels of cache, and registers. The space-filling curve makes our proposed algorithm cache-oblivious to fully exploit the memory hierarchy and to reach the possible peak performance of a multi-core processor. Our novel space-filling curve called Fast General Form (FGF) Hilbert solves a number of limitations of well-known approaches: it is non-recursive, it is not restricted to traverse squares, and it has a constant time and space complexity. As we demonstrate the easy transformation from conventional into cache-oblivious loops we believe that many algorithms for complex joins and other database operators could be transformed systematically into cache-oblivious SIMD and MIMD parallel algorithms.",
keywords = "ALGORITHM, CURVES, Cache-oblivious, Epsilon Grid Order, Hilbert-curve, INDEX, METRIC-SPACES, NEAREST-NEIGHBOR, SET, Similarity Join, Space-filling curve",
author = "Martin Perdacher and Claudia Plant and Christian B{\"o}hm",
note = "Publisher Copyright: {\textcopyright} 2019 Association for Computing Machinery.; ACM SIGMOD Conference ; Conference date: 30-06-2019 Through 05-07-2019",
year = "2019",
doi = "10.1145/3299869.3319859",
language = "English",
pages = "87--104",
booktitle = "Proceedings of the 2019 International Conference on Management of Data, SIGMOD Conference 2019, Amsterdam, The Netherlands, June 30 - July 5, 2019.",
url = "http://sigmod2019.org/",
}