Skip to content

Commit 42a0663

Browse files
Alfredo Altamiranometa-codesync[bot]
authored andcommitted
Optimize PrefixMap find by walking through small prefixes
Summary: When the bucket's SmallPrefix differs from the query's (or query < 8 bytes), skip the binary search entirely and walk backwards from roughTo using pre-built SmallPrefix values and precomputed masks. This avoids touching fullPrefixes_ string data entirely — we only do uint64 comparisons. * Add SmallPrefix::starts_with_mask() that uses a precomputed mask for the fast-path walk, avoiding runtime bit-counting. * Add bucketInfo_ vector of (bucketIdx, prefixMask) pairs: the bucket index locates the pre-built SmallPrefix in smallPrefixes_, and the mask has 1-bits for the significant prefix bytes (derived from min(prefix.size(), 8 ) at construction). Both are read in the same cache line during the walk. * Populate bucketInfo_ in a separate loop after smallPrefixes_ is fully built, so we don't depend on iterator stability during insertions. * Template findPrefix into findPrefixImpl<enableSmallPrefixLookup> so the constructor can use the slow path (findPrefixImpl<false>) while bucketInfo_ is not yet populated. Benchmark command: ``` buck run fbcode/mode/opt fbcode//mcrouter/lib/fbi/cpp/test/facebook:string_prefix_map_benchmark -- --bm_regex _Lb --bm_mode=adaptive --bm_min_secs=5 --bm_max_secs=30 ``` # x86 Before: ``` ============================================================================ [...]facebook/StringPrefixMapBenchmark.cpp relative time/iter iters/s ============================================================================ PrefixMapInCache_Lb 68.87ns 14.52M PrefixMapNotInCache_Lb 305.95ns 3.27M ``` After: ``` ============================================================================ [...]facebook/StringPrefixMapBenchmark.cpp relative time/iter iters/s ============================================================================ PrefixMapInCache_Lb 54.56ns 18.33M PrefixMapNotInCache_Lb 233.65ns 4.28M ``` - PrefixMapInCache_Lb: 68.87ns → 54.56ns (21% less time/iter) - PrefixMapNotInCache_Lb: 305.95ns → 233.65ns (24% less time/iter) # ARM Before ``` ============================================================================ [...]facebook/StringPrefixMapBenchmark.cpp relative time/iter iters/s ============================================================================ PrefixMapInCache_Lb 93.37ns 10.71M PrefixMapNotInCache_Lb 458.93ns 2.18M ``` After ``` ============================================================================ [...]facebook/StringPrefixMapBenchmark.cpp relative time/iter iters/s ============================================================================ PrefixMapInCache_Lb 76.96ns 12.99M PrefixMapNotInCache_Lb 376.88ns 2.65M ``` - PrefixMapInCache_Lb: 93.37ns → 76.96ns (18% less time/iter) - PrefixMapNotInCache_Lb: 458.93ns → 376.88ns (18% less time/iter) Reviewed By: DenisYaroshevskiy Differential Revision: D97313182 fbshipit-source-id: ba0e9ddd59e4eb9a8df367860260e30cb06d5d5a
1 parent 067ebcd commit 42a0663

3 files changed

Lines changed: 108 additions & 12 deletions

File tree

‎mcrouter/lib/fbi/cpp/LowerBoundPrefixMap.cpp‎

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@
77

88
#include "mcrouter/lib/fbi/cpp/LowerBoundPrefixMap.h"
99

10+
#include <cassert>
1011
#include <cstring>
1112
#include <ostream>
1213

14+
#include <folly/lang/Bits.h>
15+
1316
namespace facebook::memcache::detail {
1417
namespace {
1518

@@ -46,6 +49,14 @@ I branchlessUpperBound(I f, I l, const T& x, Compare comp) {
4649

4750
} // namespace
4851

52+
FOLLY_ALWAYS_INLINE
53+
bool checkSmallPrefixStartsWith(
54+
SmallPrefix full,
55+
SmallPrefix start,
56+
std::uint64_t lengthMask) {
57+
return (full.data_ & lengthMask) == start.data_;
58+
}
59+
4960
std::ostream& operator<<(std::ostream& os, const SmallPrefix& self) {
5061
std::uint64_t correctOrder = folly::Endian::swap(self.data_);
5162
std::string_view s(reinterpret_cast<const char*>(correctOrder), 8u);
@@ -57,6 +68,7 @@ LowerBoundPrefixMapCommon::LowerBoundPrefixMapCommon(
5768
: fullPrefixes_(std::move(sortedUniquePrefixes)) {
5869
smallPrefixes_.reserve(fullPrefixes_.size() + 1);
5970
previousPrefix_.reserve(fullPrefixes_.size());
71+
smallPrefixExtraInfo_.reserve(fullPrefixes_.size());
6072

6173
// Adding an empty string with no matches.
6274
// This acts as a sentinel so we are always guranteed to find an
@@ -68,11 +80,18 @@ LowerBoundPrefixMapCommon::LowerBoundPrefixMapCommon(
6880

6981
for (std::size_t i = 0; i != fullPrefixes_.size(); ++i) {
7082
const auto& prefix = fullPrefixes_[i];
83+
// Null bytes are not expected. If present it would break the
84+
// findPrefix fast SmallPrefix path (e.g. SmallPrefix "ab\0", query "ab").
85+
assert(prefix.find('\0') == std::string_view::npos);
7186

7287
// Prefixes always come before lexicographically, so if there is one
7388
// we will find it.
7489
previousPrefix_.push_back(findPrefix(prefix));
7590

91+
auto len = std::min<size_t>(prefix.size(), (size_t)8);
92+
auto mask = folly::n_most_significant_bits<std::uint64_t>(len * 8);
93+
smallPrefixExtraInfo_.push_back({SmallPrefix{prefix}, mask});
94+
7695
// Add small prefix ---
7796
// if it is there already - update existing, otherwise insert [i, i+1]
7897
//
@@ -90,26 +109,38 @@ LowerBoundPrefixMapCommon::LowerBoundPrefixMapCommon(
90109

91110
std::uint32_t LowerBoundPrefixMapCommon::findPrefix(
92111
std::string_view query) const noexcept {
112+
SmallPrefix qSmall{query};
93113
#ifdef __aarch64__
94114
auto afterPrefix = branchlessUpperBound(
95115
smallPrefixes_.begin(),
96116
smallPrefixes_.end(),
97-
SmallPrefix{query},
117+
qSmall,
98118
[](const SmallPrefix& v, const auto& elem) { return v < elem.first; });
99119
#else
100120
// Due to a sentinel - guaranteed to not be .begin()
101-
auto afterPrefix = smallPrefixes_.upper_bound(SmallPrefix{query});
121+
auto afterPrefix = smallPrefixes_.upper_bound(qSmall);
102122
#endif
103-
auto [roughFrom, roughTo] = std::prev(afterPrefix)->second;
104-
105-
// Binary search complete strings between rough boundaries.
106-
// NOTE: which array we search - doesn't matter -
107-
// we just want indexes.
108-
auto cur = std::upper_bound(
109-
fullPrefixes_.begin() + roughFrom,
110-
fullPrefixes_.begin() + roughTo,
111-
query) -
112-
fullPrefixes_.begin();
123+
const auto bucketIt = std::prev(afterPrefix);
124+
auto [roughFrom, roughTo] = bucketIt->second;
125+
126+
if (bucketIt->first < qSmall || query.size() < 8) {
127+
std::uint32_t cur = roughTo;
128+
while (cur != 0 &&
129+
!checkSmallPrefixStartsWith(
130+
qSmall,
131+
smallPrefixExtraInfo_[cur - 1].smallPrefix,
132+
smallPrefixExtraInfo_[cur - 1].lengthMask)) {
133+
cur = previousPrefix_[cur - 1];
134+
}
135+
return cur;
136+
}
137+
138+
std::uint32_t cur = static_cast<std::uint32_t>(
139+
std::upper_bound(
140+
fullPrefixes_.begin() + roughFrom,
141+
fullPrefixes_.begin() + roughTo,
142+
query) -
143+
fullPrefixes_.begin());
113144

114145
while (cur != 0 &&
115146
!std_string_view_starts_with(query, fullPrefixes_[cur - 1])) {

‎mcrouter/lib/fbi/cpp/LowerBoundPrefixMap.h‎

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,20 @@ class SmallPrefix {
8181
}
8282

8383
friend std::ostream& operator<<(std::ostream& os, const SmallPrefix& self);
84+
friend bool checkSmallPrefixStartsWith(
85+
SmallPrefix full,
86+
SmallPrefix start,
87+
std::uint64_t lengthMask);
8488

8589
private:
8690
std::uint64_t data_ = 0;
8791
};
8892

93+
struct SmallPrefixExtraInfo {
94+
SmallPrefix smallPrefix;
95+
std::uint64_t lengthMask = 0;
96+
};
97+
8998
// What is the idea
9099
//
91100
// If a string (query) has a prefix in a sorted lexicographically array
@@ -125,6 +134,14 @@ struct LowerBoundPrefixMapCommon {
125134
// ^________|
126135
// This is the index of that prefix, base 1 (0 means absence).
127136
std::vector<std::uint32_t> previousPrefix_;
137+
138+
// Sometimes we get to check SmallPrefixes for starts with instead of full
139+
// prefixes. This is faster but we need some extra information - which we
140+
// store for each full prefix.
141+
// NOTE: in theory this is suboptimal, because we iterate through a list of
142+
// full prefixes and there are less SmallPrefixes but our experiments with
143+
// that so far were unfruitful.
144+
std::vector<SmallPrefixExtraInfo> smallPrefixExtraInfo_;
128145
};
129146

130147
template <typename Storage>

‎mcrouter/lib/fbi/cpp/test/LowerBoundPrefixMapTest.cpp‎

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,5 +210,53 @@ TEST(LowerBoundPrefixMapTest, NullKey) {
210210
ASSERT_EQ(lbMap.findPrefix(std::string_view{}), lbMap.end());
211211
}
212212

213+
TEST(LowerBoundPrefixMapTest, LongPrefixShortQuery) {
214+
// Entries sharing the same 8-byte prefix but with different lengths.
215+
// Queries of exactly 8 bytes must not falsely match longer entries.
216+
std::vector<KeyValue> kv = {
217+
KeyValue{"abcdefgh", 1},
218+
KeyValue{"abcdefghijk", 2},
219+
};
220+
LowerBoundPrefixMap<int> lbMap{kv};
221+
222+
// "abcdefgh" matches exactly
223+
ASSERT_NE(lbMap.end(), lbMap.findPrefix("abcdefgh"));
224+
ASSERT_EQ("abcdefgh", lbMap.findPrefix("abcdefgh")->key());
225+
ASSERT_EQ(1, lbMap.findPrefix("abcdefgh")->value());
226+
227+
// "abcdefghij" — "abcdefgh" is a prefix, not "abcdefghijk"
228+
ASSERT_NE(lbMap.end(), lbMap.findPrefix("abcdefghij"));
229+
ASSERT_EQ("abcdefgh", lbMap.findPrefix("abcdefghij")->key());
230+
231+
// "abcdefghijk" matches the longer entry exactly
232+
ASSERT_NE(lbMap.end(), lbMap.findPrefix("abcdefghijk"));
233+
ASSERT_EQ("abcdefghijk", lbMap.findPrefix("abcdefghijk")->key());
234+
ASSERT_EQ(2, lbMap.findPrefix("abcdefghijk")->value());
235+
236+
// "abcdefghijkz" — "abcdefghijk" is the longest prefix
237+
ASSERT_NE(lbMap.end(), lbMap.findPrefix("abcdefghijkz"));
238+
ASSERT_EQ("abcdefghijk", lbMap.findPrefix("abcdefghijkz")->key());
239+
240+
// "abcdefg" (7 bytes) — no entry is a prefix of this
241+
ASSERT_EQ(lbMap.end(), lbMap.findPrefix("abcdefg"));
242+
}
243+
244+
TEST(LowerBoundPrefixMapTest, SameBucketWalkSkipsSuffix) {
245+
// Entries share the same 8-byte prefix. "abcdefghXXX" is NOT a prefix of
246+
// "abcdefghYYY", but "abcdefgh" IS. The walk must check the suffix (bytes
247+
// 8+) before accepting a match, not stop at the first SmallPrefix match.
248+
std::vector<KeyValue> kv = {
249+
KeyValue{"abcdefgh", 1},
250+
KeyValue{"abcdefghXXX", 2},
251+
};
252+
LowerBoundPrefixMap<int> lbMap{kv};
253+
254+
// Query "abcdefghYYY": "abcdefghXXX" shares the 8-byte prefix but is not
255+
// a prefix. Must walk past it to find "abcdefgh".
256+
ASSERT_NE(lbMap.end(), lbMap.findPrefix("abcdefghYYY"));
257+
ASSERT_EQ("abcdefgh", lbMap.findPrefix("abcdefghYYY")->key());
258+
ASSERT_EQ(1, lbMap.findPrefix("abcdefghYYY")->value());
259+
}
260+
213261
} // namespace
214262
} // namespace facebook::memcache

0 commit comments

Comments
 (0)