Bläddra i källkod

Add hashtable benchmark coverage for integers with low zero bits (#5735)

These have unique challenges for our hashing scheme, and so its useful
to make sure the hash functions we use can handle them.

Some other work on Abseil's hash tables uncovered that this might be
risky and may have surfaced some improvements to reduce the impact here,
but the first step seems to try and start covering this path in the
benchmarks.

---------

Co-authored-by: Jon Ross-Perkins <jperkins@google.com>
Chandler Carruth 10 månader sedan
förälder
incheckning
b39c7c93aa

+ 1 - 0
common/BUILD

@@ -476,6 +476,7 @@ cc_library(
         "@abseil-cpp//absl/base:no_destructor",
         "@abseil-cpp//absl/hash",
         "@abseil-cpp//absl/random",
+        "@boost_unordered",
         "@google_benchmark//:benchmark",
         "@llvm-project//llvm:Support",
     ],

+ 17 - 0
common/map_benchmark.cpp

@@ -19,6 +19,7 @@ using RawHashtable::CarbonHashDI;
 using RawHashtable::GetKeysAndHitKeys;
 using RawHashtable::GetKeysAndMissKeys;
 using RawHashtable::HitArgs;
+using RawHashtable::LowZeroBitInt;
 using RawHashtable::ReportTableMetrics;
 using RawHashtable::SizeArgs;
 using RawHashtable::ValueToBool;
@@ -328,6 +329,22 @@ static void BM_MapLookupHit(benchmark::State& state) {
 }
 MAP_BENCHMARK_ONE_OP(BM_MapLookupHit, HitArgs);
 
+// We also do some minimal benchmarking with integers that have a
+// large number of low zero bits shifted into them. These present particular
+// challenges to the hashing strategy Carbon's hash tables use and so they help
+// form stress tests and benchmark to make sure the hash function quality
+// remains reasonable even under adverse conditions. We can't go past a certain
+// limit here without our hash tables becoming impossibly slow due to complete
+// collapse of the hash functions -- if we ever need to hash integers with more
+// than 32 low zero bits, we'll ask that code to use a custom hash algorithm.
+//
+// We don't benchmark these everywhere as they only provide marginal information
+// beyond the core types, and checking just this operation covers that
+// sufficiently.
+MAP_BENCHMARK_ONE_OP_SIZE(BM_MapLookupHit, HitArgs, LowZeroBitInt<12>, int);
+MAP_BENCHMARK_ONE_OP_SIZE(BM_MapLookupHit, HitArgs, LowZeroBitInt<24>, int);
+MAP_BENCHMARK_ONE_OP_SIZE(BM_MapLookupHit, HitArgs, LowZeroBitInt<32>, int);
+
 // This is an update throughput benchmark in practice. While whether the key was
 // a hit is kept in the critical path, we only use keys that are hits and so
 // expect that to be fully predicted and speculated.

+ 43 - 0
common/raw_hashtable_benchmark_helpers.cpp

@@ -221,6 +221,16 @@ static absl::NoDestructor<llvm::OwningArrayRef<int>> raw_int_keys{[] {
   return keys;
 }()};
 
+template <int LowZeroBits>
+static absl::NoDestructor<llvm::OwningArrayRef<LowZeroBitInt<LowZeroBits>>>
+    raw_low_zero_bit_int_keys{[] {
+      llvm::OwningArrayRef<LowZeroBitInt<LowZeroBits>> keys(MaxNumKeys);
+      for (auto [index, key] : llvm::enumerate(keys)) {
+        key = LowZeroBitInt<LowZeroBits>(index + 1);
+      }
+      return keys;
+    }()};
+
 namespace {
 
 // Allow generically dispatching over the specific key types we build and
@@ -231,6 +241,12 @@ auto GetRawKeys() -> llvm::ArrayRef<T> {
     return *raw_str_keys;
   } else if constexpr (std::is_pointer_v<T>) {
     return *raw_ptr_keys;
+  } else if constexpr (std::is_same_v<T, LowZeroBitInt<12>>) {
+    return *raw_low_zero_bit_int_keys<12>;
+  } else if constexpr (std::is_same_v<T, LowZeroBitInt<24>>) {
+    return *raw_low_zero_bit_int_keys<24>;
+  } else if constexpr (std::is_same_v<T, LowZeroBitInt<32>>) {
+    return *raw_low_zero_bit_int_keys<32>;
   } else {
     return *raw_int_keys;
   }
@@ -305,6 +321,15 @@ template auto GetKeysAndMissKeys<int*>(ssize_t size)
 template auto GetKeysAndMissKeys<llvm::StringRef>(ssize_t size)
     -> std::pair<llvm::ArrayRef<llvm::StringRef>,
                  llvm::ArrayRef<llvm::StringRef>>;
+template auto GetKeysAndMissKeys<LowZeroBitInt<12>>(ssize_t size)
+    -> std::pair<llvm::ArrayRef<LowZeroBitInt<12>>,
+                 llvm::ArrayRef<LowZeroBitInt<12>>>;
+template auto GetKeysAndMissKeys<LowZeroBitInt<24>>(ssize_t size)
+    -> std::pair<llvm::ArrayRef<LowZeroBitInt<24>>,
+                 llvm::ArrayRef<LowZeroBitInt<24>>>;
+template auto GetKeysAndMissKeys<LowZeroBitInt<32>>(ssize_t size)
+    -> std::pair<llvm::ArrayRef<LowZeroBitInt<32>>,
+                 llvm::ArrayRef<LowZeroBitInt<32>>>;
 
 template <typename T>
 auto GetKeysAndHitKeys(ssize_t table_keys_size, ssize_t lookup_keys_size)
@@ -322,6 +347,18 @@ template auto GetKeysAndHitKeys<llvm::StringRef>(ssize_t size,
                                                  ssize_t lookup_keys_size)
     -> std::pair<llvm::ArrayRef<llvm::StringRef>,
                  llvm::ArrayRef<llvm::StringRef>>;
+template auto GetKeysAndHitKeys<LowZeroBitInt<12>>(ssize_t size,
+                                                   ssize_t lookup_keys_size)
+    -> std::pair<llvm::ArrayRef<LowZeroBitInt<12>>,
+                 llvm::ArrayRef<LowZeroBitInt<12>>>;
+template auto GetKeysAndHitKeys<LowZeroBitInt<24>>(ssize_t size,
+                                                   ssize_t lookup_keys_size)
+    -> std::pair<llvm::ArrayRef<LowZeroBitInt<24>>,
+                 llvm::ArrayRef<LowZeroBitInt<24>>>;
+template auto GetKeysAndHitKeys<LowZeroBitInt<32>>(ssize_t size,
+                                                   ssize_t lookup_keys_size)
+    -> std::pair<llvm::ArrayRef<LowZeroBitInt<32>>,
+                 llvm::ArrayRef<LowZeroBitInt<32>>>;
 
 template <typename T>
 auto DumpHashStatistics(llvm::ArrayRef<T> keys) -> void {
@@ -381,6 +418,12 @@ auto DumpHashStatistics(llvm::ArrayRef<T> keys) -> void {
   }
 }
 template auto DumpHashStatistics(llvm::ArrayRef<int> keys) -> void;
+template auto DumpHashStatistics(llvm::ArrayRef<LowZeroBitInt<12>> keys)
+    -> void;
+template auto DumpHashStatistics(llvm::ArrayRef<LowZeroBitInt<24>> keys)
+    -> void;
+template auto DumpHashStatistics(llvm::ArrayRef<LowZeroBitInt<32>> keys)
+    -> void;
 template auto DumpHashStatistics(llvm::ArrayRef<int*> keys) -> void;
 template auto DumpHashStatistics(llvm::ArrayRef<llvm::StringRef> keys) -> void;
 

+ 69 - 0
common/raw_hashtable_benchmark_helpers.h

@@ -8,6 +8,8 @@
 #include <benchmark/benchmark.h>
 #include <sys/types.h>
 
+#include <boost/unordered/unordered_flat_map.hpp>
+#include <compare>
 #include <limits>
 #include <map>
 #include <vector>
@@ -55,6 +57,42 @@ auto GetKeysAndHitKeys(ssize_t table_keys_size, ssize_t lookup_keys_size)
 template <typename T>
 auto DumpHashStatistics(llvm::ArrayRef<T> keys) -> void;
 
+// A type that works like an `int` but shifting in the specified number of low
+// zero bits. This is only intended for testing hash tables with especially
+// difficult to hash integer values, it isn't meant to be used otherwise.
+template <int LowZeroBits>
+struct LowZeroBitInt {
+  int64_t shifted_value = 0;
+
+  explicit constexpr LowZeroBitInt() = default;
+  explicit constexpr LowZeroBitInt(int64_t value)
+      : shifted_value(value << LowZeroBits) {}
+
+  friend auto operator<<(llvm::raw_ostream& out, const LowZeroBitInt& value)
+      -> llvm::raw_ostream& {
+    return out << value.shifted_value;
+  }
+
+  constexpr auto operator==(const LowZeroBitInt& rhs) const -> bool = default;
+  constexpr auto operator<=>(const LowZeroBitInt& rhs) const
+      -> std::strong_ordering = default;
+
+  friend auto CarbonHashValue(const LowZeroBitInt& value, uint64_t seed)
+      -> HashCode {
+    return HashValue(value.shifted_value, seed);
+  }
+
+  template <typename H>
+  friend auto AbslHashValue(H h, const LowZeroBitInt& value) -> H {
+    return H::combine(std::move(h), value.shifted_value);
+  }
+
+  friend auto hash_value(const LowZeroBitInt& value) -> size_t {
+    boost::hash<int64_t> hasher;
+    return hasher(value.shifted_value);
+  }
+};
+
 // Convert values used in hashtable benchmarking to a bool. This is used to form
 // dependencies between values stored in the hashtable between benchmark
 // iterations.
@@ -151,6 +189,19 @@ struct CarbonHashDI<int> {
   }
 };
 
+template <int LowZeroBits>
+struct CarbonHashDI<LowZeroBitInt<LowZeroBits>> {
+  using IntT = LowZeroBitInt<LowZeroBits>;
+  static auto getEmptyKey() -> IntT { return IntT(-1); }
+  static auto getTombstoneKey() -> IntT { return IntT(-2); }
+  static auto getHashValue(const IntT val) -> unsigned {
+    return static_cast<uint64_t>(HashValue(val));
+  }
+  static auto isEqual(const IntT lhs, const IntT rhs) -> bool {
+    return lhs == rhs;
+  }
+};
+
 template <typename T>
 struct CarbonHashDI<T*> {
   static constexpr uintptr_t Log2MaxAlign = 12;
@@ -238,4 +289,22 @@ auto ReportTableMetrics(const TableT& table, benchmark::State& state) -> void {
 
 }  // namespace Carbon::RawHashtable
 
+namespace llvm {
+
+// Enable LLVM to hash our special stress testing integer type.
+template <int LowZeroBits>
+struct DenseMapInfo<Carbon::RawHashtable::LowZeroBitInt<LowZeroBits>> {
+  using IntT = Carbon::RawHashtable::LowZeroBitInt<LowZeroBits>;
+  static auto getEmptyKey() -> IntT { return IntT(-1); }
+  static auto getTombstoneKey() -> IntT { return IntT(-2); }
+  static auto getHashValue(const IntT val) -> unsigned {
+    return DenseMapInfo<int64_t>::getHashValue(val.shifted_value);
+  }
+  static auto isEqual(const IntT lhs, const IntT rhs) -> bool {
+    return lhs == rhs;
+  }
+};
+
+}  // namespace llvm
+
 #endif  // CARBON_COMMON_RAW_HASHTABLE_BENCHMARK_HELPERS_H_