set.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_COMMON_SET_H_
  5. #define CARBON_COMMON_SET_H_
  6. #include <concepts>
  7. #include "common/check.h"
  8. #include "common/hashtable_key_context.h"
  9. #include "common/raw_hashtable.h"
  10. #include "llvm/Support/Compiler.h"
  11. namespace Carbon {
  12. // Forward declarations to resolve cyclic references.
  13. template <typename KeyT, typename KeyContextT>
  14. class SetView;
  15. template <typename KeyT, typename KeyContextT>
  16. class SetBase;
  17. template <typename KeyT, ssize_t SmallSize, typename KeyContextT>
  18. class Set;
  19. // A read-only view type for a set of keys.
  20. //
  21. // This view is a cheap-to-copy type that should be passed by value, but
  22. // provides view or read-only reference semantics to the underlying set data
  23. // structure.
  24. //
  25. // This should always be preferred to a `const`-ref parameter for the `SetBase`
  26. // or `Set` type as it provides more flexibility and a cleaner API.
  27. //
  28. // Note that while this type is a read-only view, that applies to the underlying
  29. // *set* data structure, not the individual entries stored within it. Those can
  30. // be mutated freely as long as both the hashes and equality of the keys are
  31. // preserved. If we applied a deep-`const` design here, it would prevent using
  32. // this type in situations where the keys carry state (unhashed and not part of
  33. // equality) that is mutated while the associative container is not. A view of
  34. // immutable data can always be obtained by using `SetView<const T>`, and we
  35. // enable conversions to more-const views. This mirrors the semantics of views
  36. // like `std::span`.
  37. //
  38. // A specific `KeyContextT` type can optionally be provided to configure how
  39. // keys will be hashed and compared. The default is `DefaultKeyContext` which is
  40. // stateless and will hash using `Carbon::HashValue` and compare using
  41. // `operator==`. Every method accepting a lookup key or operating on the keys in
  42. // the table will also accept an instance of this type. For stateless context
  43. // types, including the default, an instance will be default constructed if not
  44. // provided to these methods. However, stateful contexts should be constructed
  45. // and passed in explicitly. The context type should be small and reasonable to
  46. // pass by value, often a wrapper or pointer to the relevant context needed for
  47. // hashing and comparing keys. For more details about the key context, see
  48. // `hashtable_key_context.h`.
  49. template <typename InputKeyT, typename InputKeyContextT = DefaultKeyContext>
  50. class SetView : RawHashtable::ViewImpl<InputKeyT, void, InputKeyContextT> {
  51. using ImplT = RawHashtable::ViewImpl<InputKeyT, void, InputKeyContextT>;
  52. public:
  53. using KeyT = typename ImplT::KeyT;
  54. using KeyContextT = typename ImplT::KeyContextT;
  55. // This type represents the result of lookup operations. It encodes whether
  56. // the lookup was a success as well as accessors for the key.
  57. class LookupResult {
  58. public:
  59. LookupResult() = default;
  60. explicit LookupResult(KeyT& key) : key_(&key) {}
  61. explicit operator bool() const { return key_ != nullptr; }
  62. auto key() const -> KeyT& { return *key_; }
  63. private:
  64. KeyT* key_ = nullptr;
  65. };
  66. // Enable implicit conversions that add `const`-ness to the key type.
  67. // NOLINTNEXTLINE(google-explicit-constructor)
  68. SetView(SetView<std::remove_const_t<KeyT>, KeyContextT> other_view)
  69. requires(!std::same_as<KeyT, std::remove_const_t<KeyT>>)
  70. : ImplT(other_view) {}
  71. // Tests whether a key is present in the set.
  72. template <typename LookupKeyT>
  73. auto Contains(LookupKeyT lookup_key,
  74. KeyContextT key_context = KeyContextT()) const -> bool;
  75. // Lookup a key in the set.
  76. template <typename LookupKeyT>
  77. auto Lookup(LookupKeyT lookup_key,
  78. KeyContextT key_context = KeyContextT()) const -> LookupResult;
  79. // Run the provided callback for every key in the set.
  80. template <typename CallbackT>
  81. void ForEach(CallbackT callback)
  82. requires(std::invocable<CallbackT, KeyT&>);
  83. // This routine is relatively inefficient and only intended for use in
  84. // benchmarking or logging of performance anomalies. The specific count
  85. // returned has no specific guarantees beyond being informative in benchmarks.
  86. // It counts how many of the keys in the hashtable have required probing
  87. // beyond their initial group of slots.
  88. //
  89. // TODO: Replace with a more general metrics routine that covers other
  90. // important aspects such as load factor, and average probe *distance*.
  91. auto CountProbedKeys(KeyContextT key_context = KeyContextT()) -> ssize_t {
  92. return ImplT::CountProbedKeys(key_context);
  93. }
  94. private:
  95. template <typename SetKeyT, ssize_t SmallSize, typename KeyContextT>
  96. friend class Set;
  97. friend class SetBase<KeyT, KeyContextT>;
  98. friend class SetView<const KeyT, KeyContextT>;
  99. using EntryT = typename ImplT::EntryT;
  100. SetView() = default;
  101. // NOLINTNEXTLINE(google-explicit-constructor): Implicit by design.
  102. SetView(ImplT base) : ImplT(base) {}
  103. SetView(ssize_t size, RawHashtable::Storage* storage)
  104. : ImplT(size, storage) {}
  105. };
  106. // A base class for a `Set` type that remains mutable while type-erasing the
  107. // `SmallSize` (SSO) template parameter.
  108. //
  109. // A pointer or reference to this type is the preferred way to pass a mutable
  110. // handle to a `Set` type across API boundaries as it avoids encoding specific
  111. // SSO sizing information while providing a near-complete mutable API.
  112. template <typename InputKeyT, typename InputKeyContextT>
  113. class SetBase
  114. : protected RawHashtable::BaseImpl<InputKeyT, void, InputKeyContextT> {
  115. protected:
  116. using ImplT = RawHashtable::BaseImpl<InputKeyT, void, InputKeyContextT>;
  117. public:
  118. using KeyT = typename ImplT::KeyT;
  119. using KeyContextT = typename ImplT::KeyContextT;
  120. using ViewT = SetView<KeyT, KeyContextT>;
  121. using LookupResult = typename ViewT::LookupResult;
  122. // The result type for insertion operations both indicates whether an insert
  123. // was needed (as opposed to the key already being in the set), and provides
  124. // access to the key.
  125. class InsertResult {
  126. public:
  127. InsertResult() = default;
  128. explicit InsertResult(bool inserted, KeyT& key)
  129. : key_(&key), inserted_(inserted) {}
  130. auto is_inserted() const -> bool { return inserted_; }
  131. auto key() const -> KeyT& { return *key_; }
  132. private:
  133. KeyT* key_;
  134. bool inserted_;
  135. };
  136. // Implicitly convertible to the relevant view type.
  137. //
  138. // NOLINTNEXTLINE(google-explicit-constructor): Designed to implicitly decay.
  139. operator ViewT() const { return this->view_impl(); }
  140. // We can't chain the above conversion with the conversions on `ViewT` to add
  141. // const, so explicitly support adding const to produce a view here.
  142. //
  143. // NOLINTNEXTLINE(google-explicit-constructor): Designed to implicitly decay.
  144. operator SetView<const KeyT, KeyContextT>() const { return ViewT(*this); }
  145. // Convenience forwarder to the view type.
  146. template <typename LookupKeyT>
  147. auto Contains(LookupKeyT lookup_key,
  148. KeyContextT key_context = KeyContextT()) const -> bool {
  149. return ViewT(*this).Contains(lookup_key, key_context);
  150. }
  151. // Convenience forwarder to the view type.
  152. template <typename LookupKeyT>
  153. auto Lookup(LookupKeyT lookup_key,
  154. KeyContextT key_context = KeyContextT()) const -> LookupResult {
  155. return ViewT(*this).Lookup(lookup_key, key_context);
  156. }
  157. // Convenience forwarder to the view type.
  158. template <typename CallbackT>
  159. void ForEach(CallbackT callback)
  160. requires(std::invocable<CallbackT, KeyT&>)
  161. {
  162. return ViewT(*this).ForEach(callback);
  163. }
  164. // Convenience forwarder to the view type.
  165. auto CountProbedKeys(KeyContextT key_context = KeyContextT()) const
  166. -> ssize_t {
  167. return ViewT(*this).CountProbedKeys(key_context);
  168. }
  169. // Insert a key into the set. If the key is already present, no insertion is
  170. // performed and that present key is available in the result. Otherwise a new
  171. // key is inserted and constructed from the argument and available in the
  172. // result.
  173. template <typename LookupKeyT>
  174. auto Insert(LookupKeyT lookup_key, KeyContextT key_context = KeyContextT())
  175. -> InsertResult;
  176. // Insert a key into the set and call the provided callback to allow in-place
  177. // construction of the key if not already present. The lookup key is passed
  178. // through to the callback so it needn't be captured and can be kept in a
  179. // register argument throughout.
  180. //
  181. // Example:
  182. // ```cpp
  183. // m.Insert("widget", [](MyStringViewType lookup_key, void* key_storage) {
  184. // new (key_storage) MyStringType(lookup_key);
  185. // });
  186. // ```
  187. template <typename LookupKeyT, typename InsertCallbackT>
  188. auto Insert(LookupKeyT lookup_key, InsertCallbackT insert_cb,
  189. KeyContextT key_context = KeyContextT()) -> InsertResult
  190. requires std::invocable<InsertCallbackT, LookupKeyT, void*>;
  191. // Erase a key from the set.
  192. template <typename LookupKeyT>
  193. auto Erase(LookupKeyT lookup_key, KeyContextT key_context = KeyContextT())
  194. -> bool;
  195. // Clear all key/value pairs from the set but leave the underlying hashtable
  196. // allocated and in place.
  197. void Clear();
  198. protected:
  199. using ImplT::ImplT;
  200. };
  201. // A data structure for a set of keys.
  202. //
  203. // This set supports small size optimization (or "SSO"). The provided
  204. // `SmallSize` type parameter indicates the size of an embedded buffer for
  205. // storing sets small enough to fit. The default is zero, which always allocates
  206. // a heap buffer on construction. When non-zero, must be a multiple of the
  207. // `MaxGroupSize` which is currently 16. The library will check that the size is
  208. // valid and provide an error at compile time if not. We don't automatically
  209. // select the next multiple or otherwise fit the size to the constraints to make
  210. // it clear in the code how much memory is used by the SSO buffer.
  211. //
  212. // This data structure optimizes heavily for small key types that are cheap to
  213. // move and even copy. Using types with large keys or expensive to copy keys may
  214. // create surprising performance bottlenecks. A `std::string` key should be fine
  215. // with generally small strings, but if some or many strings are large heap
  216. // allocations the performance of hashtable routines may be unacceptably bad and
  217. // another data structure or key design is likely preferable.
  218. //
  219. // Note that this type should typically not appear on API boundaries; either
  220. // `SetBase` or `SetView` should be used instead.
  221. template <typename InputKeyT, ssize_t SmallSize = 0,
  222. typename InputKeyContextT = DefaultKeyContext>
  223. class Set : public RawHashtable::TableImpl<SetBase<InputKeyT, InputKeyContextT>,
  224. SmallSize> {
  225. using BaseT = SetBase<InputKeyT, InputKeyContextT>;
  226. using ImplT = RawHashtable::TableImpl<BaseT, SmallSize>;
  227. public:
  228. using KeyT = typename BaseT::KeyT;
  229. Set() = default;
  230. Set(const Set& arg) = default;
  231. Set(Set&& arg) noexcept = default;
  232. // Reset the entire state of the hashtable to as it was when constructed,
  233. // throwing away any intervening allocations.
  234. void Reset();
  235. };
  236. template <typename InputKeyT, typename InputKeyContextT>
  237. template <typename LookupKeyT>
  238. auto SetView<InputKeyT, InputKeyContextT>::Contains(
  239. LookupKeyT lookup_key, KeyContextT key_context) const -> bool {
  240. return this->LookupEntry(lookup_key, key_context) != nullptr;
  241. }
  242. template <typename InputKeyT, typename InputKeyContextT>
  243. template <typename LookupKeyT>
  244. auto SetView<InputKeyT, InputKeyContextT>::Lookup(LookupKeyT lookup_key,
  245. KeyContextT key_context) const
  246. -> LookupResult {
  247. EntryT* entry = this->LookupEntry(lookup_key, key_context);
  248. if (!entry) {
  249. return LookupResult();
  250. }
  251. return LookupResult(entry->key());
  252. }
  253. template <typename InputKeyT, typename InputKeyContextT>
  254. template <typename CallbackT>
  255. void SetView<InputKeyT, InputKeyContextT>::ForEach(CallbackT callback)
  256. requires(std::invocable<CallbackT, KeyT&>)
  257. {
  258. this->ForEachEntry([callback](EntryT& entry) { callback(entry.key()); },
  259. [](auto...) {});
  260. }
  261. template <typename InputKeyT, typename InputKeyContextT>
  262. template <typename LookupKeyT>
  263. auto SetBase<InputKeyT, InputKeyContextT>::Insert(LookupKeyT lookup_key,
  264. KeyContextT key_context)
  265. -> InsertResult {
  266. return Insert(
  267. lookup_key,
  268. [](LookupKeyT lookup_key, void* key_storage) {
  269. new (key_storage) KeyT(std::move(lookup_key));
  270. },
  271. key_context);
  272. }
  273. template <typename InputKeyT, typename InputKeyContextT>
  274. template <typename LookupKeyT, typename InsertCallbackT>
  275. auto SetBase<InputKeyT, InputKeyContextT>::Insert(LookupKeyT lookup_key,
  276. InsertCallbackT insert_cb,
  277. KeyContextT key_context)
  278. -> InsertResult
  279. requires std::invocable<InsertCallbackT, LookupKeyT, void*>
  280. {
  281. auto [entry, inserted] = this->InsertImpl(lookup_key, key_context);
  282. CARBON_DCHECK(entry) << "Should always result in a valid index.";
  283. if (LLVM_LIKELY(!inserted)) {
  284. return InsertResult(false, entry->key());
  285. }
  286. insert_cb(lookup_key, static_cast<void*>(&entry->key_storage));
  287. return InsertResult(true, entry->key());
  288. }
  289. template <typename InputKeyT, typename InputKeyContextT>
  290. template <typename LookupKeyT>
  291. auto SetBase<InputKeyT, InputKeyContextT>::Erase(LookupKeyT lookup_key,
  292. KeyContextT key_context)
  293. -> bool {
  294. return this->EraseImpl(lookup_key, key_context);
  295. }
  296. template <typename InputKeyT, typename InputKeyContextT>
  297. void SetBase<InputKeyT, InputKeyContextT>::Clear() {
  298. this->ClearImpl();
  299. }
  300. template <typename InputKeyT, ssize_t SmallSize, typename InputKeyContextT>
  301. void Set<InputKeyT, SmallSize, InputKeyContextT>::Reset() {
  302. this->ResetImpl();
  303. }
  304. } // namespace Carbon
  305. #endif // CARBON_COMMON_SET_H_