Răsfoiți Sursa

Change lexical lookup to use an array instead of hash map. (#3582)

This is primarily being done for performance reasons, removing hash
lookups. The increased memory consumption is accepted.

Refactors LexicalLookup out to its own structure.
Jon Ross-Perkins 2 ani în urmă
părinte
comite
dc75295a72

+ 5 - 1
toolchain/base/value_store.h

@@ -170,7 +170,7 @@ class ValueStore
   }
 
   auto array_ref() const -> llvm::ArrayRef<ValueType> { return values_; }
-  auto size() const -> int { return values_.size(); }
+  auto size() const -> size_t { return values_.size(); }
 
  private:
   // Set inline size to 0 because these will typically be too large for the
@@ -208,6 +208,8 @@ class ValueStore<StringId> : public Yaml::Printable<ValueStore<StringId>> {
     });
   }
 
+  auto size() const -> size_t { return values_.size(); }
+
  private:
   llvm::DenseMap<llvm::StringRef, StringId> map_;
   // Set inline size to 0 because these will typically be too large for the
@@ -233,6 +235,8 @@ class StringStoreWrapper : public Printable<StringStoreWrapper<IdT>> {
 
   auto Print(llvm::raw_ostream& out) const -> void { out << *values_; }
 
+  auto size() const -> size_t { return values_->size(); }
+
  private:
   ValueStore<StringId>* values_;
 };

+ 3 - 0
toolchain/check/BUILD

@@ -44,14 +44,17 @@ cc_library(
         "decl_name_stack.h",
         "decl_state.h",
         "inst_block_stack.h",
+        "lexical_lookup.h",
         "modifiers.h",
         "pending_block.h",
         "return.h",
+        "scope_index.h",
     ],
     deps = [
         ":node_stack",
         "//common:check",
         "//common:vlog",
+        "//toolchain/base:index_base",
         "//toolchain/lex:tokenized_buffer",
         "//toolchain/parse:node_kind",
         "//toolchain/parse:tree",

+ 18 - 29
toolchain/check/context.cpp

@@ -34,7 +34,8 @@ Context::Context(const Lex::TokenizedBuffer& tokens, DiagnosticEmitter& emitter,
       inst_block_stack_("inst_block_stack_", sem_ir, vlog_stream),
       params_or_args_stack_("params_or_args_stack_", sem_ir, vlog_stream),
       args_type_info_stack_("args_type_info_stack_", sem_ir, vlog_stream),
-      decl_name_stack_(this) {
+      decl_name_stack_(this),
+      lexical_lookup_(sem_ir_->identifiers()) {
   // Inserts the "Error" and "Type" types as "used types" so that
   // canonicalization can skip them. We don't emit either for lowering.
   canonical_types_.insert({SemIR::InstId::BuiltinError, SemIR::TypeId::Error});
@@ -54,7 +55,6 @@ auto Context::VerifyOnFinish() -> void {
   // various pieces of context go out of scope. At this point, nothing should
   // remain.
   // node_stack_ will still contain top-level entities.
-  CARBON_CHECK(name_lookup_.empty()) << name_lookup_.size();
   CARBON_CHECK(scope_stack_.empty()) << scope_stack_.size();
   CARBON_CHECK(inst_block_stack_.empty()) << inst_block_stack_.size();
   CARBON_CHECK(params_or_args_stack_.empty()) << params_or_args_stack_.size();
@@ -157,14 +157,15 @@ auto Context::AddNameToLookup(Parse::NodeId name_node, SemIR::NameId name_id,
   if (current_scope().names.insert(name_id).second) {
     // TODO: Reject if we previously performed a failed lookup for this name in
     // this scope or a scope nested within it.
-    auto& lexical_results = name_lookup_[name_id];
+    auto& lexical_results = lexical_lookup_.Get(name_id);
     CARBON_CHECK(lexical_results.empty() ||
                  lexical_results.back().scope_index < current_scope_index())
         << "Failed to clean up after scope nested within the current scope";
     lexical_results.push_back(
         {.inst_id = target_id, .scope_index = current_scope_index()});
   } else {
-    DiagnoseDuplicateName(name_node, name_lookup_[name_id].back().inst_id);
+    DiagnoseDuplicateName(name_node,
+                          lexical_lookup_.Get(name_id).back().inst_id);
   }
 }
 
@@ -235,11 +236,9 @@ auto Context::LookupNameInDecl(Parse::NodeId /*parse_node*/,
     //    In this case, we're not in the correct scope to define a member of
     //    class A, so we should reject, and we achieve this by not finding the
     //    name A from the outer scope.
-    if (auto name_it = name_lookup_.find(name_id);
-        name_it != name_lookup_.end()) {
-      CARBON_CHECK(!name_it->second.empty())
-          << "Should have been erased: " << names().GetFormatted(name_id);
-      auto result = name_it->second.back();
+    auto& lexical_results = lexical_lookup_.Get(name_id);
+    if (!lexical_results.empty()) {
+      auto result = lexical_results.back();
       if (result.scope_index == current_scope_index()) {
         ResolveIfLazyImportRef(result.inst_id);
         return result.inst_id;
@@ -266,13 +265,8 @@ auto Context::LookupUnqualifiedName(Parse::NodeId parse_node,
 
   // Find the results from enclosing lexical scopes. These will be combined with
   // results from non-lexical scopes such as namespaces and classes.
-  llvm::ArrayRef<LexicalLookupResult> lexical_results;
-  if (auto name_it = name_lookup_.find(name_id);
-      name_it != name_lookup_.end()) {
-    lexical_results = name_it->second;
-    CARBON_CHECK(!lexical_results.empty())
-        << "Should have been erased: " << names().GetFormatted(name_id);
-  }
+  llvm::ArrayRef<LexicalLookup::Result> lexical_results =
+      lexical_lookup_.Get(name_id);
 
   // Walk the non-lexical scopes and perform lookups into each of them.
   for (auto [index, name_scope_id] : llvm::reverse(non_lexical_scope_stack_)) {
@@ -300,7 +294,7 @@ auto Context::LookupUnqualifiedName(Parse::NodeId parse_node,
   }
 
   // We didn't find anything at all.
-  if (!name_lookup_has_load_error_) {
+  if (!lexical_lookup_has_load_error_) {
     DiagnoseNameNotFound(parse_node, name_id);
   }
   return SemIR::InstId::BuiltinError;
@@ -367,17 +361,17 @@ auto Context::LookupQualifiedName(Parse::NodeId parse_node,
 
 auto Context::PushScope(SemIR::InstId scope_inst_id,
                         SemIR::NameScopeId scope_id,
-                        bool name_lookup_has_load_error) -> void {
+                        bool lexical_lookup_has_load_error) -> void {
   scope_stack_.push_back(
       {.index = next_scope_index_,
        .scope_inst_id = scope_inst_id,
        .scope_id = scope_id,
-       .prev_name_lookup_has_load_error = name_lookup_has_load_error_});
+       .prev_lexical_lookup_has_load_error = lexical_lookup_has_load_error_});
   if (scope_id.is_valid()) {
     non_lexical_scope_stack_.push_back({next_scope_index_, scope_id});
   }
 
-  name_lookup_has_load_error_ |= name_lookup_has_load_error;
+  lexical_lookup_has_load_error_ |= lexical_lookup_has_load_error;
 
   // TODO: Handle this case more gracefully.
   CARBON_CHECK(next_scope_index_.index != std::numeric_limits<int32_t>::max())
@@ -388,18 +382,13 @@ auto Context::PushScope(SemIR::InstId scope_inst_id,
 auto Context::PopScope() -> void {
   auto scope = scope_stack_.pop_back_val();
 
-  name_lookup_has_load_error_ = scope.prev_name_lookup_has_load_error;
+  lexical_lookup_has_load_error_ = scope.prev_lexical_lookup_has_load_error;
 
   for (const auto& str_id : scope.names) {
-    auto it = name_lookup_.find(str_id);
-    CARBON_CHECK(it->second.back().scope_index == scope.index)
+    auto& lexical_results = lexical_lookup_.Get(str_id);
+    CARBON_CHECK(lexical_results.back().scope_index == scope.index)
         << "Inconsistent scope index for name " << names().GetFormatted(str_id);
-    if (it->second.size() == 1) {
-      // Erase names that no longer resolve.
-      name_lookup_.erase(it);
-    } else {
-      it->second.pop_back();
-    }
+    lexical_results.pop_back();
   }
 
   if (scope.scope_id.is_valid()) {

+ 19 - 27
toolchain/check/context.h

@@ -12,6 +12,7 @@
 #include "toolchain/check/decl_name_stack.h"
 #include "toolchain/check/decl_state.h"
 #include "toolchain/check/inst_block_stack.h"
+#include "toolchain/check/lexical_lookup.h"
 #include "toolchain/check/node_stack.h"
 #include "toolchain/parse/tree.h"
 #include "toolchain/parse/tree_node_location_translator.h"
@@ -110,14 +111,15 @@ class Context {
       -> void;
 
   // Pushes a scope onto scope_stack_. NameScopeId::Invalid is used for new
-  // scopes. name_lookup_has_load_error is used to limit diagnostics when a
+  // scopes. lexical_lookup_has_load_error is used to limit diagnostics when a
   // given namespace may contain a mix of both successful and failed name
   // imports.
   auto PushScope(SemIR::InstId scope_inst_id = SemIR::InstId::Invalid,
                  SemIR::NameScopeId scope_id = SemIR::NameScopeId::Invalid,
-                 bool name_lookup_has_load_error = false) -> void;
+                 bool lexical_lookup_has_load_error = false) -> void;
 
-  // Pops the top scope from scope_stack_, cleaning up names from name_lookup_.
+  // Pops the top scope from scope_stack_, cleaning up names from
+  // lexical_lookup_.
   auto PopScope() -> void;
 
   // Pops scopes until we return to the specified scope index.
@@ -342,10 +344,16 @@ class Context {
 
   auto decl_state_stack() -> DeclStateStack& { return decl_state_stack_; }
 
+  auto lexical_lookup() -> LexicalLookup& { return lexical_lookup_; }
+
   // Directly expose SemIR::File data accessors for brevity in calls.
-  auto identifiers() -> StringStoreWrapper<IdentifierId>& {
+
+  // Use `lexical_lookup().AddIdentifier` to add entries. `identifiers()` is
+  // const to discourage misuse.
+  auto identifiers() -> const StringStoreWrapper<IdentifierId>& {
     return sem_ir().identifiers();
   }
+
   auto ints() -> ValueStore<IntId>& { return sem_ir().ints(); }
   auto reals() -> ValueStore<RealId>& { return sem_ir().reals(); }
   auto string_literal_values() -> StringStoreWrapper<StringLiteralValueId>& {
@@ -408,10 +416,10 @@ class Context {
     // The name scope associated with this entry, if any.
     SemIR::NameScopeId scope_id;
 
-    // The previous state of name_lookup_has_load_error_, restored on pop.
-    bool prev_name_lookup_has_load_error;
+    // The previous state of lexical_lookup_has_load_error_, restored on pop.
+    bool prev_lexical_lookup_has_load_error;
 
-    // Names which are registered with name_lookup_, and will need to be
+    // Names which are registered with lexical_lookup_, and will need to be
     // unregistered when the scope ends.
     llvm::DenseSet<SemIR::NameId> names;
 
@@ -422,14 +430,6 @@ class Context {
     // TODO: This likely needs to track things which need to be destructed.
   };
 
-  // A lookup result in the lexical lookup table `name_lookup_`.
-  struct LexicalLookupResult {
-    // The instruction that was added to lookup.
-    SemIR::InstId inst_id;
-    // The scope in which the instruction was added.
-    ScopeIndex scope_index;
-  };
-
   // Forms a canonical type ID for a type. This function is given two
   // callbacks:
   //
@@ -516,20 +516,12 @@ class Context {
   // The stack of declarations that could have modifiers.
   DeclStateStack decl_state_stack_;
 
-  // Maps identifiers to name lookup results. Values are a stack of name lookup
-  // results in the ancestor scopes. This offers constant-time lookup of names,
-  // regardless of how many scopes exist between the name declaration and
-  // reference. The corresponding scope for each lookup result is tracked, so
-  // that lexical lookup results can be interleaved with lookup results from
-  // non-lexical scopes such as classes.
-  //
-  // Names which no longer have lookup results are erased.
-  llvm::DenseMap<SemIR::NameId, llvm::SmallVector<LexicalLookupResult>>
-      name_lookup_;
+  // Tracks lexical lookup results.
+  LexicalLookup lexical_lookup_;
 
-  // Whether name_lookup_ has load errors, updated whenever scope_stack_ is
+  // Whether lexical_lookup_ has load errors, updated whenever scope_stack_ is
   // pushed or popped.
-  bool name_lookup_has_load_error_ = false;
+  bool lexical_lookup_has_load_error_ = false;
 
   // Cache of the mapping from instructions to types, to avoid recomputing the
   // folding set ID.

+ 1 - 14
toolchain/check/decl_name_stack.h

@@ -6,25 +6,12 @@
 #define CARBON_TOOLCHAIN_CHECK_DECL_NAME_STACK_H_
 
 #include "llvm/ADT/SmallVector.h"
+#include "toolchain/check/scope_index.h"
 #include "toolchain/parse/tree.h"
 #include "toolchain/sem_ir/ids.h"
 
 namespace Carbon::Check {
 
-// An index for a pushed scope. This may correspond to a permanent scope with a
-// corresponding `NameScope`, in which case a different index will be assigned
-// each time the scope is entered. Alternatively, it may be a temporary scope
-// such as is created for a block, and will only be entered once.
-//
-// `ScopeIndex` values are comparable. Lower `ScopeIndex` values correspond to
-// scopes entered earlier in the file.
-//
-// TODO: Move this struct and the name lookup code in context.h to a separate
-// file.
-struct ScopeIndex : public IndexBase, public Printable<ScopeIndex> {
-  using IndexBase::IndexBase;
-};
-
 class Context;
 
 // Provides support and stacking for qualified declaration name handling.

+ 2 - 1
toolchain/check/import.cpp

@@ -57,7 +57,8 @@ static auto CopyNameFromImportIR(Context& context,
   if (auto import_identifier_id = import_name_id.AsIdentifierId();
       import_identifier_id.is_valid()) {
     auto name = import_sem_ir.identifiers().Get(import_identifier_id);
-    return SemIR::NameId::ForIdentifier(context.identifiers().Add(name));
+    return SemIR::NameId::ForIdentifier(
+        context.lexical_lookup().AddIdentifier(name));
   }
   return import_name_id;
 }

+ 70 - 0
toolchain/check/lexical_lookup.h

@@ -0,0 +1,70 @@
+
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_CHECK_LEXICAL_LOOKUP_H_
+#define CARBON_TOOLCHAIN_CHECK_LEXICAL_LOOKUP_H_
+
+#include "toolchain/check/scope_index.h"
+#include "toolchain/sem_ir/file.h"
+#include "toolchain/sem_ir/ids.h"
+
+namespace Carbon::Check {
+
+// Manages lexical lookup information for NameIds.
+//
+// Values are a stack of name lookup results in the ancestor scopes. This offers
+// constant-time lookup of names, regardless of how many scopes exist between
+// the name declaration and reference. The corresponding scope for each lookup
+// result is tracked, so that lexical lookup results can be interleaved with
+// lookup results from non-lexical scopes such as classes.
+class LexicalLookup {
+ public:
+  // A lookup result.
+  struct Result {
+    // The instruction that was added to lookup.
+    SemIR::InstId inst_id;
+    // The scope in which the instruction was added.
+    ScopeIndex scope_index;
+  };
+
+  explicit LexicalLookup(StringStoreWrapper<IdentifierId>& identifiers)
+      : identifiers_(&identifiers),
+        lookup_(identifiers_->size() + SemIR::NameId::NonIndexValueCount) {}
+
+  ~LexicalLookup() {
+    CARBON_CHECK(lookup_.size() ==
+                 identifiers_->size() + SemIR::NameId::NonIndexValueCount)
+        << lookup_.size() << " must match " << identifiers_->size() << " + "
+        << SemIR::NameId::NonIndexValueCount
+        << "; something may have been added incorrectly";
+  }
+
+  // Handles both adding the identifier and resizing lookup_ to accommodate the
+  // new entry. `identifiers().Add` must not be called directly once checking
+  // has begun.
+  auto AddIdentifier(llvm::StringRef name) -> IdentifierId {
+    auto id = identifiers_->Add(name);
+    // Bear in mind that Add was not guaranteed to actually change the size.
+    lookup_.resize(identifiers_->size() + SemIR::NameId::NonIndexValueCount);
+    return id;
+  }
+
+  // Returns the lexical lookup results for a name.
+  auto Get(SemIR::NameId name_id) -> llvm::SmallVector<Result, 2>& {
+    return lookup_[name_id.index + SemIR::NameId::NonIndexValueCount];
+  }
+
+ private:
+  StringStoreWrapper<IdentifierId>* identifiers_;
+
+  // Maps identifiers to name lookup results.
+  // TODO: Consider TinyPtrVector<Result> or similar. For now, use a small size
+  // of 2 to cover the common case.
+  llvm::SmallVector<llvm::SmallVector<Result, 2>> lookup_;
+};
+
+}  // namespace Carbon::Check
+
+#endif  // CARBON_TOOLCHAIN_CHECK_LEXICAL_LOOKUP_H_

+ 26 - 0
toolchain/check/scope_index.h

@@ -0,0 +1,26 @@
+
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_CHECK_SCOPE_INDEX_H_
+#define CARBON_TOOLCHAIN_CHECK_SCOPE_INDEX_H_
+
+#include "toolchain/base/index_base.h"
+
+namespace Carbon::Check {
+
+// An index for a pushed scope. This may correspond to a permanent scope with a
+// corresponding `NameScope`, in which case a different index will be assigned
+// each time the scope is entered. Alternatively, it may be a temporary scope
+// such as is created for a block, and will only be entered once.
+//
+// `ScopeIndex` values are comparable. Lower `ScopeIndex` values correspond to
+// scopes entered earlier in the file.
+struct ScopeIndex : public IndexBase, public Printable<ScopeIndex> {
+  using IndexBase::IndexBase;
+};
+
+}  // namespace Carbon::Check
+
+#endif  // CARBON_TOOLCHAIN_CHECK_SCOPE_INDEX_H_

+ 7 - 0
toolchain/sem_ir/ids.h

@@ -183,6 +183,10 @@ struct NameId : public IdBase, public Printable<NameId> {
   // The name of `base`.
   static const NameId Base;
 
+  // The number of non-index (<0) that exist, and will need storage in name
+  // lookup.
+  static const int NonIndexValueCount;
+
   // Returns the NameId corresponding to a particular IdentifierId.
   static auto ForIdentifier(IdentifierId id) -> NameId {
     if (id.index >= 0) {
@@ -227,6 +231,9 @@ constexpr NameId NameId::SelfType = NameId(NameId::InvalidIndex - 2);
 constexpr NameId NameId::ReturnSlot = NameId(NameId::InvalidIndex - 3);
 constexpr NameId NameId::PackageNamespace = NameId(NameId::InvalidIndex - 4);
 constexpr NameId NameId::Base = NameId(NameId::InvalidIndex - 5);
+constexpr int NameId::NonIndexValueCount = 6;
+// Enforce the link between SpecialValueCount and the last special value.
+static_assert(NameId::NonIndexValueCount == -NameId::Base.index);
 
 // The ID of a name scope.
 struct NameScopeId : public IdBase, public Printable<NameScopeId> {