Pārlūkot izejas kodu

Expand use of CheckIRId stores (#5820)

This is trying to make it clearer when vectors are being indexed with
`CheckIRId`.

The only one that I still kind of want to change is the
`SmallVector<std::unique_ptr<CompilationUnit>>`, but because it's a
`unique_ptr` that's a little more complex. I may not bother.

Note, some of the changes around nuanced `SmallVector` interactions were
based on trying to copy the way `SmallVector` itself takes arguments,
like with range passing.
Jon Ross-Perkins 9 mēneši atpakaļ
vecāks
revīzija
bd4fbb4393

+ 21 - 3
toolchain/base/fixed_size_value_store.h

@@ -44,7 +44,7 @@ class FixedSizeValueStore {
   }
 
   // Makes a ValueStore of the specified size, initialized to a default.
-  static auto MakeWithExplicitSize(size_t size, ValueT default_value)
+  static auto MakeWithExplicitSize(size_t size, ConstRefType default_value)
       -> FixedSizeValueStore {
     FixedSizeValueStore store;
     store.values_.resize(size, default_value);
@@ -57,10 +57,21 @@ class FixedSizeValueStore {
   template <typename ValueStoreT>
     requires std::same_as<IdT, typename ValueStoreT::IdType>
   explicit FixedSizeValueStore(const ValueStoreT& size_source,
-                               ValueT default_value) {
+                               ConstRefType default_value) {
     values_.resize(size_source.size(), default_value);
   }
 
+  // Makes a ValueStore using a mapped range of `source`. The `factory_fn`
+  // receives each enumerated entry for construction of `ValueType`.
+  template <typename ValueStoreT>
+    requires std::same_as<IdT, typename ValueStoreT::IdType>
+  explicit FixedSizeValueStore(
+      const ValueStoreT& source,
+      llvm::function_ref<
+          auto(IdT, typename ValueStoreT::ConstRefType)->ValueType>
+          factory_fn)
+      : values_(llvm::map_range(source.enumerate(), factory_fn)) {}
+
   // Move-only.
   FixedSizeValueStore(FixedSizeValueStore&&) noexcept = default;
   auto operator=(FixedSizeValueStore&&) noexcept
@@ -91,7 +102,14 @@ class FixedSizeValueStore {
   }
 
   auto size() const -> size_t { return values_.size(); }
-  auto values() -> auto {
+
+  auto values()
+      -> llvm::iterator_range<typename llvm::SmallVector<ValueT, 0>::iterator> {
+    return llvm::make_range(values_.begin(), values_.end());
+  }
+
+  auto values() const -> llvm::iterator_range<
+      typename llvm::SmallVector<ValueT, 0>::const_iterator> {
     return llvm::make_range(values_.begin(), values_.end());
   }
 

+ 4 - 0
toolchain/base/value_store.h

@@ -172,6 +172,10 @@ class ValueStore
   auto size() const -> size_t { return size_; }
 
   // Makes an iterable range over references to all values in the ValueStore.
+  auto values() [[clang::lifetimebound]] -> auto {
+    return llvm::map_range(
+        llvm::seq(size_), [&](int32_t i) -> RefType { return Get(IdType(i)); });
+  }
   auto values() const [[clang::lifetimebound]] -> Range { return Range(*this); }
 
   // Makes an iterable range over pairs of the index and a reference to the

+ 1 - 0
toolchain/check/BUILD

@@ -192,6 +192,7 @@ cc_library(
         "//common:ostream",
         "//common:pretty_stack_trace_function",
         "//common:vlog",
+        "//toolchain/base:fixed_size_value_store",
         "//toolchain/base:kind_switch",
         "//toolchain/base:shared_value_stores",
         "//toolchain/base:timings",

+ 7 - 8
toolchain/check/check.cpp

@@ -361,7 +361,7 @@ static auto MaybeDumpFormattedSemIR(
 // Handles options for dumping SemIR, including verbose output.
 static auto MaybeDumpSemIR(
     llvm::ArrayRef<Unit> units,
-    llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+    const Parse::GetTreeAndSubtreesStore& tree_and_subtrees_getters,
     const CheckParseTreesOptions& options) -> void {
   if (!options.vlog_stream && !options.dump_stream &&
       !options.raw_dump_stream) {
@@ -375,22 +375,21 @@ static auto MaybeDumpSemIR(
 
   for (const auto& unit : units) {
     bool include_in_dumps =
-        options.include_in_dumps[unit.sem_ir->check_ir_id().index];
+        options.include_in_dumps->Get(unit.sem_ir->check_ir_id());
     if (include_in_dumps && options.raw_dump_stream) {
       unit.sem_ir->Print(*options.raw_dump_stream,
                          options.dump_raw_sem_ir_builtins);
     }
 
     MaybeDumpFormattedSemIR(
-        *unit.sem_ir,
-        tree_and_subtrees_getters[unit.sem_ir->check_ir_id().index],
+        *unit.sem_ir, tree_and_subtrees_getters.Get(unit.sem_ir->check_ir_id()),
         include_in_dumps, options);
   }
 }
 
 auto CheckParseTrees(
     llvm::MutableArrayRef<Unit> units,
-    llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+    const Parse::GetTreeAndSubtreesStore& tree_and_subtrees_getters,
     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
     const CheckParseTreesOptions& options,
     std::shared_ptr<clang::CompilerInvocation> clang_invocation) -> void {
@@ -399,7 +398,7 @@ auto CheckParseTrees(
   llvm::SmallVector<UnitAndImports, 0> unit_infos(
       llvm::map_range(units, [&](Unit& unit) {
         return UnitAndImports(
-            &unit, tree_and_subtrees_getters[unit.sem_ir->check_ir_id().index]);
+            &unit, tree_and_subtrees_getters.Get(unit.sem_ir->check_ir_id()));
       }));
 
   Map<ImportKey, UnitAndImports*> api_map =
@@ -448,7 +447,7 @@ auto CheckParseTrees(
   for (int check_index = 0;
        check_index < static_cast<int>(ready_to_check.size()); ++check_index) {
     auto* unit_info = ready_to_check[check_index];
-    CheckUnit(unit_info, tree_and_subtrees_getters, fs, clang_invocation,
+    CheckUnit(unit_info, &tree_and_subtrees_getters, fs, clang_invocation,
               options.vlog_stream)
         .Run();
     for (auto* incoming_import : unit_info->incoming_imports) {
@@ -497,7 +496,7 @@ auto CheckParseTrees(
     // incomplete imports.
     for (auto& unit_info : unit_infos) {
       if (unit_info.imports_remaining > 0) {
-        CheckUnit(&unit_info, tree_and_subtrees_getters, fs, clang_invocation,
+        CheckUnit(&unit_info, &tree_and_subtrees_getters, fs, clang_invocation,
                   options.vlog_stream)
             .Run();
       }

+ 6 - 2
toolchain/check/check.h

@@ -13,6 +13,7 @@
 #include "toolchain/diagnostics/diagnostic_emitter.h"
 #include "toolchain/parse/tree_and_subtrees.h"
 #include "toolchain/sem_ir/file.h"
+#include "toolchain/sem_ir/ids.h"
 
 namespace Carbon::Check {
 
@@ -49,7 +50,7 @@ struct CheckParseTreesOptions {
   // Whether to include each unit in dumps. This is required when dumping
   // (either of `dump_stream` or `raw_dump_stream`), and must have entries based
   // on CheckIRId.
-  llvm::ArrayRef<bool> include_in_dumps = {};
+  const FixedSizeValueStore<SemIR::CheckIRId, bool>* include_in_dumps = nullptr;
 
   // If set, SemIR will be dumped to this.
   llvm::raw_ostream* dump_stream = nullptr;
@@ -72,9 +73,12 @@ struct CheckParseTreesOptions {
 
 // Checks a group of parse trees. This will use imports to decide the order of
 // checking.
+//
+// `units` will only contain units which should be checked, and is not indexed
+// by `CheckIRId`.
 auto CheckParseTrees(
     llvm::MutableArrayRef<Unit> units,
-    llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+    const Parse::GetTreeAndSubtreesStore& tree_and_subtrees_getters,
     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
     const CheckParseTreesOptions& options,
     std::shared_ptr<clang::CompilerInvocation> clang_invocation) -> void;

+ 13 - 10
toolchain/check/check_unit.cpp

@@ -14,6 +14,7 @@
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/VirtualFileSystem.h"
+#include "toolchain/base/fixed_size_value_store.h"
 #include "toolchain/base/kind_switch.h"
 #include "toolchain/check/diagnostic_helpers.h"
 #include "toolchain/check/generic.h"
@@ -55,15 +56,14 @@ static auto GetImportedIRCount(UnitAndImports* unit_and_imports) -> int {
 
 CheckUnit::CheckUnit(
     UnitAndImports* unit_and_imports,
-    llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+    const Parse::GetTreeAndSubtreesStore* tree_and_subtrees_getters,
     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
     std::shared_ptr<clang::CompilerInvocation> clang_invocation,
     llvm::raw_ostream* vlog_stream)
     : unit_and_imports_(unit_and_imports),
-      tree_and_subtrees_getter_(
-          tree_and_subtrees_getters
-              [unit_and_imports->unit->sem_ir->check_ir_id().index]),
-      total_ir_count_(tree_and_subtrees_getters.size()),
+      tree_and_subtrees_getter_(tree_and_subtrees_getters->Get(
+          unit_and_imports->unit->sem_ir->check_ir_id())),
+      total_ir_count_(tree_and_subtrees_getters->size()),
       fs_(std::move(fs)),
       clang_invocation_(std::move(clang_invocation)),
       emitter_(&unit_and_imports_->err_tracker, tree_and_subtrees_getters,
@@ -163,11 +163,12 @@ auto CheckUnit::InitPackageScopeAndImports() -> void {
 
 auto CheckUnit::CollectDirectImports(
     llvm::SmallVector<SemIR::ImportIR>& results,
-    llvm::MutableArrayRef<int> ir_to_result_index, SemIR::InstId import_decl_id,
-    const PackageImports& imports, bool is_local) -> void {
+    FixedSizeValueStore<SemIR::CheckIRId, int>& ir_to_result_index,
+    SemIR::InstId import_decl_id, const PackageImports& imports, bool is_local)
+    -> void {
   for (const auto& import : imports.imports) {
     const auto& direct_ir = *import.unit_info->unit->sem_ir;
-    auto& index = ir_to_result_index[direct_ir.check_ir_id().index];
+    auto& index = ir_to_result_index.Get(direct_ir.check_ir_id());
     if (index != -1) {
       // This should only happen when doing API imports for an implementation
       // file. Don't change the entry; is_export doesn't matter.
@@ -191,7 +192,9 @@ auto CheckUnit::CollectTransitiveImports(SemIR::InstId import_decl_id,
   // Track whether an IR was imported in full, including `export import`. This
   // distinguishes from IRs that are indirectly added without all names being
   // exported to this IR.
-  llvm::SmallVector<int> ir_to_result_index(total_ir_count_, -1);
+  auto ir_to_result_index =
+      FixedSizeValueStore<SemIR::CheckIRId, int>::MakeWithExplicitSize(
+          total_ir_count_, -1);
 
   // First add direct imports. This means that if an entity is imported both
   // directly and indirectly, the import path will reflect the direct import.
@@ -219,7 +222,7 @@ auto CheckUnit::CollectTransitiveImports(SemIR::InstId import_decl_id,
       }
 
       auto& indirect_index =
-          ir_to_result_index[indirect_ir.sem_ir->check_ir_id().index];
+          ir_to_result_index.Get(indirect_ir.sem_ir->check_ir_id());
       if (indirect_index == -1) {
         indirect_index = results.size();
         // TODO: In the case of a recursive `export import`, this only points at

+ 8 - 6
toolchain/check/check_unit.h

@@ -121,9 +121,11 @@ struct UnitAndImports {
 // logic in check.cpp.
 class CheckUnit {
  public:
+  // `unit_and_imports` and `tree_and_subtrees_getters` must be non-null.
+  // `vlog_stream` is optional.
   explicit CheckUnit(
       UnitAndImports* unit_and_imports,
-      llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+      const Parse::GetTreeAndSubtreesStore* tree_and_subtrees_getters,
       llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
       std::shared_ptr<clang::CompilerInvocation> clang_invocation,
       llvm::raw_ostream* vlog_stream);
@@ -136,11 +138,11 @@ class CheckUnit {
   auto InitPackageScopeAndImports() -> void;
 
   // Collects direct imports, for CollectTransitiveImports.
-  auto CollectDirectImports(llvm::SmallVector<SemIR::ImportIR>& results,
-                            llvm::MutableArrayRef<int> ir_to_result_index,
-                            SemIR::InstId import_decl_id,
-                            const PackageImports& imports, bool is_local)
-      -> void;
+  auto CollectDirectImports(
+      llvm::SmallVector<SemIR::ImportIR>& results,
+      FixedSizeValueStore<SemIR::CheckIRId, int>& ir_to_result_index,
+      SemIR::InstId import_decl_id, const PackageImports& imports,
+      bool is_local) -> void;
 
   // Collects transitive imports, handling deduplication. These will be unified
   // between local_imports and api_imports.

+ 4 - 1
toolchain/check/context.cpp

@@ -9,6 +9,7 @@
 
 #include "common/check.h"
 #include "toolchain/check/deferred_definition_worklist.h"
+#include "toolchain/sem_ir/ids.h"
 
 namespace Carbon::Check {
 
@@ -29,6 +30,9 @@ Context::Context(DiagnosticEmitterBase* emitter,
       scope_stack_(sem_ir_),
       deferred_definition_worklist_(vlog_stream),
       vtable_stack_("vtable_stack_", *sem_ir, vlog_stream),
+      check_ir_map_(
+          FixedSizeValueStore<SemIR::CheckIRId, SemIR::ImportIRId>::
+              MakeWithExplicitSize(total_ir_count, SemIR::ImportIRId::None)),
       global_init_(this),
       region_stack_([this](SemIR::LocId loc_id, std::string label) {
         TODO(loc_id, label);
@@ -36,7 +40,6 @@ Context::Context(DiagnosticEmitterBase* emitter,
   // Prepare fields which relate to the number of IRs available for import.
   import_irs().Reserve(imported_ir_count);
   import_ir_constant_values_.reserve(imported_ir_count);
-  check_ir_map_.resize(total_ir_count, SemIR::ImportIRId::None);
 }
 
 auto Context::TODO(SemIR::LocId loc_id, std::string label) -> bool {

+ 3 - 2
toolchain/check/context.h

@@ -144,7 +144,8 @@ class Context {
 
   auto exports() -> llvm::SmallVector<SemIR::InstId>& { return exports_; }
 
-  auto check_ir_map() -> llvm::MutableArrayRef<SemIR::ImportIRId> {
+  auto check_ir_map()
+      -> FixedSizeValueStore<SemIR::CheckIRId, SemIR::ImportIRId>& {
     return check_ir_map_;
   }
 
@@ -353,7 +354,7 @@ class Context {
   llvm::SmallVector<SemIR::InstId> exports_;
 
   // Maps CheckIRId to ImportIRId.
-  llvm::SmallVector<SemIR::ImportIRId> check_ir_map_;
+  FixedSizeValueStore<SemIR::CheckIRId, SemIR::ImportIRId> check_ir_map_;
 
   // Per-import constant values. These refer to the main IR and mainly serve as
   // a lookup table for quick access.

+ 2 - 1
toolchain/check/diagnostic_emitter.h

@@ -16,9 +16,10 @@ namespace Carbon::Check {
 // Handles the transformation of a SemIR::LocId to a DiagnosticLoc.
 class DiagnosticEmitter : public DiagnosticEmitterBase {
  public:
+  // `consumer`, `tree_and_subtrees_getters`, and `sem_ir` must be non-null.
   explicit DiagnosticEmitter(
       Diagnostics::Consumer* consumer,
-      llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+      const Parse::GetTreeAndSubtreesStore* tree_and_subtrees_getters,
       const SemIR::File* sem_ir)
       : DiagnosticEmitterBase(consumer),
         sem_ir_(sem_ir),

+ 1 - 1
toolchain/check/import_ref.cpp

@@ -64,7 +64,7 @@ auto SetSpecialImportIRs(Context& context, SemIR::ImportIR import_ir) -> void {
 
 auto AddImportIR(Context& context, SemIR::ImportIR import_ir)
     -> SemIR::ImportIRId {
-  auto& ir_id = context.check_ir_map()[import_ir.sem_ir->check_ir_id().index];
+  auto& ir_id = context.check_ir_map().Get(import_ir.sem_ir->check_ir_id());
   if (!ir_id.has_value()) {
     // Note this updates check_ir_map.
     ir_id = InternalAddImportIR(context, import_ir);

+ 1 - 0
toolchain/driver/BUILD

@@ -141,6 +141,7 @@ cc_library(
         "//toolchain/parse",
         "//toolchain/parse:tree",
         "//toolchain/sem_ir:file",
+        "//toolchain/sem_ir:typed_insts",
         "//toolchain/source:source_buffer",
         "@llvm-project//llvm:Core",
         "@llvm-project//llvm:Support",

+ 48 - 47
toolchain/driver/compile_subcommand.cpp

@@ -25,6 +25,7 @@
 #include "toolchain/lower/lower.h"
 #include "toolchain/parse/parse.h"
 #include "toolchain/parse/tree_and_subtrees.h"
+#include "toolchain/sem_ir/ids.h"
 #include "toolchain/source/source_buffer.h"
 
 namespace Carbon {
@@ -407,7 +408,8 @@ class MultiUnitCache;
 // Ties together information for a file being compiled.
 class CompilationUnit {
  public:
-  explicit CompilationUnit(int unit_index, DriverEnv* driver_env,
+  // `driver_env`, `options`, and `consumer` must be non-null.
+  explicit CompilationUnit(SemIR::CheckIRId check_ir_id, DriverEnv* driver_env,
                            const CompileOptions* options,
                            Diagnostics::Consumer* consumer,
                            llvm::StringRef input_filename);
@@ -465,8 +467,8 @@ class CompilationUnit {
   // Returns true if the current file should be included in debug dumps.
   auto IncludeInDumps() -> bool;
 
-  // The index of the unit amongst all units. Equivalent to a `CheckIRId`.
-  int unit_index_;
+  // The index of the unit amongst all units.
+  SemIR::CheckIRId check_ir_id_;
 
   DriverEnv* driver_env_;
   const CompileOptions* options_;
@@ -512,70 +514,68 @@ class CompilationUnit {
 // they may not be used.
 class MultiUnitCache {
  public:
+  using IncludeInDumpsStore = FixedSizeValueStore<SemIR::CheckIRId, bool>;
+  using TreeAndSubtreesGettersStore = Parse::GetTreeAndSubtreesStore;
+
   // This relies on construction after `units` are all initialized, which is
   // reflected by the `ArrayRef` here.
   explicit MultiUnitCache(
       const CompileOptions* options,
-      const llvm::ArrayRef<std::unique_ptr<CompilationUnit>> units)
+      llvm::ArrayRef<std::unique_ptr<CompilationUnit>> units)
       : options_(options), units_(units) {}
 
-  auto include_in_dumps() -> llvm::ArrayRef<bool> {
-    CARBON_CHECK(!units_.empty());
-    if (include_in_dumps_.empty()) {
-      BuildIncludeInDumps();
+  auto include_in_dumps() -> const IncludeInDumpsStore& {
+    if (!include_in_dumps_) {
+      include_in_dumps_.emplace(
+          IncludeInDumpsStore::MakeWithExplicitSize(units_.size(), false));
+      for (const auto& [i, unit] : llvm::enumerate(units_)) {
+        include_in_dumps_->Set(
+            SemIR::CheckIRId(i),
+            llvm::none_of(options_->exclude_dump_file_prefixes,
+                          [&](auto prefix) {
+                            return unit->input_filename().starts_with(prefix);
+                          }));
+      }
     }
-    return include_in_dumps_;
+    return *include_in_dumps_;
   }
 
-  auto tree_and_subtrees_getters()
-      -> llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> {
-    CARBON_CHECK(!units_.empty());
-    if (tree_and_subtrees_getters_.empty()) {
-      BuildTreeAndSubtreesGetters();
+  auto tree_and_subtrees_getters() -> const TreeAndSubtreesGettersStore& {
+    if (!tree_and_subtrees_getters_) {
+      tree_and_subtrees_getters_.emplace(
+          TreeAndSubtreesGettersStore::MakeWithExplicitSize(units_.size(),
+                                                            nullptr));
+      for (const auto& [i, unit] : llvm::enumerate(units_)) {
+        if (unit->has_source()) {
+          tree_and_subtrees_getters_->Set(SemIR::CheckIRId(i),
+                                          unit->get_trees_and_subtrees());
+        }
+      }
     }
-    return tree_and_subtrees_getters_;
+    return *tree_and_subtrees_getters_;
   }
 
  private:
-  auto BuildIncludeInDumps() -> void {
-    CARBON_CHECK(include_in_dumps_.empty());
-    llvm::append_range(
-        include_in_dumps_, llvm::map_range(units_, [&](const auto& unit) {
-          return llvm::none_of(
-              options_->exclude_dump_file_prefixes, [&](auto prefix) {
-                return unit->input_filename().starts_with(prefix);
-              });
-        }));
-  }
-
-  auto BuildTreeAndSubtreesGetters() -> void {
-    CARBON_CHECK(tree_and_subtrees_getters_.empty());
-    llvm::append_range(
-        tree_and_subtrees_getters_,
-        llvm::map_range(units_, [&](const auto& unit) {
-          return unit->has_source() ? unit->get_trees_and_subtrees() : nullptr;
-        }));
-  }
-
   const CompileOptions* options_;
 
   // The units being compiled.
-  const llvm::ArrayRef<std::unique_ptr<CompilationUnit>> units_;
+  llvm::ArrayRef<std::unique_ptr<CompilationUnit>> units_;
 
   // For each unit, whether it's included in dumps. Used cross-phase.
-  llvm::SmallVector<bool> include_in_dumps_;
+  std::optional<IncludeInDumpsStore> include_in_dumps_;
 
   // For each unit, the `TreeAndSubtrees` getter. Used by lowering.
-  llvm::SmallVector<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters_;
+  std::optional<TreeAndSubtreesGettersStore> tree_and_subtrees_getters_;
 };
 
 }  // namespace
 
-CompilationUnit::CompilationUnit(int unit_index, DriverEnv* driver_env,
+CompilationUnit::CompilationUnit(SemIR::CheckIRId check_ir_id,
+                                 DriverEnv* driver_env,
                                  const CompileOptions* options,
                                  Diagnostics::Consumer* consumer,
                                  llvm::StringRef input_filename)
-    : unit_index_(unit_index),
+    : check_ir_id_(check_ir_id),
       driver_env_(driver_env),
       options_(options),
       input_filename_(input_filename),
@@ -589,7 +589,7 @@ CompilationUnit::CompilationUnit(int unit_index, DriverEnv* driver_env,
 }
 
 auto CompilationUnit::IncludeInDumps() -> bool {
-  return cache_->include_in_dumps()[unit_index_];
+  return cache_->include_in_dumps().Get(check_ir_id_);
 }
 
 auto CompilationUnit::SetMultiUnitCache(MultiUnitCache* cache) -> void {
@@ -670,9 +670,8 @@ auto CompilationUnit::GetCheckUnit() -> Check::Unit {
   tree_and_subtrees_getter_ = [this]() -> const Parse::TreeAndSubtrees& {
     return this->GetParseTreeAndSubtrees();
   };
-  sem_ir_.emplace(&*parse_tree_, SemIR::CheckIRId(unit_index_),
-                  parse_tree_->packaging_decl(), value_stores_,
-                  input_filename_);
+  sem_ir_.emplace(&*parse_tree_, check_ir_id_, parse_tree_->packaging_decl(),
+                  value_stores_, input_filename_);
   return {.consumer = consumer_,
           .value_stores = &value_stores_,
           .timings = timings_ ? &*timings_ : nullptr,
@@ -894,8 +893,10 @@ auto CompileSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
   llvm::SmallVector<std::unique_ptr<CompilationUnit>> units;
   int unit_index = -1;
   auto unit_builder = [&](llvm::StringRef filename) {
-    return std::make_unique<CompilationUnit>(
-        ++unit_index, &driver_env, &options_, &driver_env.consumer, filename);
+    ++unit_index;
+    return std::make_unique<CompilationUnit>(SemIR::CheckIRId(unit_index),
+                                             &driver_env, &options_,
+                                             &driver_env.consumer, filename);
   };
   llvm::append_range(units, llvm::map_range(prelude, unit_builder));
   llvm::append_range(units,
@@ -984,7 +985,7 @@ auto CompileSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
   options.vlog_stream = driver_env.vlog_stream;
   options.fuzzing = driver_env.fuzzing;
   if (options.vlog_stream || options_.dump_sem_ir || options_.dump_raw_sem_ir) {
-    options.include_in_dumps = cache.include_in_dumps();
+    options.include_in_dumps = &cache.include_in_dumps();
     if (options_.dump_sem_ir) {
       options.dump_stream = driver_env.output_stream;
     }

+ 4 - 3
toolchain/language_server/context.cpp

@@ -166,13 +166,14 @@ auto Context::File::SetText(Context& context, std::optional<int64_t> version,
   // TODO: Include the prelude.
   Check::CheckParseTreesOptions check_options;
   check_options.vlog_stream = context.vlog_stream();
+  auto getters =
+      Parse::GetTreeAndSubtreesStore::MakeWithExplicitSize(1, getter);
 
   auto clang_invocation =
       BuildClangInvocation(consumer, fs, {context.installation().clang_path()});
 
-  Check::CheckParseTrees(units,
-                         llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>(getter),
-                         fs, check_options, std::move(clang_invocation));
+  Check::CheckParseTrees(units, getters, fs, check_options,
+                         std::move(clang_invocation));
 
   // Note we need to publish diagnostics even when empty.
   // TODO: Consider caching previously published diagnostics and only publishing

+ 5 - 5
toolchain/lower/context.cpp

@@ -15,12 +15,12 @@
 namespace Carbon::Lower {
 
 Context::Context(
-    llvm::LLVMContext& llvm_context,
+    llvm::LLVMContext* llvm_context,
     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs, bool want_debug_info,
-    llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+    const Parse::GetTreeAndSubtreesStore* tree_and_subtrees_getters,
     llvm::StringRef module_name, llvm::raw_ostream* vlog_stream)
-    : llvm_context_(&llvm_context),
-      llvm_module_(std::make_unique<llvm::Module>(module_name, llvm_context)),
+    : llvm_context_(llvm_context),
+      llvm_module_(std::make_unique<llvm::Module>(module_name, *llvm_context)),
       file_system_(std::move(fs)),
       di_builder_(*llvm_module_),
       di_compile_unit_(
@@ -80,7 +80,7 @@ auto Context::BuildDICompileUnit(llvm::StringRef module_name,
 
 auto Context::GetLocForDI(SemIR::AbsoluteNodeId abs_node_id) -> LocForDI {
   const auto& tree_and_subtrees =
-      tree_and_subtrees_getters()[abs_node_id.check_ir_id().index]();
+      tree_and_subtrees_getters().Get(abs_node_id.check_ir_id())();
   const auto& tokens = tree_and_subtrees.tree().tokens();
 
   if (abs_node_id.node_id().has_value()) {

+ 7 - 6
toolchain/lower/context.h

@@ -41,10 +41,12 @@ class Context {
     SemIR::SpecificId specific_id;
   };
 
+  // `llvm_context` and `tree_and_subtrees_getters` must be non-null.
+  // `vlog_stream` is optional.
   explicit Context(
-      llvm::LLVMContext& llvm_context,
+      llvm::LLVMContext* llvm_context,
       llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs, bool want_debug_info,
-      llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+      const Parse::GetTreeAndSubtreesStore* tree_and_subtrees_getters,
       llvm::StringRef module_name, llvm::raw_ostream* vlog_stream);
 
   // Gets or creates the `FileContext` for a given SemIR file. If an
@@ -95,9 +97,8 @@ class Context {
   }
   auto di_builder() -> llvm::DIBuilder& { return di_builder_; }
   auto di_compile_unit() -> llvm::DICompileUnit* { return di_compile_unit_; }
-  auto tree_and_subtrees_getters()
-      -> llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> {
-    return tree_and_subtrees_getters_;
+  auto tree_and_subtrees_getters() -> const Parse::GetTreeAndSubtreesStore& {
+    return *tree_and_subtrees_getters_;
   }
 
   auto printf_int_format_string() -> llvm::Value* {
@@ -133,7 +134,7 @@ class Context {
   llvm::DICompileUnit* di_compile_unit_;
 
   // Parse trees. Used for debug information and crash diagnostics.
-  llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters_;
+  const Parse::GetTreeAndSubtreesStore* tree_and_subtrees_getters_;
 
   // The optional vlog stream.
   llvm::raw_ostream* vlog_stream_;

+ 3 - 2
toolchain/lower/file_context.cpp

@@ -53,7 +53,8 @@ FileContext::FileContext(Context& context, const SemIR::File& sem_ir,
                                                     nullptr)),
       constants_(LoweredConstantStore::MakeWithExplicitSize(
           sem_ir.insts().size(), nullptr)),
-      lowered_specifics_(sem_ir.generics(), {}),
+      lowered_specifics_(sem_ir.generics(),
+                         llvm::SmallVector<SemIR::SpecificId>()),
       coalescer_(vlog_stream_, sem_ir.specifics()),
       vtables_(decltype(vtables_)::MakeForOverwrite(sem_ir.vtables())),
       specific_vtables_(sem_ir.specifics(), nullptr) {
@@ -541,7 +542,7 @@ auto FileContext::BuildFunctionBody(SemIR::FunctionId function_id,
   // On crash, report the function we were lowering.
   PrettyStackTraceFunction stack_trace_entry([&](llvm::raw_ostream& output) {
     SemIR::DiagnosticLocConverter converter(
-        context().tree_and_subtrees_getters(), &sem_ir());
+        &context().tree_and_subtrees_getters(), &sem_ir());
     auto converted =
         converter.Convert(SemIR::LocId(declaration_function.definition_id),
                           /*token_only=*/false);

+ 1 - 1
toolchain/lower/function_context.cpp

@@ -62,7 +62,7 @@ auto FunctionContext::LowerBlockContents(SemIR::InstBlockId block_id) -> void {
   // On crash, report the instruction we were lowering.
   PrettyStackTraceFunction stack_trace_entry([&](llvm::raw_ostream& output) {
     SemIR::DiagnosticLocConverter converter(
-        file_context_->context().tree_and_subtrees_getters(), &sem_ir());
+        &file_context_->context().tree_and_subtrees_getters(), &sem_ir());
     auto converted = converter.Convert(SemIR::LocId(inst_id_for_stack_trace),
                                        /*token_only=*/false);
     converted.loc.FormatLocation(output);

+ 3 - 3
toolchain/lower/lower.cpp

@@ -17,11 +17,11 @@ namespace Carbon::Lower {
 auto LowerToLLVM(
     llvm::LLVMContext& llvm_context,
     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
-    llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+    const Parse::GetTreeAndSubtreesStore& tree_and_subtrees_getters,
     const SemIR::File& sem_ir, const LowerToLLVMOptions& options)
     -> std::unique_ptr<llvm::Module> {
-  Context context(llvm_context, std::move(fs), options.want_debug_info,
-                  tree_and_subtrees_getters, sem_ir.filename(),
+  Context context(&llvm_context, std::move(fs), options.want_debug_info,
+                  &tree_and_subtrees_getters, sem_ir.filename(),
                   options.vlog_stream);
 
   // TODO: Consider disabling instruction naming by default if we're not

+ 1 - 1
toolchain/lower/lower.h

@@ -35,7 +35,7 @@ struct LowerToLLVMOptions {
 auto LowerToLLVM(
     llvm::LLVMContext& llvm_context,
     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
-    llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+    const Parse::GetTreeAndSubtreesStore& tree_and_subtrees_getters,
     const SemIR::File& sem_ir, const LowerToLLVMOptions& options)
     -> std::unique_ptr<llvm::Module>;
 

+ 13 - 0
toolchain/parse/tree_and_subtrees.h

@@ -10,6 +10,11 @@
 #include "toolchain/lex/token_index.h"
 #include "toolchain/parse/tree.h"
 
+namespace Carbon::SemIR {
+// Forward-declared here for `GetTreeAndSubtreesStore`.
+struct CheckIRId;
+}  // namespace Carbon::SemIR
+
 namespace Carbon::Parse {
 
 // Calculates and stores subtree data for a parse tree. Supports APIs that
@@ -192,6 +197,14 @@ class TreeAndSubtrees {
 // A standard signature for a callback to support lazy construction.
 using GetTreeAndSubtreesFn = llvm::function_ref<auto()->const TreeAndSubtrees&>;
 
+// The typical storage of `GetTreeAndSubtreesFn`. Note this stores non-owning
+// references.
+//
+// This is a commonly used alias, and while it depends on SemIR, it's difficult
+// to find a better home.
+using GetTreeAndSubtreesStore =
+    FixedSizeValueStore<SemIR::CheckIRId, Parse::GetTreeAndSubtreesFn>;
+
 // A forward iterator across the siblings at a particular level in the parse
 // tree. It produces `Tree::NodeId` objects which are opaque handles and must
 // be used in conjunction with the `Tree` itself.

+ 1 - 1
toolchain/sem_ir/diagnostic_loc_converter.cpp

@@ -53,7 +53,7 @@ auto DiagnosticLocConverter::ConvertImpl(SemIR::CheckIRId check_ir_id,
     -> Diagnostics::ConvertedLoc {
   CARBON_CHECK(check_ir_id != SemIR::CheckIRId::Cpp);
   const auto& tree_and_subtrees =
-      tree_and_subtrees_getters_[check_ir_id.index]();
+      tree_and_subtrees_getters_->Get(check_ir_id)();
   return tree_and_subtrees.NodeToDiagnosticLoc(node_id, token_only);
 }
 

+ 3 - 3
toolchain/sem_ir/diagnostic_loc_converter.h

@@ -35,9 +35,9 @@ class DiagnosticLocConverter {
     Diagnostics::ConvertedLoc loc;
   };
 
-  // `sem_ir` must not be null.
+  // `tree_and_subtrees_getters` and `sem_ir` must not be null.
   explicit DiagnosticLocConverter(
-      llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
+      const Parse::GetTreeAndSubtreesStore* tree_and_subtrees_getters,
       const File* sem_ir)
       : tree_and_subtrees_getters_(tree_and_subtrees_getters),
         sem_ir_(sem_ir) {}
@@ -66,7 +66,7 @@ class DiagnosticLocConverter {
       -> Diagnostics::ConvertedLoc;
 
   // Converters for each SemIR.
-  llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters_;
+  const Parse::GetTreeAndSubtreesStore* tree_and_subtrees_getters_;
 
   // The current SemIR being processed.
   const File* sem_ir_;

+ 5 - 5
toolchain/sem_ir/formatter.cpp

@@ -34,10 +34,10 @@
 
 namespace Carbon::SemIR {
 
-Formatter::Formatter(const File* sem_ir,
-                     Parse::GetTreeAndSubtreesFn get_tree_and_subtrees,
-                     llvm::ArrayRef<bool> include_ir_in_dumps,
-                     bool use_dump_sem_ir_ranges)
+Formatter::Formatter(
+    const File* sem_ir, Parse::GetTreeAndSubtreesFn get_tree_and_subtrees,
+    const FixedSizeValueStore<SemIR::CheckIRId, bool>* include_ir_in_dumps,
+    bool use_dump_sem_ir_ranges)
     : sem_ir_(sem_ir),
       inst_namer_(sem_ir_),
       get_tree_and_subtrees_(get_tree_and_subtrees),
@@ -168,7 +168,7 @@ auto Formatter::IncludeChunkInOutput(size_t chunk) -> void {
 
 auto Formatter::ShouldIncludeInstByIR(InstId inst_id) -> bool {
   const auto* import_ir = GetCanonicalFileAndInstId(sem_ir_, inst_id).first;
-  return include_ir_in_dumps_[import_ir->check_ir_id().index];
+  return include_ir_in_dumps_->Get(import_ir->check_ir_id());
 }
 
 // Returns true for a `DefinitionStart` node.

+ 6 - 5
toolchain/sem_ir/formatter.h

@@ -19,10 +19,11 @@ namespace Carbon::SemIR {
 // Formatter for printing textual Semantics IR.
 class Formatter {
  public:
-  explicit Formatter(const File* sem_ir,
-                     Parse::GetTreeAndSubtreesFn get_tree_and_subtrees,
-                     llvm::ArrayRef<bool> include_ir_in_dumps,
-                     bool use_dump_sem_ir_ranges);
+  // sem_ir and include_ir_in_dumps must be non-null.
+  explicit Formatter(
+      const File* sem_ir, Parse::GetTreeAndSubtreesFn get_tree_and_subtrees,
+      const FixedSizeValueStore<SemIR::CheckIRId, bool>* include_ir_in_dumps,
+      bool use_dump_sem_ir_ranges);
 
   // Prints the SemIR into an internal buffer. Must only be called once.
   //
@@ -333,7 +334,7 @@ class Formatter {
   Parse::GetTreeAndSubtreesFn get_tree_and_subtrees_;
 
   // For each CheckIRId, whether entities from it should be formatted.
-  llvm::ArrayRef<bool> include_ir_in_dumps_;
+  const FixedSizeValueStore<SemIR::CheckIRId, bool>* include_ir_in_dumps_;
 
   // Whether to use ranges when dumping, or to dump the full SemIR.
   bool use_dump_sem_ir_ranges_;