Jelajahi Sumber

Refactor resolving a location into a SemIR library (#4876)

At present, lower depends on `Check::SemIRDiagnosticConverter` for debug
info. That was to support a quick implementation of debug info, but
isn't great because it's both an unusual dependency on check's
implementation, and relying on diagnostic structures for debug info.

This cleans that up by splitting relevant logic out to a library in
sem_ir, and having lowering use sem_ir's library instead of check's.
Additionally, a small refactoring of `Parse::TreeAndSubtrees` to allow
getting locations in lowering without going through a `DiagnosticLoc`.
I'm adding `Parse::GetTreeAndSubtreesFn` in because it's a complex
signature to have in so many spots.

I chose to have `ResolveNodeId` return a `SmallVector` because it seemed
likely to be fairly compact, but that could also be using an optional
callback to handle resolved node IDs, possibly just returning the last
entry. This could be switched if preferred.

Note this change shouldn't affect behavior, it's just moving code
around.

---------

Co-authored-by: Chandler Carruth <chandlerc@gmail.com>
Jon Ross-Perkins 1 tahun lalu
induk
melakukan
7eee9a3489

+ 2 - 0
toolchain/check/BUILD

@@ -272,8 +272,10 @@ cc_library(
         "//toolchain/diagnostics:diagnostic_emitter",
         "//toolchain/lex:token_index",
         "//toolchain/parse:tree",
+        "//toolchain/sem_ir:absolute_node_id",
         "//toolchain/sem_ir:file",
         "//toolchain/sem_ir:stringify_type",
+        "//toolchain/sem_ir:typed_insts",
         "@llvm-project//llvm:Support",
     ],
 )

+ 1 - 2
toolchain/check/check.h

@@ -23,8 +23,7 @@ struct Unit {
   Timings* timings;
 
   // Returns a lazily constructed TreeAndSubtrees.
-  llvm::function_ref<const Parse::TreeAndSubtrees&()>
-      get_parse_tree_and_subtrees;
+  Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees;
 
   // The unit's SemIR, provided as empty and filled in by CheckParseTrees.
   SemIR::File* sem_ir;

+ 2 - 4
toolchain/check/check_unit.h

@@ -48,8 +48,7 @@ class UnitAndImportsDiagnosticConverter
     : public DiagnosticConverter<Parse::NodeId> {
  public:
   explicit UnitAndImportsDiagnosticConverter(
-      llvm::function_ref<const Parse::TreeAndSubtrees&()>
-          get_parse_tree_and_subtrees)
+      Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees)
       : get_parse_tree_and_subtrees_(get_parse_tree_and_subtrees) {}
 
   auto ConvertLoc(Parse::NodeId node_id, ContextFnT /*context_fn*/) const
@@ -59,8 +58,7 @@ class UnitAndImportsDiagnosticConverter
   }
 
  private:
-  llvm::function_ref<const Parse::TreeAndSubtrees&()>
-      get_parse_tree_and_subtrees_;
+  Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees_;
 };
 
 // Contains information accumulated while checking a `Unit` (primarily import

+ 1 - 2
toolchain/check/context.cpp

@@ -39,8 +39,7 @@
 namespace Carbon::Check {
 
 Context::Context(DiagnosticEmitter* emitter,
-                 llvm::function_ref<const Parse::TreeAndSubtrees&()>
-                     get_parse_tree_and_subtrees,
+                 Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees,
                  SemIR::File* sem_ir, int imported_ir_count, int total_ir_count,
                  llvm::raw_ostream* vlog_stream)
     : emitter_(emitter),

+ 2 - 4
toolchain/check/context.h

@@ -73,8 +73,7 @@ class Context {
 
   // Stores references for work.
   explicit Context(DiagnosticEmitter* emitter,
-                   llvm::function_ref<const Parse::TreeAndSubtrees&()>
-                       get_parse_tree_and_subtrees,
+                   Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees,
                    SemIR::File* sem_ir, int imported_ir_count,
                    int total_ir_count, llvm::raw_ostream* vlog_stream);
 
@@ -748,8 +747,7 @@ class Context {
   DiagnosticEmitter* emitter_;
 
   // Returns a lazily constructed TreeAndSubtrees.
-  llvm::function_ref<const Parse::TreeAndSubtrees&()>
-      get_parse_tree_and_subtrees_;
+  Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees_;
 
   // The SemIR::File being added to.
   SemIR::File* sem_ir_;

+ 25 - 109
toolchain/check/sem_ir_diagnostic_converter.cpp

@@ -5,6 +5,7 @@
 #include "toolchain/check/sem_ir_diagnostic_converter.h"
 
 #include "common/raw_string_ostream.h"
+#include "toolchain/sem_ir/absolute_node_id.h"
 #include "toolchain/sem_ir/stringify_type.h"
 
 namespace Carbon::Check {
@@ -35,109 +36,34 @@ auto SemIRDiagnosticConverter::ConvertLoc(SemIRLoc loc,
 auto SemIRDiagnosticConverter::ConvertLocImpl(SemIRLoc loc,
                                               ContextFnT context_fn) const
     -> ConvertedDiagnosticLoc {
-  // Cursors for the current IR and instruction in that IR.
-  const auto* cursor_ir = sem_ir_;
-  auto cursor_inst_id = SemIR::InstId::None;
-
-  // Notes an import on the diagnostic and updates cursors to point at the
-  // imported IR.
-  auto follow_import_ref = [&](SemIR::ImportIRInstId import_ir_inst_id) {
-    auto import_ir_inst = cursor_ir->import_ir_insts().Get(import_ir_inst_id);
-    const auto& import_ir = cursor_ir->import_irs().Get(import_ir_inst.ir_id);
-    CARBON_CHECK(import_ir.decl_id.has_value(),
-                 "If we get `None` locations here, we may need to more "
-                 "thoroughly track ImportDecls.");
-
-    ConvertedDiagnosticLoc in_import_loc;
-    auto import_loc_id = cursor_ir->insts().GetLocId(import_ir.decl_id);
-    if (import_loc_id.is_node_id()) {
-      // For imports in the current file, the location is simple.
-      in_import_loc = ConvertLocInFile(cursor_ir, import_loc_id.node_id(),
-                                       loc.token_only_, context_fn);
-    } else if (import_loc_id.is_import_ir_inst_id()) {
-      // For implicit imports, we need to unravel the location a little
-      // further.
-      auto implicit_import_ir_inst =
-          cursor_ir->import_ir_insts().Get(import_loc_id.import_ir_inst_id());
-      const auto& implicit_ir =
-          cursor_ir->import_irs().Get(implicit_import_ir_inst.ir_id);
-      auto implicit_loc_id =
-          implicit_ir.sem_ir->insts().GetLocId(implicit_import_ir_inst.inst_id);
-      CARBON_CHECK(implicit_loc_id.is_node_id(),
-                   "Should only be one layer of implicit imports");
-      in_import_loc =
-          ConvertLocInFile(implicit_ir.sem_ir, implicit_loc_id.node_id(),
-                           loc.token_only_, context_fn);
-    }
-
-    // TODO: Add an "In implicit import of prelude." note for the case where we
-    // don't have a location.
-    if (import_loc_id.has_value()) {
-      // TODO: Include the name of the imported library in the diagnostic.
-      CARBON_DIAGNOSTIC(InImport, LocationInfo, "in import");
-      context_fn(in_import_loc.loc, InImport);
-    }
-
-    cursor_ir = import_ir.sem_ir;
-    cursor_inst_id = import_ir_inst.inst_id;
-  };
-
-  // If the location is is an import, follows it and returns nullopt.
-  // Otherwise, it's a parse node, so return the final location.
-  auto handle_loc =
-      [&](SemIR::LocId loc_id) -> std::optional<ConvertedDiagnosticLoc> {
-    if (loc_id.is_import_ir_inst_id()) {
-      follow_import_ref(loc_id.import_ir_inst_id());
-      return std::nullopt;
-    } else {
-      // Parse nodes always refer to the current IR.
-      return ConvertLocInFile(cursor_ir, loc_id.node_id(), loc.token_only_,
-                              context_fn);
-    }
-  };
-
-  // Handle the base location.
-  if (loc.is_inst_id_) {
-    cursor_inst_id = loc.inst_id_;
-  } else {
-    if (auto diag_loc = handle_loc(loc.loc_id_)) {
-      return *diag_loc;
+  llvm::SmallVector<SemIR::AbsoluteNodeId> absolute_node_ids =
+      loc.is_inst_id_ ? SemIR::GetAbsoluteNodeId(sem_ir_, loc.inst_id_)
+                      : SemIR::GetAbsoluteNodeId(sem_ir_, loc.loc_id_);
+
+  auto final_node_id = absolute_node_ids.pop_back_val();
+  for (const auto& absolute_node_id : absolute_node_ids) {
+    if (!absolute_node_id.node_id.has_value()) {
+      // TODO: Add an "In implicit import of prelude." note for the case where
+      // we don't have a location.
+      continue;
     }
-    CARBON_CHECK(cursor_inst_id.has_value(), "Should have been set");
+    // TODO: Include the name of the imported library in the diagnostic.
+    auto diag_loc =
+        ConvertLocInFile(absolute_node_id, loc.token_only_, context_fn);
+    CARBON_DIAGNOSTIC(InImport, LocationInfo, "in import");
+    context_fn(diag_loc.loc, InImport);
   }
 
-  while (true) {
-    if (cursor_inst_id.has_value()) {
-      auto cursor_inst = cursor_ir->insts().Get(cursor_inst_id);
-      if (auto bind_ref = cursor_inst.TryAs<SemIR::ExportDecl>();
-          bind_ref && bind_ref->value_id.has_value()) {
-        cursor_inst_id = bind_ref->value_id;
-        continue;
-      }
-
-      // If the parse node has a value, use it for the location.
-      if (auto loc_id = cursor_ir->insts().GetLocId(cursor_inst_id);
-          loc_id.has_value()) {
-        if (auto diag_loc = handle_loc(loc_id)) {
-          return *diag_loc;
-        }
-        continue;
-      }
-
-      // If a namespace has an instruction for an import, switch to looking at
-      // it.
-      if (auto ns = cursor_inst.TryAs<SemIR::Namespace>()) {
-        if (ns->import_id.has_value()) {
-          cursor_inst_id = ns->import_id;
-          continue;
-        }
-      }
-    }
+  return ConvertLocInFile(final_node_id, loc.token_only_, context_fn);
+}
 
-    // `None` parse node but not an import; just nothing to point at.
-    return ConvertLocInFile(cursor_ir, Parse::NodeId::None, loc.token_only_,
-                            context_fn);
-  }
+auto SemIRDiagnosticConverter::ConvertLocInFile(
+    SemIR::AbsoluteNodeId absolute_node_id, bool token_only,
+    ContextFnT /*context_fn*/) const -> ConvertedDiagnosticLoc {
+  const auto& tree_and_subtrees =
+      imported_trees_and_subtrees_[absolute_node_id.check_ir_id.index]();
+  return tree_and_subtrees.NodeToDiagnosticLoc(absolute_node_id.node_id,
+                                               token_only);
 }
 
 auto SemIRDiagnosticConverter::ConvertArg(llvm::Any arg) const -> llvm::Any {
@@ -195,14 +121,4 @@ auto SemIRDiagnosticConverter::ConvertArg(llvm::Any arg) const -> llvm::Any {
   return DiagnosticConverter<SemIRLoc>::ConvertArg(arg);
 }
 
-auto SemIRDiagnosticConverter::ConvertLocInFile(const SemIR::File* sem_ir,
-                                                Parse::NodeId node_id,
-                                                bool token_only,
-                                                ContextFnT /*context_fn*/) const
-    -> ConvertedDiagnosticLoc {
-  const auto& tree_and_subtrees =
-      imported_trees_and_subtrees_[sem_ir->check_ir_id().index]();
-  return tree_and_subtrees.NodeToDiagnosticLoc(node_id, token_only);
-}
-
 }  // namespace Carbon::Check

+ 6 - 7
toolchain/check/sem_ir_diagnostic_converter.h

@@ -10,17 +10,17 @@
 #include "toolchain/diagnostics/diagnostic_emitter.h"
 #include "toolchain/lex/token_index.h"
 #include "toolchain/parse/tree_and_subtrees.h"
+#include "toolchain/sem_ir/absolute_node_id.h"
 #include "toolchain/sem_ir/file.h"
+#include "toolchain/sem_ir/ids.h"
 
 namespace Carbon::Check {
 
 // Handles the transformation of a SemIRLoc to a DiagnosticLoc.
 class SemIRDiagnosticConverter : public DiagnosticConverter<SemIRLoc> {
  public:
-  using TreeFnT = llvm::function_ref<const Parse::TreeAndSubtrees&()>;
-
   explicit SemIRDiagnosticConverter(
-      llvm::ArrayRef<TreeFnT> imported_trees_and_subtrees,
+      llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> imported_trees_and_subtrees,
       const SemIR::File* sem_ir)
       : imported_trees_and_subtrees_(imported_trees_and_subtrees),
         sem_ir_(sem_ir) {}
@@ -50,12 +50,11 @@ class SemIRDiagnosticConverter : public DiagnosticConverter<SemIRLoc> {
 
   // Converts a node_id corresponding to a specific sem_ir to a diagnostic
   // location.
-  auto ConvertLocInFile(const SemIR::File* sem_ir, Parse::NodeId node_id,
-                        bool token_only, ContextFnT context_fn) const
-      -> ConvertedDiagnosticLoc;
+  auto ConvertLocInFile(SemIR::AbsoluteNodeId absolute_node_id, bool token_only,
+                        ContextFnT context_fn) const -> ConvertedDiagnosticLoc;
 
   // Converters for each SemIR.
-  llvm::ArrayRef<TreeFnT> imported_trees_and_subtrees_;
+  llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> imported_trees_and_subtrees_;
 
   // The current SemIR being processed.
   const SemIR::File* sem_ir_;

+ 21 - 15
toolchain/driver/compile_subcommand.cpp

@@ -335,19 +335,20 @@ class CompilationUnit {
 
   // Prepares per-IR lazy fetch functions which may come up in cross-IR
   // diagnostics.
-  auto PreCheck() -> llvm::function_ref<const Parse::TreeAndSubtrees&()>;
+  auto PreCheck() -> Parse::GetTreeAndSubtreesFn;
 
   // Returns information needed to check this unit.
   auto GetCheckUnit(
       SemIR::CheckIRId check_ir_id,
-      llvm::ArrayRef<llvm::function_ref<const Parse::TreeAndSubtrees&()>>
-          all_trees_and_subtrees) -> Check::Unit;
+      llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> all_trees_and_subtrees)
+      -> Check::Unit;
 
   // Runs post-check logic. Returns true if checking succeeded for the IR.
   auto PostCheck() -> void;
 
   // Lower SemIR to LLVM IR.
-  auto RunLower() -> void;
+  auto RunLower(std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
+                    all_trees_and_subtrees_for_debug_info) -> void;
 
   auto RunCodeGen() -> void;
 
@@ -499,8 +500,7 @@ auto CompilationUnit::RunParse() -> void {
   }
 }
 
-auto CompilationUnit::PreCheck()
-    -> llvm::function_ref<const Parse::TreeAndSubtrees&()> {
+auto CompilationUnit::PreCheck() -> Parse::GetTreeAndSubtreesFn {
   CARBON_CHECK(parse_tree_, "Must call RunParse first");
   CARBON_CHECK(!get_parse_tree_and_subtrees_, "Called PreCheck twice");
 
@@ -512,8 +512,8 @@ auto CompilationUnit::PreCheck()
 
 auto CompilationUnit::GetCheckUnit(
     SemIR::CheckIRId check_ir_id,
-    llvm::ArrayRef<llvm::function_ref<const Parse::TreeAndSubtrees&()>>
-        all_trees_and_subtrees) -> Check::Unit {
+    llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> all_trees_and_subtrees)
+    -> Check::Unit {
   CARBON_CHECK(get_parse_tree_and_subtrees_, "Must call PreCheck first");
   CARBON_CHECK(!sem_ir_converter_, "Called GetCheckUnit twice");
 
@@ -587,15 +587,17 @@ auto CompilationUnit::PostCheck() -> void {
   }
 }
 
-auto CompilationUnit::RunLower() -> void {
+auto CompilationUnit::RunLower(
+    std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
+        all_trees_and_subtrees_for_debug_info) -> void {
   LogCall("Lower::LowerToLLVM", "lower", [&] {
     llvm_context_ = std::make_unique<llvm::LLVMContext>();
     // TODO: Consider disabling instruction naming by default if we're not
     // producing textual LLVM IR.
     SemIR::InstNamer inst_namer(&*sem_ir_);
-    module_ = Lower::LowerToLLVM(*llvm_context_, options_.include_debug_info,
-                                 *sem_ir_converter_, input_filename_, *sem_ir_,
-                                 &inst_namer, vlog_stream_);
+    module_ = Lower::LowerToLLVM(
+        *llvm_context_, all_trees_and_subtrees_for_debug_info, input_filename_,
+        *sem_ir_, &inst_namer, vlog_stream_);
   });
   if (vlog_stream_) {
     CARBON_VLOG("*** llvm::Module ***\n");
@@ -842,8 +844,7 @@ auto CompileSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
   }
 
   // Pre-check assigns IR IDs and constructs node converters.
-  llvm::SmallVector<llvm::function_ref<const Parse::TreeAndSubtrees&()>>
-      all_trees_and_subtrees;
+  llvm::SmallVector<Parse::GetTreeAndSubtreesFn> all_trees_and_subtrees;
   // This size may not match due to units that are missing source, but that's an
   // error case and not worth extra work.
   all_trees_and_subtrees.reserve(units.size());
@@ -887,8 +888,13 @@ auto CompileSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
   }
 
   // Lower.
+  std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
+      all_trees_and_subtrees_for_debug_info;
+  if (options_.include_debug_info) {
+    all_trees_and_subtrees_for_debug_info = all_trees_and_subtrees;
+  }
   for (const auto& unit : units) {
-    unit->RunLower();
+    unit->RunLower(all_trees_and_subtrees_for_debug_info);
   }
   if (options_.phase == CompileOptions::Phase::Lower) {
     return make_result();

+ 2 - 0
toolchain/lower/BUILD

@@ -18,6 +18,7 @@ cc_library(
     deps = [
         ":context",
         "//toolchain/check:sem_ir_diagnostic_converter",
+        "//toolchain/parse:tree",
         "//toolchain/sem_ir:file",
         "//toolchain/sem_ir:inst_namer",
         "@llvm-project//llvm:Core",
@@ -50,6 +51,7 @@ cc_library(
         "//common:vlog",
         "//toolchain/base:kind_switch",
         "//toolchain/check:sem_ir_diagnostic_converter",
+        "//toolchain/sem_ir:absolute_node_id",
         "//toolchain/sem_ir:entry_point",
         "//toolchain/sem_ir:file",
         "//toolchain/sem_ir:inst",

+ 25 - 15
toolchain/lower/file_context.cpp

@@ -12,6 +12,7 @@
 #include "toolchain/lower/constant.h"
 #include "toolchain/lower/function_context.h"
 #include "toolchain/lower/mangler.h"
+#include "toolchain/sem_ir/absolute_node_id.h"
 #include "toolchain/sem_ir/entry_point.h"
 #include "toolchain/sem_ir/file.h"
 #include "toolchain/sem_ir/function.h"
@@ -22,20 +23,21 @@
 
 namespace Carbon::Lower {
 
-FileContext::FileContext(llvm::LLVMContext& llvm_context,
-                         bool include_debug_info,
-                         const Check::SemIRDiagnosticConverter& converter,
-                         llvm::StringRef module_name, const SemIR::File& sem_ir,
-                         const SemIR::InstNamer* inst_namer,
-                         llvm::raw_ostream* vlog_stream)
+FileContext::FileContext(
+    llvm::LLVMContext& llvm_context,
+    std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
+        all_trees_and_subtrees_for_debug_info,
+    llvm::StringRef module_name, const SemIR::File& sem_ir,
+    const SemIR::InstNamer* inst_namer, llvm::raw_ostream* vlog_stream)
     : llvm_context_(&llvm_context),
       llvm_module_(std::make_unique<llvm::Module>(module_name, llvm_context)),
       di_builder_(*llvm_module_),
       di_compile_unit_(
-          include_debug_info
+          all_trees_and_subtrees_for_debug_info
               ? BuildDICompileUnit(module_name, *llvm_module_, di_builder_)
               : nullptr),
-      converter_(converter),
+      all_trees_and_subtrees_for_debug_info_(
+          all_trees_and_subtrees_for_debug_info),
       sem_ir_(&sem_ir),
       inst_namer_(inst_namer),
       vlog_stream_(vlog_stream) {
@@ -617,13 +619,21 @@ auto FileContext::BuildGlobalVariableDecl(SemIR::VarStorage var_storage)
 }
 
 auto FileContext::GetLocForDI(SemIR::InstId inst_id) -> LocForDI {
-  auto converted = converter_.ConvertLoc(
-      inst_id, [&](DiagnosticLoc /*context_loc*/,
-                   const DiagnosticBase<>& /*context_diagnostic_base*/) {});
-  const auto& loc = converted.loc;
-  return {.filename = loc.filename,
-          .line_number = loc.line_number == -1 ? 0 : loc.line_number,
-          .column_number = loc.column_number == -1 ? 0 : loc.column_number};
+  SemIR::AbsoluteNodeId resolved = GetAbsoluteNodeId(sem_ir_, inst_id).back();
+  const auto& tree_and_subtrees =
+      (*all_trees_and_subtrees_for_debug_info_)[resolved.check_ir_id.index]();
+  const auto& tokens = tree_and_subtrees.tree().tokens();
+
+  if (resolved.node_id.has_value()) {
+    auto token = tree_and_subtrees.GetSubtreeTokenRange(resolved.node_id).begin;
+    return {.filename = tokens.source().filename(),
+            .line_number = tokens.GetLineNumber(token),
+            .column_number = tokens.GetColumnNumber(token)};
+  } else {
+    return {.filename = tokens.source().filename(),
+            .line_number = 0,
+            .column_number = 0};
+  }
 }
 
 }  // namespace Carbon::Lower

+ 9 - 7
toolchain/lower/file_context.h

@@ -27,11 +27,12 @@ class FileContext {
     int32_t column_number;
   };
 
-  explicit FileContext(llvm::LLVMContext& llvm_context, bool include_debug_info,
-                       const Check::SemIRDiagnosticConverter& converter,
-                       llvm::StringRef module_name, const SemIR::File& sem_ir,
-                       const SemIR::InstNamer* inst_namer,
-                       llvm::raw_ostream* vlog_stream);
+  explicit FileContext(
+      llvm::LLVMContext& llvm_context,
+      std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
+          all_trees_and_subtrees_for_debug_info,
+      llvm::StringRef module_name, const SemIR::File& sem_ir,
+      const SemIR::InstNamer* inst_namer, llvm::raw_ostream* vlog_stream);
 
   // Lowers the SemIR::File to LLVM IR. Should only be called once, and handles
   // the main execution loop.
@@ -128,8 +129,9 @@ class FileContext {
   // The DICompileUnit, if any - null implies debug info is not being emitted.
   llvm::DICompileUnit* di_compile_unit_;
 
-  // The source location converter.
-  const Check::SemIRDiagnosticConverter& converter_;
+  // The trees are only provided when debug info should be emitted.
+  std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
+      all_trees_and_subtrees_for_debug_info_;
 
   // The input SemIR.
   const SemIR::File* const sem_ir_;

+ 5 - 4
toolchain/lower/lower.cpp

@@ -8,14 +8,15 @@
 
 namespace Carbon::Lower {
 
-auto LowerToLLVM(llvm::LLVMContext& llvm_context, bool include_debug_info,
-                 const Check::SemIRDiagnosticConverter& converter,
+auto LowerToLLVM(llvm::LLVMContext& llvm_context,
+                 std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
+                     all_trees_and_subtrees_for_debug_info,
                  llvm::StringRef module_name, const SemIR::File& sem_ir,
                  const SemIR::InstNamer* inst_namer,
                  llvm::raw_ostream* vlog_stream)
     -> std::unique_ptr<llvm::Module> {
-  FileContext context(llvm_context, include_debug_info, converter, module_name,
-                      sem_ir, inst_namer, vlog_stream);
+  FileContext context(llvm_context, all_trees_and_subtrees_for_debug_info,
+                      module_name, sem_ir, inst_namer, vlog_stream);
   return context.Run();
 }
 

+ 5 - 3
toolchain/lower/lower.h

@@ -5,17 +5,19 @@
 #ifndef CARBON_TOOLCHAIN_LOWER_LOWER_H_
 #define CARBON_TOOLCHAIN_LOWER_LOWER_H_
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "toolchain/check/sem_ir_diagnostic_converter.h"
+#include "toolchain/parse/tree_and_subtrees.h"
 #include "toolchain/sem_ir/file.h"
 #include "toolchain/sem_ir/inst_namer.h"
 
 namespace Carbon::Lower {
 
 // Lowers SemIR to LLVM IR.
-auto LowerToLLVM(llvm::LLVMContext& llvm_context, bool include_debug_info,
-                 const Check::SemIRDiagnosticConverter& converter,
+auto LowerToLLVM(llvm::LLVMContext& llvm_context,
+                 std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
+                     all_trees_and_subtrees_for_debug_info,
                  llvm::StringRef module_name, const SemIR::File& sem_ir,
                  const SemIR::InstNamer* inst_namer,
                  llvm::raw_ostream* vlog_stream)

+ 1 - 0
toolchain/parse/BUILD

@@ -143,6 +143,7 @@ cc_library(
         "//common:ostream",
         "//common:struct_reflection",
         "//toolchain/base:value_store",
+        "//toolchain/lex:token_index",
         "//toolchain/lex:tokenized_buffer",
         "@llvm-project//llvm:Support",
     ],

+ 36 - 28
toolchain/parse/tree_and_subtrees.cpp

@@ -4,6 +4,8 @@
 
 #include "toolchain/parse/tree_and_subtrees.h"
 
+#include "toolchain/lex/token_index.h"
+
 namespace Carbon::Parse {
 
 TreeAndSubtrees::TreeAndSubtrees(const Lex::TokenizedBuffer& tokens,
@@ -110,10 +112,10 @@ auto TreeAndSubtrees::Verify() const -> ErrorOr<Success> {
 
 auto TreeAndSubtrees::postorder(NodeId n) const
     -> llvm::iterator_range<Tree::PostorderIterator> {
-  // The postorder ends after this node, the root, and begins at the start of
+  // The postorder ends after this node, the root, and begins at the begin of
   // its subtree.
-  int start_index = n.index - subtree_sizes_[n.index] + 1;
-  return Tree::PostorderIterator::MakeRange(NodeId(start_index), n);
+  int begin_index = n.index - subtree_sizes_[n.index] + 1;
+  return Tree::PostorderIterator::MakeRange(NodeId(begin_index), n);
 }
 
 auto TreeAndSubtrees::children(NodeId n) const
@@ -239,6 +241,24 @@ auto TreeAndSubtrees::CollectMemUsage(MemUsage& mem_usage,
                     subtree_sizes_);
 }
 
+auto TreeAndSubtrees::GetSubtreeTokenRange(NodeId node_id) const -> TokenRange {
+  TokenRange range = {.begin = tree_->node_token(node_id),
+                      .end = Lex::TokenIndex::None};
+  range.end = range.begin;
+  for (NodeId desc : postorder(node_id)) {
+    Lex::TokenIndex desc_token = tree_->node_token(desc);
+    if (!desc_token.has_value()) {
+      continue;
+    }
+    if (desc_token < range.begin) {
+      range.begin = desc_token;
+    } else if (desc_token > range.end) {
+      range.end = desc_token;
+    }
+  }
+  return range;
+}
+
 auto TreeAndSubtrees::NodeToDiagnosticLoc(NodeId node_id, bool token_only) const
     -> ConvertedDiagnosticLoc {
   // Support the invalid token as a way to emit only the filename, when there
@@ -253,37 +273,25 @@ auto TreeAndSubtrees::NodeToDiagnosticLoc(NodeId node_id, bool token_only) const
 
   // Construct a location that encompasses all tokens that descend from this
   // node (including the root).
-  Lex::TokenIndex start_token = tree_->node_token(node_id);
-  Lex::TokenIndex end_token = start_token;
-  for (NodeId desc : postorder(node_id)) {
-    Lex::TokenIndex desc_token = tree_->node_token(desc);
-    if (!desc_token.has_value()) {
-      continue;
-    }
-    if (desc_token < start_token) {
-      start_token = desc_token;
-    } else if (desc_token > end_token) {
-      end_token = desc_token;
-    }
-  }
-  auto start_loc = tree_->tokens().TokenToDiagnosticLoc(start_token);
-  if (start_token == end_token) {
-    return start_loc;
+  TokenRange token_range = GetSubtreeTokenRange(node_id);
+  auto begin_loc = tree_->tokens().TokenToDiagnosticLoc(token_range.begin);
+  if (token_range.begin == token_range.end) {
+    return begin_loc;
   }
-  auto end_loc = tree_->tokens().TokenToDiagnosticLoc(end_token);
-  start_loc.last_byte_offset = end_loc.last_byte_offset;
+  auto end_loc = tree_->tokens().TokenToDiagnosticLoc(token_range.end);
+  begin_loc.last_byte_offset = end_loc.last_byte_offset;
   // For multiline locations we simply return the rest of the line for now
   // since true multiline locations are not yet supported.
-  if (start_loc.loc.line_number != end_loc.loc.line_number) {
-    start_loc.loc.length =
-        start_loc.loc.line.size() - start_loc.loc.column_number + 1;
+  if (begin_loc.loc.line_number != end_loc.loc.line_number) {
+    begin_loc.loc.length =
+        begin_loc.loc.line.size() - begin_loc.loc.column_number + 1;
   } else {
-    if (start_loc.loc.column_number != end_loc.loc.column_number) {
-      start_loc.loc.length = end_loc.loc.column_number + end_loc.loc.length -
-                             start_loc.loc.column_number;
+    if (begin_loc.loc.column_number != end_loc.loc.column_number) {
+      begin_loc.loc.length = end_loc.loc.column_number + end_loc.loc.length -
+                             begin_loc.loc.column_number;
     }
   }
-  return start_loc;
+  return begin_loc;
 }
 
 auto TreeAndSubtrees::SiblingIterator::Print(llvm::raw_ostream& output) const

+ 13 - 0
toolchain/parse/tree_and_subtrees.h

@@ -6,6 +6,7 @@
 #define CARBON_TOOLCHAIN_PARSE_TREE_AND_SUBTREES_H_
 
 #include "llvm/ADT/SmallVector.h"
+#include "toolchain/lex/token_index.h"
 #include "toolchain/parse/tree.h"
 
 namespace Carbon::Parse {
@@ -16,6 +17,12 @@ namespace Carbon::Parse {
 // This requires a complete tree.
 class TreeAndSubtrees {
  public:
+  // A range of tokens, returned by GetSubtreeTokenRange.
+  struct TokenRange {
+    Lex::TokenIndex begin;
+    Lex::TokenIndex end;
+  };
+
   class SiblingIterator;
 
   explicit TreeAndSubtrees(const Lex::TokenizedBuffer& tokens,
@@ -107,6 +114,9 @@ class TreeAndSubtrees {
   auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
       -> void;
 
+  // Returns the range of tokens in the node's subtree.
+  auto GetSubtreeTokenRange(NodeId node_id) const -> TokenRange;
+
   // Converts the node to a diagnostic location, covering either the full
   // subtree or only the token.
   auto NodeToDiagnosticLoc(NodeId node_id, bool token_only) const
@@ -183,6 +193,9 @@ class TreeAndSubtrees {
   llvm::SmallVector<int32_t> subtree_sizes_;
 };
 
+// A standard signature for a callback to support lazy construction.
+using GetTreeAndSubtreesFn = llvm::function_ref<const TreeAndSubtrees&()>;
+
 // A forward iterator across the siblings at a particular level in the parse
 // tree. It produces `Tree::NodeId` objects which are opaque handles and must
 // be used in conjunction with the `Tree` itself.

+ 12 - 0
toolchain/sem_ir/BUILD

@@ -194,6 +194,18 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "absolute_node_id",
+    srcs = ["absolute_node_id.cpp"],
+    hdrs = ["absolute_node_id.h"],
+    deps = [
+        ":file",
+        ":typed_insts",
+        "//toolchain/parse:tree",
+        "@llvm-project//llvm:Support",
+    ],
+)
+
 cc_library(
     name = "dump",
     srcs = ["dump.cpp"],

+ 129 - 0
toolchain/sem_ir/absolute_node_id.cpp

@@ -0,0 +1,129 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "toolchain/sem_ir/absolute_node_id.h"
+
+#include "toolchain/sem_ir/ids.h"
+
+namespace Carbon::SemIR {
+
+// Notes an import on the diagnostic and updates cursors to point at the
+// imported IR.
+static auto FollowImportRef(
+    llvm::SmallVector<AbsoluteNodeId>& absolute_node_ids,
+    const File*& cursor_ir, InstId& cursor_inst_id,
+    ImportIRInstId import_ir_inst_id) -> void {
+  auto import_ir_inst = cursor_ir->import_ir_insts().Get(import_ir_inst_id);
+  const auto& import_ir = cursor_ir->import_irs().Get(import_ir_inst.ir_id);
+  CARBON_CHECK(import_ir.decl_id.has_value(),
+               "If we get `None` locations here, we may need to more "
+               "thoroughly track ImportDecls.");
+
+  auto import_loc_id = cursor_ir->insts().GetLocId(import_ir.decl_id);
+  if (import_loc_id.is_node_id()) {
+    // For imports in the current file, the location is simple.
+    absolute_node_ids.push_back({.check_ir_id = cursor_ir->check_ir_id(),
+                                 .node_id = import_loc_id.node_id()});
+  } else if (import_loc_id.is_import_ir_inst_id()) {
+    // For implicit imports, we need to unravel the location a little
+    // further.
+    auto implicit_import_ir_inst =
+        cursor_ir->import_ir_insts().Get(import_loc_id.import_ir_inst_id());
+    const auto& implicit_ir =
+        cursor_ir->import_irs().Get(implicit_import_ir_inst.ir_id);
+    auto implicit_loc_id =
+        implicit_ir.sem_ir->insts().GetLocId(implicit_import_ir_inst.inst_id);
+    CARBON_CHECK(implicit_loc_id.is_node_id(),
+                 "Should only be one layer of implicit imports");
+    absolute_node_ids.push_back(
+        {.check_ir_id = implicit_ir.sem_ir->check_ir_id(),
+         .node_id = implicit_loc_id.node_id()});
+  }
+
+  cursor_ir = import_ir.sem_ir;
+  cursor_inst_id = import_ir_inst.inst_id;
+}
+
+// Returns true if this is the final parse node location. If the location is is
+// an import, follows it and returns false.
+static auto HandleLocId(llvm::SmallVector<AbsoluteNodeId>& absolute_node_ids,
+                        const File*& cursor_ir, InstId& cursor_inst_id,
+                        LocId loc_id) -> bool {
+  if (loc_id.is_import_ir_inst_id()) {
+    FollowImportRef(absolute_node_ids, cursor_ir, cursor_inst_id,
+                    loc_id.import_ir_inst_id());
+    return false;
+  } else {
+    // Parse nodes always refer to the current IR.
+    absolute_node_ids.push_back(
+        {.check_ir_id = cursor_ir->check_ir_id(), .node_id = loc_id.node_id()});
+    return true;
+  }
+}
+
+// Loops through imported instructions until the actual instruction is found.
+static auto GetAbsoluteNodeIdImpl(
+    llvm::SmallVector<AbsoluteNodeId>& absolute_node_ids, const File* cursor_ir,
+    InstId cursor_inst_id) -> void {
+  while (true) {
+    if (cursor_inst_id.has_value()) {
+      auto cursor_inst = cursor_ir->insts().Get(cursor_inst_id);
+      if (auto bind_ref = cursor_inst.TryAs<ExportDecl>();
+          bind_ref && bind_ref->value_id.has_value()) {
+        cursor_inst_id = bind_ref->value_id;
+        continue;
+      }
+
+      // If the parse node has a value, use it for the location.
+      if (auto loc_id = cursor_ir->insts().GetLocId(cursor_inst_id);
+          loc_id.has_value()) {
+        if (HandleLocId(absolute_node_ids, cursor_ir, cursor_inst_id, loc_id)) {
+          return;
+        }
+        continue;
+      }
+
+      // If a namespace has an instruction for an import, switch to looking at
+      // it.
+      if (auto ns = cursor_inst.TryAs<Namespace>()) {
+        if (ns->import_id.has_value()) {
+          cursor_inst_id = ns->import_id;
+          continue;
+        }
+      }
+    }
+
+    // `None` parse node but not an import; just nothing to point at.
+    absolute_node_ids.push_back({.check_ir_id = cursor_ir->check_ir_id(),
+                                 .node_id = Parse::NodeId::None});
+    return;
+  }
+}
+
+auto GetAbsoluteNodeId(const File* sem_ir, InstId inst_id)
+    -> llvm::SmallVector<AbsoluteNodeId> {
+  llvm::SmallVector<AbsoluteNodeId> absolute_node_ids;
+  GetAbsoluteNodeIdImpl(absolute_node_ids, sem_ir, inst_id);
+  return absolute_node_ids;
+}
+
+auto GetAbsoluteNodeId(const File* sem_ir, LocId loc_id)
+    -> llvm::SmallVector<AbsoluteNodeId> {
+  llvm::SmallVector<AbsoluteNodeId> absolute_node_ids;
+  if (!loc_id.has_value()) {
+    absolute_node_ids.push_back(
+        {.check_ir_id = sem_ir->check_ir_id(), .node_id = Parse::NodeId::None});
+    return absolute_node_ids;
+  }
+  const File* cursor_ir = sem_ir;
+  InstId cursor_inst_id = InstId::None;
+  if (HandleLocId(absolute_node_ids, cursor_ir, cursor_inst_id, loc_id)) {
+    return absolute_node_ids;
+  }
+  CARBON_CHECK(cursor_inst_id.has_value(), "Should be set by HandleLocId");
+  GetAbsoluteNodeIdImpl(absolute_node_ids, cursor_ir, cursor_inst_id);
+  return absolute_node_ids;
+}
+
+}  // namespace Carbon::SemIR

+ 33 - 0
toolchain/sem_ir/absolute_node_id.h

@@ -0,0 +1,33 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_SEM_IR_ABSOLUTE_NODE_ID_H_
+#define CARBON_TOOLCHAIN_SEM_IR_ABSOLUTE_NODE_ID_H_
+
+#include "toolchain/parse/tree_and_subtrees.h"
+#include "toolchain/sem_ir/file.h"
+#include "toolchain/sem_ir/ids.h"
+
+namespace Carbon::SemIR {
+
+// A specific node location in a file.
+struct AbsoluteNodeId {
+  CheckIRId check_ir_id;
+  Parse::NodeId node_id;
+};
+
+// Resolves the `InstId` to a series of `NodeId`s, which may be in different
+// files. The vector will have one entry if there were no imports, and multiple
+// entries when imports are traversed. The final entry is the actual
+// declaration.
+auto GetAbsoluteNodeId(const File* sem_ir, InstId inst_id)
+    -> llvm::SmallVector<AbsoluteNodeId>;
+
+// Similar to to above overload, but starting at a `LocId`.
+auto GetAbsoluteNodeId(const File* sem_ir, LocId loc_id)
+    -> llvm::SmallVector<AbsoluteNodeId>;
+
+}  // namespace Carbon::SemIR
+
+#endif  // CARBON_TOOLCHAIN_SEM_IR_ABSOLUTE_NODE_ID_H_