Эх сурвалжийг харах

Support for locations in transitively imported C++ code (#7093)

Instead of treating all C++ code as coming from a single synthetic
`CheckIRId`, track the `SemIR::File` associated with each C++ location.
This is necessary since each `SemIR::File` has a distinct `CppFile` and
therefore distinct `SourceLocation`s and `ClangSourceLocId`s.

Assisted-by: Gemini via Antigravity
Richard Smith 1 долоо хоног өмнө
parent
commit
709776ad1c

+ 3 - 3
toolchain/check/BUILD

@@ -179,7 +179,7 @@ cc_library(
         "//toolchain/lex:tokenized_buffer",
         "//toolchain/parse:node_kind",
         "//toolchain/parse:tree",
-        "//toolchain/sem_ir:absolute_node_id",
+        "//toolchain/sem_ir:absolute_node_ref",
         "//toolchain/sem_ir:clang_decl",
         "//toolchain/sem_ir:cpp_file",
         "//toolchain/sem_ir:expr_info",
@@ -258,7 +258,7 @@ cc_library(
         "//toolchain/parse:node_category",
         "//toolchain/parse:node_kind",
         "//toolchain/parse:tree",
-        "//toolchain/sem_ir:absolute_node_id",
+        "//toolchain/sem_ir:absolute_node_ref",
         "//toolchain/sem_ir:entry_point",
         "//toolchain/sem_ir:expr_info",
         "//toolchain/sem_ir:file",
@@ -338,7 +338,7 @@ cc_library(
         "//toolchain/diagnostics:emitter",
         "//toolchain/lex:token_index",
         "//toolchain/parse:tree",
-        "//toolchain/sem_ir:absolute_node_id",
+        "//toolchain/sem_ir:absolute_node_ref",
         "//toolchain/sem_ir:diagnostic_loc_converter",
         "//toolchain/sem_ir:file",
         "//toolchain/sem_ir:stringify",

+ 1 - 1
toolchain/check/context.cpp

@@ -33,7 +33,7 @@ Context::Context(DiagnosticEmitterBase* emitter,
       deferred_definition_worklist_(vlog_stream),
       generic_region_stack_(vlog_stream),
       vtable_stack_("vtable_stack_", *sem_ir, vlog_stream),
-      check_ir_map_(CheckIRToImpportIRStore::MakeWithExplicitSize(
+      check_ir_map_(CheckIRToImportIRStore::MakeWithExplicitSize(
           total_ir_count_, SemIR::ImportIRId::None)),
       global_init_(this),
       region_stack_([this](SemIR::LocId loc_id, std::string label) {

+ 3 - 3
toolchain/check/context.h

@@ -159,9 +159,9 @@ class Context {
 
   auto exports() -> llvm::SmallVector<SemIR::InstId>& { return exports_; }
 
-  using CheckIRToImpportIRStore =
+  using CheckIRToImportIRStore =
       FixedSizeValueStore<SemIR::CheckIRId, SemIR::ImportIRId>;
-  auto check_ir_map() -> CheckIRToImpportIRStore& { return check_ir_map_; }
+  auto check_ir_map() -> CheckIRToImportIRStore& { return check_ir_map_; }
 
   auto import_ir_constant_values()
       -> llvm::SmallVector<SemIR::ConstantValueStore, 0>& {
@@ -457,7 +457,7 @@ class Context {
   llvm::SmallVector<SemIR::InstId> exports_;
 
   // Maps CheckIRId to ImportIRId.
-  CheckIRToImpportIRStore check_ir_map_;
+  CheckIRToImportIRStore check_ir_map_;
 
   // Per-import constant values. These refer to the main IR and mainly serve as
   // a lookup table for quick access.

+ 9 - 9
toolchain/check/cpp/location.cpp

@@ -4,7 +4,7 @@
 
 #include "toolchain/check/cpp/location.h"
 
-#include "toolchain/sem_ir/absolute_node_id.h"
+#include "toolchain/sem_ir/absolute_node_ref.h"
 #include "toolchain/sem_ir/ids.h"
 
 namespace Carbon::Check {
@@ -62,20 +62,20 @@ auto GetCppLocation(Context& context, SemIR::LocId loc_id)
 
   // Break down the `LocId` into an import path. If that ends in a C++ location,
   // we can just return that directly.
-  llvm::SmallVector<SemIR::AbsoluteNodeId> absolute_node_ids =
-      SemIR::GetAbsoluteNodeId(&context.sem_ir(), loc_id);
-  if (absolute_node_ids.back().check_ir_id() == SemIR::CheckIRId::Cpp) {
-    return context.sem_ir().clang_source_locs().Get(
-        absolute_node_ids.back().clang_source_loc_id());
+  llvm::SmallVector<SemIR::AbsoluteNodeRef> absolute_node_refs =
+      SemIR::GetAbsoluteNodeRef(&context.sem_ir(), loc_id);
+  const auto& final_node = absolute_node_refs.back();
+  if (final_node.is_cpp()) {
+    return final_node.file()->clang_source_locs().Get(
+        final_node.clang_source_loc_id());
   }
 
   // This is a location in Carbon code; get or create a corresponding file in
   // Clang and build a corresponding location.
-  auto absolute_node_id = absolute_node_ids.back();
-  auto [ir, start_loc] = GetFileInfo(context, absolute_node_id.check_ir_id());
+  auto [ir, start_loc] = GetFileInfo(context, final_node.check_ir_id());
   const auto& tree = ir->parse_tree();
   auto offset =
-      tree.tokens().GetByteOffset(tree.node_token(absolute_node_id.node_id()));
+      tree.tokens().GetByteOffset(tree.node_token(final_node.node_id()));
   return start_loc.getLocWithOffset(offset);
 }
 

+ 1 - 1
toolchain/check/diagnostic_emitter.cpp

@@ -10,7 +10,7 @@
 
 #include "common/raw_string_ostream.h"
 #include "toolchain/check/diagnostic_helpers.h"
-#include "toolchain/sem_ir/absolute_node_id.h"
+#include "toolchain/sem_ir/absolute_node_ref.h"
 #include "toolchain/sem_ir/diagnostic_loc_converter.h"
 #include "toolchain/sem_ir/ids.h"
 #include "toolchain/sem_ir/stringify.h"

+ 1 - 1
toolchain/check/handle_loop_statement.cpp

@@ -15,7 +15,7 @@
 #include "toolchain/check/pattern.h"
 #include "toolchain/check/pattern_match.h"
 #include "toolchain/check/type.h"
-#include "toolchain/sem_ir/absolute_node_id.h"
+#include "toolchain/sem_ir/absolute_node_ref.h"
 #include "toolchain/sem_ir/ids.h"
 
 namespace Carbon::Check {

+ 2 - 2
toolchain/check/import_ref.cpp

@@ -4579,8 +4579,8 @@ auto ImportRefResolver::FindResolvedConstId(SemIR::InstId inst_id)
     }
     auto ir_inst = cursor_ir->import_ir_insts().Get(import_ir_inst_id);
     if (ir_inst.ir_id() == SemIR::ImportIRId::Cpp) {
-      local_context().TODO(SemIR::LocId::None,
-                           "Unsupported: Importing C++ indirectly");
+      auto loc_id = SemIR::LocId(AddImportIRInst(*this, inst_id));
+      local_context().TODO(loc_id, "Unsupported: Importing C++ indirectly");
       SetResolvedConstId(inst_id, result.indirect_insts,
                          SemIR::ErrorInst::ConstantId);
       result.const_id = SemIR::ErrorInst::ConstantId;

+ 6 - 2
toolchain/check/testdata/interop/cpp/basics/import/import.carbon

@@ -62,11 +62,15 @@ import Cpp library "struct.h";
 alias MyStructAlias = Cpp.MyStruct;
 
 // --- fail_todo_import_struct_api.carbon
-// CHECK:STDERR: fail_todo_import_struct_api.carbon: error: semantics TODO: `Unsupported: Importing C++ indirectly` [SemanticsTodo]
-// CHECK:STDERR:
 
 library "[[@TEST_NAME]]";
 
+// CHECK:STDERR: fail_todo_import_struct_api.carbon:[[@LINE+6]]:1: in import [InImport]
+// CHECK:STDERR: struct_api.carbon:4:10: in file included here [InCppInclude]
+// CHECK:STDERR: ./struct.h:2:8: error: semantics TODO: `Unsupported: Importing C++ indirectly` [SemanticsTodo]
+// CHECK:STDERR: struct MyStruct { void Foo(); };
+// CHECK:STDERR:        ^
+// CHECK:STDERR:
 import library "struct_api";
 
 fn F() {

+ 1 - 1
toolchain/lower/BUILD

@@ -71,7 +71,7 @@ cc_library(
         "//toolchain/base:fixed_size_value_store",
         "//toolchain/base:kind_switch",
         "//toolchain/parse:tree",
-        "//toolchain/sem_ir:absolute_node_id",
+        "//toolchain/sem_ir:absolute_node_ref",
         "//toolchain/sem_ir:clang_decl",
         "//toolchain/sem_ir:diagnostic_loc_converter",
         "//toolchain/sem_ir:entry_point",

+ 14 - 1
toolchain/lower/context.cpp

@@ -94,7 +94,20 @@ auto Context::BuildDICompileUnit(llvm::StringRef module_name,
                                       /*RV=*/0);
 }
 
-auto Context::GetLocForDI(SemIR::AbsoluteNodeId abs_node_id) -> LocForDI {
+auto Context::GetLocForDI(SemIR::AbsoluteNodeRef abs_node_id) -> LocForDI {
+  if (abs_node_id.is_cpp()) {
+    const SemIR::File* file = abs_node_id.file();
+    // TODO: Consider asking our cpp_code_generator to map the location to a
+    // debug location, in order to use Clang's rules for (eg) macro handling.
+    auto loc = file->clang_source_locs().Get(abs_node_id.clang_source_loc_id());
+    auto presumed_loc = file->cpp_file()->source_manager().getPresumedLoc(loc);
+    return {
+        .filename = presumed_loc.getFilename(),
+        .line_number = static_cast<int32_t>(presumed_loc.getLine()),
+        .column_number = static_cast<int32_t>(presumed_loc.getColumn()),
+    };
+  }
+
   const auto& tree_and_subtrees =
       tree_and_subtrees_getters().Get(abs_node_id.check_ir_id())();
   const auto& tokens = tree_and_subtrees.tree().tokens();

+ 2 - 2
toolchain/lower/context.h

@@ -16,7 +16,7 @@
 #include "toolchain/base/fixed_size_value_store.h"
 #include "toolchain/lower/options.h"
 #include "toolchain/parse/tree_and_subtrees.h"
-#include "toolchain/sem_ir/absolute_node_id.h"
+#include "toolchain/sem_ir/absolute_node_ref.h"
 #include "toolchain/sem_ir/ids.h"
 #include "toolchain/sem_ir/inst_namer.h"
 
@@ -72,7 +72,7 @@ class Context {
   auto Finalize() && -> std::unique_ptr<llvm::Module>;
 
   // Returns location information for use with DebugInfo.
-  auto GetLocForDI(SemIR::AbsoluteNodeId abs_node_id) -> LocForDI;
+  auto GetLocForDI(SemIR::AbsoluteNodeRef abs_node_id) -> LocForDI;
 
   // Returns a lowered value to use for a value of type `type`.
   auto GetTypeAsValue() -> llvm::Constant* {

+ 3 - 16
toolchain/lower/file_context.cpp

@@ -24,7 +24,7 @@
 #include "toolchain/lower/function_context.h"
 #include "toolchain/lower/options.h"
 #include "toolchain/lower/specific_coalescer.h"
-#include "toolchain/sem_ir/absolute_node_id.h"
+#include "toolchain/sem_ir/absolute_node_ref.h"
 #include "toolchain/sem_ir/diagnostic_loc_converter.h"
 #include "toolchain/sem_ir/entry_point.h"
 #include "toolchain/sem_ir/expr_info.h"
@@ -1566,21 +1566,8 @@ auto FileContext::BuildNonCppGlobalVariableDecl(SemIR::VarStorage var_storage)
 }
 
 auto FileContext::GetLocForDI(SemIR::InstId inst_id) -> Context::LocForDI {
-  auto abs_node_id = GetAbsoluteNodeId(sem_ir_, SemIR::LocId(inst_id)).back();
-
-  if (abs_node_id.check_ir_id() == SemIR::CheckIRId::Cpp) {
-    // TODO: Consider asking our cpp_code_generator to map the location to a
-    // debug location, in order to use Clang's rules for (eg) macro handling.
-    auto loc =
-        sem_ir().clang_source_locs().Get(abs_node_id.clang_source_loc_id());
-    auto presumed_loc =
-        sem_ir().cpp_file()->source_manager().getPresumedLoc(loc);
-    return {.filename = presumed_loc.getFilename(),
-            .line_number = static_cast<int32_t>(presumed_loc.getLine()),
-            .column_number = static_cast<int32_t>(presumed_loc.getColumn())};
-  }
-
-  return context().GetLocForDI(abs_node_id);
+  auto abs_node_ref = GetAbsoluteNodeRef(sem_ir_, SemIR::LocId(inst_id)).back();
+  return context().GetLocForDI(abs_node_ref);
 }
 
 auto FileContext::BuildVtable(const SemIR::Vtable& vtable,

+ 4 - 4
toolchain/sem_ir/BUILD

@@ -253,9 +253,9 @@ cc_library(
 )
 
 cc_library(
-    name = "absolute_node_id",
-    srcs = ["absolute_node_id.cpp"],
-    hdrs = ["absolute_node_id.h"],
+    name = "absolute_node_ref",
+    srcs = ["absolute_node_ref.cpp"],
+    hdrs = ["absolute_node_ref.h"],
     deps = [
         ":file",
         ":typed_insts",
@@ -281,7 +281,7 @@ cc_library(
     srcs = ["diagnostic_loc_converter.cpp"],
     hdrs = ["diagnostic_loc_converter.h"],
     deps = [
-        ":absolute_node_id",
+        ":absolute_node_ref",
         ":file",
         ":typed_insts",
         "//toolchain/diagnostics:emitter",

+ 0 - 71
toolchain/sem_ir/absolute_node_id.h

@@ -1,71 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef CARBON_TOOLCHAIN_SEM_IR_ABSOLUTE_NODE_ID_H_
-#define CARBON_TOOLCHAIN_SEM_IR_ABSOLUTE_NODE_ID_H_
-
-#include "toolchain/parse/tree_and_subtrees.h"
-#include "toolchain/sem_ir/file.h"
-#include "toolchain/sem_ir/ids.h"
-
-namespace Carbon::SemIR {
-
-// A specific node location in a file. Can refer to a Clang source location
-// within imported C++ code.
-class AbsoluteNodeId {
- public:
-  // A specific node location in a file.
-  explicit AbsoluteNodeId(CheckIRId check_ir_id, Parse::NodeId node_id)
-      : check_ir_id_(check_ir_id), node_id_(node_id) {
-    CARBON_CHECK(check_ir_id != CheckIRId::Cpp);
-  }
-
-  // A Clang source location within imported C++ code.
-  explicit AbsoluteNodeId(ClangSourceLocId clang_source_loc_id)
-      : check_ir_id_(CheckIRId::Cpp),
-        clang_source_loc_id_(clang_source_loc_id) {}
-
-  // For a specific node location in a file, the ID of the IR.
-  // For Clang source location, this returns `Cpp`.
-  auto check_ir_id() const -> CheckIRId { return check_ir_id_; }
-
-  // The specific node location in a file. Must be called only if
-  // `check_ir_id()` doesn't return `Cpp`.
-  auto node_id() const -> Parse::NodeId {
-    CARBON_CHECK(check_ir_id() != CheckIRId::Cpp);
-    return node_id_;
-  }
-
-  // The Clang source location. Must be called only if `check_ir_id()` returns
-  // `Cpp`.
-  auto clang_source_loc_id() const -> ClangSourceLocId {
-    CARBON_CHECK(check_ir_id() == CheckIRId::Cpp);
-    return clang_source_loc_id_;
-  }
-
- private:
-  // See `check_ir_id()`.
-  CheckIRId check_ir_id_;
-
-  union {
-    // See `node_id()`.
-    Parse::NodeId node_id_;
-    // See `clang_source_loc_id()`.
-    ClangSourceLocId clang_source_loc_id_;
-  };
-};
-
-// Resolves the `LocId` to a series of `NodeId`s, which may be in different
-// files. The vector will have one entry if there were no imports, and multiple
-// entries when imports are traversed. The final entry is the actual
-// declaration.
-//
-// Note that the `LocId` here is typically not canonical, and it uses that fact
-// for non-canonical locations built from an `ExportDecl` instruction.
-auto GetAbsoluteNodeId(const File* sem_ir, LocId loc_id)
-    -> llvm::SmallVector<AbsoluteNodeId>;
-
-}  // namespace Carbon::SemIR
-
-#endif  // CARBON_TOOLCHAIN_SEM_IR_ABSOLUTE_NODE_ID_H_

+ 26 - 27
toolchain/sem_ir/absolute_node_id.cpp → toolchain/sem_ir/absolute_node_ref.cpp

@@ -2,7 +2,7 @@
 // Exceptions. See /LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-#include "toolchain/sem_ir/absolute_node_id.h"
+#include "toolchain/sem_ir/absolute_node_ref.h"
 
 #include "toolchain/sem_ir/ids.h"
 
@@ -11,13 +11,13 @@ namespace Carbon::SemIR {
 // Follows an imported instruction location to find the sequence of import
 // locations and the ultimately imported location.
 static auto FollowImportRef(
-    llvm::SmallVector<AbsoluteNodeId>& absolute_node_ids,
+    llvm::SmallVector<AbsoluteNodeRef>& absolute_node_refs,
     const File*& cursor_ir, InstId& cursor_inst_id,
     ImportIRInstId import_ir_inst_id) -> bool {
   auto import_ir_inst = cursor_ir->import_ir_insts().Get(import_ir_inst_id);
   if (import_ir_inst.ir_id() == ImportIRId::Cpp) {
-    absolute_node_ids.push_back(
-        AbsoluteNodeId(import_ir_inst.clang_source_loc_id()));
+    absolute_node_refs.push_back(
+        AbsoluteNodeRef(cursor_ir, import_ir_inst.clang_source_loc_id()));
     return true;
   }
 
@@ -42,8 +42,8 @@ static auto FollowImportRef(
           implicit_import_ir_inst.inst_id());
       CARBON_CHECK(implicit_loc_id.kind() == LocId::Kind::NodeId,
                    "Should only be one layer of implicit imports");
-      absolute_node_ids.push_back(AbsoluteNodeId(
-          implicit_ir.sem_ir->check_ir_id(), implicit_loc_id.node_id()));
+      absolute_node_refs.push_back(
+          AbsoluteNodeRef(implicit_ir.sem_ir, implicit_loc_id.node_id()));
       break;
     }
 
@@ -52,8 +52,8 @@ static auto FollowImportRef(
 
     case LocId::Kind::NodeId: {
       // For imports in the current file, the location is simple.
-      absolute_node_ids.push_back(
-          AbsoluteNodeId(cursor_ir->check_ir_id(), import_loc_id.node_id()));
+      absolute_node_refs.push_back(
+          AbsoluteNodeRef(cursor_ir, import_loc_id.node_id()));
       break;
     }
   }
@@ -65,19 +65,19 @@ static auto FollowImportRef(
 
 // Returns true if this is the final parse node location. If the location is an
 // import, follows it and returns false.
-static auto HandleLocId(llvm::SmallVector<AbsoluteNodeId>& absolute_node_ids,
+static auto HandleLocId(llvm::SmallVector<AbsoluteNodeRef>& absolute_node_refs,
                         const File*& cursor_ir, InstId& cursor_inst_id,
                         LocId loc_id) -> bool {
   switch (loc_id.kind()) {
     case LocId::Kind::ImportIRInstId: {
-      return FollowImportRef(absolute_node_ids, cursor_ir, cursor_inst_id,
+      return FollowImportRef(absolute_node_refs, cursor_ir, cursor_inst_id,
                              loc_id.import_ir_inst_id());
     }
 
     case LocId::Kind::NodeId: {
       // Parse nodes always refer to the current IR.
-      absolute_node_ids.push_back(
-          AbsoluteNodeId(cursor_ir->check_ir_id(), loc_id.node_id()));
+      absolute_node_refs.push_back(
+          AbsoluteNodeRef(cursor_ir, loc_id.node_id()));
       return true;
     }
 
@@ -88,9 +88,9 @@ static auto HandleLocId(llvm::SmallVector<AbsoluteNodeId>& absolute_node_ids,
 }
 
 // Loops through imported instructions until the actual instruction is found.
-static auto GetAbsoluteNodeIdImpl(
-    llvm::SmallVector<AbsoluteNodeId>& absolute_node_ids, const File* cursor_ir,
-    InstId cursor_inst_id) -> void {
+static auto GetAbsoluteNodeRefImpl(
+    llvm::SmallVector<AbsoluteNodeRef>& absolute_node_refs,
+    const File* cursor_ir, InstId cursor_inst_id) -> void {
   while (cursor_inst_id.has_value()) {
     auto cursor_inst = cursor_ir->insts().Get(cursor_inst_id);
     if (auto bind_ref = cursor_inst.TryAs<ExportDecl>();
@@ -102,7 +102,7 @@ static auto GetAbsoluteNodeIdImpl(
     // If the parse node has a value, use it for the location.
     if (auto loc_id = cursor_ir->insts().GetCanonicalLocId(cursor_inst_id);
         loc_id.has_value()) {
-      if (HandleLocId(absolute_node_ids, cursor_ir, cursor_inst_id, loc_id)) {
+      if (HandleLocId(absolute_node_refs, cursor_ir, cursor_inst_id, loc_id)) {
         return;
       }
       continue;
@@ -119,37 +119,36 @@ static auto GetAbsoluteNodeIdImpl(
   }
 
   // `None` parse node but not an import; just nothing to point at.
-  absolute_node_ids.push_back(
-      AbsoluteNodeId(cursor_ir->check_ir_id(), Parse::NodeId::None));
+  absolute_node_refs.push_back(AbsoluteNodeRef(cursor_ir, Parse::NodeId::None));
 }
 
-auto GetAbsoluteNodeId(const File* sem_ir, LocId loc_id)
-    -> llvm::SmallVector<AbsoluteNodeId> {
-  llvm::SmallVector<AbsoluteNodeId> absolute_node_ids;
+auto GetAbsoluteNodeRef(const File* sem_ir, LocId loc_id)
+    -> llvm::SmallVector<AbsoluteNodeRef> {
+  llvm::SmallVector<AbsoluteNodeRef> absolute_node_refs;
   switch (loc_id.kind()) {
     case LocId::Kind::None:
-      absolute_node_ids.push_back(
-          AbsoluteNodeId(sem_ir->check_ir_id(), Parse::NodeId::None));
+      absolute_node_refs.push_back(
+          AbsoluteNodeRef(sem_ir, Parse::NodeId::None));
       break;
 
     case LocId::Kind::InstId:
-      GetAbsoluteNodeIdImpl(absolute_node_ids, sem_ir, loc_id.inst_id());
+      GetAbsoluteNodeRefImpl(absolute_node_refs, sem_ir, loc_id.inst_id());
       break;
 
     case LocId::Kind::ImportIRInstId:
     case LocId::Kind::NodeId: {
       const File* cursor_ir = sem_ir;
       InstId cursor_inst_id = InstId::None;
-      if (HandleLocId(absolute_node_ids, cursor_ir, cursor_inst_id,
+      if (HandleLocId(absolute_node_refs, cursor_ir, cursor_inst_id,
                       cursor_ir->insts().GetCanonicalLocId(loc_id))) {
         break;
       }
       CARBON_CHECK(cursor_inst_id.has_value(), "Should be set by HandleLocId");
-      GetAbsoluteNodeIdImpl(absolute_node_ids, cursor_ir, cursor_inst_id);
+      GetAbsoluteNodeRefImpl(absolute_node_refs, cursor_ir, cursor_inst_id);
       break;
     }
   }
-  return absolute_node_ids;
+  return absolute_node_refs;
 }
 
 }  // namespace Carbon::SemIR

+ 77 - 0
toolchain/sem_ir/absolute_node_ref.h

@@ -0,0 +1,77 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_SEM_IR_ABSOLUTE_NODE_REF_H_
+#define CARBON_TOOLCHAIN_SEM_IR_ABSOLUTE_NODE_REF_H_
+
+#include "toolchain/parse/tree_and_subtrees.h"
+#include "toolchain/sem_ir/file.h"
+#include "toolchain/sem_ir/ids.h"
+
+namespace Carbon::SemIR {
+
+// A specific node location in a file. Usually refers to a NodeId in a Carbon
+// source file, but can also refer to a Clang source location within imported
+// C++ code.
+class AbsoluteNodeRef {
+ public:
+  // A specific node location in a file.
+  explicit AbsoluteNodeRef(const File* file, Parse::NodeId node_id)
+      : file_(file), is_cpp_(false), node_id_(node_id) {}
+
+  // A Clang source location within imported C++ code.
+  explicit AbsoluteNodeRef(const File* file,
+                           ClangSourceLocId clang_source_loc_id)
+      : file_(file), is_cpp_(true), clang_source_loc_id_(clang_source_loc_id) {}
+
+  // The file containing the location.
+  auto file() const -> const File* { return file_; }
+
+  // The ID of the IR.
+  auto check_ir_id() const -> CheckIRId { return file_->check_ir_id(); }
+
+  // Returns true if this is a C++ location.
+  auto is_cpp() const -> bool { return is_cpp_; }
+
+  // The specific node location in a file. Must be called only if
+  // `is_cpp()` is false.
+  auto node_id() const -> Parse::NodeId {
+    CARBON_CHECK(!is_cpp());
+    return node_id_;
+  }
+
+  // The Clang source location. Must be called only if `is_cpp()` is true.
+  auto clang_source_loc_id() const -> ClangSourceLocId {
+    CARBON_CHECK(is_cpp());
+    return clang_source_loc_id_;
+  }
+
+ private:
+  // The file containing the location.
+  const File* file_;
+
+  // True if this is a C++ location.
+  bool is_cpp_;
+
+  union {
+    // See `node_id()`.
+    Parse::NodeId node_id_;
+    // See `clang_source_loc_id()`.
+    ClangSourceLocId clang_source_loc_id_;
+  };
+};
+
+// Resolves the `LocId` to a series of `AbsoluteNodeRef`s, which may be in
+// different files. The vector will have one entry if there were no imports, and
+// multiple entries when imports are traversed. The final entry is the actual
+// declaration.
+//
+// Note that the `LocId` here is typically not canonical, and it uses that fact
+// for non-canonical locations built from an `ExportDecl` instruction.
+auto GetAbsoluteNodeRef(const File* sem_ir, LocId loc_id)
+    -> llvm::SmallVector<AbsoluteNodeRef>;
+
+}  // namespace Carbon::SemIR
+
+#endif  // CARBON_TOOLCHAIN_SEM_IR_ABSOLUTE_NODE_REF_H_

+ 32 - 28
toolchain/sem_ir/diagnostic_loc_converter.cpp

@@ -111,35 +111,38 @@ class ClangImportCollector : public clang::DiagnosticRenderer {
 auto DiagnosticLocConverter::ConvertWithImports(LocId loc_id,
                                                 bool token_only) const
     -> LocAndImports {
-  llvm::SmallVector<AbsoluteNodeId> absolute_node_ids =
-      GetAbsoluteNodeId(sem_ir_, loc_id);
-  auto final_node_id = absolute_node_ids.pop_back_val();
+  llvm::SmallVector<AbsoluteNodeRef> absolute_node_refs =
+      GetAbsoluteNodeRef(sem_ir_, loc_id);
+  auto final_node = absolute_node_refs.pop_back_val();
 
   // Convert the final location.
-  LocAndImports result = {.loc = ConvertImpl(final_node_id, token_only)};
+  LocAndImports result = {.loc = ConvertImpl(final_node, token_only)};
 
   // Convert the import locations.
-  for (const auto& absolute_node_id : absolute_node_ids) {
-    if (!absolute_node_id.node_id().has_value()) {
+  for (const auto& absolute_node_ref : absolute_node_refs) {
+    if (!absolute_node_ref.node_id().has_value()) {
       // TODO: Add an `ImportLoc` pointing at the prelude for the case where
       // we don't have a location.
       continue;
     }
-    result.imports.push_back({.loc = ConvertImpl(absolute_node_id, false).loc});
+    result.imports.push_back(
+        {.loc = ConvertImpl(absolute_node_ref, false).loc});
   }
 
   // Convert the C++ import locations.
-  if (final_node_id.check_ir_id() == CheckIRId::Cpp) {
-    const SemIR::CppFile* cpp_file = sem_ir_->cpp_file();
-    CARBON_CHECK(cpp_file, "Converting C++ location before C++ file is set");
+  if (final_node.is_cpp()) {
+    const File* file = final_node.file();
+    CARBON_CHECK(file->cpp_file(),
+                 "Converting C++ location before C++ file is set");
 
     // Collect the location backtrace that Clang would use for an error here.
-    ClangImportCollector(cpp_file->lang_options(),
-                         cpp_file->diagnostic_options(), &result.imports)
+    ClangImportCollector(file->cpp_file()->lang_options(),
+                         file->cpp_file()->diagnostic_options(),
+                         &result.imports)
         .emitDiagnostic(
-            clang::FullSourceLoc(sem_ir_->clang_source_locs().Get(
-                                     final_node_id.clang_source_loc_id()),
-                                 cpp_file->source_manager()),
+            clang::FullSourceLoc(
+                file->clang_source_locs().Get(final_node.clang_source_loc_id()),
+                file->cpp_file()->source_manager()),
             clang::DiagnosticsEngine::Error, "", {}, {});
   }
 
@@ -148,39 +151,40 @@ auto DiagnosticLocConverter::ConvertWithImports(LocId loc_id,
 
 auto DiagnosticLocConverter::Convert(LocId loc_id, bool token_only) const
     -> Diagnostics::ConvertedLoc {
-  llvm::SmallVector<AbsoluteNodeId> absolute_node_ids =
-      GetAbsoluteNodeId(sem_ir_, loc_id);
-  return ConvertImpl(absolute_node_ids.back(), token_only);
+  llvm::SmallVector<AbsoluteNodeRef> absolute_node_refs =
+      GetAbsoluteNodeRef(sem_ir_, loc_id);
+  return ConvertImpl(absolute_node_refs.back(), token_only);
 }
 
-auto DiagnosticLocConverter::ConvertImpl(AbsoluteNodeId absolute_node_id,
+auto DiagnosticLocConverter::ConvertImpl(AbsoluteNodeRef absolute_node_ref,
                                          bool token_only) const
     -> Diagnostics::ConvertedLoc {
-  if (absolute_node_id.check_ir_id() == CheckIRId::Cpp) {
-    return ConvertImpl(absolute_node_id.clang_source_loc_id());
+  if (absolute_node_ref.is_cpp()) {
+    return ConvertImpl(absolute_node_ref.file(),
+                       absolute_node_ref.clang_source_loc_id());
   }
 
-  return ConvertImpl(absolute_node_id.check_ir_id(), absolute_node_id.node_id(),
-                     token_only);
+  return ConvertImpl(absolute_node_ref.check_ir_id(),
+                     absolute_node_ref.node_id(), token_only);
 }
 
 auto DiagnosticLocConverter::ConvertImpl(CheckIRId check_ir_id,
                                          Parse::NodeId node_id,
                                          bool token_only) const
     -> Diagnostics::ConvertedLoc {
-  CARBON_CHECK(check_ir_id != CheckIRId::Cpp);
   const auto& tree_and_subtrees =
       tree_and_subtrees_getters_->Get(check_ir_id)();
   return tree_and_subtrees.NodeToDiagnosticLoc(node_id, token_only);
 }
 
 auto DiagnosticLocConverter::ConvertImpl(
-    ClangSourceLocId clang_source_loc_id) const -> Diagnostics::ConvertedLoc {
+    const File* file, ClangSourceLocId clang_source_loc_id) const
+    -> Diagnostics::ConvertedLoc {
   clang::SourceLocation clang_loc =
-      sem_ir_->clang_source_locs().Get(clang_source_loc_id);
+      file->clang_source_locs().Get(clang_source_loc_id);
 
-  CARBON_CHECK(sem_ir_->cpp_file());
-  const auto& src_mgr = sem_ir_->cpp_file()->source_manager();
+  CARBON_CHECK(file->cpp_file());
+  const auto& src_mgr = file->cpp_file()->source_manager();
   clang::PresumedLoc presumed_loc = src_mgr.getPresumedLoc(clang_loc);
   if (presumed_loc.isInvalid()) {
     return Diagnostics::ConvertedLoc();

+ 4 - 4
toolchain/sem_ir/diagnostic_loc_converter.h

@@ -9,7 +9,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "toolchain/diagnostics/emitter.h"
 #include "toolchain/parse/tree_and_subtrees.h"
-#include "toolchain/sem_ir/absolute_node_id.h"
+#include "toolchain/sem_ir/absolute_node_ref.h"
 #include "toolchain/sem_ir/file.h"
 #include "toolchain/sem_ir/ids.h"
 
@@ -71,9 +71,9 @@ class DiagnosticLocConverter {
       -> Diagnostics::ConvertedLoc;
 
  private:
-  // Converts an `absolute_node_id` in either a Carbon file or C++ import to a
+  // Converts an `absolute_node_ref` in either a Carbon file or C++ import to a
   // diagnostic location.
-  auto ConvertImpl(AbsoluteNodeId absolute_node_id, bool token_only) const
+  auto ConvertImpl(AbsoluteNodeRef absolute_node_ref, bool token_only) const
       -> Diagnostics::ConvertedLoc;
 
   // Converts a `node_id` corresponding to a specific check IR to a diagnostic
@@ -82,7 +82,7 @@ class DiagnosticLocConverter {
                    bool token_only) const -> Diagnostics::ConvertedLoc;
 
   // Converts a location pointing into C++ code to a diagnostic location.
-  auto ConvertImpl(ClangSourceLocId clang_source_loc_id) const
+  auto ConvertImpl(const File* file, ClangSourceLocId clang_source_loc_id) const
       -> Diagnostics::ConvertedLoc;
 
   // Converters for each SemIR.

+ 1 - 5
toolchain/sem_ir/ids.cpp

@@ -47,11 +47,7 @@ auto ConstantId::Print(llvm::raw_ostream& out, bool disambiguate) const
 }
 
 auto CheckIRId::Print(llvm::raw_ostream& out) const -> void {
-  if (*this == Cpp) {
-    out << Label << "(Cpp)";
-  } else {
-    IdBase::Print(out);
-  }
+  IdBase::Print(out);
 }
 
 auto GenericInstIndex::Print(llvm::raw_ostream& out) const -> void {

+ 0 - 5
toolchain/sem_ir/ids.h

@@ -312,15 +312,10 @@ struct FunctionId : public IdBase<FunctionId> {
 struct CheckIRId : public IdBase<CheckIRId> {
   static constexpr llvm::StringLiteral Label = "check_ir";
 
-  // Used when referring to the imported C++.
-  static const CheckIRId Cpp;
-
   using IdBase::IdBase;
   auto Print(llvm::raw_ostream& out) const -> void;
 };
 
-inline constexpr CheckIRId CheckIRId::Cpp = CheckIRId(NoneIndex - 1);
-
 // The ID of a `Class`.
 struct ClassId : public IdBase<ClassId> {
   static constexpr llvm::StringLiteral Label = "class";