Переглянути джерело

Refactor LocId to merge in SemIRLoc (#5284)

The main goal of this is to collapse the LocId and SemIRLoc types into a
single type, eliminating the need for APIs to decide which to use. This
originated from discussion about UnwrapSemIRLoc in #5169. Although that
was removed in #5202, it's probably still a good direction for LocId.

This changes the packing of LocId to allow adding InstId, making it
tri-modal: ImportIRInstId, InstId, or NodeId. This has a side-effect of
reducing the available space for ImportIRInstId, although not by much
due to the pre-existing `ImplicitBit` behavior. If needed, we could also
probably play with packing a bit more since `ImplicitBit` really only
applies to `NodeId`, but I was trying to keep the logic a little
simpler. Note `TokenOnlyBit` can still apply to `ImportIRInstId`.

This leaves in place a typedef for SemIRLoc -- I intend to clean that up
separately.

Some Discord discussion is
[here](https://discord.com/channels/655572317891461132/655578254970716160/1353755830058745959).
Jon Ross-Perkins 1 рік тому
батько
коміт
fe29224016

+ 2 - 1
toolchain/check/check_unit.cpp

@@ -407,7 +407,8 @@ auto CheckUnit::CheckRequiredDeclarations() -> void {
         function.extern_library_id == context_.sem_ir().library_id()) {
       auto function_loc_id =
           context_.insts().GetLocId(function.non_owning_decl_id);
-      CARBON_CHECK(function_loc_id.is_import_ir_inst_id());
+      CARBON_CHECK(function_loc_id.kind() ==
+                   SemIR::LocId::Kind::ImportIRInstId);
       auto import_ir_id = context_.sem_ir()
                               .import_ir_insts()
                               .Get(function_loc_id.import_ir_inst_id())

+ 9 - 8
toolchain/check/diagnostic_emitter.cpp

@@ -10,9 +10,10 @@
 
 namespace Carbon::Check {
 
-auto DiagnosticEmitter::ConvertLoc(SemIRLoc loc, ContextFnT context_fn) const
+auto DiagnosticEmitter::ConvertLoc(SemIR::LocId loc_id,
+                                   ContextFnT context_fn) const
     -> Diagnostics::ConvertedLoc {
-  auto converted = ConvertLocImpl(loc, context_fn);
+  auto converted = ConvertLocImpl(loc_id, context_fn);
 
   // Use the token when possible, but -1 is the default value.
   auto last_offset = -1;
@@ -32,12 +33,13 @@ auto DiagnosticEmitter::ConvertLoc(SemIRLoc loc, ContextFnT context_fn) const
   return converted;
 }
 
-auto DiagnosticEmitter::ConvertLocImpl(SemIRLoc loc,
+auto DiagnosticEmitter::ConvertLocImpl(SemIR::LocId loc_id,
                                        ContextFnT context_fn) const
     -> Diagnostics::ConvertedLoc {
   llvm::SmallVector<SemIR::AbsoluteNodeId> absolute_node_ids =
-      loc.is_inst_id_ ? SemIR::GetAbsoluteNodeId(sem_ir_, loc.inst_id_)
-                      : SemIR::GetAbsoluteNodeId(sem_ir_, loc.loc_id_);
+      SemIR::GetAbsoluteNodeId(sem_ir_, loc_id);
+  bool token_only =
+      loc_id.kind() != SemIR::LocId::Kind::InstId && loc_id.is_token_only();
 
   auto final_node_id = absolute_node_ids.pop_back_val();
   for (const auto& absolute_node_id : absolute_node_ids) {
@@ -47,13 +49,12 @@ auto DiagnosticEmitter::ConvertLocImpl(SemIRLoc loc,
       continue;
     }
     // TODO: Include the name of the imported library in the diagnostic.
-    auto diag_loc =
-        ConvertLocInFile(absolute_node_id, loc.token_only_, context_fn);
+    auto diag_loc = ConvertLocInFile(absolute_node_id, token_only, context_fn);
     CARBON_DIAGNOSTIC(InImport, LocationInfo, "in import");
     context_fn(diag_loc.loc, InImport);
   }
 
-  return ConvertLocInFile(final_node_id, loc.token_only_, context_fn);
+  return ConvertLocInFile(final_node_id, token_only, context_fn);
 }
 
 auto DiagnosticEmitter::ConvertLocInFile(SemIR::AbsoluteNodeId absolute_node_id,

+ 2 - 2
toolchain/check/diagnostic_emitter.h

@@ -43,12 +43,12 @@ class DiagnosticEmitter : public DiagnosticEmitterBase {
   // For the last byte offset, this uses `last_token_` exclusively for imported
   // locations, or `loc` if it's in the same file and (for whatever reason)
   // later.
-  auto ConvertLoc(SemIRLoc loc, ContextFnT context_fn) const
+  auto ConvertLoc(SemIR::LocId loc_id, ContextFnT context_fn) const
       -> Diagnostics::ConvertedLoc override;
 
  private:
   // Implements `ConvertLoc`, but without `last_token_` applied.
-  auto ConvertLocImpl(SemIRLoc loc, ContextFnT context_fn) const
+  auto ConvertLocImpl(SemIR::LocId loc_id, ContextFnT context_fn) const
       -> Diagnostics::ConvertedLoc;
 
   // Converts a node_id corresponding to a specific sem_ir to a diagnostic

+ 6 - 37
toolchain/check/diagnostic_helpers.h

@@ -11,40 +11,13 @@
 
 namespace Carbon::Check {
 
-// Tracks a location for diagnostic use, which is either a parse node or an inst
-// ID which can be translated to a parse node. Used when code needs to support
-// multiple possible ways of reporting a diagnostic location.
-class SemIRLoc {
- public:
-  // NOLINTNEXTLINE(google-explicit-constructor)
-  SemIRLoc(SemIR::InstId inst_id)
-      : inst_id_(inst_id), is_inst_id_(true), token_only_(false) {}
-
-  // NOLINTNEXTLINE(google-explicit-constructor)
-  SemIRLoc(Parse::NodeId node_id) : SemIRLoc(node_id, false) {}
+// TODO: Update code to use LocId directly.
+using SemIRLoc = SemIR::LocId;
 
-  // NOLINTNEXTLINE(google-explicit-constructor)
-  SemIRLoc(SemIR::LocId loc_id) : SemIRLoc(loc_id, false) {}
-
-  // If `token_only` is true, refers to the specific node; otherwise, refers to
-  // the node and its children.
-  explicit SemIRLoc(SemIR::LocId loc_id, bool token_only)
-      : loc_id_(loc_id), is_inst_id_(false), token_only_(token_only) {}
-
- private:
-  // Only allow member access for diagnostics.
-  friend class DiagnosticEmitter;
-  // And also for eval to unwrap a LocId for calling into the rest of Check.
-  friend class UnwrapSemIRLoc;
-
-  union {
-    SemIR::InstId inst_id_;
-    SemIR::LocId loc_id_;
-  };
-
-  bool is_inst_id_;
-  bool token_only_;
-};
+// TODO: Consider instead changing calls to `SemIR::LocId::TokenOnly(...)`.
+inline auto TokenOnly(SemIR::LocId loc_id) -> SemIRLoc {
+  return loc_id.ToTokenOnly();
+}
 
 // We define the emitter separately for dependencies, so only provide a base
 // here.
@@ -57,10 +30,6 @@ using DiagnosticBuilder = DiagnosticEmitterBase::Builder;
 // can add contextual notes as appropriate.
 using MakeDiagnosticBuilderFn = llvm::function_ref<auto()->DiagnosticBuilder>;
 
-inline auto TokenOnly(SemIR::LocId loc_id) -> SemIRLoc {
-  return SemIRLoc(loc_id, true);
-}
-
 // An expression with a constant value, for rendering in a diagnostic. The
 // diagnostic rendering will include enclosing "`"s.
 struct InstIdAsConstant {

+ 32 - 23
toolchain/check/dump.cpp

@@ -108,29 +108,38 @@ LLVM_DUMP_METHOD static auto Dump(const Context& context,
 LLVM_DUMP_METHOD static auto Dump(const Context& context, SemIR::LocId loc_id)
     -> std::string {
   RawStringOstream out;
-  if (!loc_id.has_value()) {
-    out << "LocId(<none>)";
-    return out.TakeStr();
-  }
-
-  if (loc_id.is_node_id()) {
-    auto token = context.parse_tree().node_token(loc_id.node_id());
-    auto line = context.tokens().GetLineNumber(token);
-    auto col = context.tokens().GetColumnNumber(token);
-    const char* implicit = loc_id.is_implicit() ? " implicit" : "";
-    out << "LocId(" << FormatEscaped(context.sem_ir().filename()) << ":" << line
-        << ":" << col << implicit << ")";
-  } else {
-    CARBON_CHECK(loc_id.is_import_ir_inst_id());
-
-    auto import_ir_id = context.sem_ir()
-                            .import_ir_insts()
-                            .Get(loc_id.import_ir_inst_id())
-                            .ir_id;
-    const auto* import_file =
-        context.sem_ir().import_irs().Get(import_ir_id).sem_ir;
-    out << "LocId(import from \"" << FormatEscaped(import_file->filename())
-        << "\")";
+  switch (loc_id.kind()) {
+    case SemIR::LocId::Kind::None: {
+      out << "LocId(<none>)";
+      break;
+    }
+
+    case SemIR::LocId::Kind::ImportIRInstId: {
+      auto import_ir_id = context.sem_ir()
+                              .import_ir_insts()
+                              .Get(loc_id.import_ir_inst_id())
+                              .ir_id;
+      const auto* import_file =
+          context.sem_ir().import_irs().Get(import_ir_id).sem_ir;
+      out << "LocId(import from \"" << FormatEscaped(import_file->filename())
+          << "\")";
+      break;
+    }
+
+    case SemIR::LocId::Kind::InstId: {
+      out << "LocId(" << SemIR::Dump(context.sem_ir(), loc_id.inst_id()) << ")";
+      break;
+    }
+
+    case SemIR::LocId::Kind::NodeId: {
+      auto token = context.parse_tree().node_token(loc_id.node_id());
+      auto line = context.tokens().GetLineNumber(token);
+      auto col = context.tokens().GetColumnNumber(token);
+      const char* implicit = loc_id.is_implicit() ? " implicit" : "";
+      out << "LocId(" << FormatEscaped(context.sem_ir().filename()) << ":"
+          << line << ":" << col << implicit << ")";
+      break;
+    }
   }
   return out.TakeStr();
 }

+ 11 - 6
toolchain/check/import.cpp

@@ -105,13 +105,18 @@ static auto MakeImportedNamespaceLocIdAndInst(Context& context,
     // TODO: Either document the use-case for this, or require a location.
     return SemIR::LocIdAndInst::NoLoc(namespace_inst);
   }
-  if (import_loc_id.is_import_ir_inst_id()) {
-    return MakeImportedLocIdAndInst(context, import_loc_id.import_ir_inst_id(),
-                                    namespace_inst);
+  switch (import_loc_id.kind()) {
+    case SemIR::LocId::Kind::ImportIRInstId:
+      return MakeImportedLocIdAndInst(
+          context, import_loc_id.import_ir_inst_id(), namespace_inst);
+    case SemIR::LocId::Kind::NodeId:
+    case SemIR::LocId::Kind::None:
+      return SemIR::LocIdAndInst(context.parse_tree().As<Parse::AnyNamespaceId>(
+                                     import_loc_id.node_id()),
+                                 namespace_inst);
+    case SemIR::LocId::Kind::InstId:
+      CARBON_FATAL("Unexpected LocId kind");
   }
-  return SemIR::LocIdAndInst(
-      context.parse_tree().As<Parse::AnyNamespaceId>(import_loc_id.node_id()),
-      namespace_inst);
 }
 
 auto AddImportNamespace(Context& context, SemIR::TypeId namespace_type_id,

+ 2 - 2
toolchain/check/import_ref.cpp

@@ -103,7 +103,7 @@ static auto GetCanonicalImportIRInst(Context& context,
     // Step through an instruction with an imported location to the imported
     // instruction.
     auto loc_id = cursor_ir->insts().GetLocId(cursor_inst_id);
-    if (loc_id.is_import_ir_inst_id()) {
+    if (loc_id.kind() == SemIR::LocId::Kind::ImportIRInstId) {
       auto import_ir_inst =
           cursor_ir->import_ir_insts().Get(loc_id.import_ir_inst_id());
       cursor_ir = cursor_ir->import_irs().Get(import_ir_inst.ir_id).sem_ir;
@@ -595,7 +595,7 @@ class ImportRefResolver : public ImportContext {
 
     while (true) {
       auto loc_id = cursor_ir->insts().GetLocId(cursor_inst_id);
-      if (!loc_id.is_import_ir_inst_id()) {
+      if (loc_id.kind() != SemIR::LocId::Kind::ImportIRInstId) {
         return result;
       }
       auto ir_inst =

+ 1 - 1
toolchain/driver/compile_subcommand.cpp

@@ -560,7 +560,7 @@ auto CompilationUnit::PostCheck() -> void {
       const SemIR::File* file = &*sem_ir_;
       while (true) {
         auto loc_id = file->insts().GetLocId(entity_inst_id);
-        if (!loc_id.is_import_ir_inst_id()) {
+        if (loc_id.kind() != SemIR::LocId::Kind::ImportIRInstId) {
           return true;
         }
         auto import_ir_inst =

+ 1 - 1
toolchain/lower/file_context.cpp

@@ -733,7 +733,7 @@ auto FileContext::BuildVtable(const SemIR::Class& class_info)
 
   auto first_owning_decl_loc =
       sem_ir().insts().GetLocId(class_info.first_owning_decl_id);
-  if (first_owning_decl_loc.is_import_ir_inst_id()) {
+  if (first_owning_decl_loc.kind() == SemIR::LocId::Kind::ImportIRInstId) {
     // Emit a declaration of an imported vtable using a(n opaque) pointer type.
     // This doesn't have to match the definition that appears elsewhere, it'll
     // still get merged correctly.

+ 68 - 38
toolchain/sem_ir/absolute_node_id.cpp

@@ -21,24 +21,36 @@ static auto FollowImportRef(
                "thoroughly track ImportDecls.");
 
   auto import_loc_id = cursor_ir->insts().GetLocId(import_ir.decl_id);
-  if (import_loc_id.is_node_id()) {
-    // For imports in the current file, the location is simple.
-    absolute_node_ids.push_back({.check_ir_id = cursor_ir->check_ir_id(),
-                                 .node_id = import_loc_id.node_id()});
-  } else if (import_loc_id.is_import_ir_inst_id()) {
-    // For implicit imports, we need to unravel the location a little
-    // further.
-    auto implicit_import_ir_inst =
-        cursor_ir->import_ir_insts().Get(import_loc_id.import_ir_inst_id());
-    const auto& implicit_ir =
-        cursor_ir->import_irs().Get(implicit_import_ir_inst.ir_id);
-    auto implicit_loc_id =
-        implicit_ir.sem_ir->insts().GetLocId(implicit_import_ir_inst.inst_id);
-    CARBON_CHECK(implicit_loc_id.is_node_id(),
-                 "Should only be one layer of implicit imports");
-    absolute_node_ids.push_back(
-        {.check_ir_id = implicit_ir.sem_ir->check_ir_id(),
-         .node_id = implicit_loc_id.node_id()});
+  switch (import_loc_id.kind()) {
+    case SemIR::LocId::Kind::None:
+      break;
+
+    case SemIR::LocId::Kind::ImportIRInstId: {
+      // For implicit imports, we need to unravel the location a little
+      // further.
+      auto implicit_import_ir_inst =
+          cursor_ir->import_ir_insts().Get(import_loc_id.import_ir_inst_id());
+      const auto& implicit_ir =
+          cursor_ir->import_irs().Get(implicit_import_ir_inst.ir_id);
+      auto implicit_loc_id =
+          implicit_ir.sem_ir->insts().GetLocId(implicit_import_ir_inst.inst_id);
+      CARBON_CHECK(implicit_loc_id.kind() == SemIR::LocId::Kind::NodeId,
+                   "Should only be one layer of implicit imports");
+      absolute_node_ids.push_back(
+          {.check_ir_id = implicit_ir.sem_ir->check_ir_id(),
+           .node_id = implicit_loc_id.node_id()});
+      break;
+    }
+
+    case SemIR::LocId::Kind::InstId:
+      CARBON_FATAL("Unexpected LocId: {0}", import_loc_id);
+
+    case SemIR::LocId::Kind::NodeId: {
+      // For imports in the current file, the location is simple.
+      absolute_node_ids.push_back({.check_ir_id = cursor_ir->check_ir_id(),
+                                   .node_id = import_loc_id.node_id()});
+      break;
+    }
   }
 
   cursor_ir = import_ir.sem_ir;
@@ -50,15 +62,23 @@ static auto FollowImportRef(
 static auto HandleLocId(llvm::SmallVector<AbsoluteNodeId>& absolute_node_ids,
                         const File*& cursor_ir, InstId& cursor_inst_id,
                         LocId loc_id) -> bool {
-  if (loc_id.is_import_ir_inst_id()) {
-    FollowImportRef(absolute_node_ids, cursor_ir, cursor_inst_id,
-                    loc_id.import_ir_inst_id());
-    return false;
-  } else {
-    // Parse nodes always refer to the current IR.
-    absolute_node_ids.push_back(
-        {.check_ir_id = cursor_ir->check_ir_id(), .node_id = loc_id.node_id()});
-    return true;
+  switch (loc_id.kind()) {
+    case SemIR::LocId::Kind::ImportIRInstId: {
+      FollowImportRef(absolute_node_ids, cursor_ir, cursor_inst_id,
+                      loc_id.import_ir_inst_id());
+      return false;
+    }
+
+    case SemIR::LocId::Kind::NodeId: {
+      // Parse nodes always refer to the current IR.
+      absolute_node_ids.push_back({.check_ir_id = cursor_ir->check_ir_id(),
+                                   .node_id = loc_id.node_id()});
+      return true;
+    }
+
+    case SemIR::LocId::Kind::None:
+    case SemIR::LocId::Kind::InstId:
+      CARBON_FATAL("Unexpected LocId: {0}", loc_id);
   }
 }
 
@@ -108,18 +128,28 @@ auto GetAbsoluteNodeId(const File* sem_ir, InstId inst_id)
 auto GetAbsoluteNodeId(const File* sem_ir, LocId loc_id)
     -> llvm::SmallVector<AbsoluteNodeId> {
   llvm::SmallVector<AbsoluteNodeId> absolute_node_ids;
-  if (!loc_id.has_value()) {
-    absolute_node_ids.push_back(
-        {.check_ir_id = sem_ir->check_ir_id(), .node_id = Parse::NodeId::None});
-    return absolute_node_ids;
-  }
-  const File* cursor_ir = sem_ir;
-  InstId cursor_inst_id = InstId::None;
-  if (HandleLocId(absolute_node_ids, cursor_ir, cursor_inst_id, loc_id)) {
-    return absolute_node_ids;
+  switch (loc_id.kind()) {
+    case SemIR::LocId::Kind::None:
+      absolute_node_ids.push_back({.check_ir_id = sem_ir->check_ir_id(),
+                                   .node_id = Parse::NodeId::None});
+      break;
+
+    case SemIR::LocId::Kind::InstId:
+      absolute_node_ids = GetAbsoluteNodeId(sem_ir, loc_id.inst_id());
+      break;
+
+    case SemIR::LocId::Kind::ImportIRInstId:
+    case SemIR::LocId::Kind::NodeId: {
+      const File* cursor_ir = sem_ir;
+      InstId cursor_inst_id = InstId::None;
+      if (HandleLocId(absolute_node_ids, cursor_ir, cursor_inst_id, loc_id)) {
+        break;
+      }
+      CARBON_CHECK(cursor_inst_id.has_value(), "Should be set by HandleLocId");
+      GetAbsoluteNodeIdImpl(absolute_node_ids, cursor_ir, cursor_inst_id);
+      break;
+    }
   }
-  CARBON_CHECK(cursor_inst_id.has_value(), "Should be set by HandleLocId");
-  GetAbsoluteNodeIdImpl(absolute_node_ids, cursor_ir, cursor_inst_id);
   return absolute_node_ids;
 }
 

+ 22 - 15
toolchain/sem_ir/formatter.cpp

@@ -571,7 +571,7 @@ class FormatterImpl {
     // that IR in the output before the `{` or `;`.
     if (first_owning_decl_id.has_value()) {
       auto loc_id = sem_ir_->insts().GetLocId(first_owning_decl_id);
-      if (loc_id.is_import_ir_inst_id()) {
+      if (loc_id.kind() == SemIR::LocId::Kind::ImportIRInstId) {
         auto import_ir_id =
             sem_ir_->import_ir_insts().Get(loc_id.import_ir_inst_id()).ir_id;
         const auto* import_file =
@@ -1142,20 +1142,27 @@ class FormatterImpl {
       // instruction as a last resort.
       const auto& import_ir = sem_ir_->import_irs().Get(import_ir_inst.ir_id);
       auto loc_id = import_ir.sem_ir->insts().GetLocId(import_ir_inst.inst_id);
-      if (!loc_id.has_value()) {
-        out_ << import_ir_inst.inst_id << " [no loc]";
-      } else if (loc_id.is_import_ir_inst_id()) {
-        // TODO: Probably don't want to format each indirection, but maybe reuse
-        // GetCanonicalImportIRInst?
-        out_ << import_ir_inst.inst_id << " [indirect]";
-      } else if (loc_id.is_node_id()) {
-        // Formats a NodeId from the import.
-        const auto& tree = import_ir.sem_ir->parse_tree();
-        auto token = tree.node_token(loc_id.node_id());
-        out_ << "loc" << tree.tokens().GetLineNumber(token) << "_"
-             << tree.tokens().GetColumnNumber(token);
-      } else {
-        CARBON_FATAL("Unexpected LocId: {0}", loc_id);
+      switch (loc_id.kind()) {
+        case SemIR::LocId::Kind::None: {
+          out_ << import_ir_inst.inst_id << " [no loc]";
+          break;
+        }
+        case SemIR::LocId::Kind::ImportIRInstId: {
+          // TODO: Probably don't want to format each indirection, but maybe
+          // reuse GetCanonicalImportIRInst?
+          out_ << import_ir_inst.inst_id << " [indirect]";
+          break;
+        }
+        case SemIR::LocId::Kind::NodeId: {
+          // Formats a NodeId from the import.
+          const auto& tree = import_ir.sem_ir->parse_tree();
+          auto token = tree.node_token(loc_id.node_id());
+          out_ << "loc" << tree.tokens().GetLineNumber(token) << "_"
+               << tree.tokens().GetColumnNumber(token);
+          break;
+        }
+        case SemIR::LocId::Kind::InstId:
+          CARBON_FATAL("Unexpected LocId: {0}", loc_id);
       }
     }
     out_ << ", " << loaded_label;

+ 13 - 5
toolchain/sem_ir/ids.cpp

@@ -166,11 +166,19 @@ auto LibraryNameId::Print(llvm::raw_ostream& out) const -> void {
 }
 
 auto LocId::Print(llvm::raw_ostream& out) const -> void {
-  out << Label << "_";
-  if (is_node_id() || !has_value()) {
-    out << node_id();
-  } else {
-    out << import_ir_inst_id();
+  switch (kind()) {
+    case Kind::None:
+      IdBase::Print(out);
+      break;
+    case Kind::ImportIRInstId:
+      out << Label << "_" << import_ir_inst_id();
+      break;
+    case Kind::InstId:
+      out << Label << "_" << inst_id();
+      break;
+    case Kind::NodeId:
+      out << Label << "_" << node_id();
+      break;
   }
 }
 

+ 122 - 38
toolchain/sem_ir/ids.h

@@ -5,6 +5,8 @@
 #ifndef CARBON_TOOLCHAIN_SEM_IR_IDS_H_
 #define CARBON_TOOLCHAIN_SEM_IR_IDS_H_
 
+#include <limits>
+
 #include "common/check.h"
 #include "common/ostream.h"
 #include "toolchain/base/index_base.h"
@@ -41,6 +43,9 @@ struct InstId : public IdBase<InstId> {
   static constexpr llvm::StringLiteral Label = "inst";
   using ValueType = Inst;
 
+  // The maximum ID, inclusive.
+  static constexpr int Max = std::numeric_limits<int32_t>::max();
+
   // Represents the result of a name lookup that is temporarily disallowed
   // because the name is currently being initialized.
   static const InstId InitTombstone;
@@ -792,78 +797,157 @@ struct ImportIRInstId : public IdBase<ImportIRInstId> {
   using ValueType = ImportIRInst;
 
   // ImportIRInstId is restricted so that it can fit into LocId.
-  static constexpr int32_t Bits = 29;
+  static constexpr int32_t BitsWithNodeId = 29;
 
   // The maximum ID, non-inclusive.
-  static constexpr int Max = 1 << Bits;
+  static constexpr int Max = (1 << BitsWithNodeId) - Parse::NodeId::Max - 2;
 
   constexpr explicit ImportIRInstId(int32_t index) : IdBase(index) {
     CARBON_DCHECK(index < Max, "Index out of range: {0}", index);
   }
 };
 
-// A SemIR location used as the location of instructions.
+// A SemIR location used as the location of instructions. This contains either a
+// InstId, NodeId, ImportIRInstId, or None. The intent is that any of these can
+// indicate the source of an instruction, and also be used to associate a line
+// in diagnostics.
+//
+// The structure is:
+// - None: The standard NoneIndex for all Id types, -1.
+// - InstId: positive values including zero; a full 31 bits.
+//   - [0, 1 << 31)
+// - NodeId: negative values starting after None; the 24 bit NodeId range.
+//   - [-2, -2 - (1 << 24))
+// - ImportIRInstId: remaining negative values; after NodeId, fills out negative
+//   values to 29 bits.
+//   - [-2 - (1 << 24), -(1 << 29))
 //
-// Contents:
-// - index > None: A Parse::NodeId in the current IR.
-// - index < None: An ImportIRInstId.
-// - index == None: Can be used for either.
+// In addition, two bits are used for flags: `ImplicitBit` and `TokenOnlyBit`.
+// Note that these can only be used with negative, non-`InstId` values.
 struct LocId : public IdBase<LocId> {
-  static constexpr llvm::StringLiteral Label = "loc";
+  // The contained index kind.
+  enum class Kind {
+    None,
+    ImportIRInstId,
+    InstId,
+    NodeId,
+  };
 
-  // This bit, if set for a node ID location, indicates a location for
-  // operations performed implicitly.
-  static const int32_t ImplicitBit = 1 << 30;
+  static constexpr llvm::StringLiteral Label = "loc";
 
   using IdBase::IdBase;
 
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  constexpr LocId(ImportIRInstId import_ir_inst_id)
+      : IdBase(import_ir_inst_id.has_value()
+                   ? FirstImportIRInstId - import_ir_inst_id.index
+                   : NoneIndex) {}
+
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  constexpr LocId(InstId inst_id) : IdBase(inst_id.index) {}
+
   // NOLINTNEXTLINE(google-explicit-constructor)
   constexpr LocId(Parse::NoneNodeId /*none*/) : IdBase(NoneIndex) {}
 
   // NOLINTNEXTLINE(google-explicit-constructor)
-  constexpr LocId(Parse::NodeId node_id) : IdBase(node_id.index) {
-    CARBON_CHECK(node_id.has_value() == has_value(), "{0}", index);
-    CARBON_CHECK(!is_implicit(), "{0}", index);
+  constexpr LocId(Parse::NodeId node_id)
+      : IdBase(FirstNodeId - node_id.index) {}
+
+  // Forms an equivalent LocId for a desugared location.  Requires a
+  // non-`InstId` location.
+  // TODO: Rename to something like `ToDesugared`.
+  auto ToImplicit() const -> LocId {
+    // This should only be called for NodeId or ImportIRInstId, but we only set
+    // the flag for NodeId.
+    CARBON_CHECK(kind() != Kind::InstId);
+    if (kind() == Kind::NodeId) {
+      return LocId(index & ~ImplicitBit);
+    }
+    return *this;
   }
 
-  // NOLINTNEXTLINE(google-explicit-constructor)
-  constexpr LocId(ImportIRInstId inst_id)
-      : IdBase(NoneIndex + ImportIRInstId::NoneIndex - inst_id.index) {
-    CARBON_CHECK(inst_id.has_value() == has_value(), "{0}", index);
-    CARBON_CHECK(index & ImplicitBit, "{0}", index);
+  // Forms an equivalent `LocId` for a token-only diagnostic location.  Requires
+  // a non-`InstId` location.
+  auto ToTokenOnly() const -> LocId {
+    CARBON_CHECK(kind() != Kind::InstId);
+    if (has_value()) {
+      return LocId(index & ~TokenOnlyBit);
+    }
+    return *this;
   }
 
-  // Forms an equivalent LocId for an implicit location.
-  auto ToImplicit() const -> LocId {
-    // For import IR locations and the `None` location, the implicit bit is
-    // always set, so this is a no-op.
-    return LocId(index | ImplicitBit);
+  // Returns the kind of the `LocId`.
+  auto kind() const -> Kind {
+    if (!has_value()) {
+      return Kind::None;
+    }
+    if (index >= 0) {
+      return Kind::InstId;
+    }
+    if (index_without_flags() <= FirstImportIRInstId) {
+      return Kind::ImportIRInstId;
+    }
+    return Kind::NodeId;
   }
 
-  auto is_node_id() const -> bool { return index > NoneIndex; }
-  auto is_import_ir_inst_id() const -> bool { return index < NoneIndex; }
+  // Returns true if the location corresponds to desugared instructions.
+  // Requires a non-`InstId` location.
   auto is_implicit() const -> bool {
-    return is_node_id() && (index & ImplicitBit) != 0;
+    CARBON_CHECK(kind() != Kind::InstId);
+    return (kind() == Kind::NodeId) && (index & ImplicitBit) == 0;
   }
 
-  // This is allowed to return `NodeId::None`, but should never be used for
-  // `InstId` other than `InstId::None`.
-  auto node_id() const -> Parse::NodeId {
+  // Returns true if the location is token-only for diagnostics. Requires a
+  // non-`InstId` location.
+  auto is_token_only() const -> bool {
+    CARBON_CHECK(kind() != Kind::InstId);
+    return (index & TokenOnlyBit) == 0;
+  }
+
+  // Returns the equivalent `ImportIRInstId` when `kind()` matches or is `None`.
+  auto import_ir_inst_id() const -> ImportIRInstId {
     if (!has_value()) {
-      return Parse::NodeId::None;
+      return ImportIRInstId::None;
     }
-    CARBON_CHECK(is_node_id());
-    return Parse::NodeId(index & ~ImplicitBit);
+    CARBON_CHECK(kind() == Kind::ImportIRInstId, "{0}", index);
+    return ImportIRInstId(FirstImportIRInstId - index_without_flags());
   }
 
-  // This is allowed to return `InstId::None`, but should never be used for
-  // `NodeId` other than `NodeId::None`.
-  auto import_ir_inst_id() const -> ImportIRInstId {
-    CARBON_CHECK(is_import_ir_inst_id() || !has_value());
-    return ImportIRInstId(NoneIndex + ImportIRInstId::NoneIndex - index);
+  // Returns the equivalent `InstId` when `kind()` matches or is `None`.
+  auto inst_id() const -> InstId {
+    CARBON_CHECK(kind() == Kind::None || kind() == Kind::InstId, "{0}", index);
+    return InstId(index);
+  }
+
+  // Returns the equivalent `NodeId` when `kind()` matches or is `None`.
+  auto node_id() const -> Parse::NodeId {
+    if (!has_value()) {
+      return Parse::NodeId::None;
+    }
+    CARBON_CHECK(kind() == Kind::NodeId, "{0}", index);
+    return Parse::NodeId(FirstNodeId - index_without_flags());
   }
 
   auto Print(llvm::raw_ostream& out) const -> void;
+
+ private:
+  // Whether a location corresponds to desugared instructions. This only applies
+  // for `NodeId`.
+  static constexpr int32_t ImplicitBit = 1 << 30;
+
+  // See `token_only` for the use. This only applies for `NodeId` and
+  // `ImportIRInstId`.
+  static constexpr int32_t TokenOnlyBit = 1 << 29;
+
+  // The value of the 0 index for each of `NodeId` and `ImportIRInstId`.
+  static constexpr int32_t FirstNodeId = NoneIndex - 1;
+  static constexpr int32_t FirstImportIRInstId =
+      FirstNodeId - Parse::NodeId::Max;
+
+  auto index_without_flags() const -> int32_t {
+    CARBON_DCHECK(index < NoneIndex, "Only for NodeId and ImportIRInstId");
+    return index | ImplicitBit | TokenOnlyBit;
+  }
 };
 
 // Polymorphic id for fields in `Any[...]` typed instruction category. Used for

+ 109 - 34
toolchain/sem_ir/ids_test.cpp

@@ -14,49 +14,124 @@ namespace {
 
 using ::testing::Eq;
 
-TEST(IdsTest, LocIdIsNone) {
-  LocId loc_id = Parse::NodeId::None;
+TEST(IdsTest, LocIdValues) {
+  // This testing should match the ranges documented on LocId.
+  EXPECT_THAT(static_cast<LocId>(Parse::NodeId::None).index, Eq(-1));
+
+  EXPECT_THAT(static_cast<LocId>(InstId(0)).index, Eq(0));
+  EXPECT_THAT(
+      static_cast<LocId>(InstId(std::numeric_limits<int32_t>::max())).index,
+      Eq(std::numeric_limits<int32_t>::max()));
+
+  EXPECT_THAT(static_cast<LocId>(Parse::NodeId(0)).index, Eq(-2));
+  EXPECT_THAT(static_cast<LocId>(Parse::NodeId(Parse::NodeId::Max - 1)).index,
+              Eq(-2 - (1 << 24) + 1));
+
+  EXPECT_THAT(static_cast<LocId>(ImportIRInstId(0)).index, Eq(-2 - (1 << 24)));
+  EXPECT_THAT(static_cast<LocId>(ImportIRInstId(ImportIRInstId::Max - 1)).index,
+              Eq(-(1 << 29) + 1));
+}
+
+// A standard parameterized test for (implicit, token_only, index).
+class IdsTestWithParam
+    : public testing::TestWithParam<std::tuple<bool, bool, int32_t>> {
+ public:
+  explicit IdsTestWithParam() {
+    llvm::errs() << "implicit=" << is_implicit()
+                 << ", token_only=" << is_token_only()
+                 << ", index=" << std::get<2>(GetParam()) << "\n";
+  }
+
+  // Returns IdT with its matching LocId form. Sets flags based on test
+  // parameters.
+  template <typename IdT>
+  auto BuildIdAndLocId() -> std::pair<IdT, LocId> {
+    auto [implicit, token_only, index] = GetParam();
+    IdT id(index);
+    LocId loc_id = id;
+    if (implicit) {
+      loc_id = loc_id.ToImplicit();
+    }
+    if (token_only) {
+      loc_id = loc_id.ToTokenOnly();
+    }
+    return {id, loc_id};
+  }
+
+  auto is_implicit() -> bool { return std::get<0>(GetParam()); }
+  auto is_token_only() -> bool { return std::get<1>(GetParam()); }
+};
+
+// Returns a test case generator for edge-case values.
+static auto GetValueRange(int32_t max) -> auto {
+  return testing::Values(0, 1, max - 2, max - 1);
+}
+
+// Returns a test case generator for `IdsTestWithParam` uses.
+static auto CombineWithFlags(auto value_range) -> auto {
+  return testing::Combine(testing::Bool(), testing::Bool(), value_range);
+}
+
+class LocIdAsNoneTestWithParam : public IdsTestWithParam {};
+
+INSTANTIATE_TEST_SUITE_P(
+    LocIdAsNoneTest, LocIdAsNoneTestWithParam,
+    CombineWithFlags(testing::Values(Parse::NodeId::NoneIndex)));
+
+TEST_P(LocIdAsNoneTestWithParam, Test) {
+  auto [_, loc_id] = BuildIdAndLocId<Parse::NodeId>();
   EXPECT_FALSE(loc_id.has_value());
-  EXPECT_FALSE(loc_id.is_node_id());
-  EXPECT_FALSE(loc_id.is_import_ir_inst_id());
+  EXPECT_THAT(loc_id.kind(), Eq(SemIR::LocId::Kind::None));
   EXPECT_FALSE(loc_id.is_implicit());
+  EXPECT_THAT(loc_id.import_ir_inst_id(), Eq(ImportIRInstId::None));
+  EXPECT_THAT(loc_id.inst_id(), Eq(InstId::None));
   EXPECT_THAT(loc_id.node_id(),
               // The actual type is NoneNodeId, so cast to NodeId.
               Eq<Parse::NodeId>(Parse::NodeId::None));
-  EXPECT_THAT(loc_id.import_ir_inst_id(), Eq(ImportIRInstId::None));
 }
 
-TEST(IdsTest, LocIdIsNodeId) {
-  for (auto index : {0, 1, Parse::NodeId::Max - 2, Parse::NodeId::Max - 1}) {
-    SCOPED_TRACE(llvm::formatv("Index: {0}", index));
-    Parse::NodeId node_id(index);
-    LocId loc_id = node_id;
-    EXPECT_TRUE(loc_id.has_value());
-    EXPECT_TRUE(loc_id.is_node_id());
-    EXPECT_FALSE(loc_id.is_import_ir_inst_id());
-    EXPECT_FALSE(loc_id.is_implicit());
-    EXPECT_THAT(loc_id.node_id(), node_id);
-
-    loc_id = loc_id.ToImplicit();
-    EXPECT_TRUE(loc_id.has_value());
-    EXPECT_TRUE(loc_id.is_node_id());
-    EXPECT_FALSE(loc_id.is_import_ir_inst_id());
-    EXPECT_TRUE(loc_id.is_implicit());
-    EXPECT_THAT(loc_id.node_id(), node_id);
-  }
+class LocIdAsImportIRInstIdTest : public IdsTestWithParam {};
+
+INSTANTIATE_TEST_SUITE_P(Test, LocIdAsImportIRInstIdTest,
+                         CombineWithFlags(GetValueRange(ImportIRInstId::Max)));
+
+TEST_P(LocIdAsImportIRInstIdTest, Test) {
+  auto [import_ir_inst_id, loc_id] = BuildIdAndLocId<ImportIRInstId>();
+  EXPECT_TRUE(loc_id.has_value());
+  ASSERT_THAT(loc_id.kind(), Eq(SemIR::LocId::Kind::ImportIRInstId));
+  EXPECT_THAT(loc_id.import_ir_inst_id(), import_ir_inst_id);
+  EXPECT_FALSE(loc_id.is_implicit());
+  EXPECT_THAT(loc_id.is_token_only(), Eq(is_token_only()));
 }
 
-TEST(IdsTest, LocIdIsImportIRInstId) {
-  for (auto index : {0, 1, ImportIRInstId::Max - 2, ImportIRInstId::Max - 1}) {
-    SCOPED_TRACE(llvm::formatv("Index: {0}", index));
-    ImportIRInstId import_ir_inst_id(index);
-    LocId loc_id = import_ir_inst_id;
-    EXPECT_TRUE(loc_id.has_value());
-    EXPECT_FALSE(loc_id.is_node_id());
-    EXPECT_TRUE(loc_id.is_import_ir_inst_id());
-    EXPECT_FALSE(loc_id.is_implicit());
-    EXPECT_THAT(loc_id.import_ir_inst_id(), import_ir_inst_id);
-  }
+class LocIdAsInstIdTest : public IdsTestWithParam {};
+
+INSTANTIATE_TEST_SUITE_P(
+    Test, LocIdAsInstIdTest,
+    testing::Combine(testing::Values(false), testing::Values(false),
+                     GetValueRange(std::numeric_limits<int32_t>::max())));
+
+TEST_P(LocIdAsInstIdTest, Test) {
+  auto [inst_id, loc_id] = BuildIdAndLocId<InstId>();
+  EXPECT_TRUE(loc_id.has_value());
+  ASSERT_THAT(loc_id.kind(), Eq(SemIR::LocId::Kind::InstId));
+  EXPECT_THAT(loc_id.inst_id(), inst_id);
+  // Note that `is_implicit` and `is_token_only` are invalid to use with
+  // `InstId`.
+}
+
+class LocIdAsNodeIdTest : public IdsTestWithParam {};
+
+INSTANTIATE_TEST_SUITE_P(Test, LocIdAsNodeIdTest,
+                         CombineWithFlags(GetValueRange(Parse::NodeId::Max)));
+
+TEST_P(LocIdAsNodeIdTest, Test) {
+  auto [node_id, loc_id] = BuildIdAndLocId<Parse::NodeId>();
+  EXPECT_TRUE(loc_id.has_value());
+  ASSERT_THAT(loc_id.kind(), Eq(SemIR::LocId::Kind::NodeId));
+  EXPECT_THAT(loc_id.node_id(), node_id);
+  EXPECT_THAT(loc_id.is_implicit(), Eq(is_implicit()));
+  EXPECT_THAT(loc_id.is_token_only(), Eq(is_token_only()));
 }
 
 }  // namespace

+ 4 - 4
toolchain/sem_ir/inst_namer.cpp

@@ -164,8 +164,8 @@ auto InstNamer::GetNameFor(ScopeId scope_id, InstId inst_id) const
     RawStringOstream out;
     out << "<unexpected>." << inst_id;
     auto loc_id = sem_ir_->insts().GetLocId(inst_id);
-    // TODO: Consider handling inst_id cases.
-    if (loc_id.is_node_id()) {
+    // TODO: Consider handling other kinds.
+    if (loc_id.kind() == SemIR::LocId::Kind::NodeId) {
       const auto& tree = sem_ir_->parse_tree();
       auto token = tree.node_token(loc_id.node_id());
       out << ".loc" << tree.tokens().GetLineNumber(token) << "_"
@@ -255,9 +255,9 @@ auto InstNamer::Namespace::AllocateName(
   }
 
   // Append location information to try to disambiguate.
-  // TODO: Consider handling inst_id cases.
   if (auto* loc_id = std::get_if<LocId>(&loc_id_or_fingerprint)) {
-    if (loc_id->is_node_id()) {
+    // TODO: Consider handling other kinds.
+    if (loc_id->kind() == SemIR::LocId::Kind::NodeId) {
       const auto& tree = inst_namer.sem_ir_->parse_tree();
       auto token = tree.node_token(loc_id->node_id());
       llvm::raw_string_ostream(name)