Selaa lähdekoodia

Support defining instruction categories with a common representation (#3569)

Add a mechanism to define instruction categories, to support inspecting
the common representation of similar kinds of instruction. Use that
mechanism to make formatting of branch instructions slightly more
type-safe.

The idea here is to use the existing `Inst` mechanism for converting to
and from structs, extended to operate on a struct representing multiple
different kinds of instruction. In this case, the concrete kind of
instruction is stored in the struct in a `kind` field, rather than being
implied by the type.

Factored out of #3555 where this mechanism is used to provide a common
interface for runtime and symbolic name bindings.
Richard Smith 2 vuotta sitten
vanhempi
sitoutus
07efa026de

+ 3 - 4
common/struct_reflection.h

@@ -75,10 +75,9 @@ constexpr auto CountFields() -> int {
   if constexpr (CanListInitialize<T, Fields...>(0)) {
     return CountFields<T, true, Fields..., AnyField<T>>();
   } else if constexpr (AnyWorkedSoFar) {
-    // Note: Compare against the maximum number of fields supported *PLUS 1*.
-    static_assert(sizeof...(Fields) <= 7,
-                  "Unsupported: too many fields in struct");
-    return sizeof...(Fields) - 1;
+    constexpr int NumFields = sizeof...(Fields) - 1;
+    static_assert(NumFields <= 6, "Unsupported: too many fields in struct");
+    return NumFields;
   } else if constexpr (sizeof...(Fields) > 32) {
     // If we go too far without finding a working initializer, something
     // probably went wrong with our calculation. Bail out before we recurse too

+ 13 - 22
toolchain/sem_ir/formatter.cpp

@@ -352,12 +352,11 @@ class InstNamer {
 
   // Finds and adds a suitable block label for the given SemIR instruction that
   // represents some kind of branch.
-  auto AddBlockLabel(ScopeIndex scope_idx, InstBlockId block_id, Inst inst)
-      -> void {
+  auto AddBlockLabel(ScopeIndex scope_idx, AnyBranch branch) -> void {
     llvm::StringRef name;
-    switch (parse_tree_.node_kind(inst.parse_node())) {
+    switch (parse_tree_.node_kind(branch.parse_node)) {
       case Parse::NodeKind::IfExprIf:
-        switch (inst.kind()) {
+        switch (branch.kind) {
           case BranchIf::Kind:
             name = "if.expr.then";
             break;
@@ -373,7 +372,7 @@ class InstNamer {
         break;
 
       case Parse::NodeKind::IfCondition:
-        switch (inst.kind()) {
+        switch (branch.kind) {
           case BranchIf::Kind:
             name = "if.then";
             break;
@@ -390,10 +389,10 @@ class InstNamer {
         break;
 
       case Parse::NodeKind::ShortCircuitOperandAnd:
-        name = inst.Is<BranchIf>() ? "and.rhs" : "and.result";
+        name = branch.kind == BranchIf::Kind ? "and.rhs" : "and.result";
         break;
       case Parse::NodeKind::ShortCircuitOperandOr:
-        name = inst.Is<BranchIf>() ? "or.rhs" : "or.result";
+        name = branch.kind == BranchIf::Kind ? "or.rhs" : "or.result";
         break;
 
       case Parse::NodeKind::WhileConditionStart:
@@ -401,7 +400,7 @@ class InstNamer {
         break;
 
       case Parse::NodeKind::WhileCondition:
-        switch (inst.kind()) {
+        switch (branch.kind) {
           case InstKind::BranchIf:
             name = "while.body";
             break;
@@ -417,7 +416,7 @@ class InstNamer {
         break;
     }
 
-    AddBlockLabel(scope_idx, block_id, name.str(), inst.parse_node());
+    AddBlockLabel(scope_idx, branch.target_id, name.str(), branch.parse_node);
   }
 
   auto CollectNamesInBlock(ScopeIndex scope_idx, InstBlockId block_id) -> void {
@@ -446,6 +445,10 @@ class InstNamer {
             (sem_ir_.names().GetIRBaseName(name_id).str() + suffix).str());
       };
 
+      if (auto branch = inst.TryAs<AnyBranch>()) {
+        AddBlockLabel(scope_idx, *branch);
+      }
+
       switch (inst.kind()) {
         case AddrPattern::Kind: {
           // TODO: We need to assign names to parameters that appear in
@@ -455,18 +458,6 @@ class InstNamer {
           CollectNamesInBlock(scope_idx, inst.As<AddrPattern>().inner_id);
           break;
         }
-        case Branch::Kind: {
-          AddBlockLabel(scope_idx, inst.As<Branch>().target_id, inst);
-          break;
-        }
-        case BranchIf::Kind: {
-          AddBlockLabel(scope_idx, inst.As<BranchIf>().target_id, inst);
-          break;
-        }
-        case BranchWithArg::Kind: {
-          AddBlockLabel(scope_idx, inst.As<BranchWithArg>().target_id, inst);
-          break;
-        }
         case SpliceBlock::Kind: {
           CollectNamesInBlock(scope_idx, inst.As<SpliceBlock>().block_id);
           break;
@@ -827,7 +818,7 @@ class Formatter {
   template <typename InstT>
   auto FormatInstructionRHS(InstT inst) -> void {
     // By default, an instruction has a comma-separated argument list.
-    using Info = TypedInstArgsInfo<InstT>;
+    using Info = InstLikeTypeInfo<InstT>;
     if constexpr (Info::NumArgs == 2) {
       FormatArgs(Info::template Get<0>(inst), Info::template Get<1>(inst));
     } else if constexpr (Info::NumArgs == 1) {

+ 3 - 3
toolchain/sem_ir/inst.cpp

@@ -22,9 +22,9 @@ auto Inst::Print(llvm::raw_ostream& out) const -> void {
   // clang warns on unhandled enum values; clang-tidy is incorrect here.
   // NOLINTNEXTLINE(bugprone-switch-missing-default-case)
   switch (kind_) {
-#define CARBON_SEM_IR_INST_KIND(Name)      \
-  case Name::Kind:                         \
-    print_args(TypedInstArgsInfo<Name>()); \
+#define CARBON_SEM_IR_INST_KIND(Name)     \
+  case Name::Kind:                        \
+    print_args(InstLikeTypeInfo<Name>()); \
     break;
 #include "toolchain/sem_ir/inst_kind.def"
   }

+ 101 - 30
toolchain/sem_ir/inst.h

@@ -6,6 +6,7 @@
 #define CARBON_TOOLCHAIN_SEM_IR_INST_H_
 
 #include <cstdint>
+#include <type_traits>
 
 #include "common/check.h"
 #include "common/ostream.h"
@@ -18,16 +19,27 @@
 
 namespace Carbon::SemIR {
 
-// Data about the arguments of a typed instruction, to aid in type erasure. The
-// `KindT` parameter is used to check that `TypedInst` is a typed instruction.
-template <typename TypedInst,
-          const InstKind::Definition& KindT = TypedInst::Kind>
-struct TypedInstArgsInfo {
+// Information about an instruction-like type, which is a type that an Inst can
+// be converted to and from. The `Enabled` parameter is used to check
+// requirements on the type in the specializations of this template.
+template <typename InstLikeType, bool Enabled = true>
+struct InstLikeTypeInfo;
+
+// A helper base class for instruction-like types that are structs.
+template <typename InstLikeType>
+struct InstLikeTypeInfoBase {
+  // The derived class. Useful to allow SFINAE on whether a type is
+  // instruction-like: `typename InstLikeTypeInfo<T>::Self` is valid only if `T`
+  // is instruction-like.
+  using Self = InstLikeTypeInfo<InstLikeType>;
+
   // A corresponding std::tuple<...> type.
-  using Tuple = decltype(StructReflection::AsTuple(std::declval<TypedInst>()));
+  using Tuple =
+      decltype(StructReflection::AsTuple(std::declval<InstLikeType>()));
 
-  static constexpr int FirstArgField =
-      HasParseNodeMember<TypedInst> + HasTypeIdMember<TypedInst>;
+  static constexpr int FirstArgField = HasKindMemberAsField<InstLikeType> +
+                                       HasParseNodeMember<InstLikeType> +
+                                       HasTypeIdMember<InstLikeType>;
 
   static constexpr int NumArgs = std::tuple_size_v<Tuple> - FirstArgField;
   static_assert(NumArgs <= 2,
@@ -37,11 +49,55 @@ struct TypedInstArgsInfo {
   using ArgType = std::tuple_element_t<FirstArgField + N, Tuple>;
 
   template <int N>
-  static auto Get(TypedInst inst) -> ArgType<N> {
+  static auto Get(InstLikeType inst) -> ArgType<N> {
     return std::get<FirstArgField + N>(StructReflection::AsTuple(inst));
   }
 };
 
+// A particular type of instruction is instruction-like.
+template <typename TypedInst>
+struct InstLikeTypeInfo<
+    TypedInst,
+    (bool)std::is_same_v<const InstKind::Definition, decltype(TypedInst::Kind)>>
+    : InstLikeTypeInfoBase<TypedInst> {
+  static_assert(!HasKindMemberAsField<TypedInst>,
+                "Instruction type should not have a kind field");
+  static auto GetKind(TypedInst) -> InstKind { return TypedInst::Kind; }
+  static auto IsKind(InstKind kind) -> bool { return kind == TypedInst::Kind; }
+  // A name that can be streamed to an llvm::raw_ostream.
+  static auto DebugName() -> InstKind { return TypedInst::Kind; }
+};
+
+// An instruction category is instruction-like.
+template <typename InstCat>
+struct InstLikeTypeInfo<
+    InstCat, (bool)std::is_same_v<const InstKind&, decltype(InstCat::Kinds[0])>>
+    : InstLikeTypeInfoBase<InstCat> {
+  static_assert(HasKindMemberAsField<InstCat>,
+                "Instruction category should have a kind field");
+  static auto GetKind(InstCat cat) -> InstKind { return cat.kind; }
+  static auto IsKind(InstKind kind) -> bool {
+    for (InstKind k : InstCat::Kinds) {
+      if (k == kind) {
+        return true;
+      }
+    }
+    return false;
+  }
+  // A name that can be streamed to an llvm::raw_ostream.
+  static auto DebugName() -> std::string {
+    std::string str;
+    llvm::raw_string_ostream out(str);
+    out << "{";
+    llvm::ListSeparator sep;
+    for (auto kind : InstCat::Kinds) {
+      out << sep << kind;
+    }
+    out << "}";
+    return out.str();
+  }
+};
+
 // A type-erased representation of a SemIR instruction, that may be constructed
 // from the specific kinds of instruction defined in `typed_insts.h`. This
 // provides access to common fields present on most or all kinds of
@@ -54,28 +110,34 @@ struct TypedInstArgsInfo {
 // In addition, kind-specific data can be accessed by casting to the specific
 // kind of instruction:
 //
-// - Use `inst.kind()` or `Is<TypedInst>` to determine what kind of instruction
-// it is.
-// - Cast to a specific type using `inst.As<TypedInst>()`
-//   - Using the wrong kind in `inst.As<TypedInst>()` is a programming error,
+// - Use `inst.kind()` or `Is<InstLikeType>` to determine what kind of
+//   instruction it is.
+// - Cast to a specific type using `inst.As<InstLikeType>()`
+//   - Using the wrong kind in `inst.As<InstLikeType>()` is a programming error,
 //     and will CHECK-fail in debug modes (opt may too, but it's not an API
 //     guarantee).
-// - Use `inst.TryAs<TypedInst>()` to safely access type-specific instruction
-// data
-//   where the instruction's kind is not known.
+// - Use `inst.TryAs<InstLikeType>()` to safely access type-specific instruction
+//   data where the instruction's kind is not known.
 class Inst : public Printable<Inst> {
  public:
-  template <typename TypedInst, typename Info = TypedInstArgsInfo<TypedInst>>
+  template <typename TypedInst,
+            typename Info = typename InstLikeTypeInfo<TypedInst>::Self>
   // NOLINTNEXTLINE(google-explicit-constructor)
   Inst(TypedInst typed_inst)
       : parse_node_(Parse::NodeId::Invalid),
-        kind_(TypedInst::Kind),
+        // Always overwritten below.
+        kind_(InstKind::Create({})),
         type_id_(TypeId::Invalid),
         arg0_(InstId::InvalidIndex),
         arg1_(InstId::InvalidIndex) {
     if constexpr (HasParseNodeMember<TypedInst>) {
       parse_node_ = typed_inst.parse_node;
     }
+    if constexpr (HasKindMemberAsField<TypedInst>) {
+      kind_ = typed_inst.kind;
+    } else {
+      kind_ = TypedInst::Kind;
+    }
     if constexpr (HasTypeIdMember<TypedInst>) {
       type_id_ = typed_inst.type_id;
     }
@@ -88,31 +150,39 @@ class Inst : public Printable<Inst> {
   }
 
   // Returns whether this instruction has the specified type.
-  template <typename TypedInst>
+  template <typename TypedInst, typename Info = InstLikeTypeInfo<TypedInst>>
   auto Is() const -> bool {
-    return kind() == TypedInst::Kind;
+    return Info::IsKind(kind());
   }
 
   // Casts this instruction to the given typed instruction, which must match the
   // instruction's kind, and returns the typed instruction.
-  template <typename TypedInst, typename Info = TypedInstArgsInfo<TypedInst>>
+  template <typename TypedInst, typename Info = InstLikeTypeInfo<TypedInst>>
   auto As() const -> TypedInst {
     CARBON_CHECK(Is<TypedInst>()) << "Casting inst of kind " << kind()
-                                  << " to wrong kind " << TypedInst::Kind;
-    auto build_with_type_id_and_args = [&](auto... type_id_and_args) {
+                                  << " to wrong kind " << Info::DebugName();
+    auto build_with_parse_node_onwards = [&](auto... parse_node_onwards) {
+      if constexpr (HasKindMemberAsField<TypedInst>) {
+        return TypedInst{kind(), parse_node_onwards...};
+      } else {
+        return TypedInst{parse_node_onwards...};
+      }
+    };
+
+    auto build_with_type_id_onwards = [&](auto... type_id_onwards) {
       if constexpr (HasParseNodeMember<TypedInst>) {
-        return TypedInst{decltype(TypedInst::parse_node)(parse_node()),
-                         type_id_and_args...};
+        return build_with_parse_node_onwards(
+            decltype(TypedInst::parse_node)(parse_node()), type_id_onwards...);
       } else {
-        return TypedInst{type_id_and_args...};
+        return build_with_parse_node_onwards(type_id_onwards...);
       }
     };
 
     auto build_with_args = [&](auto... args) {
       if constexpr (HasTypeIdMember<TypedInst>) {
-        return build_with_type_id_and_args(type_id(), args...);
+        return build_with_type_id_onwards(type_id(), args...);
       } else {
-        return build_with_type_id_and_args(args...);
+        return build_with_type_id_onwards(args...);
       }
     };
 
@@ -190,8 +260,9 @@ class Inst : public Printable<Inst> {
 // may be worth investigating further.
 static_assert(sizeof(Inst) == 20, "Unexpected Inst size");
 
-// Typed instructions can be printed by converting them to instructions.
-template <typename TypedInst, typename = TypedInstArgsInfo<TypedInst>>
+// Instruction-like types can be printed by converting them to instructions.
+template <typename TypedInst,
+          typename = typename InstLikeTypeInfo<TypedInst>::Self>
 inline auto operator<<(llvm::raw_ostream& out, TypedInst inst)
     -> llvm::raw_ostream& {
   Inst(inst).Print(out);

+ 43 - 11
toolchain/sem_ir/typed_insts.h

@@ -13,8 +13,10 @@
 
 // Representations for specific kinds of instructions.
 //
-// Each type should be a struct with up to four members:
+// Each type should be a struct with the following members, in this order:
 //
+// - Either a `Kind` constant, or a `Kinds` constant and an `InstKind kind;`
+//   member. These are described below.
 // - Optionally, a `Parse::NodeId parse_node;` member, for instructions with an
 //   associated location. Almost all instructions should have this, with
 //   exceptions being things that are generated internally, without any relation
@@ -25,17 +27,27 @@
 //   `Namespace`, for which a placeholder type should be used.
 // - Up to two `[...]Id` members describing the contents of the struct.
 //
-// The field names here matter -- the first two fields must have the names
-// specified above, when present. When converting to a `SemIR::Inst`, they will
-// become the parse node and type associated with the type-erased instruction.
+// The field names here matter -- the fields must have the names specified
+// above, when present. When converting to a `SemIR::Inst`, the `kind`,
+// `parse_node`, and `type_id` fields will become the kind, parse node, and
+// type associated with the type-erased instruction.
 //
-// In addition, each type provides a constant `Kind` that associates the type
-// with a particular member of the `InstKind` enumeration. This `Kind`
-// declaration also defines the instruction kind by calling `InstKind::Define`
-// and specifying additional information about the instruction kind. This
-// information is available through the member functions of the `InstKind` value
-// declared in `inst_kind.h`, and includes the name used in textual IR and
-// whether the instruction is a terminator instruction.
+// Each type that describes a single kind of instructions provides a constant
+// `Kind` that associates the type with a particular member of the `InstKind`
+// enumeration. This `Kind` declaration also defines the instruction kind by
+// calling `InstKind::Define` and specifying additional information about the
+// instruction kind. This information is available through the member functions
+// of the `InstKind` value declared in `inst_kind.h`, and includes the name
+// used in textual IR and whether the instruction is a terminator instruction.
+//
+// Struct types can also be provided for categories of instructions with a
+// common representation, to allow the common representation to be accessed
+// conveniently. In this case, instead of providing a constant `Kind` member,
+// the struct should have a constant `InstKind Kinds[];` member that lists the
+// kinds of instructions in the category, and an `InstKind kind;` member that
+// is used to identify the specific kind of the instruction. Separate struct
+// types still need to be defined for each instruction kind in the category.
+
 namespace Carbon::SemIR {
 
 struct AddressOf {
@@ -164,6 +176,19 @@ struct BoundMethod {
   InstId function_id;
 };
 
+// Common representation for all kinds of `Branch*` node.
+struct AnyBranch {
+  static constexpr InstKind Kinds[] = {InstKind::Branch, InstKind::BranchIf,
+                                       InstKind::BranchWithArg};
+
+  InstKind kind;
+  Parse::NodeId parse_node;
+  // Branches don't produce a value, so have no type.
+  InstBlockId target_id;
+  // Kind-specific data.
+  int32_t arg1;
+};
+
 struct Branch {
   static constexpr auto Kind =
       InstKind::Branch.Define("br", TerminatorKind::Terminator);
@@ -657,6 +682,13 @@ struct VarStorage {
   NameId name_id;
 };
 
+// HasKindMemberAsField<T> is true if T has a `InstKind kind` field, as opposed
+// to a `static constexpr InstKind::Definition Kind` member or no kind at all.
+template <typename T, typename KindType = InstKind T::*>
+inline constexpr bool HasKindMemberAsField = false;
+template <typename T>
+inline constexpr bool HasKindMemberAsField<T, decltype(&T::kind)> = true;
+
 // HasParseNodeMember<T> is true if T has a `U parse_node` field,
 // where `U` extends `Parse::NodeId`.
 template <typename T, bool Enabled = true>