Selaa lähdekoodia

Rewrite semantics towards a more pure instruction model (#2320)

This rewrites semantics towards a more pure instruction model, in pursuit of the simple instruction-style output.

I think I can get this approach to type-check as it goes along, but obviously this change doesn't prove that yet. I'm separating it out because it's a large rewrite of the semantics structure, tossing out a lot of what was there before. But I think it does help towards several requests, like setting up a clear path for consolidating duplicate identifiers and making the node style more standardized.

I expect to need to pass multiple args to function calls, that'd probably be storing vectors of args similar to how I'm showing identifiers and integer literals stored.

This removes the semantics namespace because (a) it was getting annoying writing the `::` everywhere, and (b) I think the leaning with Carbon is to avoid namespaces (@chandlerc asked not to put SemanticsIR/SemanticsFactory in a namespace, which is the crux of the issue). But, it's still necessary to avoid name conflicts so I just prefix everything with "Semantics" (still a lot of typing, but no `::`).
Jon Ross-Perkins 3 vuotta sitten
vanhempi
sitoutus
1f8508204b

+ 1 - 1
toolchain/parser/parse_node_kind.h

@@ -33,7 +33,7 @@ class ParseNodeKind {
   };
 
  public:
-  // The formatting for this macro is weird due to a `clang-format` bug. See
+  // `clang-format` has a bug with spacing around `->` returns in macros. See
   // https://bugs.llvm.org/show_bug.cgi?id=48320 for details.
 #define CARBON_PARSE_NODE_KIND(Name)            \
   static constexpr auto Name()->ParseNodeKind { \

+ 11 - 16
toolchain/semantics/BUILD

@@ -5,34 +5,30 @@
 package(default_visibility = ["//visibility:public"])
 
 cc_library(
-    name = "nodes",
-    hdrs = [
-        "node_kind.h",
-        "node_ref.h",
-        "nodes/binary_operator.h",
-        "nodes/function.h",
-        "nodes/integer_literal.h",
-        "nodes/return.h",
-        "nodes/set_name.h",
-    ],
+    name = "semantics_node_kind",
+    srcs = ["semantics_node_kind.cpp"],
+    hdrs = ["semantics_node_kind.h"],
+    textual_hdrs = ["semantics_node_kind.def"],
     deps = [
-        "//common:check",
         "//common:ostream",
-        "//toolchain/parser:parse_tree",
         "@llvm-project//llvm:Support",
     ],
 )
 
 cc_library(
     name = "semantics_ir",
-    srcs = ["semantics_ir.cpp"],
+    srcs = [
+        "semantics_ir.cpp",
+        "semantics_node.cpp",
+    ],
     hdrs = [
-        "node_store.h",
         "semantics_ir.h",
+        "semantics_node.h",
     ],
     deps = [
-        ":nodes",
+        ":semantics_node_kind",
         "//common:check",
+        "//common:ostream",
         "//toolchain/lexer:tokenized_buffer",
         "//toolchain/parser:parse_tree",
         "@llvm-project//llvm:Support",
@@ -44,7 +40,6 @@ cc_library(
     srcs = ["semantics_ir_factory.cpp"],
     hdrs = ["semantics_ir_factory.h"],
     deps = [
-        ":nodes",
         ":semantics_ir",
         "//common:check",
         "//toolchain/lexer:token_kind",

+ 0 - 35
toolchain/semantics/node_kind.h

@@ -1,35 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef CARBON_TOOLCHAIN_SEMANTICS_NODE_KIND_H_
-#define CARBON_TOOLCHAIN_SEMANTICS_NODE_KIND_H_
-
-#include <cstdint>
-
-#include "common/ostream.h"
-
-namespace Carbon::Semantics {
-
-// Type-safe storage of Node IDs.
-struct NodeId {
-  explicit NodeId(int32_t id) : id(id) {}
-
-  void Print(llvm::raw_ostream& out) const { out << "%" << id; }
-
-  int32_t id;
-};
-
-// Meta node information for declarations.
-enum class NodeKind {
-  BinaryOperator,
-  Function,
-  IntegerLiteral,
-  Return,
-  SetName,
-  Invalid,
-};
-
-}  // namespace Carbon::Semantics
-
-#endif  // CARBON_TOOLCHAIN_SEMANTICS_NODE_KIND_H_

+ 0 - 47
toolchain/semantics/node_ref.h

@@ -1,47 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef CARBON_TOOLCHAIN_SEMANTICS_NODE_REF_H_
-#define CARBON_TOOLCHAIN_SEMANTICS_NODE_REF_H_
-
-#include <cstdint>
-
-#include "common/ostream.h"
-#include "toolchain/semantics/node_kind.h"
-
-namespace Carbon::Semantics {
-
-// Type-safe storage of NodeStore indices.
-struct NodeStoreIndex {
-  explicit NodeStoreIndex(int32_t index) : index(index) {}
-
-  int32_t index;
-};
-
-// The standard structure for nodes.
-//
-// This flyweight pattern is used so that each subtype can be stored in its own
-// vector, minimizing memory consumption and heap fragmentation when large
-// quantities are being created.
-class NodeRef {
- public:
-  NodeRef() : NodeRef(NodeKind::Invalid, NodeStoreIndex(-1)) {}
-
-  auto kind() -> NodeKind { return kind_; }
-
- private:
-  template <typename... StoredNodeT>
-  friend class NodeStoreBase;
-
-  NodeRef(NodeKind kind, NodeStoreIndex index) : kind_(kind), index_(index) {}
-
-  NodeKind kind_;
-
-  // The index of the named entity within its list.
-  NodeStoreIndex index_;
-};
-
-}  // namespace Carbon::Semantics
-
-#endif  // CARBON_TOOLCHAIN_SEMANTICS_NODE_REF_H_

+ 0 - 60
toolchain/semantics/node_store.h

@@ -1,60 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef CARBON_TOOLCHAIN_SEMANTICS_NODE_STORE_H_
-#define CARBON_TOOLCHAIN_SEMANTICS_NODE_STORE_H_
-
-#include <tuple>
-
-#include "common/check.h"
-#include "llvm/ADT/SmallVector.h"
-#include "toolchain/semantics/node_ref.h"
-#include "toolchain/semantics/nodes/binary_operator.h"
-#include "toolchain/semantics/nodes/function.h"
-#include "toolchain/semantics/nodes/integer_literal.h"
-#include "toolchain/semantics/nodes/return.h"
-#include "toolchain/semantics/nodes/set_name.h"
-
-namespace Carbon::Semantics {
-
-// Provides storage for nodes, indexed by Nodes.
-//
-// This uses templating versus either a macro or repeated functions to provide
-// per-type storage.
-template <typename... StoredNodeT>
-class NodeStoreBase {
- public:
-  // Stores the provided node, returning a pointer to it.
-  template <typename NodeT>
-  auto Store(NodeT node) -> NodeRef {
-    auto& node_store = std::get<static_cast<size_t>(NodeT::Kind)>(node_stores_);
-    NodeStoreIndex index(node_store.size());
-    node_store.push_back(node);
-    return NodeRef(NodeT::Kind, index);
-  }
-
-  // Returns the requested node. Requires that the pointer is valid for this
-  // store.
-  template <typename NodeT>
-  auto Get(NodeRef node_ref) const -> const NodeT& {
-    CARBON_CHECK(node_ref.index_.index >= 0);
-    CARBON_CHECK(node_ref.kind_ == NodeT::Kind)
-        << "Kind mismatch: " << static_cast<int>(node_ref.kind_) << " vs "
-        << static_cast<int>(NodeT::Kind);
-    auto& node_store = std::get<static_cast<size_t>(NodeT::Kind)>(node_stores_);
-    CARBON_CHECK(static_cast<size_t>(node_ref.index_.index) <
-                 node_store.size());
-    return node_store[node_ref.index_.index];
-  }
-
- private:
-  std::tuple<llvm::SmallVector<StoredNodeT, 0>...> node_stores_;
-};
-
-using NodeStore =
-    NodeStoreBase<BinaryOperator, Function, IntegerLiteral, Return, SetName>;
-
-}  // namespace Carbon::Semantics
-
-#endif  // CARBON_TOOLCHAIN_SEMANTICS_NODE_STORE_H_

+ 0 - 53
toolchain/semantics/nodes/binary_operator.h

@@ -1,53 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef CARBON_TOOLCHAIN_SEMANTICS_NODES_BINARY_OPERATOR_H_
-#define CARBON_TOOLCHAIN_SEMANTICS_NODES_BINARY_OPERATOR_H_
-
-#include "common/ostream.h"
-#include "toolchain/parser/parse_tree.h"
-#include "toolchain/semantics/node_kind.h"
-
-namespace Carbon::Semantics {
-
-// Represents a binary operator, such as `+` in `1 + 2`.
-class BinaryOperator {
- public:
-  enum class Op {
-    Add,
-  };
-
-  static constexpr NodeKind Kind = NodeKind::BinaryOperator;
-
-  explicit BinaryOperator(ParseTree::Node node, NodeId id, Op op, NodeId lhs_id,
-                          NodeId rhs_id)
-      : node_(node), id_(id), op_(op), lhs_id_(lhs_id), rhs_id_(rhs_id) {}
-
-  void Print(llvm::raw_ostream& out) const {
-    out << "BinaryOperator(" << id_ << ", ";
-    switch (op_) {
-      case Op::Add:
-        out << "+";
-        break;
-    }
-    out << ", " << lhs_id_ << ", " << rhs_id_ << ")";
-  }
-
-  auto node() const -> ParseTree::Node { return node_; }
-  auto id() const -> NodeId { return id_; }
-  auto op() const -> Op { return op_; }
-  auto lhs_id() const -> NodeId { return lhs_id_; }
-  auto rhs_id() const -> NodeId { return rhs_id_; }
-
- private:
-  ParseTree::Node node_;
-  NodeId id_;
-  Op op_;
-  NodeId lhs_id_;
-  NodeId rhs_id_;
-};
-
-}  // namespace Carbon::Semantics
-
-#endif  // CARBON_TOOLCHAIN_SEMANTICS_NODES_BINARY_OPERATOR_H_

+ 0 - 54
toolchain/semantics/nodes/function.h

@@ -1,54 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef CARBON_TOOLCHAIN_SEMANTICS_NODES_FUNCTION_H_
-#define CARBON_TOOLCHAIN_SEMANTICS_NODES_FUNCTION_H_
-
-#include "common/ostream.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "toolchain/parser/parse_tree.h"
-#include "toolchain/semantics/node_kind.h"
-#include "toolchain/semantics/node_ref.h"
-
-namespace Carbon::Semantics {
-
-// Represents `fn name(params...) [-> return_expr] body`.
-class Function {
- public:
-  static constexpr NodeKind Kind = NodeKind::Function;
-
-  Function(ParseTree::Node node, NodeId id, llvm::SmallVector<NodeRef> body)
-      : node_(node), id_(id), body_(std::move(body)) {}
-
-  void Print(
-      llvm::raw_ostream& out, int indent,
-      std::function<void(int, llvm::ArrayRef<NodeRef>)> print_block) const {
-    out << "Function(\n";
-    int content_intent = indent + 4;
-    out.indent(content_intent);
-    out << id_ << ",\n";
-    out.indent(content_intent);
-    print_block(content_intent, body_);
-    out << ")";
-  }
-
-  auto node() const -> ParseTree::Node { return node_; }
-  auto id() const -> NodeId { return id_; }
-  auto set_body(llvm::SmallVector<NodeRef, 0> body) { body_ = std::move(body); }
-  auto body() const -> llvm::ArrayRef<NodeRef> { return body_; }
-
- private:
-  // The FunctionDeclaration node.
-  ParseTree::Node node_;
-
-  // The function's ID.
-  NodeId id_;
-
-  llvm::SmallVector<NodeRef> body_;
-};
-
-}  // namespace Carbon::Semantics
-
-#endif  // CARBON_TOOLCHAIN_SEMANTICS_NODES_FUNCTION_H_

+ 0 - 39
toolchain/semantics/nodes/integer_literal.h

@@ -1,39 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef CARBON_TOOLCHAIN_SEMANTICS_NODES_INTEGER_LITERAL_H_
-#define CARBON_TOOLCHAIN_SEMANTICS_NODES_INTEGER_LITERAL_H_
-
-#include "common/ostream.h"
-#include "toolchain/parser/parse_tree.h"
-#include "toolchain/semantics/node_kind.h"
-
-namespace Carbon::Semantics {
-
-// Represents all kinds of literals: `1`, `i32`, etc.
-class IntegerLiteral {
- public:
-  static constexpr NodeKind Kind = NodeKind::IntegerLiteral;
-
-  explicit IntegerLiteral(ParseTree::Node node, NodeId id,
-                          const llvm::APInt& value)
-      : node_(node), id_(id), value_(&value) {}
-
-  void Print(llvm::raw_ostream& out) const {
-    out << "IntegerLiteral(" << id_ << ", " << *value_ << ")";
-  }
-
-  auto node() const -> ParseTree::Node { return node_; }
-  auto id() const -> NodeId { return id_; }
-  auto value() const -> const llvm::APInt& { return *value_; }
-
- private:
-  ParseTree::Node node_;
-  NodeId id_;
-  const llvm::APInt* value_;
-};
-
-}  // namespace Carbon::Semantics
-
-#endif  // CARBON_TOOLCHAIN_SEMANTICS_NODES_INTEGER_LITERAL_H_

+ 0 - 45
toolchain/semantics/nodes/return.h

@@ -1,45 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef CARBON_TOOLCHAIN_SEMANTICS_NODES_RETURN_H_
-#define CARBON_TOOLCHAIN_SEMANTICS_NODES_RETURN_H_
-
-#include "common/ostream.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "toolchain/parser/parse_tree.h"
-#include "toolchain/semantics/node_kind.h"
-#include "toolchain/semantics/node_ref.h"
-
-namespace Carbon::Semantics {
-
-// Represents `return [expr];`
-class Return {
- public:
-  static constexpr NodeKind Kind = NodeKind::Return;
-
-  Return(ParseTree::Node node, llvm::Optional<NodeId> target_id)
-      : node_(node), target_id_(target_id) {}
-
-  void Print(llvm::raw_ostream& out) const {
-    out << "Return(";
-    if (target_id_) {
-      out << *target_id_;
-    } else {
-      out << "None";
-    }
-    out << ")";
-  }
-
-  auto node() const -> ParseTree::Node { return node_; }
-  auto target_id() const -> const llvm::Optional<NodeId>& { return target_id_; }
-
- private:
-  ParseTree::Node node_;
-  llvm::Optional<NodeId> target_id_;
-};
-
-}  // namespace Carbon::Semantics
-
-#endif  // CARBON_TOOLCHAIN_SEMANTICS_NODES_RETURN_H_

+ 0 - 44
toolchain/semantics/nodes/set_name.h

@@ -1,44 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef CARBON_TOOLCHAIN_SEMANTICS_NODES_SET_NAME_H_
-#define CARBON_TOOLCHAIN_SEMANTICS_NODES_SET_NAME_H_
-
-#include "common/ostream.h"
-#include "llvm/ADT/SmallVector.h"
-#include "toolchain/parser/parse_tree.h"
-#include "toolchain/semantics/node_kind.h"
-
-namespace Carbon::Semantics {
-
-// Represents `fn name(params...) [-> return_expr] body`.
-class SetName {
- public:
-  static constexpr NodeKind Kind = NodeKind::SetName;
-
-  SetName(ParseTree::Node node, llvm::StringRef name, NodeId target_id)
-      : node_(node), name_(name), target_id_(target_id) {}
-
-  void Print(llvm::raw_ostream& out) const {
-    out << "SetName(`" << name_ << "`, " << target_id_ << ")";
-  }
-
-  auto node() const -> ParseTree::Node { return node_; }
-  auto name() const -> llvm::StringRef { return name_; }
-  auto target_id() const -> NodeId { return target_id_; }
-
- private:
-  // The name node.
-  ParseTree::Node node_;
-
-  // The name to assign.
-  llvm::StringRef name_;
-
-  // The ID being named.
-  NodeId target_id_;
-};
-
-}  // namespace Carbon::Semantics
-
-#endif  // CARBON_TOOLCHAIN_SEMANTICS_NODES_SET_NAME_H_

+ 38 - 39
toolchain/semantics/semantics_ir.cpp

@@ -7,53 +7,52 @@
 #include "common/check.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "toolchain/lexer/tokenized_buffer.h"
+#include "toolchain/semantics/semantics_node.h"
 
 namespace Carbon {
 
 auto SemanticsIR::Print(llvm::raw_ostream& out) const -> void {
-  PrintBlock(out, 0, root_block());
-  out << "\n";
-}
+  out << "identifiers = {\n";
+  for (int32_t i = 0; i < static_cast<int32_t>(identifiers_.size()); ++i) {
+    out.indent(2);
+    out << SemanticsIdentifierId(i) << " = \"" << identifiers_[i] << "\";\n";
+  }
+  out << "},\n";
 
-auto SemanticsIR::PrintBlock(llvm::raw_ostream& out, int indent,
-                             llvm::ArrayRef<Semantics::NodeRef> node_refs) const
-    -> void {
-  out << "{\n";
-  int child_indent = indent + 2;
-  for (const auto& node_ref : node_refs) {
-    out.indent(child_indent);
-    Print(out, child_indent, node_ref);
-    out << ",\n";
+  out << "integer_literals = {\n";
+  for (int32_t i = 0; i < static_cast<int32_t>(integer_literals_.size()); ++i) {
+    out.indent(2);
+    out << SemanticsIntegerLiteralId(i) << " = " << integer_literals_[i]
+        << ";\n";
   }
-  out.indent(indent);
-  out << "}";
-}
+  out << "},\n";
+
+  out << "nodes = {\n";
+  int indent = 2;
+  for (int32_t i = 0; i < static_cast<int32_t>(nodes_.size()); ++i) {
+    SemanticsNode node = nodes_[i];
+
+    // Adjust indent for block contents.
+    switch (node.kind()) {
+      case SemanticsNodeKind::CodeBlockStart():
+      case SemanticsNodeKind::FunctionDefinitionStart():
+        out.indent(indent);
+        indent += 2;
+        break;
+      case SemanticsNodeKind::CodeBlockEnd():
+      case SemanticsNodeKind::FunctionDefinitionEnd():
+        indent -= 2;
+        out.indent(indent);
+        break;
+      default:
+        // No indentation change.
+        out.indent(indent);
+        break;
+    }
 
-auto SemanticsIR::Print(llvm::raw_ostream& out, int indent,
-                        Semantics::NodeRef node_ref) const -> void {
-  switch (node_ref.kind()) {
-    case Semantics::NodeKind::BinaryOperator:
-      nodes_.Get<Semantics::BinaryOperator>(node_ref).Print(out);
-      return;
-    case Semantics::NodeKind::Function:
-      nodes_.Get<Semantics::Function>(node_ref).Print(
-          out, indent,
-          [&](int block_indent, llvm::ArrayRef<Semantics::NodeRef> block) {
-            PrintBlock(out, block_indent, block);
-          });
-      return;
-    case Semantics::NodeKind::IntegerLiteral:
-      nodes_.Get<Semantics::IntegerLiteral>(node_ref).Print(out);
-      return;
-    case Semantics::NodeKind::Return:
-      nodes_.Get<Semantics::Return>(node_ref).Print(out);
-      return;
-    case Semantics::NodeKind::SetName:
-      nodes_.Get<Semantics::SetName>(node_ref).Print(out);
-      return;
-    case Semantics::NodeKind::Invalid:
-      CARBON_FATAL() << "Invalid NodeRef kind";
+    out << SemanticsNodeId(i) << " = " << node << ";\n";
   }
+  out << "}\n";
 }
 
 }  // namespace Carbon

+ 23 - 16
toolchain/semantics/semantics_ir.h

@@ -8,7 +8,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "toolchain/parser/parse_tree.h"
-#include "toolchain/semantics/node_store.h"
+#include "toolchain/semantics/semantics_node.h"
 
 namespace Carbon::Testing {
 class SemanticsIRForTest;
@@ -19,30 +19,37 @@ namespace Carbon {
 // Provides semantic analysis on a ParseTree.
 class SemanticsIR {
  public:
-  // File-level declarations.
-  auto root_block() const -> llvm::ArrayRef<Semantics::NodeRef> {
-    return root_block_;
-  }
-
   // Prints the full IR.
   auto Print(llvm::raw_ostream& out) const -> void;
 
-  auto PrintBlock(llvm::raw_ostream& out, int indent,
-                  llvm::ArrayRef<Semantics::NodeRef> node_refs) const -> void;
-
-  // Prints the node information.
-  auto Print(llvm::raw_ostream& out, int indent,
-             Semantics::NodeRef node_ref) const -> void;
-
  private:
   friend class SemanticsIRFactory;
-  friend class Testing::SemanticsIRForTest;
 
   explicit SemanticsIR(const ParseTree& parse_tree)
       : parse_tree_(&parse_tree) {}
 
-  Semantics::NodeStore nodes_;
-  llvm::SmallVector<Semantics::NodeRef, 0> root_block_;
+  auto AddIdentifier(llvm::StringRef identifier) -> SemanticsIdentifierId {
+    SemanticsIdentifierId id(identifiers_.size());
+    identifiers_.push_back(identifier);
+    return id;
+  }
+
+  auto AddIntegerLiteral(llvm::APInt integer_literal)
+      -> SemanticsIntegerLiteralId {
+    SemanticsIntegerLiteralId id(integer_literals_.size());
+    integer_literals_.push_back(integer_literal);
+    return id;
+  }
+
+  auto AddNode(SemanticsNode node) -> SemanticsNodeId {
+    SemanticsNodeId id(nodes_.size());
+    nodes_.push_back(node);
+    return id;
+  }
+
+  llvm::SmallVector<llvm::StringRef> identifiers_;
+  llvm::SmallVector<llvm::APInt> integer_literals_;
+  llvm::SmallVector<SemanticsNode> nodes_;
 
   const ParseTree* parse_tree_;
 };

+ 36 - 141
toolchain/semantics/semantics_ir_factory.cpp

@@ -6,13 +6,10 @@
 
 #include <stack>
 
-#include "common/check.h"
 #include "toolchain/lexer/token_kind.h"
 #include "toolchain/lexer/tokenized_buffer.h"
 #include "toolchain/parser/parse_node_kind.h"
-#include "toolchain/semantics/node_ref.h"
-#include "toolchain/semantics/nodes/binary_operator.h"
-#include "toolchain/semantics/nodes/integer_literal.h"
+#include "toolchain/semantics/semantics_node.h"
 
 namespace Carbon {
 
@@ -23,107 +20,36 @@ auto SemanticsIRFactory::Build(const TokenizedBuffer& tokens,
   return builder.semantics_;
 }
 
-// An entry in the stack for traversing the ParseTree.
-// TODO: This is badly structured, and a redesign may be able to get rid of
-// the need for `sem_ir`. Need to keep thinking about this, but for now
-// this setup allows for test consistency.
-// Alternately, maybe think about if we can group semantics for multiple nodes
-// so that we aren't constantly copying/reallocating vectors of NodeRefs.
-struct TraversalStackEntry {
-  explicit TraversalStackEntry(ParseTree::Node parse_node)
-      : parse_node(parse_node) {}
-
-  TraversalStackEntry(ParseTree::Node parse_node,
-                      llvm::SmallVector<Semantics::NodeRef, 0> sem_ir)
-      : parse_node(parse_node), sem_ir(std::move(sem_ir)) {}
-
-  TraversalStackEntry(ParseTree::Node parse_node,
-                      llvm::SmallVector<Semantics::NodeRef, 0> sem_ir,
-                      Semantics::NodeId result_id)
-      : parse_node(parse_node),
-        sem_ir(std::move(sem_ir)),
-        result_id(result_id) {}
-
-  ParseTree::Node parse_node;
-  llvm::SmallVector<Semantics::NodeRef> sem_ir;
-  llvm::Optional<Semantics::NodeId> result_id;
-};
-
-// Converts a TokenKind to a BinaryOperator operator.
-static auto GetBinaryOp(TokenKind kind) -> Semantics::BinaryOperator::Op {
-  switch (kind) {
-    case TokenKind::Plus():
-      return Semantics::BinaryOperator::Op::Add;
-    default:
-      CARBON_FATAL() << "Unrecognized token kind: " << kind.Name();
-  }
-}
-
 void SemanticsIRFactory::Build() {
-  llvm::SmallVector<TraversalStackEntry> node_stack;
   auto range = parse_tree().postorder();
   for (auto it = range.begin();; ++it) {
     auto parse_node = *it;
     switch (auto parse_kind = parse_tree().node_kind(parse_node)) {
       case ParseNodeKind::DeclaredName(): {
-        // DeclaredNames will be acted upon as part of the declaring construct.
-        node_stack.push_back(TraversalStackEntry(parse_node));
+        auto text = parse_tree().GetNodeText(parse_node);
+        auto identifier_id = semantics_.AddIdentifier(text);
+        Push(parse_node, SemanticsNode::MakeIdentifier(identifier_id));
         break;
       }
       case ParseNodeKind::FunctionDefinition(): {
         // Merges code block children up under the FunctionDefinitionStart.
-        llvm::SmallVector<Semantics::NodeRef> body;
-        while (parse_tree().node_kind(node_stack.back().parse_node) !=
+        while (parse_tree().node_kind(node_stack_.back().parse_node) !=
                ParseNodeKind::FunctionDefinitionStart()) {
-          body.insert(body.begin(), node_stack.back().sem_ir.begin(),
-                      node_stack.back().sem_ir.end());
-          node_stack.pop_back();
+          node_stack_.pop_back();
         }
-
-        // Next is the FunctionDefinitionStart.
-        llvm::SmallVector<Semantics::NodeRef> sig =
-            std::move(node_stack.back().sem_ir);
-        node_stack.pop_back();
-
-        // TODO: This replacement is in particular why I want to change
-        // the IR setup now, but for now I want to just produce output that
-        // satisfies tests without changes.
-        auto orig_function = semantics_.nodes_.Get<Semantics::Function>(sig[0]);
-        auto orig_set_name = semantics_.nodes_.Get<Semantics::SetName>(sig[1]);
-        llvm::SmallVector<Semantics::NodeRef> function_sem_ir;
-        auto function_id = next_id();
-        function_sem_ir.push_back(semantics_.nodes_.Store(
-            Semantics::Function(orig_function.node(), function_id, body)));
-        function_sem_ir.push_back(semantics_.nodes_.Store(Semantics::SetName(
-            orig_set_name.node(), orig_set_name.name(), function_id)));
-
-        node_stack.push_back(
-            TraversalStackEntry(parse_node, std::move(function_sem_ir)));
+        Pop(ParseNodeKind::FunctionDefinitionStart());
+        semantics_.AddNode(SemanticsNode::MakeFunctionDefinitionEnd());
+        Push(parse_node);
         break;
       }
       case ParseNodeKind::FunctionDefinitionStart(): {
-        // TODO: Skip over the parameter list for now.
-        node_stack.pop_back();
-
-        // TODO: At this point, it should be possible to forward-declare the
-        // function so that it can be called from its code block. For now, we
-        // just assemble the semantic function to associate the body.
-        llvm::SmallVector<Semantics::NodeRef> sem_ir;
-
-        auto function_id = next_id();
-        sem_ir.push_back(semantics_.nodes_.Store(
-            Semantics::Function(parse_node, function_id, {})));
-
-        auto name_node = node_stack.back().parse_node;
-        sem_ir.push_back(semantics_.nodes_.Store(Semantics::SetName(
-            name_node, parse_tree().GetNodeText(name_node), function_id)));
-        node_stack.pop_back();
-
-        // Do nothing with the `fn`.
-        node_stack.pop_back();
-
-        node_stack.push_back(
-            TraversalStackEntry(parse_node, std::move(sem_ir)));
+        Pop(ParseNodeKind::ParameterList());
+        auto name_node_id = PopWithResult(ParseNodeKind::DeclaredName());
+        Pop(ParseNodeKind::FunctionIntroducer());
+        auto decl_id = semantics_.AddNode(
+            SemanticsNode::MakeFunctionDeclaration(name_node_id));
+        semantics_.AddNode(SemanticsNode::MakeFunctionDefinitionStart(decl_id));
+        Push(parse_node);
         break;
       }
       case ParseNodeKind::FileEnd(): {
@@ -131,77 +57,48 @@ void SemanticsIRFactory::Build() {
         CARBON_CHECK(it == range.end())
             << "FileEnd should always be last, found "
             << parse_tree().node_kind(*it);
-
-        for (const auto& entry : node_stack) {
-          semantics_.root_block_.append(entry.sem_ir.begin(),
-                                        entry.sem_ir.end());
-        }
         return;
       }
       case ParseNodeKind::InfixOperator(): {
-        llvm::SmallVector<Semantics::NodeRef> sem_ir;
-
-        sem_ir.insert(sem_ir.begin(), node_stack.back().sem_ir.begin(),
-                      node_stack.back().sem_ir.end());
-        auto rhs_id = *node_stack.back().result_id;
-        node_stack.pop_back();
-
-        sem_ir.insert(sem_ir.begin(), node_stack.back().sem_ir.begin(),
-                      node_stack.back().sem_ir.end());
-        auto lhs_id = *node_stack.back().result_id;
-        node_stack.pop_back();
+        auto rhs_id = PopWithResult();
+        auto lhs_id = PopWithResult();
 
         // Figure out the operator for the token.
         auto token = parse_tree().node_token(parse_node);
-        auto token_kind = tokens_->GetKind(token);
-        auto op = GetBinaryOp(token_kind);
-
-        auto literal_id = next_id();
-        sem_ir.push_back(semantics_.nodes_.Store(Semantics::BinaryOperator(
-            parse_node, literal_id, op, lhs_id, rhs_id)));
-        node_stack.push_back(
-            TraversalStackEntry(parse_node, std::move(sem_ir), literal_id));
+        switch (auto token_kind = tokens_->GetKind(token)) {
+          case TokenKind::Plus():
+            Push(parse_node,
+                 SemanticsNode::MakeBinaryOperatorAdd(lhs_id, rhs_id));
+            break;
+          default:
+            CARBON_FATAL() << "Unrecognized token kind: " << token_kind.Name();
+        }
         break;
       }
       case ParseNodeKind::Literal(): {
-        auto literal_id = next_id();
-
-        llvm::SmallVector<Semantics::NodeRef> sem_ir;
         auto token = parse_tree().node_token(parse_node);
         switch (auto token_kind = tokens_->GetKind(token)) {
           case TokenKind::IntegerLiteral(): {
-            sem_ir.push_back(semantics_.nodes_.Store(Semantics::IntegerLiteral(
-                parse_node, literal_id, tokens_->GetIntegerLiteral(token))));
+            auto id =
+                semantics_.AddIntegerLiteral(tokens_->GetIntegerLiteral(token));
+            Push(parse_node, SemanticsNode::MakeIntegerLiteral(id));
             break;
           }
           default:
             CARBON_FATAL() << "Unhandled kind: " << token_kind.Name();
         }
-        node_stack.push_back(
-            TraversalStackEntry(parse_node, std::move(sem_ir), literal_id));
         break;
       }
       case ParseNodeKind::ReturnStatement(): {
-        CARBON_CHECK(parse_tree().node_kind(node_stack.back().parse_node) ==
-                     ParseNodeKind::StatementEnd());
-        node_stack.pop_back();
+        Pop(ParseNodeKind::StatementEnd());
 
         // TODO: Restructure ReturnStatement so that we can do this without
         // looking at the subtree size.
         if (parse_tree().node_subtree_size(parse_node) == 2) {
-          node_stack.push_back(TraversalStackEntry(
-              parse_node, {semantics_.nodes_.Store(
-                              Semantics::Return(parse_node, llvm::None))}));
+          Push(parse_node, SemanticsNode::MakeReturn());
         } else {
-          // Return should only ever have one expression child.
-          llvm::SmallVector<Semantics::NodeRef> sem_ir =
-              std::move(node_stack.back().sem_ir);
-          Semantics::NodeId result_id = *node_stack.back().result_id;
-          node_stack.pop_back();
-          sem_ir.push_back(semantics_.nodes_.Store(
-              Semantics::Return(parse_node, result_id)));
-          node_stack.push_back(
-              TraversalStackEntry(parse_node, std::move(sem_ir)));
+          auto arg = PopWithResult();
+          Push(parse_node, SemanticsNode::MakeReturnExpression(arg));
         }
         break;
       }
@@ -210,17 +107,15 @@ void SemanticsIRFactory::Build() {
         // it's unused and only stored so that node counts match.
         // TODO: Reorder with ParameterListStart so that we can traverse without
         // subtree_size.
-        CARBON_CHECK(parse_tree().node_kind(node_stack.back().parse_node) ==
-                     ParseNodeKind::ParameterListEnd());
-        node_stack.pop_back();
-        node_stack.push_back(TraversalStackEntry(parse_node));
+        Pop(ParseNodeKind::ParameterListEnd());
+        Push(parse_node);
         break;
       }
       case ParseNodeKind::FunctionIntroducer():
       case ParseNodeKind::ParameterListEnd():
       case ParseNodeKind::StatementEnd(): {
         // The token has no action, but we still track it for the stack.
-        node_stack.push_back(TraversalStackEntry(parse_node));
+        Push(parse_node);
         break;
       }
       default: {

+ 42 - 6
toolchain/semantics/semantics_ir_factory.h

@@ -5,6 +5,7 @@
 #ifndef CARBON_TOOLCHAIN_SEMANTICS_SEMANTICS_IR_FACTORY_H_
 #define CARBON_TOOLCHAIN_SEMANTICS_SEMANTICS_IR_FACTORY_H_
 
+#include "common/check.h"
 #include "toolchain/parser/parse_tree.h"
 #include "toolchain/semantics/semantics_ir.h"
 
@@ -18,15 +19,50 @@ class SemanticsIRFactory {
       -> SemanticsIR;
 
  private:
+  struct TraversalStackEntry {
+    ParseTree::Node parse_node;
+    llvm::Optional<SemanticsNodeId> result_id;
+  };
+
   explicit SemanticsIRFactory(const TokenizedBuffer& tokens,
                               const ParseTree& parse_tree)
       : tokens_(&tokens), semantics_(parse_tree) {}
 
-  void Build();
+  auto Build() -> void;
+
+  auto Push(ParseTree::Node parse_node) -> void {
+    node_stack_.push_back({parse_node, llvm::None});
+  }
+
+  auto Push(ParseTree::Node parse_node, SemanticsNode node) -> void {
+    auto node_id = semantics_.AddNode(node);
+    node_stack_.push_back({parse_node, node_id});
+  }
+
+  auto Pop(ParseNodeKind pop_parse_kind) -> void {
+    auto back = node_stack_.back();
+    auto parse_kind = parse_tree().node_kind(back.parse_node);
+    CARBON_CHECK(parse_kind == pop_parse_kind)
+        << "Expected " << pop_parse_kind << ", found " << parse_kind;
+    CARBON_CHECK(!back.result_id) << "Expected no result ID on " << parse_kind;
+    node_stack_.pop_back();
+  }
+
+  auto PopWithResult() -> SemanticsNodeId {
+    auto back = node_stack_.back();
+    auto node_id = *back.result_id;
+    node_stack_.pop_back();
+    return node_id;
+  }
 
-  // Returns a unique ID for the SemanticsIR.
-  auto next_id() -> Semantics::NodeId {
-    return Semantics::NodeId(id_counter_++);
+  auto PopWithResult(ParseNodeKind pop_parse_kind) -> SemanticsNodeId {
+    auto back = node_stack_.back();
+    auto parse_kind = parse_tree().node_kind(back.parse_node);
+    auto node_id = *back.result_id;
+    CARBON_CHECK(parse_kind == pop_parse_kind)
+        << "Expected " << pop_parse_kind << ", found " << parse_kind;
+    node_stack_.pop_back();
+    return node_id;
   }
 
   // Convenience accessor.
@@ -38,8 +74,8 @@ class SemanticsIRFactory {
   // The SemanticsIR being constructed.
   SemanticsIR semantics_;
 
-  // A counter for unique IDs.
-  int32_t id_counter_ = 0;
+  // The stack during Build. Will contain file-level parse nodes on return.
+  llvm::SmallVector<TraversalStackEntry> node_stack_;
 };
 
 }  // namespace Carbon

+ 42 - 0
toolchain/semantics/semantics_node.cpp

@@ -0,0 +1,42 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "toolchain/semantics/semantics_node.h"
+
+namespace Carbon {
+
+static auto PrintArgs(llvm::raw_ostream& /*out*/,
+                      const SemanticsNodeArgs::None /*no_args*/) {}
+
+static auto PrintArgs(llvm::raw_ostream& out, SemanticsNodeId one_node) {
+  out << one_node;
+}
+
+static auto PrintArgs(llvm::raw_ostream& out, SemanticsTwoNodeIds two_nodes) {
+  out << two_nodes.nodes[0] << ", " << two_nodes.nodes[1];
+}
+
+static auto PrintArgs(llvm::raw_ostream& out,
+                      SemanticsIdentifierId identifier) {
+  out << identifier;
+}
+
+static auto PrintArgs(llvm::raw_ostream& out,
+                      SemanticsIntegerLiteralId integer_literal) {
+  out << integer_literal;
+}
+
+void SemanticsNode::Print(llvm::raw_ostream& out) const {
+  out << kind_ << "(";
+  switch (kind_) {
+#define CARBON_SEMANTICS_NODE_KIND(Name, Args) \
+  case SemanticsNodeKind::Name():              \
+    PrintArgs(out, one_of_args_.Args);         \
+    break;
+#include "toolchain/semantics/semantics_node_kind.def"
+  }
+  out << ")";
+}
+
+}  // namespace Carbon

+ 137 - 0
toolchain/semantics/semantics_node.h

@@ -0,0 +1,137 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_SEMANTICS_SEMANTICS_NODE_H_
+#define CARBON_TOOLCHAIN_SEMANTICS_SEMANTICS_NODE_H_
+
+#include <cstdint>
+
+#include "common/ostream.h"
+#include "toolchain/semantics/semantics_node_kind.h"
+
+namespace Carbon {
+
+// Type-safe storage of Node IDs.
+struct SemanticsNodeId {
+  SemanticsNodeId() : id(-1) {}
+  explicit SemanticsNodeId(int32_t id) : id(id) {}
+  SemanticsNodeId(SemanticsNodeId const&) = default;
+  auto operator=(const SemanticsNodeId& other) -> SemanticsNodeId& = default;
+
+  void Print(llvm::raw_ostream& out) const { out << "%" << id; }
+
+  int32_t id;
+};
+
+// Type-safe storage of identifiers.
+struct SemanticsIdentifierId {
+  SemanticsIdentifierId() : id(-1) {}
+  explicit SemanticsIdentifierId(int32_t id) : id(id) {}
+
+  void Print(llvm::raw_ostream& out) const { out << "%s" << id; }
+
+  int32_t id;
+};
+
+// Type-safe storage of integer literals.
+struct SemanticsIntegerLiteralId {
+  SemanticsIntegerLiteralId() : id(-1) {}
+  explicit SemanticsIntegerLiteralId(int32_t id) : id(id) {}
+
+  void Print(llvm::raw_ostream& out) const { out << "%s" << id; }
+
+  int32_t id;
+};
+
+struct SemanticsTwoNodeIds {
+  SemanticsNodeId nodes[2];
+};
+
+union SemanticsNodeArgs {
+  struct None {};
+
+  SemanticsNodeArgs() : no_args() {}
+  explicit SemanticsNodeArgs(SemanticsNodeId one_node) : one_node(one_node) {}
+  explicit SemanticsNodeArgs(SemanticsTwoNodeIds two_nodes)
+      : two_nodes(two_nodes) {}
+  explicit SemanticsNodeArgs(SemanticsIdentifierId identifier)
+      : identifier(identifier) {}
+  explicit SemanticsNodeArgs(SemanticsIntegerLiteralId integer_literal)
+      : integer_literal(integer_literal) {}
+
+  None no_args;
+  SemanticsNodeId one_node;
+  SemanticsTwoNodeIds two_nodes;
+  SemanticsIdentifierId identifier;
+  SemanticsIntegerLiteralId integer_literal;
+};
+// TODO: This is currently 8 bytes only because of two_nodes; others are only 4
+// bytes. The NodeKind is 1 byte; if we reduced this structure to 7 bytes (3.5
+// bytes per node), we could potentially change SemanticsNode from 12 bytes to 8
+// bytes. This may be worth investigating further.
+static_assert(sizeof(SemanticsNodeArgs) == 8, "Unexpected OneOfArgs size");
+
+// The standard structure for nodes.
+class SemanticsNode {
+ public:
+  // Define factory functions for each node kind. These should improve type
+  // safety by enforcing argument counts.
+  // `clang-format` has a bug with spacing around `->` returns here. See
+  // https://bugs.llvm.org/show_bug.cgi?id=48320 for details.
+#define CARBON_SEMANTICS_MAKE_no_args(Name)                               \
+  static auto Make##Name()->SemanticsNode {                               \
+    return SemanticsNode(SemanticsNodeKind::Name(), SemanticsNodeArgs()); \
+  }
+#define CARBON_SEMANTICS_MAKE_one_node(Name)                        \
+  static auto Make##Name(SemanticsNodeId one_node)->SemanticsNode { \
+    return SemanticsNode(SemanticsNodeKind::Name(),                 \
+                         SemanticsNodeArgs(one_node));              \
+  }
+#define CARBON_SEMANTICS_MAKE_two_nodes(Name)                          \
+  static auto Make##Name(SemanticsNodeId node1, SemanticsNodeId node2) \
+      ->SemanticsNode {                                                \
+    return SemanticsNode(                                              \
+        SemanticsNodeKind::Name(),                                     \
+        SemanticsNodeArgs(SemanticsTwoNodeIds{node1, node2}));         \
+  }
+#define CARBON_SEMANTICS_MAKE_identifier(Name)                              \
+  static auto Make##Name(SemanticsIdentifierId identifier)->SemanticsNode { \
+    return SemanticsNode(SemanticsNodeKind::Name(),                         \
+                         SemanticsNodeArgs(identifier));                    \
+  }
+#define CARBON_SEMANTICS_MAKE_integer_literal(Name)                 \
+  static auto Make##Name(SemanticsIntegerLiteralId integer_literal) \
+      ->SemanticsNode {                                             \
+    return SemanticsNode(SemanticsNodeKind::Name(),                 \
+                         SemanticsNodeArgs(integer_literal));       \
+  }
+
+#define CARBON_SEMANTICS_NODE_KIND(Name, ArgsType) \
+  CARBON_SEMANTICS_MAKE_##ArgsType(Name)
+#include "toolchain/semantics/semantics_node_kind.def"
+
+#undef CARBON_SEMANTICS_MAKE_no_args
+#undef CARBON_SEMANTICS_MAKE_one_node
+#undef CARBON_SEMANTICS_MAKE_two_nodes
+#undef CARBON_SEMANTICS_MAKE_identifier
+#undef CARBON_SEMANTICS_MAKE_integer_literal
+
+  SemanticsNode() : kind_(SemanticsNodeKind::Invalid()) {}
+
+  auto kind() -> SemanticsNodeKind { return kind_; }
+
+  void Print(llvm::raw_ostream& out) const;
+
+ private:
+  SemanticsNode(SemanticsNodeKind kind, SemanticsNodeArgs one_of_args)
+      : kind_(kind), one_of_args_(one_of_args) {}
+
+  SemanticsNodeKind kind_;
+
+  SemanticsNodeArgs one_of_args_;
+};
+
+}  // namespace Carbon
+
+#endif  // CARBON_TOOLCHAIN_SEMANTICS_SEMANTICS_NODE_H_

+ 19 - 0
toolchain/semantics/semantics_node_kind.cpp

@@ -0,0 +1,19 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "toolchain/semantics/semantics_node_kind.h"
+
+#include "llvm/ADT/StringRef.h"
+
+namespace Carbon {
+
+auto SemanticsNodeKind::name() const -> llvm::StringRef {
+  static constexpr llvm::StringLiteral Names[] = {
+#define CARBON_SEMANTICS_NODE_KIND(Name, ...) #Name,
+#include "toolchain/semantics/semantics_node_kind.def"
+  };
+  return Names[static_cast<int>(kind_)];
+}
+
+}  // namespace Carbon

+ 50 - 0
toolchain/semantics/semantics_node_kind.def

@@ -0,0 +1,50 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Note that this is an X-macro header.
+//
+// It does not use `#include` guards, and instead is designed to be `#include`ed
+// after the x-macro is defined in order for its inclusion to expand to the
+// desired output. The x-macro for this header is `CARBON_PARSE_NODE_KIND`. The
+// definition provided will be removed at the end of this file to clean up.
+
+#ifndef CARBON_SEMANTICS_NODE_KIND
+#error "Must define the x-macro to use this file."
+#endif
+
+// No args.
+CARBON_SEMANTICS_NODE_KIND(Invalid, no_args)
+
+// Two nodes: lhs and rhs.
+CARBON_SEMANTICS_NODE_KIND(BinaryOperatorAdd, two_nodes)
+
+// No args.
+CARBON_SEMANTICS_NODE_KIND(CodeBlockStart, no_args)
+
+// No args.
+CARBON_SEMANTICS_NODE_KIND(CodeBlockEnd, no_args)
+
+// One node: the name.
+// TODO: Add a declaration scope as a second arg.
+CARBON_SEMANTICS_NODE_KIND(FunctionDeclaration, one_node)
+
+// One node: the FunctionDeclaration.
+CARBON_SEMANTICS_NODE_KIND(FunctionDefinitionStart, one_node)
+
+// No args.
+CARBON_SEMANTICS_NODE_KIND(FunctionDefinitionEnd, no_args)
+
+// The decalred IdentifierId.
+CARBON_SEMANTICS_NODE_KIND(Identifier, identifier)
+
+// The declared IntegerLiteralId.
+CARBON_SEMANTICS_NODE_KIND(IntegerLiteral, integer_literal)
+
+// No args.
+CARBON_SEMANTICS_NODE_KIND(Return, no_args)
+
+// One node: the return expression.
+CARBON_SEMANTICS_NODE_KIND(ReturnExpression, one_node)
+
+#undef CARBON_SEMANTICS_NODE_KIND

+ 66 - 0
toolchain/semantics/semantics_node_kind.h

@@ -0,0 +1,66 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_SEMANTICS_SEMANTICS_NODE_KIND_H_
+#define CARBON_TOOLCHAIN_SEMANTICS_SEMANTICS_NODE_KIND_H_
+
+#include <cstdint>
+
+#include "common/ostream.h"
+
+namespace Carbon {
+
+class SemanticsNodeKind {
+ private:
+  // Note that this must be declared earlier in the class so that its type can
+  // be used, for example in the conversion operator.
+  enum class KindEnum : uint8_t {
+#define CARBON_SEMANTICS_NODE_KIND(Name, ...) Name,
+#include "toolchain/semantics/semantics_node_kind.def"
+  };
+
+ public:
+  // `clang-format` has a bug with spacing around `->` returns in macros. See
+  // https://bugs.llvm.org/show_bug.cgi?id=48320 for details.
+#define CARBON_SEMANTICS_NODE_KIND(Name, ...)       \
+  static constexpr auto Name()->SemanticsNodeKind { \
+    return SemanticsNodeKind(KindEnum::Name);       \
+  }
+#include "toolchain/semantics/semantics_node_kind.def"
+
+  // The default constructor is deleted because objects of this type should
+  // always be constructed using the above factory functions for each unique
+  // kind.
+  SemanticsNodeKind() = delete;
+
+  friend auto operator==(SemanticsNodeKind lhs, SemanticsNodeKind rhs) -> bool {
+    return lhs.kind_ == rhs.kind_;
+  }
+  friend auto operator!=(SemanticsNodeKind lhs, SemanticsNodeKind rhs) -> bool {
+    return lhs.kind_ != rhs.kind_;
+  }
+
+  // Gets a friendly name for the token for logging or debugging.
+  [[nodiscard]] auto name() const -> llvm::StringRef;
+
+  // Enable conversion to our private enum, including in a `constexpr` context,
+  // to enable usage in `switch` and `case`. The enum remains private and
+  // nothing else should be using this function.
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  constexpr operator KindEnum() const { return kind_; }
+
+  void Print(llvm::raw_ostream& out) const { out << name(); }
+
+ private:
+  constexpr explicit SemanticsNodeKind(KindEnum k) : kind_(k) {}
+
+  KindEnum kind_;
+};
+
+// We expect the node kind to fit compactly into 8 bits.
+static_assert(sizeof(SemanticsNodeKind) == 1, "Kind objects include padding!");
+
+}  // namespace Carbon
+
+#endif  // CARBON_TOOLCHAIN_SEMANTICS_SEMANTICS_NODE_KIND_H_

+ 5 - 1
toolchain/semantics/testdata/empty.carbon

@@ -4,5 +4,9 @@
 //
 // AUTOUPDATE
 // RUN: %{carbon-run-semantics}
-// CHECK:STDOUT: {
+// CHECK:STDOUT: identifiers = {
+// CHECK:STDOUT: },
+// CHECK:STDOUT: integer_literals = {
+// CHECK:STDOUT: },
+// CHECK:STDOUT: nodes = {
 // CHECK:STDOUT: }

+ 10 - 6
toolchain/semantics/testdata/function/basic.carbon

@@ -4,12 +4,16 @@
 //
 // AUTOUPDATE
 // RUN: %{carbon-run-semantics}
-// CHECK:STDOUT: {
-// CHECK:STDOUT:   Function(
-// CHECK:STDOUT:       %1,
-// CHECK:STDOUT:       {
-// CHECK:STDOUT:       }),
-// CHECK:STDOUT:   SetName(`Foo`, %1),
+// CHECK:STDOUT: identifiers = {
+// CHECK:STDOUT:   %s0 = "Foo";
+// CHECK:STDOUT: },
+// CHECK:STDOUT: integer_literals = {
+// CHECK:STDOUT: },
+// CHECK:STDOUT: nodes = {
+// CHECK:STDOUT:   %0 = Identifier(%s0);
+// CHECK:STDOUT:   %1 = FunctionDeclaration(%0);
+// CHECK:STDOUT:   %2 = FunctionDefinitionStart(%1);
+// CHECK:STDOUT:   %3 = FunctionDefinitionEnd();
 // CHECK:STDOUT: }
 
 fn Foo() {}

+ 20 - 16
toolchain/semantics/testdata/function/order.carbon

@@ -4,22 +4,26 @@
 //
 // AUTOUPDATE
 // RUN: %{carbon-run-semantics}
-// CHECK:STDOUT: {
-// CHECK:STDOUT:   Function(
-// CHECK:STDOUT:       %1,
-// CHECK:STDOUT:       {
-// CHECK:STDOUT:       }),
-// CHECK:STDOUT:   SetName(`Foo`, %1),
-// CHECK:STDOUT:   Function(
-// CHECK:STDOUT:       %3,
-// CHECK:STDOUT:       {
-// CHECK:STDOUT:       }),
-// CHECK:STDOUT:   SetName(`Bar`, %3),
-// CHECK:STDOUT:   Function(
-// CHECK:STDOUT:       %5,
-// CHECK:STDOUT:       {
-// CHECK:STDOUT:       }),
-// CHECK:STDOUT:   SetName(`Baz`, %5),
+// CHECK:STDOUT: identifiers = {
+// CHECK:STDOUT:   %s0 = "Foo";
+// CHECK:STDOUT:   %s1 = "Bar";
+// CHECK:STDOUT:   %s2 = "Baz";
+// CHECK:STDOUT: },
+// CHECK:STDOUT: integer_literals = {
+// CHECK:STDOUT: },
+// CHECK:STDOUT: nodes = {
+// CHECK:STDOUT:   %0 = Identifier(%s0);
+// CHECK:STDOUT:   %1 = FunctionDeclaration(%0);
+// CHECK:STDOUT:   %2 = FunctionDefinitionStart(%1);
+// CHECK:STDOUT:   %3 = FunctionDefinitionEnd();
+// CHECK:STDOUT:   %4 = Identifier(%s1);
+// CHECK:STDOUT:   %5 = FunctionDeclaration(%4);
+// CHECK:STDOUT:   %6 = FunctionDefinitionStart(%5);
+// CHECK:STDOUT:   %7 = FunctionDefinitionEnd();
+// CHECK:STDOUT:   %8 = Identifier(%s2);
+// CHECK:STDOUT:   %9 = FunctionDeclaration(%8);
+// CHECK:STDOUT:   %10 = FunctionDefinitionStart(%9);
+// CHECK:STDOUT:   %11 = FunctionDefinitionEnd();
 // CHECK:STDOUT: }
 
 fn Foo() {}

+ 16 - 10
toolchain/semantics/testdata/return/binary_op.carbon

@@ -4,16 +4,22 @@
 //
 // AUTOUPDATE
 // RUN: %{carbon-run-semantics}
-// CHECK:STDOUT: {
-// CHECK:STDOUT:   Function(
-// CHECK:STDOUT:       %4,
-// CHECK:STDOUT:       {
-// CHECK:STDOUT:         IntegerLiteral(%1, 12),
-// CHECK:STDOUT:         IntegerLiteral(%2, 34),
-// CHECK:STDOUT:         BinaryOperator(%3, +, %1, %2),
-// CHECK:STDOUT:         Return(%3),
-// CHECK:STDOUT:       }),
-// CHECK:STDOUT:   SetName(`Main`, %4),
+// CHECK:STDOUT: identifiers = {
+// CHECK:STDOUT:   %s0 = "Main";
+// CHECK:STDOUT: },
+// CHECK:STDOUT: integer_literals = {
+// CHECK:STDOUT:   %s0 = 12;
+// CHECK:STDOUT:   %s1 = 34;
+// CHECK:STDOUT: },
+// CHECK:STDOUT: nodes = {
+// CHECK:STDOUT:   %0 = Identifier(%s0);
+// CHECK:STDOUT:   %1 = FunctionDeclaration(%0);
+// CHECK:STDOUT:   %2 = FunctionDefinitionStart(%1);
+// CHECK:STDOUT:     %3 = IntegerLiteral(%s0);
+// CHECK:STDOUT:     %4 = IntegerLiteral(%s1);
+// CHECK:STDOUT:     %5 = BinaryOperatorAdd(%3, %4);
+// CHECK:STDOUT:     %6 = ReturnExpression(%5);
+// CHECK:STDOUT:   %7 = FunctionDefinitionEnd();
 // CHECK:STDOUT: }
 
 fn Main() {

+ 13 - 8
toolchain/semantics/testdata/return/literal.carbon

@@ -4,14 +4,19 @@
 //
 // AUTOUPDATE
 // RUN: %{carbon-run-semantics}
-// CHECK:STDOUT: {
-// CHECK:STDOUT:   Function(
-// CHECK:STDOUT:       %2,
-// CHECK:STDOUT:       {
-// CHECK:STDOUT:         IntegerLiteral(%1, 0),
-// CHECK:STDOUT:         Return(%1),
-// CHECK:STDOUT:       }),
-// CHECK:STDOUT:   SetName(`Main`, %2),
+// CHECK:STDOUT: identifiers = {
+// CHECK:STDOUT:   %s0 = "Main";
+// CHECK:STDOUT: },
+// CHECK:STDOUT: integer_literals = {
+// CHECK:STDOUT:   %s0 = 0;
+// CHECK:STDOUT: },
+// CHECK:STDOUT: nodes = {
+// CHECK:STDOUT:   %0 = Identifier(%s0);
+// CHECK:STDOUT:   %1 = FunctionDeclaration(%0);
+// CHECK:STDOUT:   %2 = FunctionDefinitionStart(%1);
+// CHECK:STDOUT:     %3 = IntegerLiteral(%s0);
+// CHECK:STDOUT:     %4 = ReturnExpression(%3);
+// CHECK:STDOUT:   %5 = FunctionDefinitionEnd();
 // CHECK:STDOUT: }
 
 fn Main() {

+ 11 - 7
toolchain/semantics/testdata/return/trivial.carbon

@@ -4,13 +4,17 @@
 //
 // AUTOUPDATE
 // RUN: %{carbon-run-semantics}
-// CHECK:STDOUT: {
-// CHECK:STDOUT:   Function(
-// CHECK:STDOUT:       %1,
-// CHECK:STDOUT:       {
-// CHECK:STDOUT:         Return(None),
-// CHECK:STDOUT:       }),
-// CHECK:STDOUT:   SetName(`Main`, %1),
+// CHECK:STDOUT: identifiers = {
+// CHECK:STDOUT:   %s0 = "Main";
+// CHECK:STDOUT: },
+// CHECK:STDOUT: integer_literals = {
+// CHECK:STDOUT: },
+// CHECK:STDOUT: nodes = {
+// CHECK:STDOUT:   %0 = Identifier(%s0);
+// CHECK:STDOUT:   %1 = FunctionDeclaration(%0);
+// CHECK:STDOUT:   %2 = FunctionDefinitionStart(%1);
+// CHECK:STDOUT:     %3 = Return();
+// CHECK:STDOUT:   %4 = FunctionDefinitionEnd();
 // CHECK:STDOUT: }
 
 fn Main() {