semantics_ir.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/semantics/semantics_ir.h"
  5. #include "common/check.h"
  6. #include "llvm/ADT/Sequence.h"
  7. #include "llvm/ADT/SmallVector.h"
  8. #include "toolchain/base/pretty_stack_trace_function.h"
  9. #include "toolchain/parser/parse_tree_node_location_translator.h"
  10. #include "toolchain/semantics/semantics_builtin_kind.h"
  11. #include "toolchain/semantics/semantics_context.h"
  12. #include "toolchain/semantics/semantics_node.h"
  13. #include "toolchain/semantics/semantics_node_kind.h"
  14. namespace Carbon::SemIR {
  15. auto File::MakeBuiltinIR() -> File {
  16. File semantics_ir(/*builtin_ir=*/nullptr);
  17. semantics_ir.nodes_.reserve(BuiltinKind::ValidCount);
  18. // Error uses a self-referential type so that it's not accidentally treated as
  19. // a normal type. Every other builtin is a type, including the
  20. // self-referential TypeType.
  21. #define CARBON_SEMANTICS_BUILTIN_KIND(Name, ...) \
  22. semantics_ir.nodes_.push_back(Node::Builtin::Make( \
  23. BuiltinKind::Name, BuiltinKind::Name == BuiltinKind::Error \
  24. ? TypeId::Error \
  25. : TypeId::TypeType));
  26. #include "toolchain/semantics/semantics_builtin_kind.def"
  27. CARBON_CHECK(semantics_ir.node_blocks_.size() == 1)
  28. << "BuildBuiltins should only have the empty block, actual: "
  29. << semantics_ir.node_blocks_.size();
  30. CARBON_CHECK(semantics_ir.nodes_.size() == BuiltinKind::ValidCount)
  31. << "BuildBuiltins should produce " << BuiltinKind::ValidCount
  32. << " nodes, actual: " << semantics_ir.nodes_.size();
  33. return semantics_ir;
  34. }
  35. auto File::MakeFromParseTree(const File& builtin_ir,
  36. const TokenizedBuffer& tokens,
  37. const ParseTree& parse_tree,
  38. DiagnosticConsumer& consumer,
  39. llvm::raw_ostream* vlog_stream) -> File {
  40. File semantics_ir(&builtin_ir);
  41. // Copy builtins over.
  42. semantics_ir.nodes_.resize_for_overwrite(BuiltinKind::ValidCount);
  43. static constexpr auto BuiltinIR = CrossReferenceIRId(0);
  44. for (int i : llvm::seq(BuiltinKind::ValidCount)) {
  45. // We can reuse the type node ID because the offsets of cross-references
  46. // will be the same in this IR.
  47. auto type = builtin_ir.nodes_[i].type_id();
  48. semantics_ir.nodes_[i] =
  49. Node::CrossReference::Make(type, BuiltinIR, NodeId(i));
  50. }
  51. ParseTreeNodeLocationTranslator translator(&tokens, &parse_tree);
  52. ErrorTrackingDiagnosticConsumer err_tracker(consumer);
  53. DiagnosticEmitter<ParseTree::Node> emitter(translator, err_tracker);
  54. Check::Context context(tokens, emitter, parse_tree, semantics_ir,
  55. vlog_stream);
  56. PrettyStackTraceFunction context_dumper(
  57. [&](llvm::raw_ostream& output) { context.PrintForStackDump(output); });
  58. // Add a block for the ParseTree.
  59. context.node_block_stack().Push();
  60. context.PushScope();
  61. // Loops over all nodes in the tree. On some errors, this may return early,
  62. // for example if an unrecoverable state is encountered.
  63. for (auto parse_node : parse_tree.postorder()) {
  64. switch (auto parse_kind = parse_tree.node_kind(parse_node)) {
  65. #define CARBON_PARSE_NODE_KIND(Name) \
  66. case ParseNodeKind::Name: { \
  67. if (!Check::Handle##Name(context, parse_node)) { \
  68. semantics_ir.has_errors_ = true; \
  69. return semantics_ir; \
  70. } \
  71. break; \
  72. }
  73. #include "toolchain/parser/parse_node_kind.def"
  74. }
  75. }
  76. // Pop information for the file-level scope.
  77. semantics_ir.top_node_block_id_ = context.node_block_stack().Pop();
  78. context.PopScope();
  79. context.VerifyOnFinish();
  80. semantics_ir.has_errors_ = err_tracker.seen_error();
  81. #ifndef NDEBUG
  82. if (auto verify = semantics_ir.Verify(); !verify.ok()) {
  83. CARBON_FATAL() << semantics_ir
  84. << "Built invalid semantics IR: " << verify.error() << "\n";
  85. }
  86. #endif
  87. return semantics_ir;
  88. }
  89. auto File::Verify() const -> ErrorOr<Success> {
  90. // Invariants don't necessarily hold for invalid IR.
  91. if (has_errors_) {
  92. return Success();
  93. }
  94. // Check that every code block has a terminator sequence that appears at the
  95. // end of the block.
  96. for (const Function& function : functions_) {
  97. for (NodeBlockId block_id : function.body_block_ids) {
  98. TerminatorKind prior_kind = TerminatorKind::NotTerminator;
  99. for (NodeId node_id : GetNodeBlock(block_id)) {
  100. TerminatorKind node_kind = GetNode(node_id).kind().terminator_kind();
  101. if (prior_kind == TerminatorKind::Terminator) {
  102. return Error(llvm::formatv("Node {0} in block {1} follows terminator",
  103. node_id, block_id));
  104. }
  105. if (prior_kind > node_kind) {
  106. return Error(
  107. llvm::formatv("Non-terminator node {0} in block {1} follows "
  108. "terminator sequence",
  109. node_id, block_id));
  110. }
  111. prior_kind = node_kind;
  112. }
  113. if (prior_kind != TerminatorKind::Terminator) {
  114. return Error(llvm::formatv("No terminator in block {0}", block_id));
  115. }
  116. }
  117. }
  118. // TODO: Check that a node only references other nodes that are either global
  119. // or that dominate it.
  120. return Success();
  121. }
  122. static constexpr int Indent = 2;
  123. template <typename T>
  124. static auto PrintList(llvm::raw_ostream& out, llvm::StringLiteral name,
  125. const llvm::SmallVector<T>& list) {
  126. out << name << ": [\n";
  127. for (const auto& element : list) {
  128. out.indent(Indent);
  129. out << element << ",\n";
  130. }
  131. out << "]\n";
  132. }
  133. template <typename T>
  134. static auto PrintBlock(llvm::raw_ostream& out, llvm::StringLiteral block_name,
  135. const llvm::SmallVector<T>& blocks) {
  136. out << block_name << ": [\n";
  137. for (const auto& block : blocks) {
  138. out.indent(Indent);
  139. out << "[\n";
  140. for (const auto& node : block) {
  141. out.indent(2 * Indent);
  142. out << node << ",\n";
  143. }
  144. out.indent(Indent);
  145. out << "],\n";
  146. }
  147. out << "]\n";
  148. }
  149. auto File::Print(llvm::raw_ostream& out, bool include_builtins) const -> void {
  150. out << "cross_reference_irs_size: " << cross_reference_irs_.size() << "\n";
  151. PrintList(out, "functions", functions_);
  152. PrintList(out, "integer_literals", integer_literals_);
  153. PrintList(out, "real_literals", real_literals_);
  154. PrintList(out, "strings", strings_);
  155. PrintList(out, "types", types_);
  156. PrintBlock(out, "type_blocks", type_blocks_);
  157. out << "nodes: [\n";
  158. for (int i = include_builtins ? 0 : BuiltinKind::ValidCount;
  159. i < static_cast<int>(nodes_.size()); ++i) {
  160. const auto& element = nodes_[i];
  161. out.indent(Indent);
  162. out << element << ",\n";
  163. }
  164. out << "]\n";
  165. PrintBlock(out, "node_blocks", node_blocks_);
  166. }
  167. // Map a node kind representing a type into an integer describing the
  168. // precedence of that type's syntax. Higher numbers correspond to higher
  169. // precedence.
  170. static auto GetTypePrecedence(NodeKind kind) -> int {
  171. switch (kind) {
  172. case NodeKind::ArrayType:
  173. case NodeKind::Builtin:
  174. case NodeKind::StructType:
  175. case NodeKind::TupleType:
  176. return 0;
  177. case NodeKind::ConstType:
  178. return -1;
  179. case NodeKind::PointerType:
  180. return -2;
  181. case NodeKind::CrossReference:
  182. // TODO: Once we support stringification of cross-references, we'll need
  183. // to determine the precedence of the target of the cross-reference. For
  184. // now, all cross-references refer to builtin types from the prelude.
  185. return 0;
  186. case NodeKind::AddressOf:
  187. case NodeKind::ArrayIndex:
  188. case NodeKind::ArrayValue:
  189. case NodeKind::Assign:
  190. case NodeKind::BinaryOperatorAdd:
  191. case NodeKind::BlockArg:
  192. case NodeKind::BoolLiteral:
  193. case NodeKind::Branch:
  194. case NodeKind::BranchIf:
  195. case NodeKind::BranchWithArg:
  196. case NodeKind::Call:
  197. case NodeKind::Dereference:
  198. case NodeKind::FunctionDeclaration:
  199. case NodeKind::IntegerLiteral:
  200. case NodeKind::Invalid:
  201. case NodeKind::Namespace:
  202. case NodeKind::Parameter:
  203. case NodeKind::RealLiteral:
  204. case NodeKind::Return:
  205. case NodeKind::ReturnExpression:
  206. case NodeKind::StringLiteral:
  207. case NodeKind::StructAccess:
  208. case NodeKind::StructTypeField:
  209. case NodeKind::StructValue:
  210. case NodeKind::StubReference:
  211. case NodeKind::TupleIndex:
  212. case NodeKind::TupleValue:
  213. case NodeKind::UnaryOperatorNot:
  214. case NodeKind::VarStorage:
  215. CARBON_FATAL() << "GetTypePrecedence for non-type node kind " << kind;
  216. }
  217. }
  218. auto File::StringifyType(TypeId type_id, bool in_type_context) const
  219. -> std::string {
  220. std::string str;
  221. llvm::raw_string_ostream out(str);
  222. struct Step {
  223. // The node to print.
  224. NodeId node_id;
  225. // The index into node_id to print. Not used by all types.
  226. int index = 0;
  227. auto Next() const -> Step {
  228. return {.node_id = node_id, .index = index + 1};
  229. }
  230. };
  231. auto outer_node_id = GetTypeAllowBuiltinTypes(type_id);
  232. llvm::SmallVector<Step> steps = {{.node_id = outer_node_id}};
  233. while (!steps.empty()) {
  234. auto step = steps.pop_back_val();
  235. // Invalid node IDs will use the default invalid printing.
  236. if (!step.node_id.is_valid()) {
  237. out << step.node_id;
  238. continue;
  239. }
  240. // Builtins have designated labels.
  241. if (step.node_id.index < BuiltinKind::ValidCount) {
  242. out << BuiltinKind::FromInt(step.node_id.index).label();
  243. continue;
  244. }
  245. auto node = GetNode(step.node_id);
  246. switch (node.kind()) {
  247. case NodeKind::ArrayType: {
  248. auto [bound_id, type_id] = node.GetAsArrayType();
  249. if (step.index == 0) {
  250. out << "[";
  251. steps.push_back(step.Next());
  252. steps.push_back({.node_id = GetTypeAllowBuiltinTypes(type_id)});
  253. } else if (step.index == 1) {
  254. out << "; " << GetArrayBoundValue(bound_id) << "]";
  255. }
  256. break;
  257. }
  258. case NodeKind::ConstType: {
  259. if (step.index == 0) {
  260. out << "const ";
  261. // Add parentheses if required.
  262. auto inner_type_node_id =
  263. GetTypeAllowBuiltinTypes(node.GetAsConstType());
  264. if (GetTypePrecedence(GetNode(inner_type_node_id).kind()) <
  265. GetTypePrecedence(node.kind())) {
  266. out << "(";
  267. steps.push_back(step.Next());
  268. }
  269. steps.push_back({.node_id = inner_type_node_id});
  270. } else if (step.index == 1) {
  271. out << ")";
  272. }
  273. break;
  274. }
  275. case NodeKind::PointerType: {
  276. if (step.index == 0) {
  277. steps.push_back(step.Next());
  278. steps.push_back(
  279. {.node_id = GetTypeAllowBuiltinTypes(node.GetAsPointerType())});
  280. } else if (step.index == 1) {
  281. out << "*";
  282. }
  283. break;
  284. }
  285. case NodeKind::StructType: {
  286. auto refs = GetNodeBlock(node.GetAsStructType());
  287. if (refs.empty()) {
  288. out << "{}";
  289. break;
  290. } else if (step.index == 0) {
  291. out << "{";
  292. } else if (step.index < static_cast<int>(refs.size())) {
  293. out << ", ";
  294. } else {
  295. out << "}";
  296. break;
  297. }
  298. steps.push_back(step.Next());
  299. steps.push_back({.node_id = refs[step.index]});
  300. break;
  301. }
  302. case NodeKind::StructTypeField: {
  303. auto [name_id, type_id] = node.GetAsStructTypeField();
  304. out << "." << GetString(name_id) << ": ";
  305. steps.push_back({.node_id = GetTypeAllowBuiltinTypes(type_id)});
  306. break;
  307. }
  308. case NodeKind::TupleType: {
  309. auto refs = GetTypeBlock(node.GetAsTupleType());
  310. if (refs.empty()) {
  311. out << "()";
  312. break;
  313. } else if (step.index == 0) {
  314. out << "(";
  315. } else if (step.index < static_cast<int>(refs.size())) {
  316. out << ", ";
  317. } else {
  318. // A tuple of one element has a comma to disambiguate from an
  319. // expression.
  320. if (step.index == 1) {
  321. out << ",";
  322. }
  323. out << ")";
  324. break;
  325. }
  326. steps.push_back(step.Next());
  327. steps.push_back(
  328. {.node_id = GetTypeAllowBuiltinTypes(refs[step.index])});
  329. break;
  330. }
  331. case NodeKind::AddressOf:
  332. case NodeKind::ArrayIndex:
  333. case NodeKind::ArrayValue:
  334. case NodeKind::Assign:
  335. case NodeKind::BinaryOperatorAdd:
  336. case NodeKind::BlockArg:
  337. case NodeKind::BoolLiteral:
  338. case NodeKind::Branch:
  339. case NodeKind::BranchIf:
  340. case NodeKind::BranchWithArg:
  341. case NodeKind::Builtin:
  342. case NodeKind::Call:
  343. case NodeKind::Dereference:
  344. case NodeKind::CrossReference:
  345. case NodeKind::FunctionDeclaration:
  346. case NodeKind::IntegerLiteral:
  347. case NodeKind::Namespace:
  348. case NodeKind::Parameter:
  349. case NodeKind::RealLiteral:
  350. case NodeKind::Return:
  351. case NodeKind::ReturnExpression:
  352. case NodeKind::StringLiteral:
  353. case NodeKind::StructAccess:
  354. case NodeKind::StructValue:
  355. case NodeKind::StubReference:
  356. case NodeKind::TupleIndex:
  357. case NodeKind::TupleValue:
  358. case NodeKind::UnaryOperatorNot:
  359. case NodeKind::VarStorage:
  360. // We don't need to handle stringification for nodes that don't show up
  361. // in errors, but make it clear what's going on so that it's clearer
  362. // when stringification is needed.
  363. out << "<cannot stringify " << step.node_id << ">";
  364. break;
  365. case NodeKind::Invalid:
  366. llvm_unreachable("NodeKind::Invalid is never used.");
  367. }
  368. }
  369. // For `{}` or any tuple type, we've printed a non-type expression, so add a
  370. // conversion to type `type` if it's not implied by the context.
  371. if (!in_type_context) {
  372. auto outer_node = GetNode(outer_node_id);
  373. if (outer_node.kind() == NodeKind::TupleType ||
  374. (outer_node.kind() == NodeKind::StructType &&
  375. GetNodeBlock(outer_node.GetAsStructType()).empty())) {
  376. out << " as type";
  377. }
  378. }
  379. return str;
  380. }
  381. auto GetExpressionCategory(const File& semantics_ir, NodeId node_id)
  382. -> ExpressionCategory {
  383. const File* ir = &semantics_ir;
  384. while (true) {
  385. auto node = ir->GetNode(node_id);
  386. switch (node.kind()) {
  387. case NodeKind::Invalid:
  388. case NodeKind::Assign:
  389. case NodeKind::Branch:
  390. case NodeKind::BranchIf:
  391. case NodeKind::BranchWithArg:
  392. case NodeKind::FunctionDeclaration:
  393. case NodeKind::Namespace:
  394. case NodeKind::Return:
  395. case NodeKind::ReturnExpression:
  396. case NodeKind::StructTypeField:
  397. return ExpressionCategory::NotExpression;
  398. case NodeKind::CrossReference: {
  399. auto [xref_id, xref_node_id] = node.GetAsCrossReference();
  400. ir = &semantics_ir.GetCrossReferenceIR(xref_id);
  401. node_id = xref_node_id;
  402. continue;
  403. }
  404. case NodeKind::Call:
  405. // TODO: This should eventually be Initializing.
  406. return ExpressionCategory::Value;
  407. case NodeKind::AddressOf:
  408. case NodeKind::ArrayType:
  409. case NodeKind::BinaryOperatorAdd:
  410. case NodeKind::BlockArg:
  411. case NodeKind::BoolLiteral:
  412. case NodeKind::Builtin:
  413. case NodeKind::ConstType:
  414. case NodeKind::IntegerLiteral:
  415. case NodeKind::Parameter:
  416. case NodeKind::PointerType:
  417. case NodeKind::RealLiteral:
  418. case NodeKind::StringLiteral:
  419. case NodeKind::StructType:
  420. case NodeKind::TupleType:
  421. case NodeKind::UnaryOperatorNot:
  422. return ExpressionCategory::Value;
  423. case NodeKind::ArrayIndex: {
  424. auto [base_id, index_id] = node.GetAsArrayIndex();
  425. node_id = base_id;
  426. continue;
  427. }
  428. case NodeKind::StructAccess: {
  429. auto [base_id, member_index] = node.GetAsStructAccess();
  430. node_id = base_id;
  431. continue;
  432. }
  433. case NodeKind::TupleIndex: {
  434. auto [base_id, index_id] = node.GetAsTupleIndex();
  435. node_id = base_id;
  436. continue;
  437. }
  438. case NodeKind::StubReference: {
  439. node_id = node.GetAsStubReference();
  440. continue;
  441. }
  442. case NodeKind::ArrayValue:
  443. case NodeKind::StructValue:
  444. case NodeKind::TupleValue:
  445. // TODO: Eventually these will depend on the context in which the value
  446. // is used, and could be either Value or Initializing. We may want
  447. // different node kinds for a struct/tuple initializer versus a
  448. // struct/tuple value construction.
  449. return ExpressionCategory::Value;
  450. case NodeKind::Dereference:
  451. case NodeKind::VarStorage:
  452. return ExpressionCategory::DurableReference;
  453. }
  454. }
  455. }
  456. } // namespace Carbon::SemIR