formatter.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/sem_ir/formatter.h"
  5. #include "llvm/ADT/Sequence.h"
  6. #include "llvm/ADT/StringExtras.h"
  7. #include "llvm/ADT/StringMap.h"
  8. #include "llvm/Support/SaveAndRestore.h"
  9. #include "toolchain/lex/tokenized_buffer.h"
  10. #include "toolchain/parse/tree.h"
  11. namespace Carbon::SemIR {
  12. namespace {
  13. // Assigns names to nodes, blocks, and scopes in the Semantics IR.
  14. //
  15. // TODOs / future work ideas:
  16. // - Add a documentation file for the textual format and link to the
  17. // naming section here.
  18. // - Consider representing literals as just `literal` in the IR and using the
  19. // type to distinguish.
  20. class NodeNamer {
  21. public:
  22. // int32_t matches the input value size.
  23. // NOLINTNEXTLINE(performance-enum-size)
  24. enum class ScopeIndex : int32_t {
  25. None = -1,
  26. Package = 0,
  27. };
  28. static_assert(sizeof(ScopeIndex) == sizeof(FunctionId));
  29. NodeNamer(const Lex::TokenizedBuffer& tokenized_buffer,
  30. const Parse::Tree& parse_tree, const File& semantics_ir)
  31. : tokenized_buffer_(tokenized_buffer),
  32. parse_tree_(parse_tree),
  33. semantics_ir_(semantics_ir) {
  34. nodes.resize(semantics_ir.nodes_size());
  35. labels.resize(semantics_ir.node_blocks_size());
  36. scopes.resize(1 + semantics_ir.functions_size() +
  37. semantics_ir.classes_size());
  38. // Build the package scope.
  39. GetScopeInfo(ScopeIndex::Package).name =
  40. globals.AddNameUnchecked("package");
  41. CollectNamesInBlock(ScopeIndex::Package, semantics_ir.top_node_block_id());
  42. // Build each function scope.
  43. for (int i : llvm::seq(semantics_ir.functions_size())) {
  44. auto fn_id = FunctionId(i);
  45. auto fn_scope = GetScopeFor(fn_id);
  46. const auto& fn = semantics_ir.GetFunction(fn_id);
  47. // TODO: Provide a location for the function for use as a
  48. // disambiguator.
  49. auto fn_loc = Parse::Node::Invalid;
  50. GetScopeInfo(fn_scope).name = globals.AllocateName(
  51. *this, fn_loc,
  52. fn.name_id.is_valid() ? semantics_ir.strings().Get(fn.name_id).str()
  53. : "");
  54. CollectNamesInBlock(fn_scope, fn.param_refs_id);
  55. if (fn.return_slot_id.is_valid()) {
  56. nodes[fn.return_slot_id.index] = {
  57. fn_scope,
  58. GetScopeInfo(fn_scope).nodes.AllocateName(
  59. *this, semantics_ir.GetNode(fn.return_slot_id).parse_node(),
  60. "return")};
  61. }
  62. if (!fn.body_block_ids.empty()) {
  63. AddBlockLabel(fn_scope, fn.body_block_ids.front(), "entry", fn_loc);
  64. }
  65. for (auto block_id : fn.body_block_ids) {
  66. CollectNamesInBlock(fn_scope, block_id);
  67. }
  68. for (auto block_id : fn.body_block_ids) {
  69. AddBlockLabel(fn_scope, block_id);
  70. }
  71. }
  72. // Build each class scope.
  73. for (int i : llvm::seq(semantics_ir.classes_size())) {
  74. auto class_id = ClassId(i);
  75. auto class_scope = GetScopeFor(class_id);
  76. const auto& class_info = semantics_ir.GetClass(class_id);
  77. // TODO: Provide a location for the class for use as a
  78. // disambiguator.
  79. auto class_loc = Parse::Node::Invalid;
  80. GetScopeInfo(class_scope).name = globals.AllocateName(
  81. *this, class_loc,
  82. class_info.name_id.is_valid()
  83. ? semantics_ir.strings().Get(class_info.name_id).str()
  84. : "");
  85. AddBlockLabel(class_scope, class_info.body_block_id, "class", class_loc);
  86. CollectNamesInBlock(class_scope, class_info.body_block_id);
  87. }
  88. }
  89. // Returns the scope index corresponding to a function.
  90. auto GetScopeFor(FunctionId fn_id) -> ScopeIndex {
  91. return static_cast<ScopeIndex>(1 + fn_id.index);
  92. }
  93. // Returns the scope index corresponding to a class.
  94. auto GetScopeFor(ClassId class_id) -> ScopeIndex {
  95. return static_cast<ScopeIndex>(1 + semantics_ir_.functions_size() +
  96. class_id.index);
  97. }
  98. // Returns the IR name to use for a function.
  99. auto GetNameFor(FunctionId fn_id) -> llvm::StringRef {
  100. if (!fn_id.is_valid()) {
  101. return "invalid";
  102. }
  103. return GetScopeInfo(GetScopeFor(fn_id)).name.str();
  104. }
  105. // Returns the IR name to use for a class.
  106. auto GetNameFor(ClassId class_id) -> llvm::StringRef {
  107. if (!class_id.is_valid()) {
  108. return "invalid";
  109. }
  110. return GetScopeInfo(GetScopeFor(class_id)).name.str();
  111. }
  112. // Returns the IR name to use for a node, when referenced from a given scope.
  113. auto GetNameFor(ScopeIndex scope_idx, NodeId node_id) -> std::string {
  114. if (!node_id.is_valid()) {
  115. return "invalid";
  116. }
  117. // Check for a builtin.
  118. if (node_id.index < BuiltinKind::ValidCount) {
  119. return BuiltinKind::FromInt(node_id.index).label().str();
  120. }
  121. auto& [node_scope, node_name] = nodes[node_id.index];
  122. if (!node_name) {
  123. // This should not happen in valid IR.
  124. return "<unexpected noderef " + llvm::itostr(node_id.index) + ">";
  125. }
  126. if (node_scope == scope_idx) {
  127. return node_name.str().str();
  128. }
  129. return (GetScopeInfo(node_scope).name.str() + "." + node_name.str()).str();
  130. }
  131. // Returns the IR name to use for a label, when referenced from a given scope.
  132. auto GetLabelFor(ScopeIndex scope_idx, NodeBlockId block_id) -> std::string {
  133. if (!block_id.is_valid()) {
  134. return "!invalid";
  135. }
  136. auto& [label_scope, label_name] = labels[block_id.index];
  137. if (!label_name) {
  138. // This should not happen in valid IR.
  139. return "<unexpected nodeblockref " + llvm::itostr(block_id.index) + ">";
  140. }
  141. if (label_scope == scope_idx) {
  142. return label_name.str().str();
  143. }
  144. return (GetScopeInfo(label_scope).name.str() + "." + label_name.str())
  145. .str();
  146. }
  147. private:
  148. // A space in which unique names can be allocated.
  149. struct Namespace {
  150. // A result of a name lookup.
  151. struct NameResult;
  152. // A name in a namespace, which might be redirected to refer to another name
  153. // for disambiguation purposes.
  154. class Name {
  155. public:
  156. Name() : value_(nullptr) {}
  157. explicit Name(llvm::StringMapIterator<NameResult> it) : value_(&*it) {}
  158. explicit operator bool() const { return value_; }
  159. auto str() const -> llvm::StringRef {
  160. llvm::StringMapEntry<NameResult>* value = value_;
  161. CARBON_CHECK(value) << "cannot print a null name";
  162. while (value->second.ambiguous && value->second.fallback) {
  163. value = value->second.fallback.value_;
  164. }
  165. return value->first();
  166. }
  167. auto SetFallback(Name name) -> void { value_->second.fallback = name; }
  168. auto SetAmbiguous() -> void { value_->second.ambiguous = true; }
  169. private:
  170. llvm::StringMapEntry<NameResult>* value_ = nullptr;
  171. };
  172. struct NameResult {
  173. bool ambiguous = false;
  174. Name fallback = Name();
  175. };
  176. llvm::StringRef prefix;
  177. llvm::StringMap<NameResult> allocated = {};
  178. int unnamed_count = 0;
  179. auto AddNameUnchecked(llvm::StringRef name) -> Name {
  180. return Name(allocated.insert({name, NameResult()}).first);
  181. }
  182. auto AllocateName(const NodeNamer& namer, Parse::Node node,
  183. std::string name = "") -> Name {
  184. // The best (shortest) name for this node so far, and the current name
  185. // for it.
  186. Name best;
  187. Name current;
  188. // Add `name` as a name for this entity.
  189. auto add_name = [&](bool mark_ambiguous = true) {
  190. auto [it, added] = allocated.insert({name, NameResult()});
  191. Name new_name = Name(it);
  192. if (!added) {
  193. if (mark_ambiguous) {
  194. // This name was allocated for a different node. Mark it as
  195. // ambiguous and keep looking for a name for this node.
  196. new_name.SetAmbiguous();
  197. }
  198. } else {
  199. if (!best) {
  200. best = new_name;
  201. } else {
  202. CARBON_CHECK(current);
  203. current.SetFallback(new_name);
  204. }
  205. current = new_name;
  206. }
  207. return added;
  208. };
  209. // All names start with the prefix.
  210. name.insert(0, prefix);
  211. // Use the given name if it's available and not just the prefix.
  212. if (name.size() > prefix.size()) {
  213. add_name();
  214. }
  215. // Append location information to try to disambiguate.
  216. if (node.is_valid()) {
  217. auto token = namer.parse_tree_.node_token(node);
  218. llvm::raw_string_ostream(name)
  219. << ".loc" << namer.tokenized_buffer_.GetLineNumber(token);
  220. add_name();
  221. llvm::raw_string_ostream(name)
  222. << "_" << namer.tokenized_buffer_.GetColumnNumber(token);
  223. add_name();
  224. }
  225. // Append numbers until we find an available name.
  226. name += ".";
  227. auto name_size_without_counter = name.size();
  228. for (int counter = 1;; ++counter) {
  229. name.resize(name_size_without_counter);
  230. llvm::raw_string_ostream(name) << counter;
  231. if (add_name(/*mark_ambiguous=*/false)) {
  232. return best;
  233. }
  234. }
  235. }
  236. };
  237. // A named scope that contains named entities.
  238. struct Scope {
  239. Namespace::Name name;
  240. Namespace nodes = {.prefix = "%"};
  241. Namespace labels = {.prefix = "!"};
  242. };
  243. auto GetScopeInfo(ScopeIndex scope_idx) -> Scope& {
  244. return scopes[static_cast<int>(scope_idx)];
  245. }
  246. auto AddBlockLabel(ScopeIndex scope_idx, NodeBlockId block_id,
  247. std::string name = "",
  248. Parse::Node parse_node = Parse::Node::Invalid) -> void {
  249. if (!block_id.is_valid() || labels[block_id.index].second) {
  250. return;
  251. }
  252. if (parse_node == Parse::Node::Invalid) {
  253. if (const auto& block = semantics_ir_.GetNodeBlock(block_id);
  254. !block.empty()) {
  255. parse_node = semantics_ir_.GetNode(block.front()).parse_node();
  256. }
  257. }
  258. labels[block_id.index] = {scope_idx,
  259. GetScopeInfo(scope_idx).labels.AllocateName(
  260. *this, parse_node, std::move(name))};
  261. }
  262. // Finds and adds a suitable block label for the given semantics node that
  263. // represents some kind of branch.
  264. auto AddBlockLabel(ScopeIndex scope_idx, NodeBlockId block_id, Node node)
  265. -> void {
  266. llvm::StringRef name;
  267. switch (parse_tree_.node_kind(node.parse_node())) {
  268. case Parse::NodeKind::IfExpressionIf:
  269. switch (node.kind()) {
  270. case BranchIf::Kind:
  271. name = "if.expr.then";
  272. break;
  273. case Branch::Kind:
  274. name = "if.expr.else";
  275. break;
  276. case BranchWithArg::Kind:
  277. name = "if.expr.result";
  278. break;
  279. default:
  280. break;
  281. }
  282. break;
  283. case Parse::NodeKind::IfCondition:
  284. switch (node.kind()) {
  285. case BranchIf::Kind:
  286. name = "if.then";
  287. break;
  288. case Branch::Kind:
  289. name = "if.else";
  290. break;
  291. default:
  292. break;
  293. }
  294. break;
  295. case Parse::NodeKind::IfStatement:
  296. name = "if.done";
  297. break;
  298. case Parse::NodeKind::ShortCircuitOperand: {
  299. bool is_rhs = node.Is<BranchIf>();
  300. bool is_and = tokenized_buffer_.GetKind(parse_tree_.node_token(
  301. node.parse_node())) == Lex::TokenKind::And;
  302. name = is_and ? (is_rhs ? "and.rhs" : "and.result")
  303. : (is_rhs ? "or.rhs" : "or.result");
  304. break;
  305. }
  306. case Parse::NodeKind::WhileConditionStart:
  307. name = "while.cond";
  308. break;
  309. case Parse::NodeKind::WhileCondition:
  310. switch (node.kind()) {
  311. case NodeKind::BranchIf:
  312. name = "while.body";
  313. break;
  314. case NodeKind::Branch:
  315. name = "while.done";
  316. break;
  317. default:
  318. break;
  319. }
  320. break;
  321. default:
  322. break;
  323. }
  324. AddBlockLabel(scope_idx, block_id, name.str(), node.parse_node());
  325. }
  326. auto CollectNamesInBlock(ScopeIndex scope_idx, NodeBlockId block_id) -> void {
  327. if (!block_id.is_valid()) {
  328. return;
  329. }
  330. Scope& scope = GetScopeInfo(scope_idx);
  331. // Use bound names where available. Otherwise, assign a backup name.
  332. for (auto node_id : semantics_ir_.GetNodeBlock(block_id)) {
  333. if (!node_id.is_valid()) {
  334. continue;
  335. }
  336. auto node = semantics_ir_.GetNode(node_id);
  337. auto add_node_name = [&](std::string name) {
  338. nodes[node_id.index] = {scope_idx, scope.nodes.AllocateName(
  339. *this, node.parse_node(), name)};
  340. };
  341. auto add_node_name_id = [&](StringId name_id) {
  342. if (name_id.is_valid()) {
  343. add_node_name(semantics_ir_.strings().Get(name_id).str());
  344. } else {
  345. add_node_name("");
  346. }
  347. };
  348. switch (node.kind()) {
  349. case Branch::Kind: {
  350. AddBlockLabel(scope_idx, node.As<Branch>().target_id, node);
  351. break;
  352. }
  353. case BranchIf::Kind: {
  354. AddBlockLabel(scope_idx, node.As<BranchIf>().target_id, node);
  355. break;
  356. }
  357. case BranchWithArg::Kind: {
  358. AddBlockLabel(scope_idx, node.As<BranchWithArg>().target_id, node);
  359. break;
  360. }
  361. case SpliceBlock::Kind: {
  362. CollectNamesInBlock(scope_idx, node.As<SpliceBlock>().block_id);
  363. break;
  364. }
  365. case BindName::Kind: {
  366. add_node_name_id(node.As<BindName>().name_id);
  367. continue;
  368. }
  369. case FunctionDeclaration::Kind: {
  370. add_node_name_id(
  371. semantics_ir_
  372. .GetFunction(node.As<FunctionDeclaration>().function_id)
  373. .name_id);
  374. continue;
  375. }
  376. case ClassDeclaration::Kind: {
  377. add_node_name_id(
  378. semantics_ir_.GetClass(node.As<ClassDeclaration>().class_id)
  379. .name_id);
  380. continue;
  381. }
  382. case NameReference::Kind: {
  383. add_node_name(semantics_ir_.strings()
  384. .Get(node.As<NameReference>().name_id)
  385. .str() +
  386. ".ref");
  387. continue;
  388. }
  389. case Parameter::Kind: {
  390. add_node_name_id(node.As<Parameter>().name_id);
  391. continue;
  392. }
  393. case VarStorage::Kind: {
  394. // TODO: Eventually this name will be optional, and we'll want to
  395. // provide something like `var` as a default. However, that's not
  396. // possible right now so cannot be tested.
  397. add_node_name_id(node.As<VarStorage>().name_id);
  398. continue;
  399. }
  400. default: {
  401. break;
  402. }
  403. }
  404. // Sequentially number all remaining values.
  405. if (node.kind().value_kind() != NodeValueKind::None) {
  406. add_node_name("");
  407. }
  408. }
  409. }
  410. const Lex::TokenizedBuffer& tokenized_buffer_;
  411. const Parse::Tree& parse_tree_;
  412. const File& semantics_ir_;
  413. Namespace globals = {.prefix = "@"};
  414. std::vector<std::pair<ScopeIndex, Namespace::Name>> nodes;
  415. std::vector<std::pair<ScopeIndex, Namespace::Name>> labels;
  416. std::vector<Scope> scopes;
  417. };
  418. } // namespace
  419. // Formatter for printing textual Semantics IR.
  420. class Formatter {
  421. public:
  422. explicit Formatter(const Lex::TokenizedBuffer& tokenized_buffer,
  423. const Parse::Tree& parse_tree, const File& semantics_ir,
  424. llvm::raw_ostream& out)
  425. : semantics_ir_(semantics_ir),
  426. out_(out),
  427. node_namer_(tokenized_buffer, parse_tree, semantics_ir) {}
  428. auto Format() -> void {
  429. out_ << "file \"" << semantics_ir_.filename() << "\" {\n";
  430. // TODO: Include information from the package declaration, once we
  431. // fully support it.
  432. // TODO: Handle the case where there are multiple top-level node blocks.
  433. // For example, there may be branching in the initializer of a global or a
  434. // type expression.
  435. if (auto block_id = semantics_ir_.top_node_block_id();
  436. block_id.is_valid()) {
  437. llvm::SaveAndRestore package_scope(scope_,
  438. NodeNamer::ScopeIndex::Package);
  439. FormatCodeBlock(block_id);
  440. }
  441. out_ << "}\n";
  442. for (int i : llvm::seq(semantics_ir_.classes_size())) {
  443. FormatClass(ClassId(i));
  444. }
  445. for (int i : llvm::seq(semantics_ir_.functions_size())) {
  446. FormatFunction(FunctionId(i));
  447. }
  448. }
  449. auto FormatClass(ClassId id) -> void {
  450. const Class& class_info = semantics_ir_.GetClass(id);
  451. out_ << "\nclass ";
  452. FormatClassName(id);
  453. llvm::SaveAndRestore class_scope(scope_, node_namer_.GetScopeFor(id));
  454. if (class_info.scope_id.is_valid()) {
  455. out_ << " {\n";
  456. FormatCodeBlock(class_info.body_block_id);
  457. out_ << "\n!members:";
  458. FormatNameScope(class_info.scope_id, "", "\n .");
  459. out_ << "\n}\n";
  460. } else {
  461. out_ << ";\n";
  462. }
  463. }
  464. auto FormatFunction(FunctionId id) -> void {
  465. const Function& fn = semantics_ir_.GetFunction(id);
  466. out_ << "\nfn ";
  467. FormatFunctionName(id);
  468. out_ << "(";
  469. llvm::SaveAndRestore function_scope(scope_, node_namer_.GetScopeFor(id));
  470. llvm::ListSeparator sep;
  471. for (const NodeId param_id : semantics_ir_.GetNodeBlock(fn.param_refs_id)) {
  472. out_ << sep;
  473. if (!param_id.is_valid()) {
  474. out_ << "invalid";
  475. continue;
  476. }
  477. FormatNodeName(param_id);
  478. out_ << ": ";
  479. FormatType(semantics_ir_.GetNode(param_id).type_id());
  480. }
  481. out_ << ")";
  482. if (fn.return_type_id.is_valid()) {
  483. out_ << " -> ";
  484. if (fn.return_slot_id.is_valid()) {
  485. FormatNodeName(fn.return_slot_id);
  486. out_ << ": ";
  487. }
  488. FormatType(fn.return_type_id);
  489. }
  490. if (!fn.body_block_ids.empty()) {
  491. out_ << " {";
  492. for (auto block_id : fn.body_block_ids) {
  493. out_ << "\n";
  494. FormatLabel(block_id);
  495. out_ << ":\n";
  496. FormatCodeBlock(block_id);
  497. }
  498. out_ << "}\n";
  499. } else {
  500. out_ << ";\n";
  501. }
  502. }
  503. auto FormatCodeBlock(NodeBlockId block_id) -> void {
  504. if (!block_id.is_valid()) {
  505. return;
  506. }
  507. for (const NodeId node_id : semantics_ir_.GetNodeBlock(block_id)) {
  508. FormatInstruction(node_id);
  509. }
  510. }
  511. auto FormatNameScope(NameScopeId id, llvm::StringRef separator,
  512. llvm::StringRef prefix) -> void {
  513. // Name scopes aren't kept in any particular order. Sort the entries before
  514. // we print them for stability and consistency.
  515. llvm::SmallVector<std::pair<NodeId, StringId>> entries;
  516. for (auto [name_id, node_id] : semantics_ir_.GetNameScope(id)) {
  517. entries.push_back({node_id, name_id});
  518. }
  519. llvm::sort(entries,
  520. [](auto a, auto b) { return a.first.index < b.first.index; });
  521. llvm::ListSeparator sep(separator);
  522. for (auto [node_id, name_id] : entries) {
  523. out_ << sep << prefix;
  524. FormatString(name_id);
  525. out_ << " = ";
  526. FormatNodeName(node_id);
  527. }
  528. }
  529. auto FormatInstruction(NodeId node_id) -> void {
  530. if (!node_id.is_valid()) {
  531. Indent();
  532. out_ << "invalid\n";
  533. return;
  534. }
  535. FormatInstruction(node_id, semantics_ir_.GetNode(node_id));
  536. }
  537. auto FormatInstruction(NodeId node_id, Node node) -> void {
  538. // clang warns on unhandled enum values; clang-tidy is incorrect here.
  539. // NOLINTNEXTLINE(bugprone-switch-missing-default-case)
  540. switch (node.kind()) {
  541. #define CARBON_SEM_IR_NODE_KIND(NodeT) \
  542. case NodeT::Kind: \
  543. FormatInstruction(node_id, node.As<NodeT>()); \
  544. break;
  545. #include "toolchain/sem_ir/node_kind.def"
  546. }
  547. }
  548. auto Indent() -> void { out_.indent(indent_); }
  549. template <typename NodeT>
  550. auto FormatInstruction(NodeId node_id, NodeT node) -> void {
  551. Indent();
  552. FormatInstructionLHS(node_id, node);
  553. out_ << NodeT::Kind.ir_name();
  554. FormatInstructionRHS(node);
  555. out_ << "\n";
  556. }
  557. auto FormatInstructionLHS(NodeId node_id, Node node) -> void {
  558. switch (node.kind().value_kind()) {
  559. case NodeValueKind::Typed:
  560. FormatNodeName(node_id);
  561. out_ << ": ";
  562. switch (GetExpressionCategory(semantics_ir_, node_id)) {
  563. case ExpressionCategory::NotExpression:
  564. case ExpressionCategory::Error:
  565. case ExpressionCategory::Value:
  566. case ExpressionCategory::Mixed:
  567. break;
  568. case ExpressionCategory::DurableReference:
  569. case ExpressionCategory::EphemeralReference:
  570. out_ << "ref ";
  571. break;
  572. case ExpressionCategory::Initializing:
  573. out_ << "init ";
  574. break;
  575. }
  576. FormatType(node.type_id());
  577. out_ << " = ";
  578. break;
  579. case NodeValueKind::None:
  580. break;
  581. }
  582. }
  583. template <typename NodeT>
  584. auto FormatInstructionRHS(NodeT node) -> void {
  585. // By default, an instruction has a comma-separated argument list.
  586. using Info = TypedNodeArgsInfo<NodeT>;
  587. if constexpr (Info::NumArgs == 2) {
  588. FormatArgs(Info::template Get<0>(node), Info::template Get<1>(node));
  589. } else if constexpr (Info::NumArgs == 1) {
  590. FormatArgs(Info::template Get<0>(node));
  591. } else {
  592. FormatArgs();
  593. }
  594. }
  595. auto FormatInstructionRHS(BlockArg node) -> void {
  596. out_ << " ";
  597. FormatLabel(node.block_id);
  598. }
  599. auto FormatInstruction(NodeId /*node_id*/, BranchIf node) -> void {
  600. if (!in_terminator_sequence_) {
  601. Indent();
  602. }
  603. out_ << "if ";
  604. FormatNodeName(node.cond_id);
  605. out_ << " " << Branch::Kind.ir_name() << " ";
  606. FormatLabel(node.target_id);
  607. out_ << " else ";
  608. in_terminator_sequence_ = true;
  609. }
  610. auto FormatInstruction(NodeId /*node_id*/, BranchWithArg node) -> void {
  611. if (!in_terminator_sequence_) {
  612. Indent();
  613. }
  614. out_ << BranchWithArg::Kind.ir_name() << " ";
  615. FormatLabel(node.target_id);
  616. out_ << "(";
  617. FormatNodeName(node.arg_id);
  618. out_ << ")\n";
  619. in_terminator_sequence_ = false;
  620. }
  621. auto FormatInstruction(NodeId /*node_id*/, Branch node) -> void {
  622. if (!in_terminator_sequence_) {
  623. Indent();
  624. }
  625. out_ << Branch::Kind.ir_name() << " ";
  626. FormatLabel(node.target_id);
  627. out_ << "\n";
  628. in_terminator_sequence_ = false;
  629. }
  630. auto FormatInstructionRHS(ArrayInit node) -> void {
  631. out_ << " ";
  632. FormatArg(node.tuple_id);
  633. llvm::ArrayRef<NodeId> inits_and_return_slot =
  634. semantics_ir_.GetNodeBlock(node.inits_and_return_slot_id);
  635. auto inits = inits_and_return_slot.drop_back(1);
  636. auto return_slot_id = inits_and_return_slot.back();
  637. out_ << ", (";
  638. llvm::ListSeparator sep;
  639. for (auto node_id : inits) {
  640. out_ << sep;
  641. FormatArg(node_id);
  642. }
  643. out_ << ')';
  644. FormatReturnSlot(return_slot_id);
  645. }
  646. auto FormatInstructionRHS(Call node) -> void {
  647. out_ << " ";
  648. FormatArg(node.callee_id);
  649. llvm::ArrayRef<NodeId> args = semantics_ir_.GetNodeBlock(node.args_id);
  650. bool has_return_slot =
  651. GetInitializingRepresentation(semantics_ir_, node.type_id)
  652. .has_return_slot();
  653. NodeId return_slot_id = NodeId::Invalid;
  654. if (has_return_slot) {
  655. return_slot_id = args.back();
  656. args = args.drop_back();
  657. }
  658. llvm::ListSeparator sep;
  659. out_ << '(';
  660. for (auto node_id : args) {
  661. out_ << sep;
  662. FormatArg(node_id);
  663. }
  664. out_ << ')';
  665. if (has_return_slot) {
  666. FormatReturnSlot(return_slot_id);
  667. }
  668. }
  669. auto FormatInstructionRHS(InitializeFrom node) -> void {
  670. FormatArgs(node.src_id);
  671. FormatReturnSlot(node.dest_id);
  672. }
  673. auto FormatInstructionRHS(CrossReference node) -> void {
  674. // TODO: Figure out a way to make this meaningful. We'll need some way to
  675. // name cross-reference IRs, perhaps by the node ID of the import?
  676. out_ << " " << node.ir_id << "." << node.node_id;
  677. }
  678. auto FormatInstructionRHS(SpliceBlock node) -> void {
  679. FormatArgs(node.result_id);
  680. out_ << " {";
  681. if (!semantics_ir_.GetNodeBlock(node.block_id).empty()) {
  682. out_ << "\n";
  683. indent_ += 2;
  684. FormatCodeBlock(node.block_id);
  685. indent_ -= 2;
  686. Indent();
  687. }
  688. out_ << "}";
  689. }
  690. // StructTypeFields are formatted as part of their StructType.
  691. auto FormatInstruction(NodeId /*node_id*/, StructTypeField /*node*/) -> void {
  692. }
  693. auto FormatInstructionRHS(StructType node) -> void {
  694. out_ << " {";
  695. llvm::ListSeparator sep;
  696. for (auto field_id : semantics_ir_.GetNodeBlock(node.fields_id)) {
  697. out_ << sep << ".";
  698. auto field = semantics_ir_.GetNodeAs<StructTypeField>(field_id);
  699. FormatString(field.name_id);
  700. out_ << ": ";
  701. FormatType(field.field_type_id);
  702. }
  703. out_ << "}";
  704. }
  705. auto FormatArgs() -> void {}
  706. template <typename... Args>
  707. auto FormatArgs(Args... args) -> void {
  708. out_ << ' ';
  709. llvm::ListSeparator sep;
  710. ((out_ << sep, FormatArg(args)), ...);
  711. }
  712. auto FormatArg(BoolValue v) -> void { out_ << v; }
  713. auto FormatArg(BuiltinKind kind) -> void { out_ << kind.label(); }
  714. auto FormatArg(FunctionId id) -> void { FormatFunctionName(id); }
  715. auto FormatArg(ClassId id) -> void { FormatClassName(id); }
  716. auto FormatArg(IntegerId id) -> void {
  717. semantics_ir_.integers().Get(id).print(out_, /*isSigned=*/false);
  718. }
  719. auto FormatArg(MemberIndex index) -> void { out_ << index; }
  720. auto FormatArg(NameScopeId id) -> void {
  721. out_ << '{';
  722. FormatNameScope(id, ", ", ".");
  723. out_ << '}';
  724. }
  725. auto FormatArg(NodeId id) -> void { FormatNodeName(id); }
  726. auto FormatArg(NodeBlockId id) -> void {
  727. out_ << '(';
  728. llvm::ListSeparator sep;
  729. for (auto node_id : semantics_ir_.GetNodeBlock(id)) {
  730. out_ << sep;
  731. FormatArg(node_id);
  732. }
  733. out_ << ')';
  734. }
  735. auto FormatArg(RealId id) -> void {
  736. // TODO: Format with a `.` when the exponent is near zero.
  737. const auto& real = semantics_ir_.reals().Get(id);
  738. real.mantissa.print(out_, /*isSigned=*/false);
  739. out_ << (real.is_decimal ? 'e' : 'p') << real.exponent;
  740. }
  741. auto FormatArg(StringId id) -> void {
  742. out_ << '"';
  743. out_.write_escaped(semantics_ir_.strings().Get(id), /*UseHexEscapes=*/true);
  744. out_ << '"';
  745. }
  746. auto FormatArg(TypeId id) -> void { FormatType(id); }
  747. auto FormatArg(TypeBlockId id) -> void {
  748. out_ << '(';
  749. llvm::ListSeparator sep;
  750. for (auto type_id : semantics_ir_.GetTypeBlock(id)) {
  751. out_ << sep;
  752. FormatArg(type_id);
  753. }
  754. out_ << ')';
  755. }
  756. auto FormatReturnSlot(NodeId dest_id) -> void {
  757. out_ << " to ";
  758. FormatArg(dest_id);
  759. }
  760. auto FormatNodeName(NodeId id) -> void {
  761. out_ << node_namer_.GetNameFor(scope_, id);
  762. }
  763. auto FormatLabel(NodeBlockId id) -> void {
  764. out_ << node_namer_.GetLabelFor(scope_, id);
  765. }
  766. auto FormatString(StringId id) -> void {
  767. out_ << semantics_ir_.strings().Get(id);
  768. }
  769. auto FormatFunctionName(FunctionId id) -> void {
  770. out_ << node_namer_.GetNameFor(id);
  771. }
  772. auto FormatClassName(ClassId id) -> void {
  773. out_ << node_namer_.GetNameFor(id);
  774. }
  775. auto FormatType(TypeId id) -> void {
  776. if (!id.is_valid()) {
  777. out_ << "invalid";
  778. } else {
  779. out_ << semantics_ir_.StringifyType(id, /*in_type_context=*/true);
  780. }
  781. }
  782. private:
  783. const File& semantics_ir_;
  784. llvm::raw_ostream& out_;
  785. NodeNamer node_namer_;
  786. NodeNamer::ScopeIndex scope_ = NodeNamer::ScopeIndex::None;
  787. bool in_terminator_sequence_ = false;
  788. int indent_ = 2;
  789. };
  790. auto FormatFile(const Lex::TokenizedBuffer& tokenized_buffer,
  791. const Parse::Tree& parse_tree, const File& semantics_ir,
  792. llvm::raw_ostream& out) -> void {
  793. Formatter(tokenized_buffer, parse_tree, semantics_ir, out).Format();
  794. }
  795. } // namespace Carbon::SemIR