formatter.cpp 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/sem_ir/formatter.h"
  5. #include "llvm/ADT/Sequence.h"
  6. #include "llvm/ADT/StringExtras.h"
  7. #include "llvm/ADT/StringMap.h"
  8. #include "llvm/Support/SaveAndRestore.h"
  9. #include "toolchain/base/value_store.h"
  10. #include "toolchain/lex/tokenized_buffer.h"
  11. #include "toolchain/parse/tree.h"
  12. #include "toolchain/sem_ir/ids.h"
  13. namespace Carbon::SemIR {
  14. namespace {
  15. // Assigns names to instructions, blocks, and scopes in the Semantics IR.
  16. //
  17. // TODOs / future work ideas:
  18. // - Add a documentation file for the textual format and link to the
  19. // naming section here.
  20. // - Consider representing literals as just `literal` in the IR and using the
  21. // type to distinguish.
  22. class InstNamer {
  23. public:
  24. // int32_t matches the input value size.
  25. // NOLINTNEXTLINE(performance-enum-size)
  26. enum class ScopeIndex : int32_t {
  27. None = -1,
  28. File = 0,
  29. Constants = 1,
  30. FirstFunction = 2,
  31. };
  32. static_assert(sizeof(ScopeIndex) == sizeof(FunctionId));
  33. InstNamer(const Lex::TokenizedBuffer& tokenized_buffer,
  34. const Parse::Tree& parse_tree, const File& sem_ir)
  35. : tokenized_buffer_(tokenized_buffer),
  36. parse_tree_(parse_tree),
  37. sem_ir_(sem_ir) {
  38. insts.resize(sem_ir.insts().size());
  39. labels.resize(sem_ir.inst_blocks().size());
  40. scopes.resize(static_cast<int32_t>(ScopeIndex::FirstFunction) +
  41. sem_ir.functions().size() + sem_ir.classes().size() +
  42. sem_ir.interfaces().size());
  43. // Build the constants scope.
  44. GetScopeInfo(ScopeIndex::Constants).name =
  45. globals.AddNameUnchecked("constants");
  46. CollectNamesInBlock(ScopeIndex::Constants, sem_ir.constants().array_ref());
  47. // Build the file scope.
  48. GetScopeInfo(ScopeIndex::File).name = globals.AddNameUnchecked("file");
  49. CollectNamesInBlock(ScopeIndex::File, sem_ir.top_inst_block_id());
  50. // Build each function scope.
  51. for (auto [i, fn] : llvm::enumerate(sem_ir.functions().array_ref())) {
  52. auto fn_id = FunctionId(i);
  53. auto fn_scope = GetScopeFor(fn_id);
  54. // TODO: Provide a location for the function for use as a
  55. // disambiguator.
  56. auto fn_loc = Parse::NodeId::Invalid;
  57. GetScopeInfo(fn_scope).name = globals.AllocateName(
  58. *this, fn_loc, sem_ir.names().GetIRBaseName(fn.name_id).str());
  59. CollectNamesInBlock(fn_scope, fn.implicit_param_refs_id);
  60. CollectNamesInBlock(fn_scope, fn.param_refs_id);
  61. if (fn.return_slot_id.is_valid()) {
  62. insts[fn.return_slot_id.index] = {
  63. fn_scope,
  64. GetScopeInfo(fn_scope).insts.AllocateName(
  65. *this, sem_ir.insts().Get(fn.return_slot_id).parse_node(),
  66. "return")};
  67. }
  68. if (!fn.body_block_ids.empty()) {
  69. AddBlockLabel(fn_scope, fn.body_block_ids.front(), "entry", fn_loc);
  70. }
  71. for (auto block_id : fn.body_block_ids) {
  72. CollectNamesInBlock(fn_scope, block_id);
  73. }
  74. for (auto block_id : fn.body_block_ids) {
  75. AddBlockLabel(fn_scope, block_id);
  76. }
  77. }
  78. // Build each class scope.
  79. for (auto [i, class_info] : llvm::enumerate(sem_ir.classes().array_ref())) {
  80. auto class_id = ClassId(i);
  81. auto class_scope = GetScopeFor(class_id);
  82. // TODO: Provide a location for the class for use as a
  83. // disambiguator.
  84. auto class_loc = Parse::NodeId::Invalid;
  85. GetScopeInfo(class_scope).name = globals.AllocateName(
  86. *this, class_loc,
  87. sem_ir.names().GetIRBaseName(class_info.name_id).str());
  88. AddBlockLabel(class_scope, class_info.body_block_id, "class", class_loc);
  89. CollectNamesInBlock(class_scope, class_info.body_block_id);
  90. }
  91. // Build each interface scope.
  92. for (auto [i, interface_info] :
  93. llvm::enumerate(sem_ir.interfaces().array_ref())) {
  94. auto interface_id = InterfaceId(i);
  95. auto interface_scope = GetScopeFor(interface_id);
  96. // TODO: Provide a location for the interface for use as a
  97. // disambiguator.
  98. auto interface_loc = Parse::NodeId::Invalid;
  99. GetScopeInfo(interface_scope).name = globals.AllocateName(
  100. *this, interface_loc,
  101. sem_ir.names().GetIRBaseName(interface_info.name_id).str());
  102. AddBlockLabel(interface_scope, interface_info.body_block_id, "interface",
  103. interface_loc);
  104. CollectNamesInBlock(interface_scope, interface_info.body_block_id);
  105. }
  106. }
  107. // Returns the scope index corresponding to a function.
  108. auto GetScopeFor(FunctionId fn_id) -> ScopeIndex {
  109. return static_cast<ScopeIndex>(
  110. static_cast<int32_t>(ScopeIndex::FirstFunction) + fn_id.index);
  111. }
  112. // Returns the scope index corresponding to a class.
  113. auto GetScopeFor(ClassId class_id) -> ScopeIndex {
  114. return static_cast<ScopeIndex>(
  115. static_cast<int32_t>(ScopeIndex::FirstFunction) +
  116. sem_ir_.functions().size() + class_id.index);
  117. }
  118. // Returns the scope index corresponding to an interface.
  119. auto GetScopeFor(InterfaceId interface_id) -> ScopeIndex {
  120. return static_cast<ScopeIndex>(
  121. static_cast<int32_t>(ScopeIndex::FirstFunction) +
  122. sem_ir_.functions().size() + sem_ir_.classes().size() +
  123. interface_id.index);
  124. }
  125. // Returns the IR name to use for a function.
  126. auto GetNameFor(FunctionId fn_id) -> llvm::StringRef {
  127. if (!fn_id.is_valid()) {
  128. return "invalid";
  129. }
  130. return GetScopeInfo(GetScopeFor(fn_id)).name.str();
  131. }
  132. // Returns the IR name to use for a class.
  133. auto GetNameFor(ClassId class_id) -> llvm::StringRef {
  134. if (!class_id.is_valid()) {
  135. return "invalid";
  136. }
  137. return GetScopeInfo(GetScopeFor(class_id)).name.str();
  138. }
  139. // Returns the IR name to use for an interface.
  140. auto GetNameFor(InterfaceId interface_id) -> llvm::StringRef {
  141. if (!interface_id.is_valid()) {
  142. return "invalid";
  143. }
  144. return GetScopeInfo(GetScopeFor(interface_id)).name.str();
  145. }
  146. // Returns the IR name to use for an instruction, when referenced from a given
  147. // scope.
  148. auto GetNameFor(ScopeIndex scope_idx, InstId inst_id) -> std::string {
  149. if (!inst_id.is_valid()) {
  150. return "invalid";
  151. }
  152. // Check for a builtin.
  153. if (inst_id.index < BuiltinKind::ValidCount) {
  154. return BuiltinKind::FromInt(inst_id.index).label().str();
  155. }
  156. if (inst_id == InstId::PackageNamespace) {
  157. return "package";
  158. }
  159. auto& [inst_scope, inst_name] = insts[inst_id.index];
  160. if (!inst_name) {
  161. // This should not happen in valid IR.
  162. std::string str;
  163. llvm::raw_string_ostream(str) << "<unexpected instref " << inst_id << ">";
  164. return str;
  165. }
  166. if (inst_scope == scope_idx) {
  167. return inst_name.str().str();
  168. }
  169. return (GetScopeInfo(inst_scope).name.str() + "." + inst_name.str()).str();
  170. }
  171. // Returns the IR name to use for a label, when referenced from a given scope.
  172. auto GetLabelFor(ScopeIndex scope_idx, InstBlockId block_id) -> std::string {
  173. if (!block_id.is_valid()) {
  174. return "!invalid";
  175. }
  176. auto& [label_scope, label_name] = labels[block_id.index];
  177. if (!label_name) {
  178. // This should not happen in valid IR.
  179. std::string str;
  180. llvm::raw_string_ostream(str)
  181. << "<unexpected instblockref " << block_id << ">";
  182. return str;
  183. }
  184. if (label_scope == scope_idx) {
  185. return label_name.str().str();
  186. }
  187. return (GetScopeInfo(label_scope).name.str() + "." + label_name.str())
  188. .str();
  189. }
  190. private:
  191. // A space in which unique names can be allocated.
  192. struct Namespace {
  193. // A result of a name lookup.
  194. struct NameResult;
  195. // A name in a namespace, which might be redirected to refer to another name
  196. // for disambiguation purposes.
  197. class Name {
  198. public:
  199. Name() : value_(nullptr) {}
  200. explicit Name(llvm::StringMapIterator<NameResult> it) : value_(&*it) {}
  201. explicit operator bool() const { return value_; }
  202. auto str() const -> llvm::StringRef {
  203. llvm::StringMapEntry<NameResult>* value = value_;
  204. CARBON_CHECK(value) << "cannot print a null name";
  205. while (value->second.ambiguous && value->second.fallback) {
  206. value = value->second.fallback.value_;
  207. }
  208. return value->first();
  209. }
  210. auto SetFallback(Name name) -> void { value_->second.fallback = name; }
  211. auto SetAmbiguous() -> void { value_->second.ambiguous = true; }
  212. private:
  213. llvm::StringMapEntry<NameResult>* value_ = nullptr;
  214. };
  215. struct NameResult {
  216. bool ambiguous = false;
  217. Name fallback = Name();
  218. };
  219. llvm::StringRef prefix;
  220. llvm::StringMap<NameResult> allocated = {};
  221. int unnamed_count = 0;
  222. auto AddNameUnchecked(llvm::StringRef name) -> Name {
  223. return Name(allocated.insert({name, NameResult()}).first);
  224. }
  225. auto AllocateName(const InstNamer& namer, Parse::NodeId node,
  226. std::string name = "") -> Name {
  227. // The best (shortest) name for this instruction so far, and the current
  228. // name for it.
  229. Name best;
  230. Name current;
  231. // Add `name` as a name for this entity.
  232. auto add_name = [&](bool mark_ambiguous = true) {
  233. auto [it, added] = allocated.insert({name, NameResult()});
  234. Name new_name = Name(it);
  235. if (!added) {
  236. if (mark_ambiguous) {
  237. // This name was allocated for a different instruction. Mark it as
  238. // ambiguous and keep looking for a name for this instruction.
  239. new_name.SetAmbiguous();
  240. }
  241. } else {
  242. if (!best) {
  243. best = new_name;
  244. } else {
  245. CARBON_CHECK(current);
  246. current.SetFallback(new_name);
  247. }
  248. current = new_name;
  249. }
  250. return added;
  251. };
  252. // All names start with the prefix.
  253. name.insert(0, prefix);
  254. // Use the given name if it's available and not just the prefix.
  255. if (name.size() > prefix.size()) {
  256. add_name();
  257. }
  258. // Append location information to try to disambiguate.
  259. if (node.is_valid()) {
  260. auto token = namer.parse_tree_.node_token(node);
  261. llvm::raw_string_ostream(name)
  262. << ".loc" << namer.tokenized_buffer_.GetLineNumber(token);
  263. add_name();
  264. llvm::raw_string_ostream(name)
  265. << "_" << namer.tokenized_buffer_.GetColumnNumber(token);
  266. add_name();
  267. }
  268. // Append numbers until we find an available name.
  269. name += ".";
  270. auto name_size_without_counter = name.size();
  271. for (int counter = 1;; ++counter) {
  272. name.resize(name_size_without_counter);
  273. llvm::raw_string_ostream(name) << counter;
  274. if (add_name(/*mark_ambiguous=*/false)) {
  275. return best;
  276. }
  277. }
  278. }
  279. };
  280. // A named scope that contains named entities.
  281. struct Scope {
  282. Namespace::Name name;
  283. Namespace insts = {.prefix = "%"};
  284. Namespace labels = {.prefix = "!"};
  285. };
  286. auto GetScopeInfo(ScopeIndex scope_idx) -> Scope& {
  287. return scopes[static_cast<int>(scope_idx)];
  288. }
  289. auto AddBlockLabel(ScopeIndex scope_idx, InstBlockId block_id,
  290. std::string name = "",
  291. Parse::NodeId parse_node = Parse::NodeId::Invalid)
  292. -> void {
  293. if (!block_id.is_valid() || labels[block_id.index].second) {
  294. return;
  295. }
  296. if (parse_node == Parse::NodeId::Invalid) {
  297. if (const auto& block = sem_ir_.inst_blocks().Get(block_id);
  298. !block.empty()) {
  299. parse_node = sem_ir_.insts().Get(block.front()).parse_node();
  300. }
  301. }
  302. labels[block_id.index] = {scope_idx,
  303. GetScopeInfo(scope_idx).labels.AllocateName(
  304. *this, parse_node, std::move(name))};
  305. }
  306. // Finds and adds a suitable block label for the given SemIR instruction that
  307. // represents some kind of branch.
  308. auto AddBlockLabel(ScopeIndex scope_idx, InstBlockId block_id, Inst inst)
  309. -> void {
  310. llvm::StringRef name;
  311. switch (parse_tree_.node_kind(inst.parse_node())) {
  312. case Parse::NodeKind::IfExprIf:
  313. switch (inst.kind()) {
  314. case BranchIf::Kind:
  315. name = "if.expr.then";
  316. break;
  317. case Branch::Kind:
  318. name = "if.expr.else";
  319. break;
  320. case BranchWithArg::Kind:
  321. name = "if.expr.result";
  322. break;
  323. default:
  324. break;
  325. }
  326. break;
  327. case Parse::NodeKind::IfCondition:
  328. switch (inst.kind()) {
  329. case BranchIf::Kind:
  330. name = "if.then";
  331. break;
  332. case Branch::Kind:
  333. name = "if.else";
  334. break;
  335. default:
  336. break;
  337. }
  338. break;
  339. case Parse::NodeKind::IfStatement:
  340. name = "if.done";
  341. break;
  342. case Parse::NodeKind::ShortCircuitOperandAnd:
  343. name = inst.Is<BranchIf>() ? "and.rhs" : "and.result";
  344. break;
  345. case Parse::NodeKind::ShortCircuitOperandOr:
  346. name = inst.Is<BranchIf>() ? "or.rhs" : "or.result";
  347. break;
  348. case Parse::NodeKind::WhileConditionStart:
  349. name = "while.cond";
  350. break;
  351. case Parse::NodeKind::WhileCondition:
  352. switch (inst.kind()) {
  353. case InstKind::BranchIf:
  354. name = "while.body";
  355. break;
  356. case InstKind::Branch:
  357. name = "while.done";
  358. break;
  359. default:
  360. break;
  361. }
  362. break;
  363. default:
  364. break;
  365. }
  366. AddBlockLabel(scope_idx, block_id, name.str(), inst.parse_node());
  367. }
  368. auto CollectNamesInBlock(ScopeIndex scope_idx, InstBlockId block_id) -> void {
  369. if (block_id.is_valid()) {
  370. CollectNamesInBlock(scope_idx, sem_ir_.inst_blocks().Get(block_id));
  371. }
  372. }
  373. auto CollectNamesInBlock(ScopeIndex scope_idx, llvm::ArrayRef<InstId> block)
  374. -> void {
  375. Scope& scope = GetScopeInfo(scope_idx);
  376. // Use bound names where available. Otherwise, assign a backup name.
  377. for (auto inst_id : block) {
  378. if (!inst_id.is_valid()) {
  379. continue;
  380. }
  381. auto inst = sem_ir_.insts().Get(inst_id);
  382. auto add_inst_name = [&](std::string name) {
  383. insts[inst_id.index] = {scope_idx, scope.insts.AllocateName(
  384. *this, inst.parse_node(), name)};
  385. };
  386. auto add_inst_name_id = [&](NameId name_id, llvm::StringRef suffix = "") {
  387. add_inst_name(
  388. (sem_ir_.names().GetIRBaseName(name_id).str() + suffix).str());
  389. };
  390. switch (inst.kind()) {
  391. case Branch::Kind: {
  392. AddBlockLabel(scope_idx, inst.As<Branch>().target_id, inst);
  393. break;
  394. }
  395. case BranchIf::Kind: {
  396. AddBlockLabel(scope_idx, inst.As<BranchIf>().target_id, inst);
  397. break;
  398. }
  399. case BranchWithArg::Kind: {
  400. AddBlockLabel(scope_idx, inst.As<BranchWithArg>().target_id, inst);
  401. break;
  402. }
  403. case SpliceBlock::Kind: {
  404. CollectNamesInBlock(scope_idx, inst.As<SpliceBlock>().block_id);
  405. break;
  406. }
  407. case BindName::Kind: {
  408. add_inst_name_id(inst.As<BindName>().name_id);
  409. continue;
  410. }
  411. case FunctionDecl::Kind: {
  412. add_inst_name_id(sem_ir_.functions()
  413. .Get(inst.As<FunctionDecl>().function_id)
  414. .name_id);
  415. continue;
  416. }
  417. case ClassDecl::Kind: {
  418. add_inst_name_id(
  419. sem_ir_.classes().Get(inst.As<ClassDecl>().class_id).name_id,
  420. ".decl");
  421. continue;
  422. }
  423. case ClassType::Kind: {
  424. add_inst_name_id(
  425. sem_ir_.classes().Get(inst.As<ClassType>().class_id).name_id);
  426. continue;
  427. }
  428. case Import::Kind: {
  429. add_inst_name("import");
  430. continue;
  431. }
  432. case InterfaceDecl::Kind: {
  433. add_inst_name_id(sem_ir_.interfaces()
  434. .Get(inst.As<InterfaceDecl>().interface_id)
  435. .name_id,
  436. ".decl");
  437. continue;
  438. }
  439. case NameRef::Kind: {
  440. add_inst_name_id(inst.As<NameRef>().name_id, ".ref");
  441. continue;
  442. }
  443. case Param::Kind: {
  444. add_inst_name_id(inst.As<Param>().name_id);
  445. continue;
  446. }
  447. case SelfParam::Kind: {
  448. add_inst_name(inst.As<SelfParam>().is_addr_self.index ? "self.addr"
  449. : "self");
  450. continue;
  451. }
  452. case VarStorage::Kind: {
  453. add_inst_name_id(inst.As<VarStorage>().name_id, ".var");
  454. continue;
  455. }
  456. default: {
  457. break;
  458. }
  459. }
  460. // Sequentially number all remaining values.
  461. if (inst.kind().value_kind() != InstValueKind::None) {
  462. add_inst_name("");
  463. }
  464. }
  465. }
  466. const Lex::TokenizedBuffer& tokenized_buffer_;
  467. const Parse::Tree& parse_tree_;
  468. const File& sem_ir_;
  469. Namespace globals = {.prefix = "@"};
  470. std::vector<std::pair<ScopeIndex, Namespace::Name>> insts;
  471. std::vector<std::pair<ScopeIndex, Namespace::Name>> labels;
  472. std::vector<Scope> scopes;
  473. };
  474. } // namespace
  475. // Formatter for printing textual Semantics IR.
  476. class Formatter {
  477. public:
  478. explicit Formatter(const Lex::TokenizedBuffer& tokenized_buffer,
  479. const Parse::Tree& parse_tree, const File& sem_ir,
  480. llvm::raw_ostream& out)
  481. : sem_ir_(sem_ir),
  482. out_(out),
  483. inst_namer_(tokenized_buffer, parse_tree, sem_ir) {}
  484. // Prints the SemIR.
  485. //
  486. // Constants are printed first and may be referenced by later sections,
  487. // including file-scoped instructions. The file scope may contain entity
  488. // declarations which are defined later, such as classes.
  489. auto Format() -> void {
  490. out_ << "--- " << sem_ir_.filename() << "\n\n";
  491. FormatConstants();
  492. out_ << "file {\n";
  493. // TODO: Handle the case where there are multiple top-level instruction
  494. // blocks. For example, there may be branching in the initializer of a
  495. // global or a type expression.
  496. if (auto block_id = sem_ir_.top_inst_block_id(); block_id.is_valid()) {
  497. llvm::SaveAndRestore file_scope(scope_, InstNamer::ScopeIndex::File);
  498. FormatCodeBlock(block_id);
  499. }
  500. out_ << "}\n";
  501. for (int i : llvm::seq(sem_ir_.interfaces().size())) {
  502. FormatInterface(InterfaceId(i));
  503. }
  504. for (int i : llvm::seq(sem_ir_.classes().size())) {
  505. FormatClass(ClassId(i));
  506. }
  507. for (int i : llvm::seq(sem_ir_.functions().size())) {
  508. FormatFunction(FunctionId(i));
  509. }
  510. // End-of-file newline.
  511. out_ << "\n";
  512. }
  513. auto FormatConstants() -> void {
  514. if (!sem_ir_.constants().size()) {
  515. return;
  516. }
  517. llvm::SaveAndRestore constants_scope(scope_,
  518. InstNamer::ScopeIndex::Constants);
  519. out_ << "constants {\n";
  520. FormatCodeBlock(sem_ir_.constants().array_ref());
  521. out_ << "}\n\n";
  522. }
  523. auto FormatClass(ClassId id) -> void {
  524. const Class& class_info = sem_ir_.classes().Get(id);
  525. out_ << "\nclass ";
  526. FormatClassName(id);
  527. llvm::SaveAndRestore class_scope(scope_, inst_namer_.GetScopeFor(id));
  528. if (class_info.scope_id.is_valid()) {
  529. out_ << " {\n";
  530. FormatCodeBlock(class_info.body_block_id);
  531. out_ << "\n!members:";
  532. FormatNameScope(class_info.scope_id, "", "\n .");
  533. out_ << "\n}\n";
  534. } else {
  535. out_ << ";\n";
  536. }
  537. }
  538. auto FormatInterface(InterfaceId id) -> void {
  539. const Interface& interface_info = sem_ir_.interfaces().Get(id);
  540. out_ << "\ninterface ";
  541. FormatInterfaceName(id);
  542. llvm::SaveAndRestore interface_scope(scope_, inst_namer_.GetScopeFor(id));
  543. if (interface_info.scope_id.is_valid()) {
  544. out_ << " {\n";
  545. FormatCodeBlock(interface_info.body_block_id);
  546. out_ << "\n!members:";
  547. FormatNameScope(interface_info.scope_id, "", "\n .");
  548. out_ << "\n}\n";
  549. } else {
  550. out_ << ";\n";
  551. }
  552. }
  553. auto FormatFunction(FunctionId id) -> void {
  554. const Function& fn = sem_ir_.functions().Get(id);
  555. out_ << "\nfn ";
  556. FormatFunctionName(id);
  557. llvm::SaveAndRestore function_scope(scope_, inst_namer_.GetScopeFor(id));
  558. if (fn.implicit_param_refs_id != InstBlockId::Empty) {
  559. out_ << "[";
  560. FormatParamList(fn.implicit_param_refs_id);
  561. out_ << "]";
  562. }
  563. out_ << "(";
  564. FormatParamList(fn.param_refs_id);
  565. out_ << ")";
  566. if (fn.return_type_id.is_valid()) {
  567. out_ << " -> ";
  568. if (fn.return_slot_id.is_valid()) {
  569. FormatInstName(fn.return_slot_id);
  570. out_ << ": ";
  571. }
  572. FormatType(fn.return_type_id);
  573. }
  574. if (!fn.body_block_ids.empty()) {
  575. out_ << " {";
  576. for (auto block_id : fn.body_block_ids) {
  577. out_ << "\n";
  578. FormatLabel(block_id);
  579. out_ << ":\n";
  580. FormatCodeBlock(block_id);
  581. }
  582. out_ << "}\n";
  583. } else {
  584. out_ << ";\n";
  585. }
  586. }
  587. auto FormatParamList(InstBlockId param_refs_id) -> void {
  588. llvm::ListSeparator sep;
  589. for (const InstId param_id : sem_ir_.inst_blocks().Get(param_refs_id)) {
  590. out_ << sep;
  591. if (!param_id.is_valid()) {
  592. out_ << "invalid";
  593. continue;
  594. }
  595. FormatInstName(param_id);
  596. out_ << ": ";
  597. FormatType(sem_ir_.insts().Get(param_id).type_id());
  598. }
  599. }
  600. auto FormatCodeBlock(InstBlockId block_id) -> void {
  601. if (block_id.is_valid()) {
  602. FormatCodeBlock(sem_ir_.inst_blocks().Get(block_id));
  603. }
  604. }
  605. auto FormatCodeBlock(llvm::ArrayRef<InstId> block) -> void {
  606. for (const InstId inst_id : block) {
  607. FormatInstruction(inst_id);
  608. }
  609. }
  610. auto FormatNameScope(NameScopeId id, llvm::StringRef separator,
  611. llvm::StringRef prefix) -> void {
  612. // Name scopes aren't kept in any particular order. Sort the entries before
  613. // we print them for stability and consistency.
  614. llvm::SmallVector<std::pair<InstId, NameId>> entries;
  615. for (auto [name_id, inst_id] : sem_ir_.name_scopes().Get(id)) {
  616. entries.push_back({inst_id, name_id});
  617. }
  618. llvm::sort(entries,
  619. [](auto a, auto b) { return a.first.index < b.first.index; });
  620. llvm::ListSeparator sep(separator);
  621. for (auto [inst_id, name_id] : entries) {
  622. out_ << sep << prefix;
  623. FormatName(name_id);
  624. out_ << " = ";
  625. FormatInstName(inst_id);
  626. }
  627. }
  628. auto FormatInstruction(InstId inst_id) -> void {
  629. if (!inst_id.is_valid()) {
  630. Indent();
  631. out_ << "invalid\n";
  632. return;
  633. }
  634. FormatInstruction(inst_id, sem_ir_.insts().Get(inst_id));
  635. }
  636. auto FormatInstruction(InstId inst_id, Inst inst) -> void {
  637. // clang warns on unhandled enum values; clang-tidy is incorrect here.
  638. // NOLINTNEXTLINE(bugprone-switch-missing-default-case)
  639. switch (inst.kind()) {
  640. #define CARBON_SEM_IR_INST_KIND(InstT) \
  641. case InstT::Kind: \
  642. FormatInstruction(inst_id, inst.As<InstT>()); \
  643. break;
  644. #include "toolchain/sem_ir/inst_kind.def"
  645. }
  646. }
  647. auto Indent() -> void { out_.indent(indent_); }
  648. template <typename InstT>
  649. auto FormatInstruction(InstId inst_id, InstT inst) -> void {
  650. Indent();
  651. FormatInstructionLHS(inst_id, inst);
  652. out_ << InstT::Kind.ir_name();
  653. FormatInstructionRHS(inst);
  654. out_ << "\n";
  655. }
  656. auto FormatInstructionLHS(InstId inst_id, Inst inst) -> void {
  657. switch (inst.kind().value_kind()) {
  658. case InstValueKind::Typed:
  659. FormatInstName(inst_id);
  660. out_ << ": ";
  661. switch (GetExprCategory(sem_ir_, inst_id)) {
  662. case ExprCategory::NotExpr:
  663. case ExprCategory::Error:
  664. case ExprCategory::Value:
  665. case ExprCategory::Mixed:
  666. break;
  667. case ExprCategory::DurableRef:
  668. case ExprCategory::EphemeralRef:
  669. out_ << "ref ";
  670. break;
  671. case ExprCategory::Initializing:
  672. out_ << "init ";
  673. break;
  674. }
  675. FormatType(inst.type_id());
  676. out_ << " = ";
  677. break;
  678. case InstValueKind::None:
  679. break;
  680. }
  681. }
  682. // Print ClassDecl with type-like semantics even though it lacks a type_id.
  683. auto FormatInstructionLHS(InstId inst_id, ClassDecl /*inst*/) -> void {
  684. FormatInstName(inst_id);
  685. out_ << " = ";
  686. }
  687. // Print InterfaceDecl with type-like semantics even though it lacks a
  688. // type_id.
  689. auto FormatInstructionLHS(InstId inst_id, InterfaceDecl /*inst*/) -> void {
  690. FormatInstName(inst_id);
  691. out_ << " = ";
  692. }
  693. template <typename InstT>
  694. auto FormatInstructionRHS(InstT inst) -> void {
  695. // By default, an instruction has a comma-separated argument list.
  696. using Info = TypedInstArgsInfo<InstT>;
  697. if constexpr (Info::NumArgs == 2) {
  698. FormatArgs(Info::template Get<0>(inst), Info::template Get<1>(inst));
  699. } else if constexpr (Info::NumArgs == 1) {
  700. FormatArgs(Info::template Get<0>(inst));
  701. } else {
  702. FormatArgs();
  703. }
  704. }
  705. auto FormatInstructionRHS(BlockArg inst) -> void {
  706. out_ << " ";
  707. FormatLabel(inst.block_id);
  708. }
  709. auto FormatInstruction(InstId /*inst_id*/, BranchIf inst) -> void {
  710. if (!in_terminator_sequence_) {
  711. Indent();
  712. }
  713. out_ << "if ";
  714. FormatInstName(inst.cond_id);
  715. out_ << " " << Branch::Kind.ir_name() << " ";
  716. FormatLabel(inst.target_id);
  717. out_ << " else ";
  718. in_terminator_sequence_ = true;
  719. }
  720. auto FormatInstruction(InstId /*inst_id*/, BranchWithArg inst) -> void {
  721. if (!in_terminator_sequence_) {
  722. Indent();
  723. }
  724. out_ << BranchWithArg::Kind.ir_name() << " ";
  725. FormatLabel(inst.target_id);
  726. out_ << "(";
  727. FormatInstName(inst.arg_id);
  728. out_ << ")\n";
  729. in_terminator_sequence_ = false;
  730. }
  731. auto FormatInstruction(InstId /*inst_id*/, Branch inst) -> void {
  732. if (!in_terminator_sequence_) {
  733. Indent();
  734. }
  735. out_ << Branch::Kind.ir_name() << " ";
  736. FormatLabel(inst.target_id);
  737. out_ << "\n";
  738. in_terminator_sequence_ = false;
  739. }
  740. auto FormatInstructionRHS(Call inst) -> void {
  741. out_ << " ";
  742. FormatArg(inst.callee_id);
  743. if (!inst.args_id.is_valid()) {
  744. out_ << "(<invalid>)";
  745. return;
  746. }
  747. llvm::ArrayRef<InstId> args = sem_ir_.inst_blocks().Get(inst.args_id);
  748. bool has_return_slot = GetInitRepr(sem_ir_, inst.type_id).has_return_slot();
  749. InstId return_slot_id = InstId::Invalid;
  750. if (has_return_slot) {
  751. return_slot_id = args.back();
  752. args = args.drop_back();
  753. }
  754. llvm::ListSeparator sep;
  755. out_ << '(';
  756. for (auto inst_id : args) {
  757. out_ << sep;
  758. FormatArg(inst_id);
  759. }
  760. out_ << ')';
  761. if (has_return_slot) {
  762. FormatReturnSlot(return_slot_id);
  763. }
  764. }
  765. auto FormatInstructionRHS(ArrayInit inst) -> void {
  766. FormatArgs(inst.inits_id);
  767. FormatReturnSlot(inst.dest_id);
  768. }
  769. auto FormatInstructionRHS(InitializeFrom inst) -> void {
  770. FormatArgs(inst.src_id);
  771. FormatReturnSlot(inst.dest_id);
  772. }
  773. auto FormatInstructionRHS(StructInit init) -> void {
  774. FormatArgs(init.elements_id);
  775. FormatReturnSlot(init.dest_id);
  776. }
  777. auto FormatInstructionRHS(TupleInit init) -> void {
  778. FormatArgs(init.elements_id);
  779. FormatReturnSlot(init.dest_id);
  780. }
  781. auto FormatInstructionRHS(CrossRef inst) -> void {
  782. // TODO: Figure out a way to make this meaningful. We'll need some way to
  783. // name cross-reference IRs, perhaps by the instruction ID of the import?
  784. out_ << " " << inst.ir_id << "." << inst.inst_id;
  785. }
  786. auto FormatInstructionRHS(SpliceBlock inst) -> void {
  787. FormatArgs(inst.result_id);
  788. out_ << " {";
  789. if (!sem_ir_.inst_blocks().Get(inst.block_id).empty()) {
  790. out_ << "\n";
  791. indent_ += 2;
  792. FormatCodeBlock(inst.block_id);
  793. indent_ -= 2;
  794. Indent();
  795. }
  796. out_ << "}";
  797. }
  798. // StructTypeFields are formatted as part of their StructType.
  799. auto FormatInstruction(InstId /*inst_id*/, StructTypeField /*inst*/) -> void {
  800. }
  801. auto FormatInstructionRHS(StructType inst) -> void {
  802. out_ << " {";
  803. llvm::ListSeparator sep;
  804. for (auto field_id : sem_ir_.inst_blocks().Get(inst.fields_id)) {
  805. out_ << sep << ".";
  806. auto field = sem_ir_.insts().GetAs<StructTypeField>(field_id);
  807. FormatName(field.name_id);
  808. out_ << ": ";
  809. FormatType(field.field_type_id);
  810. }
  811. out_ << "}";
  812. }
  813. auto FormatArgs() -> void {}
  814. template <typename... Args>
  815. auto FormatArgs(Args... args) -> void {
  816. out_ << ' ';
  817. llvm::ListSeparator sep;
  818. ((out_ << sep, FormatArg(args)), ...);
  819. }
  820. auto FormatArg(BoolValue v) -> void { out_ << v; }
  821. auto FormatArg(BuiltinKind kind) -> void { out_ << kind.label(); }
  822. auto FormatArg(FunctionId id) -> void { FormatFunctionName(id); }
  823. auto FormatArg(ClassId id) -> void { FormatClassName(id); }
  824. auto FormatArg(InterfaceId id) -> void { FormatInterfaceName(id); }
  825. auto FormatArg(CrossRefIRId id) -> void { out_ << id; }
  826. auto FormatArg(IntId id) -> void {
  827. sem_ir_.ints().Get(id).print(out_, /*isSigned=*/false);
  828. }
  829. auto FormatArg(ElementIndex index) -> void { out_ << index; }
  830. auto FormatArg(NameScopeId id) -> void {
  831. out_ << '{';
  832. FormatNameScope(id, ", ", ".");
  833. out_ << '}';
  834. }
  835. auto FormatArg(InstId id) -> void { FormatInstName(id); }
  836. auto FormatArg(InstBlockId id) -> void {
  837. out_ << '(';
  838. llvm::ListSeparator sep;
  839. for (auto inst_id : sem_ir_.inst_blocks().Get(id)) {
  840. out_ << sep;
  841. FormatArg(inst_id);
  842. }
  843. out_ << ')';
  844. }
  845. auto FormatArg(RealId id) -> void {
  846. // TODO: Format with a `.` when the exponent is near zero.
  847. const auto& real = sem_ir_.reals().Get(id);
  848. real.mantissa.print(out_, /*isSigned=*/false);
  849. out_ << (real.is_decimal ? 'e' : 'p') << real.exponent;
  850. }
  851. auto FormatArg(StringLiteralId id) -> void {
  852. out_ << '"';
  853. out_.write_escaped(sem_ir_.string_literals().Get(id),
  854. /*UseHexEscapes=*/true);
  855. out_ << '"';
  856. }
  857. auto FormatArg(NameId id) -> void { FormatName(id); }
  858. auto FormatArg(TypeId id) -> void { FormatType(id); }
  859. auto FormatArg(TypeBlockId id) -> void {
  860. out_ << '(';
  861. llvm::ListSeparator sep;
  862. for (auto type_id : sem_ir_.type_blocks().Get(id)) {
  863. out_ << sep;
  864. FormatArg(type_id);
  865. }
  866. out_ << ')';
  867. }
  868. auto FormatReturnSlot(InstId dest_id) -> void {
  869. out_ << " to ";
  870. FormatArg(dest_id);
  871. }
  872. auto FormatName(NameId id) -> void {
  873. out_ << sem_ir_.names().GetFormatted(id);
  874. }
  875. auto FormatInstName(InstId id) -> void {
  876. out_ << inst_namer_.GetNameFor(scope_, id);
  877. }
  878. auto FormatLabel(InstBlockId id) -> void {
  879. out_ << inst_namer_.GetLabelFor(scope_, id);
  880. }
  881. auto FormatFunctionName(FunctionId id) -> void {
  882. out_ << inst_namer_.GetNameFor(id);
  883. }
  884. auto FormatClassName(ClassId id) -> void {
  885. out_ << inst_namer_.GetNameFor(id);
  886. }
  887. auto FormatInterfaceName(InterfaceId id) -> void {
  888. out_ << inst_namer_.GetNameFor(id);
  889. }
  890. auto FormatType(TypeId id) -> void {
  891. if (!id.is_valid()) {
  892. out_ << "invalid";
  893. } else {
  894. out_ << sem_ir_.StringifyType(id);
  895. }
  896. }
  897. private:
  898. const File& sem_ir_;
  899. llvm::raw_ostream& out_;
  900. InstNamer inst_namer_;
  901. InstNamer::ScopeIndex scope_ = InstNamer::ScopeIndex::None;
  902. bool in_terminator_sequence_ = false;
  903. int indent_ = 2;
  904. };
  905. auto FormatFile(const Lex::TokenizedBuffer& tokenized_buffer,
  906. const Parse::Tree& parse_tree, const File& sem_ir,
  907. llvm::raw_ostream& out) -> void {
  908. Formatter(tokenized_buffer, parse_tree, sem_ir, out).Format();
  909. }
  910. } // namespace Carbon::SemIR