inst_namer.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/sem_ir/inst_namer.h"
  5. #include "common/ostream.h"
  6. #include "toolchain/base/kind_switch.h"
  7. #include "toolchain/base/value_store.h"
  8. #include "toolchain/lex/tokenized_buffer.h"
  9. #include "toolchain/parse/tree.h"
  10. #include "toolchain/sem_ir/builtin_function_kind.h"
  11. #include "toolchain/sem_ir/function.h"
  12. #include "toolchain/sem_ir/ids.h"
  13. #include "toolchain/sem_ir/inst_kind.h"
  14. #include "toolchain/sem_ir/typed_insts.h"
  15. namespace Carbon::SemIR {
  16. InstNamer::InstNamer(const Lex::TokenizedBuffer& tokenized_buffer,
  17. const Parse::Tree& parse_tree, const File& sem_ir)
  18. : tokenized_buffer_(tokenized_buffer),
  19. parse_tree_(parse_tree),
  20. sem_ir_(sem_ir) {
  21. insts.resize(sem_ir.insts().size());
  22. labels.resize(sem_ir.inst_blocks().size());
  23. scopes.resize(static_cast<size_t>(GetScopeFor(NumberOfScopesTag())));
  24. // Build the constants scope.
  25. CollectNamesInBlock(ScopeId::Constants, sem_ir.constants().array_ref());
  26. // Build the ImportRef scope.
  27. CollectNamesInBlock(ScopeId::ImportRefs,
  28. sem_ir.inst_blocks().Get(SemIR::InstBlockId::ImportRefs));
  29. // Build the file scope.
  30. CollectNamesInBlock(ScopeId::File, sem_ir.top_inst_block_id());
  31. // Build each function scope.
  32. for (auto [i, fn] : llvm::enumerate(sem_ir.functions().array_ref())) {
  33. auto fn_id = FunctionId(i);
  34. auto fn_scope = GetScopeFor(fn_id);
  35. // TODO: Provide a location for the function for use as a
  36. // disambiguator.
  37. auto fn_loc = Parse::NodeId::Invalid;
  38. GetScopeInfo(fn_scope).name = globals.AllocateName(
  39. *this, fn_loc, sem_ir.names().GetIRBaseName(fn.name_id).str());
  40. CollectNamesInBlock(fn_scope, fn.implicit_param_refs_id);
  41. CollectNamesInBlock(fn_scope, fn.param_refs_id);
  42. if (fn.return_storage_id.is_valid()) {
  43. insts[fn.return_storage_id.index] = {
  44. fn_scope,
  45. GetScopeInfo(fn_scope).insts.AllocateName(
  46. *this, sem_ir.insts().GetLocId(fn.return_storage_id), "return")};
  47. }
  48. if (!fn.body_block_ids.empty()) {
  49. AddBlockLabel(fn_scope, fn.body_block_ids.front(), "entry", fn_loc);
  50. }
  51. for (auto block_id : fn.body_block_ids) {
  52. CollectNamesInBlock(fn_scope, block_id);
  53. }
  54. for (auto block_id : fn.body_block_ids) {
  55. AddBlockLabel(fn_scope, block_id);
  56. }
  57. }
  58. // Build each class scope.
  59. for (auto [i, class_info] : llvm::enumerate(sem_ir.classes().array_ref())) {
  60. auto class_id = ClassId(i);
  61. auto class_scope = GetScopeFor(class_id);
  62. // TODO: Provide a location for the class for use as a disambiguator.
  63. auto class_loc = Parse::NodeId::Invalid;
  64. GetScopeInfo(class_scope).name = globals.AllocateName(
  65. *this, class_loc,
  66. sem_ir.names().GetIRBaseName(class_info.name_id).str());
  67. AddBlockLabel(class_scope, class_info.body_block_id, "class", class_loc);
  68. CollectNamesInBlock(class_scope, class_info.body_block_id);
  69. }
  70. // Build each interface scope.
  71. for (auto [i, interface_info] :
  72. llvm::enumerate(sem_ir.interfaces().array_ref())) {
  73. auto interface_id = InterfaceId(i);
  74. auto interface_scope = GetScopeFor(interface_id);
  75. // TODO: Provide a location for the interface for use as a disambiguator.
  76. auto interface_loc = Parse::NodeId::Invalid;
  77. GetScopeInfo(interface_scope).name = globals.AllocateName(
  78. *this, interface_loc,
  79. sem_ir.names().GetIRBaseName(interface_info.name_id).str());
  80. AddBlockLabel(interface_scope, interface_info.body_block_id, "interface",
  81. interface_loc);
  82. CollectNamesInBlock(interface_scope, interface_info.body_block_id);
  83. }
  84. // Build each impl scope.
  85. for (auto [i, impl_info] : llvm::enumerate(sem_ir.impls().array_ref())) {
  86. auto impl_id = ImplId(i);
  87. auto impl_scope = GetScopeFor(impl_id);
  88. // TODO: Provide a location for the impl for use as a disambiguator.
  89. auto impl_loc = Parse::NodeId::Invalid;
  90. // TODO: Invent a name based on the self and constraint types.
  91. GetScopeInfo(impl_scope).name =
  92. globals.AllocateName(*this, impl_loc, "impl");
  93. AddBlockLabel(impl_scope, impl_info.body_block_id, "impl", impl_loc);
  94. CollectNamesInBlock(impl_scope, impl_info.body_block_id);
  95. }
  96. }
  97. auto InstNamer::GetScopeName(ScopeId scope) const -> std::string {
  98. switch (scope) {
  99. case ScopeId::None:
  100. return "<invalid scope>";
  101. // These are treated as SemIR keywords.
  102. case ScopeId::File:
  103. return "file";
  104. case ScopeId::ImportRefs:
  105. return "imports";
  106. case ScopeId::Constants:
  107. return "constants";
  108. // For everything else, use an @ prefix.
  109. default:
  110. return ("@" + GetScopeInfo(scope).name.str()).str();
  111. }
  112. }
  113. auto InstNamer::GetUnscopedNameFor(InstId inst_id) const -> llvm::StringRef {
  114. if (!inst_id.is_valid()) {
  115. return "";
  116. }
  117. const auto& inst_name = insts[inst_id.index].second;
  118. return inst_name ? inst_name.str() : "";
  119. }
  120. auto InstNamer::GetNameFor(ScopeId scope_id, InstId inst_id) const
  121. -> std::string {
  122. if (!inst_id.is_valid()) {
  123. return "invalid";
  124. }
  125. // Check for a builtin.
  126. if (inst_id.is_builtin()) {
  127. return inst_id.builtin_inst_kind().label().str();
  128. }
  129. if (inst_id == InstId::PackageNamespace) {
  130. return "package";
  131. }
  132. const auto& [inst_scope, inst_name] = insts[inst_id.index];
  133. if (!inst_name) {
  134. // This should not happen in valid IR.
  135. std::string str;
  136. llvm::raw_string_ostream str_stream(str);
  137. str_stream << "<unexpected>." << inst_id;
  138. auto loc_id = sem_ir_.insts().GetLocId(inst_id);
  139. // TODO: Consider handling inst_id cases.
  140. if (loc_id.is_node_id()) {
  141. auto token = parse_tree_.node_token(loc_id.node_id());
  142. str_stream << ".loc" << tokenized_buffer_.GetLineNumber(token) << "_"
  143. << tokenized_buffer_.GetColumnNumber(token);
  144. }
  145. return str;
  146. }
  147. if (inst_scope == scope_id) {
  148. return ("%" + inst_name.str()).str();
  149. }
  150. return (GetScopeName(inst_scope) + ".%" + inst_name.str()).str();
  151. }
  152. auto InstNamer::GetUnscopedLabelFor(InstBlockId block_id) const
  153. -> llvm::StringRef {
  154. if (!block_id.is_valid()) {
  155. return "";
  156. }
  157. const auto& label_name = labels[block_id.index].second;
  158. return label_name ? label_name.str() : "";
  159. }
  160. // Returns the IR name to use for a label, when referenced from a given scope.
  161. auto InstNamer::GetLabelFor(ScopeId scope_id, InstBlockId block_id) const
  162. -> std::string {
  163. if (!block_id.is_valid()) {
  164. return "!invalid";
  165. }
  166. const auto& [label_scope, label_name] = labels[block_id.index];
  167. if (!label_name) {
  168. // This should not happen in valid IR.
  169. std::string str;
  170. llvm::raw_string_ostream(str)
  171. << "<unexpected instblockref " << block_id << ">";
  172. return str;
  173. }
  174. if (label_scope == scope_id) {
  175. return ("!" + label_name.str()).str();
  176. }
  177. return (GetScopeName(label_scope) + ".!" + label_name.str()).str();
  178. }
  179. auto InstNamer::Namespace::Name::str() const -> llvm::StringRef {
  180. llvm::StringMapEntry<NameResult>* value = value_;
  181. CARBON_CHECK(value) << "cannot print a null name";
  182. while (value->second.ambiguous && value->second.fallback) {
  183. value = value->second.fallback.value_;
  184. }
  185. return value->first();
  186. }
  187. auto InstNamer::Namespace::AllocateName(const InstNamer& inst_namer,
  188. SemIR::LocId loc_id, std::string name)
  189. -> Name {
  190. // The best (shortest) name for this instruction so far, and the current
  191. // name for it.
  192. Name best;
  193. Name current;
  194. // Add `name` as a name for this entity.
  195. auto add_name = [&](bool mark_ambiguous = true) {
  196. auto [it, added] = allocated.insert({name, NameResult()});
  197. Name new_name = Name(it);
  198. if (!added) {
  199. if (mark_ambiguous) {
  200. // This name was allocated for a different instruction. Mark it as
  201. // ambiguous and keep looking for a name for this instruction.
  202. new_name.SetAmbiguous();
  203. }
  204. } else {
  205. if (!best) {
  206. best = new_name;
  207. } else {
  208. CARBON_CHECK(current);
  209. current.SetFallback(new_name);
  210. }
  211. current = new_name;
  212. }
  213. return added;
  214. };
  215. // Use the given name if it's available.
  216. if (!name.empty()) {
  217. add_name();
  218. }
  219. // Append location information to try to disambiguate.
  220. // TODO: Consider handling inst_id cases.
  221. if (loc_id.is_node_id()) {
  222. auto token = inst_namer.parse_tree_.node_token(loc_id.node_id());
  223. llvm::raw_string_ostream(name)
  224. << ".loc" << inst_namer.tokenized_buffer_.GetLineNumber(token);
  225. add_name();
  226. llvm::raw_string_ostream(name)
  227. << "_" << inst_namer.tokenized_buffer_.GetColumnNumber(token);
  228. add_name();
  229. }
  230. // Append numbers until we find an available name.
  231. name += ".";
  232. auto name_size_without_counter = name.size();
  233. for (int counter = 1;; ++counter) {
  234. name.resize(name_size_without_counter);
  235. llvm::raw_string_ostream(name) << counter;
  236. if (add_name(/*mark_ambiguous=*/false)) {
  237. return best;
  238. }
  239. }
  240. }
  241. auto InstNamer::AddBlockLabel(ScopeId scope_id, InstBlockId block_id,
  242. std::string name, SemIR::LocId loc_id) -> void {
  243. if (!block_id.is_valid() || labels[block_id.index].second) {
  244. return;
  245. }
  246. if (!loc_id.is_valid()) {
  247. if (const auto& block = sem_ir_.inst_blocks().Get(block_id);
  248. !block.empty()) {
  249. loc_id = sem_ir_.insts().GetLocId(block.front());
  250. }
  251. }
  252. labels[block_id.index] = {
  253. scope_id, GetScopeInfo(scope_id).labels.AllocateName(*this, loc_id,
  254. std::move(name))};
  255. }
  256. // Finds and adds a suitable block label for the given SemIR instruction that
  257. // represents some kind of branch.
  258. auto InstNamer::AddBlockLabel(ScopeId scope_id, SemIR::LocId loc_id,
  259. AnyBranch branch) -> void {
  260. llvm::StringRef name;
  261. switch (parse_tree_.node_kind(loc_id.node_id())) {
  262. case Parse::NodeKind::IfExprIf:
  263. switch (branch.kind) {
  264. case BranchIf::Kind:
  265. name = "if.expr.then";
  266. break;
  267. case Branch::Kind:
  268. name = "if.expr.else";
  269. break;
  270. case BranchWithArg::Kind:
  271. name = "if.expr.result";
  272. break;
  273. default:
  274. break;
  275. }
  276. break;
  277. case Parse::NodeKind::IfCondition:
  278. switch (branch.kind) {
  279. case BranchIf::Kind:
  280. name = "if.then";
  281. break;
  282. case Branch::Kind:
  283. name = "if.else";
  284. break;
  285. default:
  286. break;
  287. }
  288. break;
  289. case Parse::NodeKind::IfStatement:
  290. name = "if.done";
  291. break;
  292. case Parse::NodeKind::ShortCircuitOperandAnd:
  293. name = branch.kind == BranchIf::Kind ? "and.rhs" : "and.result";
  294. break;
  295. case Parse::NodeKind::ShortCircuitOperandOr:
  296. name = branch.kind == BranchIf::Kind ? "or.rhs" : "or.result";
  297. break;
  298. case Parse::NodeKind::WhileConditionStart:
  299. name = "while.cond";
  300. break;
  301. case Parse::NodeKind::WhileCondition:
  302. switch (branch.kind) {
  303. case BranchIf::Kind:
  304. name = "while.body";
  305. break;
  306. case Branch::Kind:
  307. name = "while.done";
  308. break;
  309. default:
  310. break;
  311. }
  312. break;
  313. default:
  314. break;
  315. }
  316. AddBlockLabel(scope_id, branch.target_id, name.str(), loc_id);
  317. }
  318. auto InstNamer::CollectNamesInBlock(ScopeId scope_id, InstBlockId block_id)
  319. -> void {
  320. if (block_id.is_valid()) {
  321. CollectNamesInBlock(scope_id, sem_ir_.inst_blocks().Get(block_id));
  322. }
  323. }
  324. auto InstNamer::CollectNamesInBlock(ScopeId scope_id,
  325. llvm::ArrayRef<InstId> block) -> void {
  326. Scope& scope = GetScopeInfo(scope_id);
  327. // Use bound names where available. Otherwise, assign a backup name.
  328. for (auto inst_id : block) {
  329. if (!inst_id.is_valid()) {
  330. continue;
  331. }
  332. auto untyped_inst = sem_ir_.insts().Get(inst_id);
  333. auto add_inst_name = [&](std::string name) {
  334. insts[inst_id.index] = {
  335. scope_id, scope.insts.AllocateName(
  336. *this, sem_ir_.insts().GetLocId(inst_id), name)};
  337. };
  338. auto add_inst_name_id = [&](NameId name_id, llvm::StringRef suffix = "") {
  339. add_inst_name(
  340. (sem_ir_.names().GetIRBaseName(name_id).str() + suffix).str());
  341. };
  342. if (auto branch = untyped_inst.TryAs<AnyBranch>()) {
  343. AddBlockLabel(scope_id, sem_ir_.insts().GetLocId(inst_id), *branch);
  344. }
  345. CARBON_KIND_SWITCH(untyped_inst) {
  346. case CARBON_KIND(AddrPattern inst): {
  347. // TODO: We need to assign names to parameters that appear in
  348. // function declarations, which may be nested within a pattern. For
  349. // now, just look through `addr`, but we should find a better way to
  350. // visit parameters.
  351. CollectNamesInBlock(scope_id, inst.inner_id);
  352. break;
  353. }
  354. case CARBON_KIND(AssociatedConstantDecl inst): {
  355. add_inst_name_id(inst.name_id);
  356. continue;
  357. }
  358. case BindAlias::Kind:
  359. case BindName::Kind:
  360. case BindSymbolicName::Kind:
  361. case ExportDecl::Kind: {
  362. auto inst = untyped_inst.As<AnyBindNameOrExportDecl>();
  363. add_inst_name_id(
  364. sem_ir_.entity_names().Get(inst.entity_name_id).name_id);
  365. continue;
  366. }
  367. case CARBON_KIND(Call inst): {
  368. auto callee_function =
  369. SemIR::GetCalleeFunction(sem_ir_, inst.callee_id);
  370. if (!callee_function.function_id.is_valid()) {
  371. break;
  372. }
  373. const auto& function =
  374. sem_ir_.functions().Get(callee_function.function_id);
  375. // Name the call's result based on the callee.
  376. if (function.builtin_function_kind !=
  377. SemIR::BuiltinFunctionKind::None) {
  378. // For a builtin, use the builtin name. Otherwise, we'd typically pick
  379. // the name `Op` below, which is probably not very useful.
  380. add_inst_name(function.builtin_function_kind.name().str());
  381. continue;
  382. }
  383. add_inst_name_id(function.name_id, ".call");
  384. continue;
  385. }
  386. case CARBON_KIND(ClassDecl inst): {
  387. add_inst_name_id(sem_ir_.classes().Get(inst.class_id).name_id, ".decl");
  388. CollectNamesInBlock(scope_id, inst.decl_block_id);
  389. continue;
  390. }
  391. case CARBON_KIND(ClassType inst): {
  392. add_inst_name_id(sem_ir_.classes().Get(inst.class_id).name_id);
  393. continue;
  394. }
  395. case CARBON_KIND(FunctionDecl inst): {
  396. add_inst_name_id(sem_ir_.functions().Get(inst.function_id).name_id,
  397. ".decl");
  398. CollectNamesInBlock(scope_id, inst.decl_block_id);
  399. continue;
  400. }
  401. case CARBON_KIND(FunctionType inst): {
  402. add_inst_name_id(sem_ir_.functions().Get(inst.function_id).name_id,
  403. ".type");
  404. continue;
  405. }
  406. case CARBON_KIND(GenericClassType inst): {
  407. add_inst_name_id(sem_ir_.classes().Get(inst.class_id).name_id, ".type");
  408. continue;
  409. }
  410. case CARBON_KIND(GenericInterfaceType inst): {
  411. add_inst_name_id(sem_ir_.interfaces().Get(inst.interface_id).name_id,
  412. ".type");
  413. continue;
  414. }
  415. case CARBON_KIND(ImplDecl inst): {
  416. CollectNamesInBlock(scope_id, inst.decl_block_id);
  417. break;
  418. }
  419. case CARBON_KIND(ImportDecl inst): {
  420. if (inst.package_id.is_valid()) {
  421. add_inst_name_id(inst.package_id, ".import");
  422. } else {
  423. add_inst_name("default.import");
  424. }
  425. break;
  426. }
  427. case ImportRefUnloaded::Kind:
  428. case ImportRefLoaded::Kind: {
  429. add_inst_name("import_ref");
  430. // When building import refs, we frequently add instructions without
  431. // a block. Constants that refer to them need to be separately
  432. // named.
  433. auto const_id = sem_ir_.constant_values().Get(inst_id);
  434. if (const_id.is_valid() && const_id.is_template()) {
  435. auto const_inst_id = sem_ir_.constant_values().GetInstId(const_id);
  436. if (!insts[const_inst_id.index].second) {
  437. CollectNamesInBlock(ScopeId::ImportRefs, const_inst_id);
  438. }
  439. }
  440. continue;
  441. }
  442. case CARBON_KIND(InterfaceDecl inst): {
  443. add_inst_name_id(sem_ir_.interfaces().Get(inst.interface_id).name_id,
  444. ".decl");
  445. CollectNamesInBlock(scope_id, inst.decl_block_id);
  446. continue;
  447. }
  448. case CARBON_KIND(NameRef inst): {
  449. add_inst_name_id(inst.name_id, ".ref");
  450. continue;
  451. }
  452. // The namespace is specified here due to the name conflict.
  453. case CARBON_KIND(SemIR::Namespace inst): {
  454. add_inst_name_id(sem_ir_.name_scopes().Get(inst.name_scope_id).name_id);
  455. continue;
  456. }
  457. case CARBON_KIND(Param inst): {
  458. add_inst_name_id(inst.name_id);
  459. continue;
  460. }
  461. case CARBON_KIND(SpliceBlock inst): {
  462. CollectNamesInBlock(scope_id, inst.block_id);
  463. break;
  464. }
  465. case CARBON_KIND(StructValue inst): {
  466. if (auto fn_ty = sem_ir_.types().TryGetAs<FunctionType>(inst.type_id)) {
  467. add_inst_name_id(sem_ir_.functions().Get(fn_ty->function_id).name_id);
  468. } else if (auto generic_class_ty =
  469. sem_ir_.types().TryGetAs<GenericClassType>(
  470. inst.type_id)) {
  471. add_inst_name_id(
  472. sem_ir_.classes().Get(generic_class_ty->class_id).name_id);
  473. } else if (auto generic_interface_ty =
  474. sem_ir_.types().TryGetAs<GenericInterfaceType>(
  475. inst.type_id)) {
  476. add_inst_name_id(sem_ir_.interfaces()
  477. .Get(generic_interface_ty->interface_id)
  478. .name_id);
  479. } else {
  480. add_inst_name("struct");
  481. }
  482. continue;
  483. }
  484. case CARBON_KIND(TupleValue inst): {
  485. if (sem_ir_.types().Is<ArrayType>(inst.type_id)) {
  486. add_inst_name("array");
  487. } else {
  488. add_inst_name("tuple");
  489. }
  490. continue;
  491. }
  492. case CARBON_KIND(VarStorage inst): {
  493. add_inst_name_id(inst.name_id, ".var");
  494. continue;
  495. }
  496. default: {
  497. break;
  498. }
  499. }
  500. // Sequentially number all remaining values.
  501. if (untyped_inst.kind().value_kind() != InstValueKind::None) {
  502. add_inst_name("");
  503. }
  504. }
  505. }
  506. } // namespace Carbon::SemIR