file_context.cpp 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lower/file_context.h"
  5. #include <memory>
  6. #include <optional>
  7. #include <string>
  8. #include <utility>
  9. #include "clang/CodeGen/ModuleBuilder.h"
  10. #include "common/check.h"
  11. #include "common/pretty_stack_trace_function.h"
  12. #include "common/vlog.h"
  13. #include "llvm/ADT/STLExtras.h"
  14. #include "llvm/ADT/Sequence.h"
  15. #include "llvm/Linker/Linker.h"
  16. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  17. #include "llvm/Transforms/Utils/ModuleUtils.h"
  18. #include "toolchain/base/kind_switch.h"
  19. #include "toolchain/lower/clang_global_decl.h"
  20. #include "toolchain/lower/constant.h"
  21. #include "toolchain/lower/function_context.h"
  22. #include "toolchain/lower/mangler.h"
  23. #include "toolchain/lower/specific_coalescer.h"
  24. #include "toolchain/sem_ir/absolute_node_id.h"
  25. #include "toolchain/sem_ir/diagnostic_loc_converter.h"
  26. #include "toolchain/sem_ir/entry_point.h"
  27. #include "toolchain/sem_ir/expr_info.h"
  28. #include "toolchain/sem_ir/file.h"
  29. #include "toolchain/sem_ir/function.h"
  30. #include "toolchain/sem_ir/generic.h"
  31. #include "toolchain/sem_ir/ids.h"
  32. #include "toolchain/sem_ir/inst.h"
  33. #include "toolchain/sem_ir/inst_categories.h"
  34. #include "toolchain/sem_ir/inst_kind.h"
  35. #include "toolchain/sem_ir/pattern.h"
  36. #include "toolchain/sem_ir/stringify.h"
  37. #include "toolchain/sem_ir/typed_insts.h"
  38. namespace Carbon::Lower {
  39. FileContext::FileContext(Context& context, const SemIR::File& sem_ir,
  40. const SemIR::InstNamer* inst_namer,
  41. llvm::raw_ostream* vlog_stream)
  42. : context_(&context),
  43. sem_ir_(&sem_ir),
  44. inst_namer_(inst_namer),
  45. vlog_stream_(vlog_stream),
  46. functions_(LoweredFunctionStore::MakeForOverwrite(sem_ir.functions())),
  47. specific_functions_(sem_ir.specifics(), nullptr),
  48. types_(LoweredTypeStore::MakeWithExplicitSize(
  49. sem_ir.insts().GetIdTag(), sem_ir.insts().size(), nullptr)),
  50. constants_(LoweredConstantStore::MakeWithExplicitSize(
  51. sem_ir.insts().GetIdTag(), sem_ir.insts().size(), nullptr)),
  52. lowered_specifics_(sem_ir.generics(),
  53. llvm::SmallVector<SemIR::SpecificId>()),
  54. coalescer_(vlog_stream_, sem_ir.specifics()),
  55. vtables_(decltype(vtables_)::MakeForOverwrite(sem_ir.vtables())),
  56. specific_vtables_(sem_ir.specifics(), nullptr) {
  57. // Initialization that relies on invariants of the class.
  58. cpp_code_generator_ = CreateCppCodeGenerator();
  59. CARBON_CHECK(!sem_ir.has_errors(),
  60. "Generating LLVM IR from invalid SemIR::File is unsupported.");
  61. }
  62. // TODO: Move this to lower.cpp.
  63. auto FileContext::PrepareToLower() -> void {
  64. if (cpp_code_generator_) {
  65. // Clang code generation should not actually modify the AST, but isn't
  66. // const-correct.
  67. cpp_code_generator_->Initialize(
  68. const_cast<clang::ASTContext&>(clang_ast_unit()->getASTContext()));
  69. // Work around `visitLocalTopLevelDecls` not being const. It doesn't modify
  70. // the AST unit other than triggering deserialization.
  71. auto* non_const_ast_unit = const_cast<clang::ASTUnit*>(clang_ast_unit());
  72. // Emit any top-level declarations now.
  73. // TODO: This may miss things that we need to emit which are handed to the
  74. // ASTConsumer in other ways. Instead of doing this, we should create the
  75. // CodeGenerator earlier and register it as an ASTConsumer before we parse
  76. // the C++ inputs.
  77. non_const_ast_unit->visitLocalTopLevelDecls(
  78. cpp_code_generator_.get(),
  79. [](void* codegen_ptr, const clang::Decl* decl) {
  80. auto* codegen = static_cast<clang::CodeGenerator*>(codegen_ptr);
  81. // CodeGenerator won't modify the declaration it's given, but we can
  82. // only call it via the ASTConsumer interface which doesn't know that.
  83. auto* non_const_decl = const_cast<clang::Decl*>(decl);
  84. codegen->HandleTopLevelDecl(clang::DeclGroupRef(non_const_decl));
  85. return true;
  86. });
  87. }
  88. // Lower all types that were required to be complete.
  89. for (auto type_id : sem_ir_->types().complete_types()) {
  90. if (type_id.index >= 0) {
  91. types_.Set(type_id, BuildType(sem_ir_->types().GetInstId(type_id)));
  92. }
  93. }
  94. // Lower function declarations.
  95. for (auto [id, _] : sem_ir_->functions().enumerate()) {
  96. functions_.Set(id, BuildFunctionDecl(id));
  97. }
  98. // TODO: Split vtable declaration creation from definition creation to avoid
  99. // redundant vtable definitions for imported vtables.
  100. for (const auto& [id, vtable] : sem_ir_->vtables().enumerate()) {
  101. const auto& class_info = sem_ir().classes().Get(vtable.class_id);
  102. // Vtables can't be generated for generics, only for their specifics - and
  103. // must be done lazily based on the use of those specifics.
  104. if (!class_info.generic_id.has_value()) {
  105. vtables_.Set(id, BuildVtable(vtable, SemIR::SpecificId::None));
  106. }
  107. }
  108. // Lower constants.
  109. LowerConstants(*this, constants_);
  110. }
  111. // TODO: Move this to lower.cpp.
  112. auto FileContext::LowerDefinitions() -> void {
  113. // Lower global variable definitions.
  114. // TODO: Storing both a `constants_` array and a separate `global_variables_`
  115. // map is redundant.
  116. for (auto inst_id :
  117. sem_ir().inst_blocks().Get(sem_ir().top_inst_block_id())) {
  118. // Only `VarStorage` indicates a global variable declaration in the
  119. // top instruction block.
  120. if (auto var = sem_ir().insts().TryGetAs<SemIR::VarStorage>(inst_id)) {
  121. // Get the global variable declaration. We created this when lowering the
  122. // constant unless the variable is unnamed, in which case we need to
  123. // create it now.
  124. llvm::GlobalVariable* llvm_var = nullptr;
  125. if (auto const_id = sem_ir().constant_values().Get(inst_id);
  126. const_id.is_constant()) {
  127. llvm_var = cast<llvm::GlobalVariable>(GetConstant(const_id, inst_id));
  128. } else {
  129. llvm_var = BuildGlobalVariableDecl(*var);
  130. }
  131. // Convert the declaration of this variable into a definition by adding an
  132. // initializer.
  133. global_variables_.Insert(inst_id, llvm_var);
  134. llvm_var->setInitializer(
  135. llvm::Constant::getNullValue(llvm_var->getValueType()));
  136. }
  137. }
  138. // Lower function definitions.
  139. for (auto [id, fn_info] : sem_ir_->functions().enumerate()) {
  140. // If we created a declaration and the function definition is not imported,
  141. // build a definition.
  142. if (functions_.Get(id) && fn_info.definition_id.has_value() &&
  143. !sem_ir().insts().GetImportSource(fn_info.definition_id).has_value()) {
  144. BuildFunctionDefinition(id);
  145. }
  146. }
  147. // Append `__global_init` to `llvm::global_ctors` to initialize global
  148. // variables.
  149. if (auto global_ctor_id = sem_ir().global_ctor_id();
  150. global_ctor_id.has_value()) {
  151. const auto& global_ctor = sem_ir().functions().Get(global_ctor_id);
  152. BuildFunctionBody(global_ctor_id, SemIR::SpecificId::None, global_ctor,
  153. *this, global_ctor);
  154. llvm::appendToGlobalCtors(llvm_module(),
  155. GetFunction(sem_ir().global_ctor_id()),
  156. /*Priority=*/0);
  157. }
  158. }
  159. auto FileContext::Finalize() -> void {
  160. if (cpp_code_generator_) {
  161. // Clang code generation should not actually modify the AST, but isn't
  162. // const-correct.
  163. cpp_code_generator_->HandleTranslationUnit(
  164. const_cast<clang::ASTContext&>(clang_ast_unit()->getASTContext()));
  165. bool link_error = llvm::Linker::linkModules(
  166. /*Dest=*/llvm_module(),
  167. /*Src=*/std::unique_ptr<llvm::Module>(
  168. cpp_code_generator_->ReleaseModule()));
  169. CARBON_CHECK(!link_error);
  170. }
  171. // Find equivalent specifics (from the same generic), replace all uses and
  172. // remove duplicately lowered function definitions.
  173. coalescer_.CoalesceEquivalentSpecifics(lowered_specifics_,
  174. specific_functions_);
  175. }
  176. auto FileContext::CreateCppCodeGenerator()
  177. -> std::unique_ptr<clang::CodeGenerator> {
  178. if (!clang_ast_unit()) {
  179. return nullptr;
  180. }
  181. RawStringOstream clang_module_name_stream;
  182. clang_module_name_stream << llvm_module().getName() << ".clang";
  183. // Do not emit Clang's name and version as the creator of the output file.
  184. cpp_code_gen_options_.EmitVersionIdentMetadata = false;
  185. return std::unique_ptr<clang::CodeGenerator>(clang::CreateLLVMCodeGen(
  186. clang_ast_unit()->getASTContext().getDiagnostics(),
  187. clang_module_name_stream.TakeStr(), context().file_system(),
  188. cpp_header_search_options_, cpp_preprocessor_options_,
  189. cpp_code_gen_options_, llvm_context()));
  190. }
  191. auto FileContext::GetConstant(SemIR::ConstantId const_id,
  192. SemIR::InstId use_inst_id) -> llvm::Value* {
  193. auto const_inst_id = sem_ir().constant_values().GetInstId(const_id);
  194. auto* const_value = constants_.Get(const_inst_id);
  195. // For value expressions and initializing expressions, the value produced by
  196. // a constant instruction is a value representation of the constant. For
  197. // initializing expressions, `FinishInit` will perform a copy if needed.
  198. switch (auto cat = SemIR::GetExprCategory(sem_ir(), const_inst_id)) {
  199. case SemIR::ExprCategory::Value:
  200. case SemIR::ExprCategory::Initializing:
  201. break;
  202. case SemIR::ExprCategory::DurableRef:
  203. case SemIR::ExprCategory::EphemeralRef:
  204. // Constant reference expressions lower to an address.
  205. return const_value;
  206. case SemIR::ExprCategory::NotExpr:
  207. case SemIR::ExprCategory::Error:
  208. case SemIR::ExprCategory::Mixed:
  209. CARBON_FATAL("Unexpected category {0} for lowered constant {1}", cat,
  210. sem_ir().insts().Get(const_inst_id));
  211. };
  212. auto value_rep = SemIR::ValueRepr::ForType(
  213. sem_ir(), sem_ir().insts().Get(const_inst_id).type_id());
  214. if (value_rep.kind != SemIR::ValueRepr::Pointer) {
  215. return const_value;
  216. }
  217. // The value representation is a pointer. Generate a variable to hold the
  218. // value, or find and reuse an existing one.
  219. if (auto result = global_variables().Lookup(const_inst_id)) {
  220. return result.value();
  221. }
  222. // Include both the name of the constant, if any, and the point of use in
  223. // the name of the variable.
  224. llvm::StringRef const_name;
  225. llvm::StringRef use_name;
  226. if (inst_namer_) {
  227. const_name = inst_namer_->GetUnscopedNameFor(const_inst_id);
  228. if (use_inst_id.has_value()) {
  229. use_name = inst_namer_->GetUnscopedNameFor(use_inst_id);
  230. }
  231. }
  232. // We always need to give the global a name even if the instruction namer
  233. // doesn't have one to use.
  234. if (const_name.empty()) {
  235. const_name = "const";
  236. }
  237. if (use_name.empty()) {
  238. use_name = "anon";
  239. }
  240. llvm::StringRef sep = (use_name[0] == '.') ? "" : ".";
  241. auto* global_variable = new llvm::GlobalVariable(
  242. llvm_module(), GetType(sem_ir().GetPointeeType(value_rep.type_id)),
  243. /*isConstant=*/true, llvm::GlobalVariable::InternalLinkage, const_value,
  244. const_name + sep + use_name);
  245. global_variables_.Insert(const_inst_id, global_variable);
  246. return global_variable;
  247. }
  248. auto FileContext::GetOrCreateFunction(SemIR::FunctionId function_id,
  249. SemIR::SpecificId specific_id)
  250. -> llvm::Function* {
  251. // If we have already lowered a declaration of this function, just return it.
  252. auto** result = GetFunctionAddr(function_id, specific_id);
  253. if (!*result) {
  254. *result = BuildFunctionDecl(function_id, specific_id);
  255. }
  256. return *result;
  257. }
  258. auto FileContext::BuildFunctionTypeInfo(const SemIR::Function& function,
  259. SemIR::SpecificId specific_id)
  260. -> FunctionTypeInfo {
  261. const auto return_info =
  262. SemIR::ReturnTypeInfo::ForFunction(sem_ir(), function, specific_id);
  263. if (!return_info.is_valid()) {
  264. // The return type has not been completed, create a trivial type instead.
  265. return {.type =
  266. llvm::FunctionType::get(llvm::Type::getVoidTy(llvm_context()),
  267. /*isVarArg=*/false)};
  268. }
  269. auto get_llvm_type = [&](SemIR::TypeId type_id) -> llvm::Type* {
  270. if (!type_id.has_value()) {
  271. return nullptr;
  272. }
  273. return GetType(type_id);
  274. };
  275. // TODO: expose the `Call` parameter patterns in `Function`, and use them here
  276. // instead of reconstructing them via the syntactic parameter lists.
  277. auto implicit_param_patterns =
  278. sem_ir().inst_blocks().GetOrEmpty(function.implicit_param_patterns_id);
  279. auto param_patterns =
  280. sem_ir().inst_blocks().GetOrEmpty(function.param_patterns_id);
  281. auto* return_type = get_llvm_type(return_info.type_id);
  282. llvm::SmallVector<llvm::Type*> param_types;
  283. // Compute the return type to use for the LLVM function. If the initializing
  284. // representation doesn't produce a value, set the return type to void.
  285. // TODO: For the `Run` entry point, remap return type to i32 if it doesn't
  286. // return a value.
  287. llvm::Type* function_return_type =
  288. (return_info.is_valid() &&
  289. return_info.init_repr.kind == SemIR::InitRepr::ByCopy)
  290. ? return_type
  291. : llvm::Type::getVoidTy(llvm_context());
  292. // TODO: Consider either storing `param_inst_ids` somewhere so that we can
  293. // reuse it from `BuildFunctionDefinition` and when building calls, or factor
  294. // out a mechanism to compute the mapping between parameters and arguments on
  295. // demand.
  296. llvm::SmallVector<SemIR::InstId> param_inst_ids;
  297. auto max_llvm_params = (return_info.has_return_slot() ? 1 : 0) +
  298. implicit_param_patterns.size() + param_patterns.size();
  299. param_types.reserve(max_llvm_params);
  300. param_inst_ids.reserve(max_llvm_params);
  301. auto return_param_id = SemIR::InstId::None;
  302. if (return_info.has_return_slot()) {
  303. param_types.push_back(
  304. llvm::PointerType::get(llvm_context(), /*AddressSpace=*/0));
  305. return_param_id = function.return_slot_pattern_id;
  306. param_inst_ids.push_back(return_param_id);
  307. }
  308. for (auto param_pattern_id : llvm::concat<const SemIR::InstId>(
  309. implicit_param_patterns, param_patterns)) {
  310. // TODO: Handle a general pattern here, rather than assuming that each
  311. // parameter pattern contains at most one binding.
  312. auto param_pattern_info = SemIR::Function::GetParamPatternInfoFromPatternId(
  313. sem_ir(), param_pattern_id);
  314. if (!param_pattern_info) {
  315. continue;
  316. }
  317. // TODO: Use a more general mechanism to determine if the binding is a
  318. // reference binding.
  319. if (param_pattern_info->var_pattern_id.has_value()) {
  320. param_types.push_back(
  321. llvm::PointerType::get(llvm_context(), /*AddressSpace=*/0));
  322. param_inst_ids.push_back(param_pattern_id);
  323. continue;
  324. }
  325. auto param_type_id = ExtractScrutineeType(
  326. sem_ir(), SemIR::GetTypeOfInstInSpecific(sem_ir(), specific_id,
  327. param_pattern_info->inst_id));
  328. CARBON_CHECK(
  329. !param_type_id.AsConstantId().is_symbolic(),
  330. "Found symbolic type id after resolution when lowering type {0}.",
  331. param_pattern_info->inst.type_id);
  332. switch (auto value_rep = SemIR::ValueRepr::ForType(sem_ir(), param_type_id);
  333. value_rep.kind) {
  334. case SemIR::ValueRepr::Unknown:
  335. // This parameter type is incomplete. Fallback to describing the
  336. // function type as `void()`.
  337. return {.type = llvm::FunctionType::get(
  338. llvm::Type::getVoidTy(llvm_context()),
  339. /*isVarArg=*/false)};
  340. case SemIR::ValueRepr::Dependent:
  341. CARBON_FATAL("Lowering function with dependent parameter type");
  342. case SemIR::ValueRepr::None:
  343. break;
  344. case SemIR::ValueRepr::Copy:
  345. case SemIR::ValueRepr::Custom:
  346. case SemIR::ValueRepr::Pointer:
  347. auto* param_types_to_add = get_llvm_type(value_rep.type_id);
  348. param_types.push_back(param_types_to_add);
  349. param_inst_ids.push_back(param_pattern_id);
  350. break;
  351. }
  352. }
  353. return {.type = llvm::FunctionType::get(function_return_type, param_types,
  354. /*isVarArg=*/false),
  355. .param_inst_ids = std::move(param_inst_ids),
  356. .return_type = return_type,
  357. .return_param_id = return_param_id};
  358. }
  359. auto FileContext::HandleReferencedCppFunction(clang::FunctionDecl* cpp_decl)
  360. -> void {
  361. // TODO: To support recursive inline functions, collect all calls to
  362. // `HandleTopLevelDecl()` in a custom `ASTConsumer` configured in the
  363. // `ASTUnit`, and replay them in lowering in the `CodeGenerator`. See
  364. // https://discord.com/channels/655572317891461132/768530752592805919/1370509111585935443
  365. clang::FunctionDecl* cpp_def = cpp_decl->getDefinition();
  366. if (!cpp_def) {
  367. return;
  368. }
  369. // Create the LLVM function (`CodeGenModule::GetOrCreateLLVMFunction()`)
  370. // so that code generation (`CodeGenModule::EmitGlobal()`) would see this
  371. // function name (`CodeGenModule::getMangledName()`), and will generate
  372. // its definition.
  373. llvm::Constant* function_address =
  374. cpp_code_generator_->GetAddrOfGlobal(CreateGlobalDecl(cpp_def),
  375. /*isForDefinition=*/false);
  376. CARBON_CHECK(function_address);
  377. }
  378. auto FileContext::HandleReferencedSpecificFunction(
  379. SemIR::FunctionId function_id, SemIR::SpecificId specific_id,
  380. llvm::Type* llvm_type) -> void {
  381. CARBON_CHECK(specific_id.has_value());
  382. // Add this specific function to a list of specific functions whose
  383. // definitions we need to emit.
  384. // TODO: Don't do this if we know this function is emitted as a
  385. // non-discardable symbol in the IR for some other file.
  386. context().AddPendingSpecificFunctionDefinition({.context = this,
  387. .function_id = function_id,
  388. .specific_id = specific_id});
  389. // Create a unique fingerprint for the function type.
  390. // For now, we compute the function type fingerprint only for specifics,
  391. // though we might need it for all functions in order to create a canonical
  392. // fingerprint across translation units.
  393. coalescer_.CreateTypeFingerprint(specific_id, llvm_type);
  394. }
  395. auto FileContext::BuildFunctionDecl(SemIR::FunctionId function_id,
  396. SemIR::SpecificId specific_id)
  397. -> llvm::Function* {
  398. const auto& function = sem_ir().functions().Get(function_id);
  399. // Don't lower generic functions. Note that associated functions in interfaces
  400. // have `Self` in scope, so are implicitly generic functions.
  401. if (function.generic_id.has_value() && !specific_id.has_value()) {
  402. return nullptr;
  403. }
  404. // Don't lower builtins.
  405. if (function.builtin_function_kind() != SemIR::BuiltinFunctionKind::None) {
  406. return nullptr;
  407. }
  408. // Don't lower C++ functions that use a thunk. We will never reference them
  409. // directly, and their signatures would not be expected to match the
  410. // corresponding C++ function anyway.
  411. if (function.special_function_kind ==
  412. SemIR::Function::SpecialFunctionKind::HasCppThunk) {
  413. return nullptr;
  414. }
  415. // TODO: Consider tracking whether the function has been used, and only
  416. // lowering it if it's needed.
  417. auto function_type_info = BuildFunctionTypeInfo(function, specific_id);
  418. // TODO: For an imported inline function, consider generating an
  419. // `available_externally` definition.
  420. auto linkage = specific_id.has_value() ? llvm::Function::LinkOnceODRLinkage
  421. : llvm::Function::ExternalLinkage;
  422. Mangler m(*this);
  423. std::string mangled_name = m.Mangle(function_id, specific_id);
  424. if (auto* existing = llvm_module().getFunction(mangled_name)) {
  425. // We might have already lowered this function while lowering a different
  426. // file. That's OK.
  427. // TODO: Check-fail or maybe diagnose if the two LLVM functions are not
  428. // produced by declarations of the same Carbon function. Name collisions
  429. // between non-private members of the same library should have been
  430. // diagnosed by check if detected, but it's not clear that check will always
  431. // be able to see this problem. In theory, name collisions could also occur
  432. // due to fingerprint collision.
  433. return existing;
  434. }
  435. // If this is a C++ function, tell Clang that we referenced it.
  436. if (auto clang_decl_id = sem_ir().functions().Get(function_id).clang_decl_id;
  437. clang_decl_id.has_value()) {
  438. CARBON_CHECK(!specific_id.has_value(),
  439. "Specific functions cannot have C++ definitions");
  440. HandleReferencedCppFunction(
  441. sem_ir().clang_decls().Get(clang_decl_id).key.decl->getAsFunction());
  442. // TODO: Check that the signature and mangling generated by Clang and the
  443. // one we generated are the same.
  444. }
  445. // If this is a specific function, we may need to do additional work to emit
  446. // its definition.
  447. if (specific_id.has_value()) {
  448. HandleReferencedSpecificFunction(function_id, specific_id,
  449. function_type_info.type);
  450. }
  451. auto* llvm_function = llvm::Function::Create(function_type_info.type, linkage,
  452. mangled_name, llvm_module());
  453. CARBON_CHECK(llvm_function->getName() == mangled_name,
  454. "Mangled name collision: {0}", mangled_name);
  455. // Set up parameters and the return slot.
  456. for (auto [inst_id, arg] : llvm::zip_equal(function_type_info.param_inst_ids,
  457. llvm_function->args())) {
  458. auto name_id = SemIR::NameId::None;
  459. if (inst_id == function_type_info.return_param_id) {
  460. name_id = SemIR::NameId::ReturnSlot;
  461. arg.addAttr(llvm::Attribute::getWithStructRetType(
  462. llvm_context(), function_type_info.return_type));
  463. } else {
  464. name_id = SemIR::GetPrettyNameFromPatternId(sem_ir(), inst_id);
  465. }
  466. arg.setName(sem_ir().names().GetIRBaseName(name_id));
  467. }
  468. return llvm_function;
  469. }
  470. // Find the file and function ID describing the definition of a function.
  471. static auto GetFunctionDefinition(const SemIR::File* decl_ir,
  472. SemIR::FunctionId function_id)
  473. -> std::pair<const SemIR::File*, SemIR::FunctionId> {
  474. // Find the file containing the definition.
  475. auto decl_id = decl_ir->functions().Get(function_id).definition_id;
  476. if (!decl_id.has_value()) {
  477. // Function is not defined.
  478. return {nullptr, SemIR::FunctionId::None};
  479. }
  480. // Find the function declaration this function was originally imported from.
  481. while (true) {
  482. auto import_inst_id = decl_ir->insts().GetImportSource(decl_id);
  483. if (!import_inst_id.has_value()) {
  484. break;
  485. }
  486. auto import_inst = decl_ir->import_ir_insts().Get(import_inst_id);
  487. decl_ir = decl_ir->import_irs().Get(import_inst.ir_id()).sem_ir;
  488. decl_id = import_inst.inst_id();
  489. }
  490. auto decl_ir_function_id =
  491. decl_ir->insts().GetAs<SemIR::FunctionDecl>(decl_id).function_id;
  492. return {decl_ir, decl_ir_function_id};
  493. }
  494. auto FileContext::BuildFunctionDefinition(SemIR::FunctionId function_id,
  495. SemIR::SpecificId specific_id)
  496. -> void {
  497. auto [definition_ir, definition_ir_function_id] =
  498. GetFunctionDefinition(&sem_ir(), function_id);
  499. if (!definition_ir) {
  500. // Function is probably defined in another file; not an error.
  501. return;
  502. }
  503. const auto& definition_function =
  504. definition_ir->functions().Get(definition_ir_function_id);
  505. BuildFunctionBody(
  506. function_id, specific_id, sem_ir().functions().Get(function_id),
  507. context().GetFileContext(definition_ir), definition_function);
  508. }
  509. auto FileContext::BuildFunctionBody(SemIR::FunctionId function_id,
  510. SemIR::SpecificId specific_id,
  511. const SemIR::Function& declaration_function,
  512. FileContext& definition_context,
  513. const SemIR::Function& definition_function)
  514. -> void {
  515. // On crash, report the function we were lowering.
  516. PrettyStackTraceFunction stack_trace_entry([&](llvm::raw_ostream& output) {
  517. SemIR::DiagnosticLocConverter converter(
  518. &context().tree_and_subtrees_getters(), &sem_ir());
  519. auto converted =
  520. converter.Convert(SemIR::LocId(declaration_function.definition_id),
  521. /*token_only=*/false);
  522. converted.loc.FormatLocation(output);
  523. output << "Lowering function ";
  524. if (specific_id.has_value()) {
  525. output << SemIR::StringifySpecific(sem_ir(), specific_id);
  526. } else {
  527. output << SemIR::StringifyConstantInst(
  528. sem_ir(), declaration_function.definition_id);
  529. }
  530. output << "\n";
  531. // Crash output has a tab indent; try to indent slightly past that.
  532. converted.loc.FormatSnippet(output, /*indent=*/10);
  533. });
  534. // Note that `definition_function` is potentially from a different SemIR::File
  535. // than the one that this file context represents. Any lowering done for
  536. // values derived from `definition_function` should use `definition_context`
  537. // instead of our context.
  538. const auto& definition_ir = definition_context.sem_ir();
  539. auto* llvm_function = GetFunction(function_id, specific_id);
  540. CARBON_CHECK(llvm_function,
  541. "Attempting to define function that was not declared");
  542. const auto& body_block_ids = definition_function.body_block_ids;
  543. CARBON_DCHECK(!body_block_ids.empty(),
  544. "No function body blocks found during lowering.");
  545. // Store which specifics were already lowered (with definitions) for each
  546. // generic.
  547. if (declaration_function.generic_id.has_value() && specific_id.has_value()) {
  548. // TODO: We should track this in the definition context instead so that we
  549. // can deduplicate specifics from different files.
  550. AddLoweredSpecificForGeneric(declaration_function.generic_id, specific_id);
  551. }
  552. FunctionContext function_lowering(
  553. definition_context, llvm_function, *this, specific_id,
  554. coalescer_.InitializeFingerprintForSpecific(specific_id),
  555. definition_context.BuildDISubprogram(definition_function, llvm_function),
  556. vlog_stream_);
  557. // Add parameters to locals.
  558. // TODO: This duplicates the mapping between sem_ir instructions and LLVM
  559. // function parameters that was already computed in BuildFunctionDecl.
  560. // We should only do that once.
  561. auto call_param_ids = definition_ir.inst_blocks().GetOrEmpty(
  562. definition_function.call_params_id);
  563. int param_index = 0;
  564. // TODO: Find a way to ensure this code and the function-call lowering use
  565. // the same parameter ordering.
  566. // Lowers the given parameter. Must be called in LLVM calling convention
  567. // parameter order.
  568. auto lower_param = [&](SemIR::InstId param_id) {
  569. // Get the value of the parameter from the function argument.
  570. llvm::Value* param_value;
  571. // The `type_id` of a parameter tracks the parameter's type.
  572. CARBON_CHECK(definition_ir.insts().Is<SemIR::AnyParam>(param_id));
  573. auto param_type = function_lowering.GetTypeIdOfInst(param_id);
  574. if (function_lowering.GetValueRepr(param_type).repr.kind !=
  575. SemIR::ValueRepr::None) {
  576. param_value = llvm_function->getArg(param_index);
  577. ++param_index;
  578. } else {
  579. param_value =
  580. llvm::PoisonValue::get(function_lowering.GetType(param_type));
  581. }
  582. // The value of the parameter is the value of the argument.
  583. function_lowering.SetLocal(param_id, param_value);
  584. };
  585. // Lower the return slot parameter.
  586. if (declaration_function.return_slot_pattern_id.has_value()) {
  587. auto call_param_id = call_param_ids.consume_back();
  588. // The LLVM calling convention has the return slot first rather than last.
  589. // Note that this queries whether there is a return slot at the LLVM level,
  590. // whereas `function.return_slot_pattern_id.has_value()` queries whether
  591. // there is a return slot at the SemIR level.
  592. if (SemIR::ReturnTypeInfo::ForFunction(sem_ir(), declaration_function,
  593. specific_id)
  594. .has_return_slot()) {
  595. lower_param(call_param_id);
  596. } else {
  597. // The return slot might still be mentioned as a destination location, but
  598. // shouldn't actually be used for anything, so we can use a poison value
  599. // for it.
  600. function_lowering.SetLocal(call_param_id,
  601. llvm::PoisonValue::get(llvm::PointerType::get(
  602. llvm_context(), /*AddressSpace=*/0)));
  603. }
  604. }
  605. // Lower the remaining call parameters.
  606. for (auto param_id : call_param_ids) {
  607. lower_param(param_id);
  608. }
  609. auto decl_block_id = SemIR::InstBlockId::None;
  610. if (function_id == sem_ir().global_ctor_id()) {
  611. decl_block_id = SemIR::InstBlockId::Empty;
  612. } else {
  613. decl_block_id =
  614. definition_ir.insts()
  615. .GetAs<SemIR::FunctionDecl>(definition_function.latest_decl_id())
  616. .decl_block_id;
  617. }
  618. // Lowers the contents of decl_block_id into the corresponding LLVM block,
  619. // creating it if it doesn't already exist.
  620. auto lower_block = [&](SemIR::InstBlockId block_id) {
  621. CARBON_VLOG("Lowering {0}\n", block_id);
  622. auto* llvm_block = function_lowering.GetBlock(block_id);
  623. // Keep the LLVM blocks in lexical order.
  624. llvm_block->moveBefore(llvm_function->end());
  625. function_lowering.builder().SetInsertPoint(llvm_block);
  626. function_lowering.LowerBlockContents(block_id);
  627. };
  628. lower_block(decl_block_id);
  629. // If the decl block is empty, reuse it as the first body block. We don't do
  630. // this when the decl block is non-empty so that any branches back to the
  631. // first body block don't also re-execute the decl.
  632. llvm::BasicBlock* block = function_lowering.builder().GetInsertBlock();
  633. if (block->empty() &&
  634. function_lowering.TryToReuseBlock(body_block_ids.front(), block)) {
  635. // Reuse this block as the first block of the function body.
  636. } else {
  637. function_lowering.builder().CreateBr(
  638. function_lowering.GetBlock(body_block_ids.front()));
  639. }
  640. // Lower all blocks.
  641. for (auto block_id : body_block_ids) {
  642. lower_block(block_id);
  643. }
  644. // LLVM requires that the entry block has no predecessors.
  645. auto* entry_block = &llvm_function->getEntryBlock();
  646. if (entry_block->hasNPredecessorsOrMore(1)) {
  647. auto* new_entry_block = llvm::BasicBlock::Create(
  648. llvm_context(), "entry", llvm_function, entry_block);
  649. llvm::BranchInst::Create(entry_block, new_entry_block);
  650. }
  651. // Emit fingerprint accumulated inside the function context.
  652. function_lowering.EmitFinalFingerprint();
  653. }
  654. auto FileContext::BuildDISubprogram(const SemIR::Function& function,
  655. const llvm::Function* llvm_function)
  656. -> llvm::DISubprogram* {
  657. if (!context().di_compile_unit()) {
  658. return nullptr;
  659. }
  660. auto name = sem_ir().names().GetAsStringIfIdentifier(function.name_id);
  661. CARBON_CHECK(name, "Unexpected special name for function: {0}",
  662. function.name_id);
  663. auto loc = GetLocForDI(function.definition_id);
  664. // TODO: Add more details here, including real subroutine type (once type
  665. // information is built), etc.
  666. return context().di_builder().createFunction(
  667. context().di_compile_unit(), *name, llvm_function->getName(),
  668. /*File=*/context().di_builder().createFile(loc.filename, ""),
  669. /*LineNo=*/loc.line_number,
  670. context().di_builder().createSubroutineType(
  671. context().di_builder().getOrCreateTypeArray({})),
  672. /*ScopeLine=*/0, llvm::DINode::FlagZero,
  673. llvm::DISubprogram::SPFlagDefinition);
  674. }
  675. // BuildTypeForInst is used to construct types for FileContext::BuildType below.
  676. // Implementations return the LLVM type for the instruction. This first overload
  677. // is the fallback handler for non-type instructions.
  678. template <typename InstT>
  679. requires(InstT::Kind.is_type() == SemIR::InstIsType::Never)
  680. static auto BuildTypeForInst(FileContext& /*context*/, InstT inst)
  681. -> llvm::Type* {
  682. CARBON_FATAL("Cannot use inst as type: {0}", inst);
  683. }
  684. template <typename InstT>
  685. requires(InstT::Kind.is_symbolic_when_type())
  686. static auto BuildTypeForInst(FileContext& context, InstT /*inst*/)
  687. -> llvm::Type* {
  688. // Treat non-monomorphized symbolic types as opaque.
  689. return llvm::StructType::get(context.llvm_context());
  690. }
  691. static auto BuildTypeForInst(FileContext& context, SemIR::ArrayType inst)
  692. -> llvm::Type* {
  693. return llvm::ArrayType::get(
  694. context.GetType(context.sem_ir().types().GetTypeIdForTypeInstId(
  695. inst.element_type_inst_id)),
  696. *context.sem_ir().GetArrayBoundValue(inst.bound_id));
  697. }
  698. static auto BuildTypeForInst(FileContext& /*context*/, SemIR::AutoType inst)
  699. -> llvm::Type* {
  700. CARBON_FATAL("Unexpected builtin type in lowering: {0}", inst);
  701. }
  702. static auto BuildTypeForInst(FileContext& context, SemIR::BoolType /*inst*/)
  703. -> llvm::Type* {
  704. // TODO: We may want to have different representations for `bool` storage
  705. // (`i8`) versus for `bool` values (`i1`).
  706. return llvm::Type::getInt1Ty(context.llvm_context());
  707. }
  708. static auto BuildTypeForInst(FileContext& context, SemIR::ClassType inst)
  709. -> llvm::Type* {
  710. auto object_repr_id = context.sem_ir()
  711. .classes()
  712. .Get(inst.class_id)
  713. .GetObjectRepr(context.sem_ir(), inst.specific_id);
  714. return context.GetType(object_repr_id);
  715. }
  716. template <typename InstT>
  717. requires(SemIR::Internal::HasInstCategory<SemIR::AnyQualifiedType, InstT>)
  718. static auto BuildTypeForInst(FileContext& context, InstT inst) -> llvm::Type* {
  719. return context.GetType(
  720. context.sem_ir().types().GetTypeIdForTypeInstId(inst.inner_id));
  721. }
  722. static auto BuildTypeForInst(FileContext& context, SemIR::CustomLayoutType inst)
  723. -> llvm::Type* {
  724. auto layout = context.sem_ir().custom_layouts().Get(inst.layout_id);
  725. return llvm::ArrayType::get(llvm::Type::getInt8Ty(context.llvm_context()),
  726. layout[SemIR::CustomLayoutId::SizeIndex]);
  727. }
  728. static auto BuildTypeForInst(FileContext& context,
  729. SemIR::ImplWitnessAssociatedConstant inst)
  730. -> llvm::Type* {
  731. return context.GetType(inst.type_id);
  732. }
  733. static auto BuildTypeForInst(FileContext& /*context*/,
  734. SemIR::ErrorInst /*inst*/) -> llvm::Type* {
  735. // This is a complete type but uses of it should never be lowered.
  736. return nullptr;
  737. }
  738. static auto BuildTypeForInst(FileContext& context, SemIR::FloatType inst)
  739. -> llvm::Type* {
  740. return llvm::Type::getFloatingPointTy(context.llvm_context(),
  741. inst.float_kind.Semantics());
  742. }
  743. static auto BuildTypeForInst(FileContext& context, SemIR::IntType inst)
  744. -> llvm::Type* {
  745. auto width =
  746. context.sem_ir().insts().TryGetAs<SemIR::IntValue>(inst.bit_width_id);
  747. CARBON_CHECK(width, "Can't lower int type with symbolic width");
  748. return llvm::IntegerType::get(
  749. context.llvm_context(),
  750. context.sem_ir().ints().Get(width->int_id).getZExtValue());
  751. }
  752. static auto BuildTypeForInst(FileContext& context, SemIR::PointerType /*inst*/)
  753. -> llvm::Type* {
  754. return llvm::PointerType::get(context.llvm_context(), /*AddressSpace=*/0);
  755. }
  756. static auto BuildTypeForInst(FileContext& /*context*/,
  757. SemIR::PatternType /*inst*/) -> llvm::Type* {
  758. CARBON_FATAL("Unexpected pattern type in lowering");
  759. }
  760. static auto BuildTypeForInst(FileContext& context, SemIR::StructType inst)
  761. -> llvm::Type* {
  762. auto fields = context.sem_ir().struct_type_fields().Get(inst.fields_id);
  763. llvm::SmallVector<llvm::Type*> subtypes;
  764. subtypes.reserve(fields.size());
  765. for (auto field : fields) {
  766. subtypes.push_back(context.GetType(
  767. context.sem_ir().types().GetTypeIdForTypeInstId(field.type_inst_id)));
  768. }
  769. return llvm::StructType::get(context.llvm_context(), subtypes);
  770. }
  771. static auto BuildTypeForInst(FileContext& context, SemIR::TupleType inst)
  772. -> llvm::Type* {
  773. // TODO: Investigate special-casing handling of empty tuples so that they
  774. // can be collectively replaced with LLVM's void, particularly around
  775. // function returns. LLVM doesn't allow declaring variables with a void
  776. // type, so that may require significant special casing.
  777. auto elements = context.sem_ir().inst_blocks().Get(inst.type_elements_id);
  778. llvm::SmallVector<llvm::Type*> subtypes;
  779. subtypes.reserve(elements.size());
  780. for (auto type_id : context.sem_ir().types().GetBlockAsTypeIds(elements)) {
  781. subtypes.push_back(context.GetType(type_id));
  782. }
  783. return llvm::StructType::get(context.llvm_context(), subtypes);
  784. }
  785. static auto BuildTypeForInst(FileContext& context, SemIR::TypeType /*inst*/)
  786. -> llvm::Type* {
  787. return context.GetTypeType();
  788. }
  789. static auto BuildTypeForInst(FileContext& context, SemIR::VtableType /*inst*/)
  790. -> llvm::Type* {
  791. return llvm::Type::getVoidTy(context.llvm_context());
  792. }
  793. static auto BuildTypeForInst(FileContext& context,
  794. SemIR::SpecificFunctionType /*inst*/)
  795. -> llvm::Type* {
  796. return llvm::PointerType::get(context.llvm_context(), 0);
  797. }
  798. template <typename InstT>
  799. requires(InstT::Kind
  800. .template IsAnyOf<SemIR::BoundMethodType, SemIR::CharLiteralType,
  801. SemIR::FloatLiteralType, SemIR::IntLiteralType,
  802. SemIR::NamespaceType, SemIR::WitnessType>())
  803. static auto BuildTypeForInst(FileContext& context, InstT /*inst*/)
  804. -> llvm::Type* {
  805. // Return an empty struct as a placeholder.
  806. return llvm::StructType::get(context.llvm_context());
  807. }
  808. template <typename InstT>
  809. requires(InstT::Kind.template IsAnyOf<
  810. SemIR::AssociatedEntityType, SemIR::CppOverloadSetType,
  811. SemIR::FacetType, SemIR::FunctionType,
  812. SemIR::FunctionTypeWithSelfType, SemIR::GenericClassType,
  813. SemIR::GenericInterfaceType, SemIR::GenericNamedConstraintType,
  814. SemIR::InstType, SemIR::UnboundElementType, SemIR::WhereExpr>())
  815. static auto BuildTypeForInst(FileContext& context, InstT /*inst*/)
  816. -> llvm::Type* {
  817. // Return an empty struct as a placeholder.
  818. // TODO: Should we model an interface as a witness table, or an associated
  819. // entity as an index?
  820. return llvm::StructType::get(context.llvm_context());
  821. }
  822. auto FileContext::BuildType(SemIR::InstId inst_id) -> llvm::Type* {
  823. // Use overload resolution to select the implementation, producing compile
  824. // errors when BuildTypeForInst isn't defined for a given instruction.
  825. CARBON_KIND_SWITCH(sem_ir_->insts().Get(inst_id)) {
  826. #define CARBON_SEM_IR_INST_KIND(Name) \
  827. case CARBON_KIND(SemIR::Name inst): { \
  828. return BuildTypeForInst(*this, inst); \
  829. }
  830. #include "toolchain/sem_ir/inst_kind.def"
  831. }
  832. }
  833. auto FileContext::BuildGlobalVariableDecl(SemIR::VarStorage var_storage)
  834. -> llvm::GlobalVariable* {
  835. Mangler m(*this);
  836. auto mangled_name = m.MangleGlobalVariable(var_storage.pattern_id);
  837. auto linkage = llvm::GlobalVariable::ExternalLinkage;
  838. // If the variable doesn't have an externally-visible name, demote it to
  839. // internal linkage and invent a plausible name that shouldn't collide with
  840. // any of our real manglings.
  841. if (mangled_name.empty()) {
  842. linkage = llvm::GlobalVariable::InternalLinkage;
  843. if (inst_namer_) {
  844. mangled_name =
  845. ("var.anon" + inst_namer_->GetUnscopedNameFor(var_storage.pattern_id))
  846. .str();
  847. }
  848. }
  849. auto* type = GetType(var_storage.type_id);
  850. return new llvm::GlobalVariable(llvm_module(), type,
  851. /*isConstant=*/false, linkage,
  852. /*Initializer=*/nullptr, mangled_name);
  853. }
  854. auto FileContext::GetLocForDI(SemIR::InstId inst_id) -> Context::LocForDI {
  855. return context().GetLocForDI(
  856. GetAbsoluteNodeId(sem_ir_, SemIR::LocId(inst_id)).back());
  857. }
  858. auto FileContext::BuildVtable(const SemIR::Vtable& vtable,
  859. SemIR::SpecificId specific_id)
  860. -> llvm::GlobalVariable* {
  861. const auto& class_info = sem_ir().classes().Get(vtable.class_id);
  862. Mangler m(*this);
  863. std::string mangled_name = m.MangleVTable(class_info, specific_id);
  864. if (sem_ir()
  865. .insts()
  866. .GetImportSource(class_info.first_owning_decl_id)
  867. .has_value()) {
  868. // Emit a declaration of an imported vtable using a(n opaque) pointer type.
  869. // This doesn't have to match the definition that appears elsewhere, it'll
  870. // still get merged correctly.
  871. auto* gv = new llvm::GlobalVariable(
  872. llvm_module(),
  873. llvm::PointerType::get(llvm_context(), /*AddressSpace=*/0),
  874. /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
  875. mangled_name);
  876. gv->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  877. return gv;
  878. }
  879. auto vtable_inst_block =
  880. sem_ir().inst_blocks().Get(vtable.virtual_functions_id);
  881. auto* entry_type = llvm::IntegerType::getInt32Ty(llvm_context());
  882. auto* table_type = llvm::ArrayType::get(entry_type, vtable_inst_block.size());
  883. auto* llvm_vtable = new llvm::GlobalVariable(
  884. llvm_module(), table_type, /*isConstant=*/true,
  885. llvm::GlobalValue::ExternalLinkage, nullptr, mangled_name);
  886. auto* i32_type = llvm::IntegerType::getInt32Ty(llvm_context());
  887. auto* i64_type = llvm::IntegerType::getInt64Ty(llvm_context());
  888. auto* vtable_const_int =
  889. llvm::ConstantExpr::getPtrToInt(llvm_vtable, i64_type);
  890. llvm::SmallVector<llvm::Constant*> vfuncs;
  891. vfuncs.reserve(vtable_inst_block.size());
  892. for (auto fn_decl_id : vtable_inst_block) {
  893. auto [_1, _2, fn_id, fn_specific_id] =
  894. DecomposeVirtualFunction(sem_ir(), fn_decl_id, specific_id);
  895. vfuncs.push_back(llvm::ConstantExpr::getTrunc(
  896. llvm::ConstantExpr::getSub(
  897. llvm::ConstantExpr::getPtrToInt(
  898. GetOrCreateFunction(fn_id, fn_specific_id), i64_type),
  899. vtable_const_int),
  900. i32_type));
  901. }
  902. llvm_vtable->setInitializer(llvm::ConstantArray::get(table_type, vfuncs));
  903. llvm_vtable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  904. return llvm_vtable;
  905. }
  906. } // namespace Carbon::Lower