file_context.cpp 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lower/file_context.h"
  5. #include <memory>
  6. #include <optional>
  7. #include <string>
  8. #include <utility>
  9. #include "clang/CodeGen/ModuleBuilder.h"
  10. #include "common/check.h"
  11. #include "common/pretty_stack_trace_function.h"
  12. #include "common/vlog.h"
  13. #include "llvm/ADT/STLExtras.h"
  14. #include "llvm/ADT/Sequence.h"
  15. #include "llvm/Linker/Linker.h"
  16. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  17. #include "llvm/Transforms/Utils/ModuleUtils.h"
  18. #include "toolchain/base/kind_switch.h"
  19. #include "toolchain/lower/clang_global_decl.h"
  20. #include "toolchain/lower/constant.h"
  21. #include "toolchain/lower/function_context.h"
  22. #include "toolchain/lower/mangler.h"
  23. #include "toolchain/lower/specific_coalescer.h"
  24. #include "toolchain/sem_ir/absolute_node_id.h"
  25. #include "toolchain/sem_ir/diagnostic_loc_converter.h"
  26. #include "toolchain/sem_ir/entry_point.h"
  27. #include "toolchain/sem_ir/expr_info.h"
  28. #include "toolchain/sem_ir/file.h"
  29. #include "toolchain/sem_ir/function.h"
  30. #include "toolchain/sem_ir/generic.h"
  31. #include "toolchain/sem_ir/ids.h"
  32. #include "toolchain/sem_ir/inst.h"
  33. #include "toolchain/sem_ir/inst_categories.h"
  34. #include "toolchain/sem_ir/inst_kind.h"
  35. #include "toolchain/sem_ir/pattern.h"
  36. #include "toolchain/sem_ir/stringify.h"
  37. #include "toolchain/sem_ir/typed_insts.h"
  38. namespace Carbon::Lower {
  39. FileContext::FileContext(Context& context, const SemIR::File& sem_ir,
  40. const SemIR::InstNamer* inst_namer,
  41. llvm::raw_ostream* vlog_stream)
  42. : context_(&context),
  43. sem_ir_(&sem_ir),
  44. inst_namer_(inst_namer),
  45. vlog_stream_(vlog_stream),
  46. functions_(LoweredFunctionStore::MakeForOverwrite(sem_ir.functions())),
  47. specific_functions_(sem_ir.specifics(), nullptr),
  48. types_(LoweredTypeStore::MakeWithExplicitSize(sem_ir.insts().size(),
  49. nullptr)),
  50. constants_(LoweredConstantStore::MakeWithExplicitSize(
  51. sem_ir.insts().size(), nullptr)),
  52. lowered_specifics_(sem_ir.generics(),
  53. llvm::SmallVector<SemIR::SpecificId>()),
  54. coalescer_(vlog_stream_, sem_ir.specifics()),
  55. vtables_(decltype(vtables_)::MakeForOverwrite(sem_ir.vtables())),
  56. specific_vtables_(sem_ir.specifics(), nullptr) {
  57. // Initialization that relies on invariants of the class.
  58. cpp_code_generator_ = CreateCppCodeGenerator();
  59. CARBON_CHECK(!sem_ir.has_errors(),
  60. "Generating LLVM IR from invalid SemIR::File is unsupported.");
  61. }
  62. // TODO: Move this to lower.cpp.
  63. auto FileContext::PrepareToLower() -> void {
  64. if (cpp_code_generator_) {
  65. // Clang code generation should not actually modify the AST, but isn't
  66. // const-correct.
  67. cpp_code_generator_->Initialize(
  68. const_cast<clang::ASTContext&>(clang_ast_unit()->getASTContext()));
  69. }
  70. // Lower all types that were required to be complete.
  71. for (auto type_id : sem_ir_->types().complete_types()) {
  72. if (type_id.index >= 0) {
  73. types_.Set(type_id, BuildType(sem_ir_->types().GetInstId(type_id)));
  74. }
  75. }
  76. // Lower function declarations.
  77. for (auto [id, _] : sem_ir_->functions().enumerate()) {
  78. functions_.Set(id, BuildFunctionDecl(id));
  79. }
  80. // TODO: Split vtable declaration creation from definition creation to avoid
  81. // redundant vtable definitions for imported vtables.
  82. for (const auto& [id, vtable] : sem_ir_->vtables().enumerate()) {
  83. const auto& class_info = sem_ir().classes().Get(vtable.class_id);
  84. // Vtables can't be generated for generics, only for their specifics - and
  85. // must be done lazily based on the use of those specifics.
  86. if (!class_info.generic_id.has_value()) {
  87. vtables_.Set(id, BuildVtable(vtable, SemIR::SpecificId::None));
  88. }
  89. }
  90. // Lower constants.
  91. LowerConstants(*this, constants_);
  92. }
  93. // TODO: Move this to lower.cpp.
  94. auto FileContext::LowerDefinitions() -> void {
  95. // Lower global variable definitions.
  96. // TODO: Storing both a `constants_` array and a separate `global_variables_`
  97. // map is redundant.
  98. for (auto inst_id :
  99. sem_ir().inst_blocks().Get(sem_ir().top_inst_block_id())) {
  100. // Only `VarStorage` indicates a global variable declaration in the
  101. // top instruction block.
  102. if (auto var = sem_ir().insts().TryGetAs<SemIR::VarStorage>(inst_id)) {
  103. // Get the global variable declaration. We created this when lowering the
  104. // constant unless the variable is unnamed, in which case we need to
  105. // create it now.
  106. llvm::GlobalVariable* llvm_var = nullptr;
  107. if (auto const_id = sem_ir().constant_values().Get(inst_id);
  108. const_id.is_constant()) {
  109. llvm_var = cast<llvm::GlobalVariable>(GetConstant(const_id, inst_id));
  110. } else {
  111. llvm_var = BuildGlobalVariableDecl(*var);
  112. }
  113. // Convert the declaration of this variable into a definition by adding an
  114. // initializer.
  115. global_variables_.Insert(inst_id, llvm_var);
  116. llvm_var->setInitializer(
  117. llvm::Constant::getNullValue(llvm_var->getValueType()));
  118. }
  119. }
  120. // Lower function definitions.
  121. for (auto [id, fn_info] : sem_ir_->functions().enumerate()) {
  122. // If we created a declaration and the function definition is not imported,
  123. // build a definition.
  124. if (functions_.Get(id) && fn_info.definition_id.has_value() &&
  125. !sem_ir().insts().GetImportSource(fn_info.definition_id).has_value()) {
  126. BuildFunctionDefinition(id);
  127. }
  128. }
  129. // Append `__global_init` to `llvm::global_ctors` to initialize global
  130. // variables.
  131. if (auto global_ctor_id = sem_ir().global_ctor_id();
  132. global_ctor_id.has_value()) {
  133. const auto& global_ctor = sem_ir().functions().Get(global_ctor_id);
  134. BuildFunctionBody(global_ctor_id, SemIR::SpecificId::None, global_ctor,
  135. *this, global_ctor);
  136. llvm::appendToGlobalCtors(llvm_module(),
  137. GetFunction(sem_ir().global_ctor_id()),
  138. /*Priority=*/0);
  139. }
  140. }
  141. auto FileContext::Finalize() -> void {
  142. if (cpp_code_generator_) {
  143. // Clang code generation should not actually modify the AST, but isn't
  144. // const-correct.
  145. cpp_code_generator_->HandleTranslationUnit(
  146. const_cast<clang::ASTContext&>(clang_ast_unit()->getASTContext()));
  147. bool link_error = llvm::Linker::linkModules(
  148. /*Dest=*/llvm_module(),
  149. /*Src=*/std::unique_ptr<llvm::Module>(
  150. cpp_code_generator_->ReleaseModule()));
  151. CARBON_CHECK(!link_error);
  152. }
  153. // Find equivalent specifics (from the same generic), replace all uses and
  154. // remove duplicately lowered function definitions.
  155. coalescer_.CoalesceEquivalentSpecifics(lowered_specifics_,
  156. specific_functions_);
  157. }
  158. auto FileContext::CreateCppCodeGenerator()
  159. -> std::unique_ptr<clang::CodeGenerator> {
  160. if (!clang_ast_unit()) {
  161. return nullptr;
  162. }
  163. RawStringOstream clang_module_name_stream;
  164. clang_module_name_stream << llvm_module().getName() << ".clang";
  165. // Do not emit Clang's name and version as the creator of the output file.
  166. cpp_code_gen_options_.EmitVersionIdentMetadata = false;
  167. return std::unique_ptr<clang::CodeGenerator>(clang::CreateLLVMCodeGen(
  168. clang_ast_unit()->getASTContext().getDiagnostics(),
  169. clang_module_name_stream.TakeStr(), context().file_system(),
  170. cpp_header_search_options_, cpp_preprocessor_options_,
  171. cpp_code_gen_options_, llvm_context()));
  172. }
  173. auto FileContext::GetConstant(SemIR::ConstantId const_id,
  174. SemIR::InstId use_inst_id) -> llvm::Value* {
  175. auto const_inst_id = sem_ir().constant_values().GetInstId(const_id);
  176. auto* const_value = constants_.Get(const_inst_id);
  177. // For value expressions and initializing expressions, the value produced by
  178. // a constant instruction is a value representation of the constant. For
  179. // initializing expressions, `FinishInit` will perform a copy if needed.
  180. switch (auto cat = SemIR::GetExprCategory(sem_ir(), const_inst_id)) {
  181. case SemIR::ExprCategory::Value:
  182. case SemIR::ExprCategory::Initializing:
  183. break;
  184. case SemIR::ExprCategory::DurableRef:
  185. case SemIR::ExprCategory::EphemeralRef:
  186. // Constant reference expressions lower to an address.
  187. return const_value;
  188. case SemIR::ExprCategory::NotExpr:
  189. case SemIR::ExprCategory::Error:
  190. case SemIR::ExprCategory::Mixed:
  191. CARBON_FATAL("Unexpected category {0} for lowered constant {1}", cat,
  192. sem_ir().insts().Get(const_inst_id));
  193. };
  194. auto value_rep = SemIR::ValueRepr::ForType(
  195. sem_ir(), sem_ir().insts().Get(const_inst_id).type_id());
  196. if (value_rep.kind != SemIR::ValueRepr::Pointer) {
  197. return const_value;
  198. }
  199. // The value representation is a pointer. Generate a variable to hold the
  200. // value, or find and reuse an existing one.
  201. if (auto result = global_variables().Lookup(const_inst_id)) {
  202. return result.value();
  203. }
  204. // Include both the name of the constant, if any, and the point of use in
  205. // the name of the variable.
  206. llvm::StringRef const_name;
  207. llvm::StringRef use_name;
  208. if (inst_namer_) {
  209. const_name = inst_namer_->GetUnscopedNameFor(const_inst_id);
  210. if (use_inst_id.has_value()) {
  211. use_name = inst_namer_->GetUnscopedNameFor(use_inst_id);
  212. }
  213. }
  214. // We always need to give the global a name even if the instruction namer
  215. // doesn't have one to use.
  216. if (const_name.empty()) {
  217. const_name = "const";
  218. }
  219. if (use_name.empty()) {
  220. use_name = "anon";
  221. }
  222. llvm::StringRef sep = (use_name[0] == '.') ? "" : ".";
  223. auto* global_variable = new llvm::GlobalVariable(
  224. llvm_module(), GetType(sem_ir().GetPointeeType(value_rep.type_id)),
  225. /*isConstant=*/true, llvm::GlobalVariable::InternalLinkage, const_value,
  226. const_name + sep + use_name);
  227. global_variables_.Insert(const_inst_id, global_variable);
  228. return global_variable;
  229. }
  230. auto FileContext::GetOrCreateFunction(SemIR::FunctionId function_id,
  231. SemIR::SpecificId specific_id)
  232. -> llvm::Function* {
  233. // If we have already lowered a declaration of this function, just return it.
  234. auto** result = GetFunctionAddr(function_id, specific_id);
  235. if (!*result) {
  236. *result = BuildFunctionDecl(function_id, specific_id);
  237. }
  238. return *result;
  239. }
  240. auto FileContext::BuildFunctionTypeInfo(const SemIR::Function& function,
  241. SemIR::SpecificId specific_id)
  242. -> FunctionTypeInfo {
  243. const auto return_info =
  244. SemIR::ReturnTypeInfo::ForFunction(sem_ir(), function, specific_id);
  245. if (!return_info.is_valid()) {
  246. // The return type has not been completed, create a trivial type instead.
  247. return {.type =
  248. llvm::FunctionType::get(llvm::Type::getVoidTy(llvm_context()),
  249. /*isVarArg=*/false)};
  250. }
  251. auto get_llvm_type = [&](SemIR::TypeId type_id) -> llvm::Type* {
  252. if (!type_id.has_value()) {
  253. return nullptr;
  254. }
  255. return GetType(type_id);
  256. };
  257. // TODO: expose the `Call` parameter patterns in `Function`, and use them here
  258. // instead of reconstructing them via the syntactic parameter lists.
  259. auto implicit_param_patterns =
  260. sem_ir().inst_blocks().GetOrEmpty(function.implicit_param_patterns_id);
  261. auto param_patterns =
  262. sem_ir().inst_blocks().GetOrEmpty(function.param_patterns_id);
  263. auto* return_type = get_llvm_type(return_info.type_id);
  264. llvm::SmallVector<llvm::Type*> param_types;
  265. // Compute the return type to use for the LLVM function. If the initializing
  266. // representation doesn't produce a value, set the return type to void.
  267. // TODO: For the `Run` entry point, remap return type to i32 if it doesn't
  268. // return a value.
  269. llvm::Type* function_return_type =
  270. (return_info.is_valid() &&
  271. return_info.init_repr.kind == SemIR::InitRepr::ByCopy)
  272. ? return_type
  273. : llvm::Type::getVoidTy(llvm_context());
  274. // TODO: Consider either storing `param_inst_ids` somewhere so that we can
  275. // reuse it from `BuildFunctionDefinition` and when building calls, or factor
  276. // out a mechanism to compute the mapping between parameters and arguments on
  277. // demand.
  278. llvm::SmallVector<SemIR::InstId> param_inst_ids;
  279. auto max_llvm_params = (return_info.has_return_slot() ? 1 : 0) +
  280. implicit_param_patterns.size() + param_patterns.size();
  281. param_types.reserve(max_llvm_params);
  282. param_inst_ids.reserve(max_llvm_params);
  283. auto return_param_id = SemIR::InstId::None;
  284. if (return_info.has_return_slot()) {
  285. param_types.push_back(
  286. llvm::PointerType::get(llvm_context(), /*AddressSpace=*/0));
  287. return_param_id = function.return_slot_pattern_id;
  288. param_inst_ids.push_back(return_param_id);
  289. }
  290. for (auto param_pattern_id : llvm::concat<const SemIR::InstId>(
  291. implicit_param_patterns, param_patterns)) {
  292. // TODO: Handle a general pattern here, rather than assuming that each
  293. // parameter pattern contains at most one binding.
  294. auto param_pattern_info = SemIR::Function::GetParamPatternInfoFromPatternId(
  295. sem_ir(), param_pattern_id);
  296. if (!param_pattern_info) {
  297. continue;
  298. }
  299. // TODO: Use a more general mechanism to determine if the binding is a
  300. // reference binding.
  301. if (param_pattern_info->var_pattern_id.has_value()) {
  302. param_types.push_back(
  303. llvm::PointerType::get(llvm_context(), /*AddressSpace=*/0));
  304. param_inst_ids.push_back(param_pattern_id);
  305. continue;
  306. }
  307. auto param_type_id = ExtractScrutineeType(
  308. sem_ir(), SemIR::GetTypeOfInstInSpecific(sem_ir(), specific_id,
  309. param_pattern_info->inst_id));
  310. CARBON_CHECK(
  311. !param_type_id.AsConstantId().is_symbolic(),
  312. "Found symbolic type id after resolution when lowering type {0}.",
  313. param_pattern_info->inst.type_id);
  314. switch (auto value_rep = SemIR::ValueRepr::ForType(sem_ir(), param_type_id);
  315. value_rep.kind) {
  316. case SemIR::ValueRepr::Unknown:
  317. // This parameter type is incomplete. Fallback to describing the
  318. // function type as `void()`.
  319. return {.type = llvm::FunctionType::get(
  320. llvm::Type::getVoidTy(llvm_context()),
  321. /*isVarArg=*/false)};
  322. case SemIR::ValueRepr::Dependent:
  323. CARBON_FATAL("Lowering function with dependent parameter type");
  324. case SemIR::ValueRepr::None:
  325. break;
  326. case SemIR::ValueRepr::Copy:
  327. case SemIR::ValueRepr::Custom:
  328. case SemIR::ValueRepr::Pointer:
  329. auto* param_types_to_add = get_llvm_type(value_rep.type_id);
  330. param_types.push_back(param_types_to_add);
  331. param_inst_ids.push_back(param_pattern_id);
  332. break;
  333. }
  334. }
  335. return {.type = llvm::FunctionType::get(function_return_type, param_types,
  336. /*isVarArg=*/false),
  337. .param_inst_ids = std::move(param_inst_ids),
  338. .return_type = return_type,
  339. .return_param_id = return_param_id};
  340. }
  341. auto FileContext::HandleReferencedCppFunction(clang::FunctionDecl* cpp_decl)
  342. -> void {
  343. // TODO: To support recursive inline functions, collect all calls to
  344. // `HandleTopLevelDecl()` in a custom `ASTConsumer` configured in the
  345. // `ASTUnit`, and replay them in lowering in the `CodeGenerator`. See
  346. // https://discord.com/channels/655572317891461132/768530752592805919/1370509111585935443
  347. clang::FunctionDecl* cpp_def = cpp_decl->getDefinition();
  348. if (!cpp_def) {
  349. return;
  350. }
  351. // Create the LLVM function (`CodeGenModule::GetOrCreateLLVMFunction()`)
  352. // so that code generation (`CodeGenModule::EmitGlobal()`) would see this
  353. // function name (`CodeGenModule::getMangledName()`), and will generate
  354. // its definition.
  355. llvm::Constant* function_address =
  356. cpp_code_generator_->GetAddrOfGlobal(CreateGlobalDecl(cpp_def),
  357. /*isForDefinition=*/false);
  358. CARBON_CHECK(function_address);
  359. // Emit the function code.
  360. cpp_code_generator_->HandleTopLevelDecl(clang::DeclGroupRef(cpp_def));
  361. }
  362. auto FileContext::HandleReferencedSpecificFunction(
  363. SemIR::FunctionId function_id, SemIR::SpecificId specific_id,
  364. llvm::Type* llvm_type) -> void {
  365. CARBON_CHECK(specific_id.has_value());
  366. // Add this specific function to a list of specific functions whose
  367. // definitions we need to emit.
  368. // TODO: Don't do this if we know this function is emitted as a
  369. // non-discardable symbol in the IR for some other file.
  370. context().AddPendingSpecificFunctionDefinition({.context = this,
  371. .function_id = function_id,
  372. .specific_id = specific_id});
  373. // Create a unique fingerprint for the function type.
  374. // For now, we compute the function type fingerprint only for specifics,
  375. // though we might need it for all functions in order to create a canonical
  376. // fingerprint across translation units.
  377. coalescer_.CreateTypeFingerprint(specific_id, llvm_type);
  378. }
  379. auto FileContext::BuildFunctionDecl(SemIR::FunctionId function_id,
  380. SemIR::SpecificId specific_id)
  381. -> llvm::Function* {
  382. const auto& function = sem_ir().functions().Get(function_id);
  383. // Don't lower generic functions. Note that associated functions in interfaces
  384. // have `Self` in scope, so are implicitly generic functions.
  385. if (function.generic_id.has_value() && !specific_id.has_value()) {
  386. return nullptr;
  387. }
  388. // Don't lower builtins.
  389. if (function.builtin_function_kind() != SemIR::BuiltinFunctionKind::None) {
  390. return nullptr;
  391. }
  392. // Don't lower C++ functions that use a thunk. We will never reference them
  393. // directly, and their signatures would not be expected to match the
  394. // corresponding C++ function anyway.
  395. if (function.special_function_kind ==
  396. SemIR::Function::SpecialFunctionKind::HasCppThunk) {
  397. // Make sure Clang emits this function.
  398. // TODO: This shouldn't be necessary: Clang should emit definitions of
  399. // functions that it emits calls to. But this doesn't currently work.
  400. auto clang_decl_id = sem_ir().functions().Get(function_id).clang_decl_id;
  401. HandleReferencedCppFunction(cast<clang::FunctionDecl>(
  402. sem_ir().clang_decls().Get(clang_decl_id).key.decl));
  403. return nullptr;
  404. }
  405. // TODO: Consider tracking whether the function has been used, and only
  406. // lowering it if it's needed.
  407. auto function_type_info = BuildFunctionTypeInfo(function, specific_id);
  408. // TODO: For an imported inline function, consider generating an
  409. // `available_externally` definition.
  410. auto linkage = specific_id.has_value() ? llvm::Function::LinkOnceODRLinkage
  411. : llvm::Function::ExternalLinkage;
  412. Mangler m(*this);
  413. std::string mangled_name = m.Mangle(function_id, specific_id);
  414. if (auto* existing = llvm_module().getFunction(mangled_name)) {
  415. // We might have already lowered this function while lowering a different
  416. // file. That's OK.
  417. // TODO: Check-fail or maybe diagnose if the two LLVM functions are not
  418. // produced by declarations of the same Carbon function. Name collisions
  419. // between non-private members of the same library should have been
  420. // diagnosed by check if detected, but it's not clear that check will always
  421. // be able to see this problem. In theory, name collisions could also occur
  422. // due to fingerprint collision.
  423. return existing;
  424. }
  425. // If this is a C++ function, tell Clang that we referenced it.
  426. if (auto clang_decl_id = sem_ir().functions().Get(function_id).clang_decl_id;
  427. clang_decl_id.has_value()) {
  428. CARBON_CHECK(!specific_id.has_value(),
  429. "Specific functions cannot have C++ definitions");
  430. HandleReferencedCppFunction(
  431. sem_ir().clang_decls().Get(clang_decl_id).key.decl->getAsFunction());
  432. // TODO: Check that the signature and mangling generated by Clang and the
  433. // one we generated are the same.
  434. }
  435. // If this is a specific function, we may need to do additional work to emit
  436. // its definition.
  437. if (specific_id.has_value()) {
  438. HandleReferencedSpecificFunction(function_id, specific_id,
  439. function_type_info.type);
  440. }
  441. auto* llvm_function = llvm::Function::Create(function_type_info.type, linkage,
  442. mangled_name, llvm_module());
  443. CARBON_CHECK(llvm_function->getName() == mangled_name,
  444. "Mangled name collision: {0}", mangled_name);
  445. // Set up parameters and the return slot.
  446. for (auto [inst_id, arg] : llvm::zip_equal(function_type_info.param_inst_ids,
  447. llvm_function->args())) {
  448. auto name_id = SemIR::NameId::None;
  449. if (inst_id == function_type_info.return_param_id) {
  450. name_id = SemIR::NameId::ReturnSlot;
  451. arg.addAttr(llvm::Attribute::getWithStructRetType(
  452. llvm_context(), function_type_info.return_type));
  453. } else {
  454. name_id = SemIR::GetPrettyNameFromPatternId(sem_ir(), inst_id);
  455. }
  456. arg.setName(sem_ir().names().GetIRBaseName(name_id));
  457. }
  458. return llvm_function;
  459. }
  460. // Find the file and function ID describing the definition of a function.
  461. static auto GetFunctionDefinition(const SemIR::File* decl_ir,
  462. SemIR::FunctionId function_id)
  463. -> std::pair<const SemIR::File*, SemIR::FunctionId> {
  464. // Find the file containing the definition.
  465. auto decl_id = decl_ir->functions().Get(function_id).definition_id;
  466. if (!decl_id.has_value()) {
  467. // Function is not defined.
  468. return {nullptr, SemIR::FunctionId::None};
  469. }
  470. // Find the function declaration this function was originally imported from.
  471. while (true) {
  472. auto import_inst_id = decl_ir->insts().GetImportSource(decl_id);
  473. if (!import_inst_id.has_value()) {
  474. break;
  475. }
  476. auto import_inst = decl_ir->import_ir_insts().Get(import_inst_id);
  477. decl_ir = decl_ir->import_irs().Get(import_inst.ir_id()).sem_ir;
  478. decl_id = import_inst.inst_id();
  479. }
  480. auto decl_ir_function_id =
  481. decl_ir->insts().GetAs<SemIR::FunctionDecl>(decl_id).function_id;
  482. return {decl_ir, decl_ir_function_id};
  483. }
  484. auto FileContext::BuildFunctionDefinition(SemIR::FunctionId function_id,
  485. SemIR::SpecificId specific_id)
  486. -> void {
  487. auto [definition_ir, definition_ir_function_id] =
  488. GetFunctionDefinition(&sem_ir(), function_id);
  489. if (!definition_ir) {
  490. // Function is probably defined in another file; not an error.
  491. return;
  492. }
  493. const auto& definition_function =
  494. definition_ir->functions().Get(definition_ir_function_id);
  495. BuildFunctionBody(
  496. function_id, specific_id, sem_ir().functions().Get(function_id),
  497. context().GetFileContext(definition_ir), definition_function);
  498. }
  499. auto FileContext::BuildFunctionBody(SemIR::FunctionId function_id,
  500. SemIR::SpecificId specific_id,
  501. const SemIR::Function& declaration_function,
  502. FileContext& definition_context,
  503. const SemIR::Function& definition_function)
  504. -> void {
  505. // On crash, report the function we were lowering.
  506. PrettyStackTraceFunction stack_trace_entry([&](llvm::raw_ostream& output) {
  507. SemIR::DiagnosticLocConverter converter(
  508. &context().tree_and_subtrees_getters(), &sem_ir());
  509. auto converted =
  510. converter.Convert(SemIR::LocId(declaration_function.definition_id),
  511. /*token_only=*/false);
  512. converted.loc.FormatLocation(output);
  513. output << "Lowering function ";
  514. if (specific_id.has_value()) {
  515. output << SemIR::StringifySpecific(sem_ir(), specific_id);
  516. } else {
  517. output << SemIR::StringifyConstantInst(
  518. sem_ir(), declaration_function.definition_id);
  519. }
  520. output << "\n";
  521. // Crash output has a tab indent; try to indent slightly past that.
  522. converted.loc.FormatSnippet(output, /*indent=*/10);
  523. });
  524. // Note that `definition_function` is potentially from a different SemIR::File
  525. // than the one that this file context represents. Any lowering done for
  526. // values derived from `definition_function` should use `definition_context`
  527. // instead of our context.
  528. const auto& definition_ir = definition_context.sem_ir();
  529. auto* llvm_function = GetFunction(function_id, specific_id);
  530. CARBON_CHECK(llvm_function,
  531. "Attempting to define function that was not declared");
  532. const auto& body_block_ids = definition_function.body_block_ids;
  533. CARBON_DCHECK(!body_block_ids.empty(),
  534. "No function body blocks found during lowering.");
  535. // Store which specifics were already lowered (with definitions) for each
  536. // generic.
  537. if (declaration_function.generic_id.has_value() && specific_id.has_value()) {
  538. // TODO: We should track this in the definition context instead so that we
  539. // can deduplicate specifics from different files.
  540. AddLoweredSpecificForGeneric(declaration_function.generic_id, specific_id);
  541. }
  542. FunctionContext function_lowering(
  543. definition_context, llvm_function, *this, specific_id,
  544. coalescer_.InitializeFingerprintForSpecific(specific_id),
  545. definition_context.BuildDISubprogram(definition_function, llvm_function),
  546. vlog_stream_);
  547. // Add parameters to locals.
  548. // TODO: This duplicates the mapping between sem_ir instructions and LLVM
  549. // function parameters that was already computed in BuildFunctionDecl.
  550. // We should only do that once.
  551. auto call_param_ids = definition_ir.inst_blocks().GetOrEmpty(
  552. definition_function.call_params_id);
  553. int param_index = 0;
  554. // TODO: Find a way to ensure this code and the function-call lowering use
  555. // the same parameter ordering.
  556. // Lowers the given parameter. Must be called in LLVM calling convention
  557. // parameter order.
  558. auto lower_param = [&](SemIR::InstId param_id) {
  559. // Get the value of the parameter from the function argument.
  560. llvm::Value* param_value;
  561. // The `type_id` of a parameter tracks the parameter's type.
  562. CARBON_CHECK(definition_ir.insts().Is<SemIR::AnyParam>(param_id));
  563. auto param_type = function_lowering.GetTypeIdOfInst(param_id);
  564. if (function_lowering.GetValueRepr(param_type).repr.kind !=
  565. SemIR::ValueRepr::None) {
  566. param_value = llvm_function->getArg(param_index);
  567. ++param_index;
  568. } else {
  569. param_value =
  570. llvm::PoisonValue::get(function_lowering.GetType(param_type));
  571. }
  572. // The value of the parameter is the value of the argument.
  573. function_lowering.SetLocal(param_id, param_value);
  574. };
  575. // Lower the return slot parameter.
  576. if (declaration_function.return_slot_pattern_id.has_value()) {
  577. auto call_param_id = call_param_ids.consume_back();
  578. // The LLVM calling convention has the return slot first rather than last.
  579. // Note that this queries whether there is a return slot at the LLVM level,
  580. // whereas `function.return_slot_pattern_id.has_value()` queries whether
  581. // there is a return slot at the SemIR level.
  582. if (SemIR::ReturnTypeInfo::ForFunction(sem_ir(), declaration_function,
  583. specific_id)
  584. .has_return_slot()) {
  585. lower_param(call_param_id);
  586. } else {
  587. // The return slot might still be mentioned as a destination location, but
  588. // shouldn't actually be used for anything, so we can use a poison value
  589. // for it.
  590. function_lowering.SetLocal(call_param_id,
  591. llvm::PoisonValue::get(llvm::PointerType::get(
  592. llvm_context(), /*AddressSpace=*/0)));
  593. }
  594. }
  595. // Lower the remaining call parameters.
  596. for (auto param_id : call_param_ids) {
  597. lower_param(param_id);
  598. }
  599. auto decl_block_id = SemIR::InstBlockId::None;
  600. if (function_id == sem_ir().global_ctor_id()) {
  601. decl_block_id = SemIR::InstBlockId::Empty;
  602. } else {
  603. decl_block_id =
  604. definition_ir.insts()
  605. .GetAs<SemIR::FunctionDecl>(definition_function.latest_decl_id())
  606. .decl_block_id;
  607. }
  608. // Lowers the contents of decl_block_id into the corresponding LLVM block,
  609. // creating it if it doesn't already exist.
  610. auto lower_block = [&](SemIR::InstBlockId block_id) {
  611. CARBON_VLOG("Lowering {0}\n", block_id);
  612. auto* llvm_block = function_lowering.GetBlock(block_id);
  613. // Keep the LLVM blocks in lexical order.
  614. llvm_block->moveBefore(llvm_function->end());
  615. function_lowering.builder().SetInsertPoint(llvm_block);
  616. function_lowering.LowerBlockContents(block_id);
  617. };
  618. lower_block(decl_block_id);
  619. // If the decl block is empty, reuse it as the first body block. We don't do
  620. // this when the decl block is non-empty so that any branches back to the
  621. // first body block don't also re-execute the decl.
  622. llvm::BasicBlock* block = function_lowering.builder().GetInsertBlock();
  623. if (block->empty() &&
  624. function_lowering.TryToReuseBlock(body_block_ids.front(), block)) {
  625. // Reuse this block as the first block of the function body.
  626. } else {
  627. function_lowering.builder().CreateBr(
  628. function_lowering.GetBlock(body_block_ids.front()));
  629. }
  630. // Lower all blocks.
  631. for (auto block_id : body_block_ids) {
  632. lower_block(block_id);
  633. }
  634. // LLVM requires that the entry block has no predecessors.
  635. auto* entry_block = &llvm_function->getEntryBlock();
  636. if (entry_block->hasNPredecessorsOrMore(1)) {
  637. auto* new_entry_block = llvm::BasicBlock::Create(
  638. llvm_context(), "entry", llvm_function, entry_block);
  639. llvm::BranchInst::Create(entry_block, new_entry_block);
  640. }
  641. // Emit fingerprint accumulated inside the function context.
  642. function_lowering.EmitFinalFingerprint();
  643. }
  644. auto FileContext::BuildDISubprogram(const SemIR::Function& function,
  645. const llvm::Function* llvm_function)
  646. -> llvm::DISubprogram* {
  647. if (!context().di_compile_unit()) {
  648. return nullptr;
  649. }
  650. auto name = sem_ir().names().GetAsStringIfIdentifier(function.name_id);
  651. CARBON_CHECK(name, "Unexpected special name for function: {0}",
  652. function.name_id);
  653. auto loc = GetLocForDI(function.definition_id);
  654. // TODO: Add more details here, including real subroutine type (once type
  655. // information is built), etc.
  656. return context().di_builder().createFunction(
  657. context().di_compile_unit(), *name, llvm_function->getName(),
  658. /*File=*/context().di_builder().createFile(loc.filename, ""),
  659. /*LineNo=*/loc.line_number,
  660. context().di_builder().createSubroutineType(
  661. context().di_builder().getOrCreateTypeArray({})),
  662. /*ScopeLine=*/0, llvm::DINode::FlagZero,
  663. llvm::DISubprogram::SPFlagDefinition);
  664. }
  665. // BuildTypeForInst is used to construct types for FileContext::BuildType below.
  666. // Implementations return the LLVM type for the instruction. This first overload
  667. // is the fallback handler for non-type instructions.
  668. template <typename InstT>
  669. requires(InstT::Kind.is_type() == SemIR::InstIsType::Never)
  670. static auto BuildTypeForInst(FileContext& /*context*/, InstT inst)
  671. -> llvm::Type* {
  672. CARBON_FATAL("Cannot use inst as type: {0}", inst);
  673. }
  674. template <typename InstT>
  675. requires(InstT::Kind.is_symbolic_when_type())
  676. static auto BuildTypeForInst(FileContext& context, InstT /*inst*/)
  677. -> llvm::Type* {
  678. // Treat non-monomorphized symbolic types as opaque.
  679. return llvm::StructType::get(context.llvm_context());
  680. }
  681. static auto BuildTypeForInst(FileContext& context, SemIR::ArrayType inst)
  682. -> llvm::Type* {
  683. return llvm::ArrayType::get(
  684. context.GetType(context.sem_ir().types().GetTypeIdForTypeInstId(
  685. inst.element_type_inst_id)),
  686. *context.sem_ir().GetArrayBoundValue(inst.bound_id));
  687. }
  688. static auto BuildTypeForInst(FileContext& /*context*/, SemIR::AutoType inst)
  689. -> llvm::Type* {
  690. CARBON_FATAL("Unexpected builtin type in lowering: {0}", inst);
  691. }
  692. static auto BuildTypeForInst(FileContext& context, SemIR::BoolType /*inst*/)
  693. -> llvm::Type* {
  694. // TODO: We may want to have different representations for `bool` storage
  695. // (`i8`) versus for `bool` values (`i1`).
  696. return llvm::Type::getInt1Ty(context.llvm_context());
  697. }
  698. static auto BuildTypeForInst(FileContext& context, SemIR::ClassType inst)
  699. -> llvm::Type* {
  700. auto object_repr_id = context.sem_ir()
  701. .classes()
  702. .Get(inst.class_id)
  703. .GetObjectRepr(context.sem_ir(), inst.specific_id);
  704. return context.GetType(object_repr_id);
  705. }
  706. template <typename InstT>
  707. requires(SemIR::Internal::HasInstCategory<SemIR::AnyQualifiedType, InstT>)
  708. static auto BuildTypeForInst(FileContext& context, InstT inst) -> llvm::Type* {
  709. return context.GetType(
  710. context.sem_ir().types().GetTypeIdForTypeInstId(inst.inner_id));
  711. }
  712. static auto BuildTypeForInst(FileContext& context, SemIR::CustomLayoutType inst)
  713. -> llvm::Type* {
  714. auto layout = context.sem_ir().custom_layouts().Get(inst.layout_id);
  715. return llvm::ArrayType::get(llvm::Type::getInt8Ty(context.llvm_context()),
  716. layout[SemIR::CustomLayoutId::SizeIndex]);
  717. }
  718. static auto BuildTypeForInst(FileContext& context,
  719. SemIR::ImplWitnessAssociatedConstant inst)
  720. -> llvm::Type* {
  721. return context.GetType(inst.type_id);
  722. }
  723. static auto BuildTypeForInst(FileContext& /*context*/,
  724. SemIR::ErrorInst /*inst*/) -> llvm::Type* {
  725. // This is a complete type but uses of it should never be lowered.
  726. return nullptr;
  727. }
  728. static auto BuildTypeForInst(FileContext& context, SemIR::FloatType inst)
  729. -> llvm::Type* {
  730. return llvm::Type::getFloatingPointTy(context.llvm_context(),
  731. inst.float_kind.Semantics());
  732. }
  733. static auto BuildTypeForInst(FileContext& context, SemIR::IntType inst)
  734. -> llvm::Type* {
  735. auto width =
  736. context.sem_ir().insts().TryGetAs<SemIR::IntValue>(inst.bit_width_id);
  737. CARBON_CHECK(width, "Can't lower int type with symbolic width");
  738. return llvm::IntegerType::get(
  739. context.llvm_context(),
  740. context.sem_ir().ints().Get(width->int_id).getZExtValue());
  741. }
  742. static auto BuildTypeForInst(FileContext& context, SemIR::PointerType /*inst*/)
  743. -> llvm::Type* {
  744. return llvm::PointerType::get(context.llvm_context(), /*AddressSpace=*/0);
  745. }
  746. static auto BuildTypeForInst(FileContext& /*context*/,
  747. SemIR::PatternType /*inst*/) -> llvm::Type* {
  748. CARBON_FATAL("Unexpected pattern type in lowering");
  749. }
  750. static auto BuildTypeForInst(FileContext& context, SemIR::StructType inst)
  751. -> llvm::Type* {
  752. auto fields = context.sem_ir().struct_type_fields().Get(inst.fields_id);
  753. llvm::SmallVector<llvm::Type*> subtypes;
  754. subtypes.reserve(fields.size());
  755. for (auto field : fields) {
  756. subtypes.push_back(context.GetType(
  757. context.sem_ir().types().GetTypeIdForTypeInstId(field.type_inst_id)));
  758. }
  759. return llvm::StructType::get(context.llvm_context(), subtypes);
  760. }
  761. static auto BuildTypeForInst(FileContext& context, SemIR::TupleType inst)
  762. -> llvm::Type* {
  763. // TODO: Investigate special-casing handling of empty tuples so that they
  764. // can be collectively replaced with LLVM's void, particularly around
  765. // function returns. LLVM doesn't allow declaring variables with a void
  766. // type, so that may require significant special casing.
  767. auto elements = context.sem_ir().inst_blocks().Get(inst.type_elements_id);
  768. llvm::SmallVector<llvm::Type*> subtypes;
  769. subtypes.reserve(elements.size());
  770. for (auto type_id : context.sem_ir().types().GetBlockAsTypeIds(elements)) {
  771. subtypes.push_back(context.GetType(type_id));
  772. }
  773. return llvm::StructType::get(context.llvm_context(), subtypes);
  774. }
  775. static auto BuildTypeForInst(FileContext& context, SemIR::TypeType /*inst*/)
  776. -> llvm::Type* {
  777. return context.GetTypeType();
  778. }
  779. static auto BuildTypeForInst(FileContext& context, SemIR::VtableType /*inst*/)
  780. -> llvm::Type* {
  781. return llvm::Type::getVoidTy(context.llvm_context());
  782. }
  783. static auto BuildTypeForInst(FileContext& context,
  784. SemIR::SpecificFunctionType /*inst*/)
  785. -> llvm::Type* {
  786. return llvm::PointerType::get(context.llvm_context(), 0);
  787. }
  788. template <typename InstT>
  789. requires(InstT::Kind
  790. .template IsAnyOf<SemIR::BoundMethodType, SemIR::CharLiteralType,
  791. SemIR::FloatLiteralType, SemIR::IntLiteralType,
  792. SemIR::NamespaceType, SemIR::WitnessType>())
  793. static auto BuildTypeForInst(FileContext& context, InstT /*inst*/)
  794. -> llvm::Type* {
  795. // Return an empty struct as a placeholder.
  796. return llvm::StructType::get(context.llvm_context());
  797. }
  798. template <typename InstT>
  799. requires(InstT::Kind.template IsAnyOf<
  800. SemIR::AssociatedEntityType, SemIR::CppOverloadSetType,
  801. SemIR::FacetType, SemIR::FunctionType,
  802. SemIR::FunctionTypeWithSelfType, SemIR::GenericClassType,
  803. SemIR::GenericInterfaceType, SemIR::InstType,
  804. SemIR::UnboundElementType, SemIR::WhereExpr>())
  805. static auto BuildTypeForInst(FileContext& context, InstT /*inst*/)
  806. -> llvm::Type* {
  807. // Return an empty struct as a placeholder.
  808. // TODO: Should we model an interface as a witness table, or an associated
  809. // entity as an index?
  810. return llvm::StructType::get(context.llvm_context());
  811. }
  812. auto FileContext::BuildType(SemIR::InstId inst_id) -> llvm::Type* {
  813. // Use overload resolution to select the implementation, producing compile
  814. // errors when BuildTypeForInst isn't defined for a given instruction.
  815. CARBON_KIND_SWITCH(sem_ir_->insts().Get(inst_id)) {
  816. #define CARBON_SEM_IR_INST_KIND(Name) \
  817. case CARBON_KIND(SemIR::Name inst): { \
  818. return BuildTypeForInst(*this, inst); \
  819. }
  820. #include "toolchain/sem_ir/inst_kind.def"
  821. }
  822. }
  823. auto FileContext::BuildGlobalVariableDecl(SemIR::VarStorage var_storage)
  824. -> llvm::GlobalVariable* {
  825. Mangler m(*this);
  826. auto mangled_name = m.MangleGlobalVariable(var_storage.pattern_id);
  827. auto linkage = llvm::GlobalVariable::ExternalLinkage;
  828. // If the variable doesn't have an externally-visible name, demote it to
  829. // internal linkage and invent a plausible name that shouldn't collide with
  830. // any of our real manglings.
  831. if (mangled_name.empty()) {
  832. linkage = llvm::GlobalVariable::InternalLinkage;
  833. if (inst_namer_) {
  834. mangled_name =
  835. ("var.anon" + inst_namer_->GetUnscopedNameFor(var_storage.pattern_id))
  836. .str();
  837. }
  838. }
  839. auto* type = GetType(var_storage.type_id);
  840. return new llvm::GlobalVariable(llvm_module(), type,
  841. /*isConstant=*/false, linkage,
  842. /*Initializer=*/nullptr, mangled_name);
  843. }
  844. auto FileContext::GetLocForDI(SemIR::InstId inst_id) -> Context::LocForDI {
  845. return context().GetLocForDI(
  846. GetAbsoluteNodeId(sem_ir_, SemIR::LocId(inst_id)).back());
  847. }
  848. auto FileContext::BuildVtable(const SemIR::Vtable& vtable,
  849. SemIR::SpecificId specific_id)
  850. -> llvm::GlobalVariable* {
  851. const auto& class_info = sem_ir().classes().Get(vtable.class_id);
  852. Mangler m(*this);
  853. std::string mangled_name = m.MangleVTable(class_info, specific_id);
  854. if (sem_ir()
  855. .insts()
  856. .GetImportSource(class_info.first_owning_decl_id)
  857. .has_value()) {
  858. // Emit a declaration of an imported vtable using a(n opaque) pointer type.
  859. // This doesn't have to match the definition that appears elsewhere, it'll
  860. // still get merged correctly.
  861. auto* gv = new llvm::GlobalVariable(
  862. llvm_module(),
  863. llvm::PointerType::get(llvm_context(), /*AddressSpace=*/0),
  864. /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
  865. mangled_name);
  866. gv->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  867. return gv;
  868. }
  869. auto vtable_inst_block =
  870. sem_ir().inst_blocks().Get(vtable.virtual_functions_id);
  871. auto* entry_type = llvm::IntegerType::getInt32Ty(llvm_context());
  872. auto* table_type = llvm::ArrayType::get(entry_type, vtable_inst_block.size());
  873. auto* llvm_vtable = new llvm::GlobalVariable(
  874. llvm_module(), table_type, /*isConstant=*/true,
  875. llvm::GlobalValue::ExternalLinkage, nullptr, mangled_name);
  876. auto* i32_type = llvm::IntegerType::getInt32Ty(llvm_context());
  877. auto* i64_type = llvm::IntegerType::getInt64Ty(llvm_context());
  878. auto* vtable_const_int =
  879. llvm::ConstantExpr::getPtrToInt(llvm_vtable, i64_type);
  880. llvm::SmallVector<llvm::Constant*> vfuncs;
  881. vfuncs.reserve(vtable_inst_block.size());
  882. for (auto fn_decl_id : vtable_inst_block) {
  883. auto [_1, _2, fn_id, fn_specific_id] =
  884. DecomposeVirtualFunction(sem_ir(), fn_decl_id, specific_id);
  885. vfuncs.push_back(llvm::ConstantExpr::getTrunc(
  886. llvm::ConstantExpr::getSub(
  887. llvm::ConstantExpr::getPtrToInt(
  888. GetOrCreateFunction(fn_id, fn_specific_id), i64_type),
  889. vtable_const_int),
  890. i32_type));
  891. }
  892. llvm_vtable->setInitializer(llvm::ConstantArray::get(table_type, vfuncs));
  893. llvm_vtable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  894. return llvm_vtable;
  895. }
  896. } // namespace Carbon::Lower