context.h 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_CHECK_CONTEXT_H_
  5. #define CARBON_TOOLCHAIN_CHECK_CONTEXT_H_
  6. #include "llvm/ADT/DenseMap.h"
  7. #include "llvm/ADT/DenseSet.h"
  8. #include "llvm/ADT/FoldingSet.h"
  9. #include "llvm/ADT/SmallVector.h"
  10. #include "toolchain/check/decl_name_stack.h"
  11. #include "toolchain/check/decl_state.h"
  12. #include "toolchain/check/inst_block_stack.h"
  13. #include "toolchain/check/node_stack.h"
  14. #include "toolchain/parse/tree.h"
  15. #include "toolchain/sem_ir/file.h"
  16. #include "toolchain/sem_ir/ids.h"
  17. #include "toolchain/sem_ir/inst.h"
  18. namespace Carbon::Check {
  19. // Context and shared functionality for semantics handlers.
  20. class Context {
  21. public:
  22. using DiagnosticEmitter = Carbon::DiagnosticEmitter<Parse::NodeId>;
  23. using DiagnosticBuilder = DiagnosticEmitter::DiagnosticBuilder;
  24. // A scope in which `break` and `continue` can be used.
  25. struct BreakContinueScope {
  26. SemIR::InstBlockId break_target;
  27. SemIR::InstBlockId continue_target;
  28. };
  29. // A scope in which `return` can be used.
  30. struct ReturnScope {
  31. // The declaration from which we can return. Inside a function, this will
  32. // be a `FunctionDecl`.
  33. SemIR::InstId decl_id;
  34. // The value corresponding to the current `returned var`, if any. Will be
  35. // set and unset as `returned var`s are declared and go out of scope.
  36. SemIR::InstId returned_var = SemIR::InstId::Invalid;
  37. };
  38. // Stores references for work.
  39. explicit Context(const Lex::TokenizedBuffer& tokens,
  40. DiagnosticEmitter& emitter, const Parse::Tree& parse_tree,
  41. SemIR::File& sem_ir, llvm::raw_ostream* vlog_stream);
  42. // Marks an implementation TODO. Always returns false.
  43. auto TODO(Parse::NodeId parse_node, std::string label) -> bool;
  44. // Runs verification that the processing cleanly finished.
  45. auto VerifyOnFinish() -> void;
  46. // Adds an instruction to the current block, returning the produced ID.
  47. auto AddInst(SemIR::Inst inst) -> SemIR::InstId;
  48. // Adds an instruction to the constants block, returning the produced ID.
  49. auto AddConstantInst(SemIR::Inst inst) -> SemIR::InstId;
  50. // Pushes a parse tree node onto the stack, storing the SemIR::Inst as the
  51. // result.
  52. auto AddInstAndPush(Parse::NodeId parse_node, SemIR::Inst inst) -> void;
  53. // Adds a package's imports to name lookup, with all libraries together.
  54. // sem_irs will all be non-null; has_load_error must be used for any errors.
  55. auto AddPackageImports(Parse::NodeId import_node, IdentifierId package_id,
  56. llvm::ArrayRef<const SemIR::File*> sem_irs,
  57. bool has_load_error) -> void;
  58. // Adds a name to name lookup. Prints a diagnostic for name conflicts.
  59. auto AddNameToLookup(Parse::NodeId name_node, SemIR::NameId name_id,
  60. SemIR::InstId target_id) -> void;
  61. // Performs name lookup in a specified scope for a name appearing in a
  62. // declaration, returning the referenced instruction. If scope_id is invalid,
  63. // uses the current contextual scope.
  64. auto LookupNameInDecl(Parse::NodeId parse_node, SemIR::NameId name_id,
  65. SemIR::NameScopeId scope_id) -> SemIR::InstId;
  66. // Performs an unqualified name lookup, returning the referenced instruction.
  67. auto LookupUnqualifiedName(Parse::NodeId parse_node, SemIR::NameId name_id)
  68. -> SemIR::InstId;
  69. // Performs a qualified name lookup in a specified scope and in scopes that
  70. // it extends, returning the referenced instruction.
  71. auto LookupQualifiedName(Parse::NodeId parse_node, SemIR::NameId name_id,
  72. SemIR::NameScopeId scope_id, bool required = true)
  73. -> SemIR::InstId;
  74. // Prints a diagnostic for a duplicate name.
  75. auto DiagnoseDuplicateName(Parse::NodeId parse_node,
  76. SemIR::InstId prev_def_id) -> void;
  77. // Prints a diagnostic for a missing name.
  78. auto DiagnoseNameNotFound(Parse::NodeId parse_node, SemIR::NameId name_id)
  79. -> void;
  80. // Adds a note to a diagnostic explaining that a class is incomplete.
  81. auto NoteIncompleteClass(SemIR::ClassId class_id, DiagnosticBuilder& builder)
  82. -> void;
  83. // Pushes a new scope onto scope_stack_.
  84. auto PushScope(SemIR::InstId scope_inst_id = SemIR::InstId::Invalid,
  85. SemIR::NameScopeId scope_id = SemIR::NameScopeId::Invalid)
  86. -> void;
  87. // Pops the top scope from scope_stack_, cleaning up names from name_lookup_.
  88. auto PopScope() -> void;
  89. // Pops scopes until we return to the specified scope index.
  90. auto PopToScope(ScopeIndex index) -> void;
  91. // Returns the scope index associated with the current scope.
  92. auto current_scope_index() const -> ScopeIndex {
  93. return current_scope().index;
  94. }
  95. // Returns the name scope associated with the current lexical scope, if any.
  96. auto current_scope_id() const -> SemIR::NameScopeId {
  97. return current_scope().scope_id;
  98. }
  99. // Returns true if currently at file scope.
  100. auto at_file_scope() const -> bool { return scope_stack_.size() == 1; }
  101. // Returns true if the current scope is of the specified kind.
  102. template <typename InstT>
  103. auto CurrentScopeIs() -> bool {
  104. auto current_scope_inst_id = current_scope().scope_inst_id;
  105. if (!current_scope_inst_id.is_valid()) {
  106. return false;
  107. }
  108. return sem_ir_->insts().Get(current_scope_inst_id).kind() == InstT::Kind;
  109. }
  110. // Returns the current scope, if it is of the specified kind. Otherwise,
  111. // returns nullopt.
  112. template <typename InstT>
  113. auto GetCurrentScopeAs() -> std::optional<InstT> {
  114. auto current_scope_inst_id = current_scope().scope_inst_id;
  115. if (!current_scope_inst_id.is_valid()) {
  116. return std::nullopt;
  117. }
  118. return insts().Get(current_scope_inst_id).TryAs<InstT>();
  119. }
  120. // If there is no `returned var` in scope, sets the given instruction to be
  121. // the current `returned var` and returns an invalid instruction ID. If there
  122. // is already a `returned var`, returns it instead.
  123. auto SetReturnedVarOrGetExisting(SemIR::InstId inst_id) -> SemIR::InstId;
  124. // Follows NameRef instructions to find the value named by a given
  125. // instruction.
  126. auto FollowNameRefs(SemIR::InstId inst_id) -> SemIR::InstId;
  127. // Gets the constant value of the given instruction, if it has one.
  128. auto GetConstantValue(SemIR::InstId inst_id) -> SemIR::InstId;
  129. // Adds a `Branch` instruction branching to a new instruction block, and
  130. // returns the ID of the new block. All paths to the branch target must go
  131. // through the current block, though not necessarily through this branch.
  132. auto AddDominatedBlockAndBranch(Parse::NodeId parse_node)
  133. -> SemIR::InstBlockId;
  134. // Adds a `Branch` instruction branching to a new instruction block with a
  135. // value, and returns the ID of the new block. All paths to the branch target
  136. // must go through the current block.
  137. auto AddDominatedBlockAndBranchWithArg(Parse::NodeId parse_node,
  138. SemIR::InstId arg_id)
  139. -> SemIR::InstBlockId;
  140. // Adds a `BranchIf` instruction branching to a new instruction block, and
  141. // returns the ID of the new block. All paths to the branch target must go
  142. // through the current block.
  143. auto AddDominatedBlockAndBranchIf(Parse::NodeId parse_node,
  144. SemIR::InstId cond_id)
  145. -> SemIR::InstBlockId;
  146. // Handles recovergence of control flow. Adds branches from the top
  147. // `num_blocks` on the instruction block stack to a new block, pops the
  148. // existing blocks, and pushes the new block onto the instruction block stack.
  149. auto AddConvergenceBlockAndPush(Parse::NodeId parse_node, int num_blocks)
  150. -> void;
  151. // Handles recovergence of control flow with a result value. Adds branches
  152. // from the top few blocks on the instruction block stack to a new block, pops
  153. // the existing blocks, and pushes the new block onto the instruction block
  154. // stack. The number of blocks popped is the size of `block_args`, and the
  155. // corresponding result values are the elements of `block_args`. Returns an
  156. // instruction referring to the result value.
  157. auto AddConvergenceBlockWithArgAndPush(
  158. Parse::NodeId parse_node, std::initializer_list<SemIR::InstId> block_args)
  159. -> SemIR::InstId;
  160. // Add the current code block to the enclosing function.
  161. // TODO: The parse_node is taken for expressions, which can occur in
  162. // non-function contexts. This should be refactored to support non-function
  163. // contexts, and parse_node removed.
  164. auto AddCurrentCodeBlockToFunction(
  165. Parse::NodeId parse_node = Parse::NodeId::Invalid) -> void;
  166. // Returns whether the current position in the current block is reachable.
  167. auto is_current_position_reachable() -> bool;
  168. // Canonicalizes a type which is tracked as a single instruction.
  169. auto CanonicalizeType(SemIR::InstId inst_id) -> SemIR::TypeId;
  170. // Handles canonicalization of struct types. This may create a new struct type
  171. // when it has a new structure, or reference an existing struct type when it
  172. // duplicates a prior type.
  173. //
  174. // Individual struct type fields aren't canonicalized because they may have
  175. // name conflicts or other diagnostics during creation, which can use the
  176. // parse node.
  177. auto CanonicalizeStructType(Parse::NodeId parse_node,
  178. SemIR::InstBlockId refs_id) -> SemIR::TypeId;
  179. // Handles canonicalization of tuple types. This may create a new tuple type
  180. // if the `type_ids` doesn't match an existing tuple type.
  181. auto CanonicalizeTupleType(Parse::NodeId parse_node,
  182. llvm::ArrayRef<SemIR::TypeId> type_ids)
  183. -> SemIR::TypeId;
  184. // Attempts to complete the type `type_id`. Returns `true` if the type is
  185. // complete, or `false` if it could not be completed. A complete type has
  186. // known object and value representations.
  187. //
  188. // If the type is not complete, `diagnoser` is invoked to diagnose the issue,
  189. // if a `diagnoser` is provided. The builder it returns will be annotated to
  190. // describe the reason why the type is not complete.
  191. auto TryToCompleteType(
  192. SemIR::TypeId type_id,
  193. std::optional<llvm::function_ref<auto()->DiagnosticBuilder>> diagnoser =
  194. std::nullopt) -> bool;
  195. // Returns the type `type_id` as a complete type, or produces an incomplete
  196. // type error and returns an error type. This is a convenience wrapper around
  197. // TryToCompleteType.
  198. auto AsCompleteType(SemIR::TypeId type_id,
  199. llvm::function_ref<auto()->DiagnosticBuilder> diagnoser)
  200. -> SemIR::TypeId {
  201. return TryToCompleteType(type_id, diagnoser) ? type_id
  202. : SemIR::TypeId::Error;
  203. }
  204. // Gets a builtin type. The returned type will be complete.
  205. auto GetBuiltinType(SemIR::BuiltinKind kind) -> SemIR::TypeId;
  206. // Returns a pointer type whose pointee type is `pointee_type_id`.
  207. auto GetPointerType(Parse::NodeId parse_node, SemIR::TypeId pointee_type_id)
  208. -> SemIR::TypeId;
  209. // Removes any top-level `const` qualifiers from a type.
  210. auto GetUnqualifiedType(SemIR::TypeId type_id) -> SemIR::TypeId;
  211. // Starts handling parameters or arguments.
  212. auto ParamOrArgStart() -> void;
  213. // On a comma, pushes the entry. On return, the top of node_stack_ will be
  214. // start_kind.
  215. auto ParamOrArgComma() -> void;
  216. // Detects whether there's an entry to push from the end of a parameter or
  217. // argument list, and if so, moves it to the current parameter or argument
  218. // list. Does not pop the list. `start_kind` is the node kind at the start
  219. // of the parameter or argument list, and will be at the top of the parse node
  220. // stack when this function returns.
  221. auto ParamOrArgEndNoPop(Parse::NodeKind start_kind) -> void;
  222. // Pops the current parameter or argument list. Should only be called after
  223. // `ParamOrArgEndNoPop`.
  224. auto ParamOrArgPop() -> SemIR::InstBlockId;
  225. // Detects whether there's an entry to push. Pops and returns the argument
  226. // list. This is the same as `ParamOrArgEndNoPop` followed by `ParamOrArgPop`.
  227. auto ParamOrArgEnd(Parse::NodeKind start_kind) -> SemIR::InstBlockId;
  228. // Saves a parameter from the top block in node_stack_ to the top block in
  229. // params_or_args_stack_.
  230. auto ParamOrArgSave(SemIR::InstId inst_id) -> void {
  231. params_or_args_stack_.AddInstId(inst_id);
  232. }
  233. // Prints information for a stack dump.
  234. auto PrintForStackDump(llvm::raw_ostream& output) const -> void;
  235. // Get the Lex::TokenKind of a node for diagnostics.
  236. auto token_kind(Parse::NodeId parse_node) -> Lex::TokenKind {
  237. return tokens().GetKind(parse_tree().node_token(parse_node));
  238. }
  239. auto tokens() -> const Lex::TokenizedBuffer& { return *tokens_; }
  240. auto emitter() -> DiagnosticEmitter& { return *emitter_; }
  241. auto parse_tree() -> const Parse::Tree& { return *parse_tree_; }
  242. auto sem_ir() -> SemIR::File& { return *sem_ir_; }
  243. auto node_stack() -> NodeStack& { return node_stack_; }
  244. auto inst_block_stack() -> InstBlockStack& { return inst_block_stack_; }
  245. auto params_or_args_stack() -> InstBlockStack& {
  246. return params_or_args_stack_;
  247. }
  248. auto args_type_info_stack() -> InstBlockStack& {
  249. return args_type_info_stack_;
  250. }
  251. auto return_scope_stack() -> llvm::SmallVector<ReturnScope>& {
  252. return return_scope_stack_;
  253. }
  254. auto break_continue_stack() -> llvm::SmallVector<BreakContinueScope>& {
  255. return break_continue_stack_;
  256. }
  257. auto decl_name_stack() -> DeclNameStack& { return decl_name_stack_; }
  258. auto decl_state_stack() -> DeclStateStack& { return decl_state_stack_; }
  259. // Directly expose SemIR::File data accessors for brevity in calls.
  260. auto identifiers() -> StringStoreWrapper<IdentifierId>& {
  261. return sem_ir().identifiers();
  262. }
  263. auto ints() -> ValueStore<IntId>& { return sem_ir().ints(); }
  264. auto reals() -> ValueStore<RealId>& { return sem_ir().reals(); }
  265. auto string_literals() -> StringStoreWrapper<StringLiteralId>& {
  266. return sem_ir().string_literals();
  267. }
  268. auto functions() -> ValueStore<SemIR::FunctionId>& {
  269. return sem_ir().functions();
  270. }
  271. auto classes() -> ValueStore<SemIR::ClassId>& { return sem_ir().classes(); }
  272. auto interfaces() -> ValueStore<SemIR::InterfaceId>& {
  273. return sem_ir().interfaces();
  274. }
  275. auto cross_ref_irs() -> ValueStore<SemIR::CrossRefIRId>& {
  276. return sem_ir().cross_ref_irs();
  277. }
  278. auto names() -> SemIR::NameStoreWrapper { return sem_ir().names(); }
  279. auto name_scopes() -> SemIR::NameScopeStore& {
  280. return sem_ir().name_scopes();
  281. }
  282. auto types() -> ValueStore<SemIR::TypeId>& { return sem_ir().types(); }
  283. auto type_blocks() -> SemIR::BlockValueStore<SemIR::TypeBlockId>& {
  284. return sem_ir().type_blocks();
  285. }
  286. auto insts() -> SemIR::InstStore& { return sem_ir().insts(); }
  287. auto inst_blocks() -> SemIR::InstBlockStore& {
  288. return sem_ir().inst_blocks();
  289. }
  290. auto constants() -> SemIR::ConstantStore& { return sem_ir().constants(); }
  291. private:
  292. // A FoldingSet node for a type.
  293. class TypeNode : public llvm::FastFoldingSetNode {
  294. public:
  295. explicit TypeNode(const llvm::FoldingSetNodeID& node_id,
  296. SemIR::TypeId type_id)
  297. : llvm::FastFoldingSetNode(node_id), type_id_(type_id) {}
  298. auto type_id() -> SemIR::TypeId { return type_id_; }
  299. private:
  300. SemIR::TypeId type_id_;
  301. };
  302. // An entry in scope_stack_.
  303. struct ScopeStackEntry {
  304. // The sequential index of this scope entry within the file.
  305. ScopeIndex index;
  306. // The instruction associated with this entry, if any. This can be one of:
  307. //
  308. // - A `ClassDecl`, for a class definition scope.
  309. // - A `FunctionDecl`, for the outermost scope in a function
  310. // definition.
  311. // - Invalid, for any other scope.
  312. SemIR::InstId scope_inst_id;
  313. // The name scope associated with this entry, if any.
  314. SemIR::NameScopeId scope_id;
  315. // Names which are registered with name_lookup_, and will need to be
  316. // unregistered when the scope ends.
  317. llvm::DenseSet<SemIR::NameId> names;
  318. // Whether a `returned var` was introduced in this scope, and needs to be
  319. // unregistered when the scope ends.
  320. bool has_returned_var = false;
  321. // TODO: This likely needs to track things which need to be destructed.
  322. };
  323. // A lookup result in the lexical lookup table `name_lookup_`.
  324. struct LexicalLookupResult {
  325. // The instruction that was added to lookup.
  326. SemIR::InstId inst_id;
  327. // The scope in which the instruction was added.
  328. ScopeIndex scope_index;
  329. };
  330. // Forms a canonical type ID for a type. This function is given two
  331. // callbacks:
  332. //
  333. // `profile_type(canonical_id)` is called to build a fingerprint for this
  334. // type. The ID should be distinct for all distinct type values with the same
  335. // `kind`.
  336. //
  337. // `make_inst()` is called to obtain a `SemIR::InstId` that describes the
  338. // type. It is only called if the type does not already exist, so can be used
  339. // to lazily build the `SemIR::Inst`. `make_inst()` is not permitted to
  340. // directly or indirectly canonicalize any types.
  341. auto CanonicalizeTypeImpl(
  342. SemIR::InstKind kind,
  343. llvm::function_ref<bool(llvm::FoldingSetNodeID& canonical_id)>
  344. profile_type,
  345. llvm::function_ref<SemIR::InstId()> make_inst) -> SemIR::TypeId;
  346. // Forms a canonical type ID for a type. If the type is new, adds the
  347. // instruction to the current block.
  348. auto CanonicalizeTypeAndAddInstIfNew(SemIR::Inst inst) -> SemIR::TypeId;
  349. auto current_scope() -> ScopeStackEntry& { return scope_stack_.back(); }
  350. auto current_scope() const -> const ScopeStackEntry& {
  351. return scope_stack_.back();
  352. }
  353. // Tokens for getting data on literals.
  354. const Lex::TokenizedBuffer* tokens_;
  355. // Handles diagnostics.
  356. DiagnosticEmitter* emitter_;
  357. // The file's parse tree.
  358. const Parse::Tree* parse_tree_;
  359. // The SemIR::File being added to.
  360. SemIR::File* sem_ir_;
  361. // Whether to print verbose output.
  362. llvm::raw_ostream* vlog_stream_;
  363. // The stack during Build. Will contain file-level parse nodes on return.
  364. NodeStack node_stack_;
  365. // The stack of instruction blocks being used for general IR generation.
  366. InstBlockStack inst_block_stack_;
  367. // The stack of instruction blocks being used for per-element tracking of
  368. // instructions in parameter and argument instruction blocks. Versus
  369. // inst_block_stack_, an element will have 1 or more instructions in blocks in
  370. // inst_block_stack_, but only ever 1 instruction in blocks here.
  371. InstBlockStack params_or_args_stack_;
  372. // The stack of instruction blocks being used for type information while
  373. // processing arguments. This is used in parallel with params_or_args_stack_.
  374. // It's currently only used for struct literals, where we need to track names
  375. // for a type separate from the literal arguments.
  376. InstBlockStack args_type_info_stack_;
  377. // A stack of scopes from which we can `return`.
  378. llvm::SmallVector<ReturnScope> return_scope_stack_;
  379. // A stack of `break` and `continue` targets.
  380. llvm::SmallVector<BreakContinueScope> break_continue_stack_;
  381. // A stack for scope context.
  382. llvm::SmallVector<ScopeStackEntry> scope_stack_;
  383. // Information about non-lexical scopes. This is a subset of the entries and
  384. // the information in scope_stack_.
  385. llvm::SmallVector<std::pair<ScopeIndex, SemIR::NameScopeId>>
  386. non_lexical_scope_stack_;
  387. // The index of the next scope that will be pushed onto scope_stack_.
  388. ScopeIndex next_scope_index_ = ScopeIndex(0);
  389. // The stack used for qualified declaration name construction.
  390. DeclNameStack decl_name_stack_;
  391. // The stack of declarations that could have modifiers.
  392. DeclStateStack decl_state_stack_;
  393. // Maps identifiers to name lookup results. Values are a stack of name lookup
  394. // results in the ancestor scopes. This offers constant-time lookup of names,
  395. // regardless of how many scopes exist between the name declaration and
  396. // reference. The corresponding scope for each lookup result is tracked, so
  397. // that lexical lookup results can be interleaved with lookup results from
  398. // non-lexical scopes such as classes.
  399. //
  400. // Names which no longer have lookup results are erased.
  401. llvm::DenseMap<SemIR::NameId, llvm::SmallVector<LexicalLookupResult>>
  402. name_lookup_;
  403. // Cache of the mapping from instructions to types, to avoid recomputing the
  404. // folding set ID.
  405. llvm::DenseMap<SemIR::InstId, SemIR::TypeId> canonical_types_;
  406. // Tracks the canonical representation of types that have been defined.
  407. llvm::FoldingSet<TypeNode> canonical_type_nodes_;
  408. // Storage for the nodes in canonical_type_nodes_. This stores in pointers so
  409. // that FoldingSet can have stable pointers.
  410. llvm::SmallVector<std::unique_ptr<TypeNode>> type_node_storage_;
  411. };
  412. // Parse node handlers. Returns false for unrecoverable errors.
  413. #define CARBON_PARSE_NODE_KIND(Name) \
  414. auto Handle##Name(Context& context, Parse::NodeId parse_node) -> bool;
  415. #include "toolchain/parse/node_kind.def"
  416. } // namespace Carbon::Check
  417. #endif // CARBON_TOOLCHAIN_CHECK_CONTEXT_H_