file.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_SEM_IR_FILE_H_
  5. #define CARBON_TOOLCHAIN_SEM_IR_FILE_H_
  6. #include "llvm/ADT/SmallVector.h"
  7. #include "llvm/ADT/StringMap.h"
  8. #include "llvm/ADT/iterator_range.h"
  9. #include "llvm/Support/FormatVariadic.h"
  10. #include "toolchain/sem_ir/node.h"
  11. namespace Carbon::SemIR {
  12. // A function.
  13. struct Function : public Printable<Function> {
  14. auto Print(llvm::raw_ostream& out) const -> void {
  15. out << "{name: " << name_id << ", "
  16. << "param_refs: " << param_refs_id;
  17. if (return_type_id.is_valid()) {
  18. out << ", return_type: " << return_type_id;
  19. }
  20. if (return_slot_id.is_valid()) {
  21. out << ", return_slot: " << return_slot_id;
  22. }
  23. if (!body_block_ids.empty()) {
  24. out << llvm::formatv(
  25. ", body: [{0}]",
  26. llvm::make_range(body_block_ids.begin(), body_block_ids.end()));
  27. }
  28. out << "}";
  29. }
  30. // The function name.
  31. StringId name_id;
  32. // A block containing a single reference node per parameter.
  33. NodeBlockId param_refs_id;
  34. // The return type. This will be invalid if the return type wasn't specified.
  35. TypeId return_type_id;
  36. // The storage for the return value, which is a reference expression whose
  37. // type is the return type of the function. Will be invalid if the function
  38. // doesn't have a return slot. If this is valid, a call to the function is
  39. // expected to have an additional final argument corresponding to the return
  40. // slot.
  41. NodeId return_slot_id;
  42. // A list of the statically reachable code blocks in the body of the
  43. // function, in lexical order. The first block is the entry block. This will
  44. // be empty for declarations that don't have a visible definition.
  45. llvm::SmallVector<NodeBlockId> body_block_ids;
  46. };
  47. struct RealLiteral : public Printable<RealLiteral> {
  48. auto Print(llvm::raw_ostream& out) const -> void {
  49. out << "{mantissa: " << mantissa << ", exponent: " << exponent
  50. << ", is_decimal: " << is_decimal << "}";
  51. }
  52. llvm::APInt mantissa;
  53. llvm::APInt exponent;
  54. // If false, the value is mantissa * 2^exponent.
  55. // If true, the value is mantissa * 10^exponent.
  56. bool is_decimal;
  57. };
  58. // Provides semantic analysis on a Parse::Tree.
  59. class File : public Printable<File> {
  60. public:
  61. // Produces a file for the builtins.
  62. explicit File();
  63. // Starts a new file for Check::CheckParseTree. Builtins are required.
  64. explicit File(const File* builtins);
  65. // Verifies that invariants of the semantics IR hold.
  66. auto Verify() const -> ErrorOr<Success>;
  67. // Prints the full IR. Allow omitting builtins so that unrelated changes are
  68. // less likely to alternate test golden files.
  69. // TODO: In the future, the things to print may change, for example by adding
  70. // preludes. We may then want the ability to omit other things similar to
  71. // builtins.
  72. auto Print(llvm::raw_ostream& out, bool include_builtins) const -> void;
  73. auto Print(llvm::raw_ostream& out) const -> void {
  74. Print(out, /*include_builtins=*/false);
  75. }
  76. // Returns array bound value from the bound node.
  77. auto GetArrayBoundValue(NodeId bound_id) const -> uint64_t {
  78. return GetIntegerLiteral(GetNode(bound_id).GetAsIntegerLiteral())
  79. .getZExtValue();
  80. }
  81. // Returns the requested IR.
  82. auto GetCrossReferenceIR(CrossReferenceIRId xref_id) const -> const File& {
  83. return *cross_reference_irs_[xref_id.index];
  84. }
  85. // Adds a callable, returning an ID to reference it.
  86. auto AddFunction(Function function) -> FunctionId {
  87. FunctionId id(functions_.size());
  88. functions_.push_back(function);
  89. return id;
  90. }
  91. // Returns the requested callable.
  92. auto GetFunction(FunctionId function_id) const -> const Function& {
  93. return functions_[function_id.index];
  94. }
  95. // Returns the requested callable.
  96. auto GetFunction(FunctionId function_id) -> Function& {
  97. return functions_[function_id.index];
  98. }
  99. // Adds an integer literal, returning an ID to reference it.
  100. auto AddIntegerLiteral(llvm::APInt integer_literal) -> IntegerLiteralId {
  101. IntegerLiteralId id(integer_literals_.size());
  102. integer_literals_.push_back(integer_literal);
  103. return id;
  104. }
  105. // Returns the requested integer literal.
  106. auto GetIntegerLiteral(IntegerLiteralId int_id) const -> const llvm::APInt& {
  107. return integer_literals_[int_id.index];
  108. }
  109. // Adds a name scope, returning an ID to reference it.
  110. auto AddNameScope() -> NameScopeId {
  111. NameScopeId name_scopes_id(name_scopes_.size());
  112. name_scopes_.resize(name_scopes_id.index + 1);
  113. return name_scopes_id;
  114. }
  115. // Adds an entry to a name scope. Returns true on success, false on
  116. // duplicates.
  117. auto AddNameScopeEntry(NameScopeId scope_id, StringId name_id,
  118. NodeId target_id) -> bool {
  119. return name_scopes_[scope_id.index].insert({name_id, target_id}).second;
  120. }
  121. // Returns the requested name scope.
  122. auto GetNameScope(NameScopeId scope_id) const
  123. -> const llvm::DenseMap<StringId, NodeId>& {
  124. return name_scopes_[scope_id.index];
  125. }
  126. // Adds a node to a specified block, returning an ID to reference the node.
  127. auto AddNode(NodeBlockId block_id, Node node) -> NodeId {
  128. NodeId node_id(nodes_.size());
  129. nodes_.push_back(node);
  130. if (block_id != NodeBlockId::Unreachable) {
  131. node_blocks_[block_id.index].push_back(node_id);
  132. }
  133. return node_id;
  134. }
  135. // Overwrites a given node with a new value.
  136. auto ReplaceNode(NodeId node_id, Node node) -> void {
  137. nodes_[node_id.index] = node;
  138. }
  139. // Returns the requested node.
  140. auto GetNode(NodeId node_id) const -> Node { return nodes_[node_id.index]; }
  141. // Adds an empty node block, returning an ID to reference it.
  142. auto AddNodeBlock() -> NodeBlockId {
  143. NodeBlockId id(node_blocks_.size());
  144. node_blocks_.push_back({});
  145. return id;
  146. }
  147. // Returns the requested node block.
  148. auto GetNodeBlock(NodeBlockId block_id) const
  149. -> const llvm::SmallVector<NodeId>& {
  150. CARBON_CHECK(block_id != NodeBlockId::Unreachable);
  151. return node_blocks_[block_id.index];
  152. }
  153. // Returns the requested node block.
  154. auto GetNodeBlock(NodeBlockId block_id) -> llvm::SmallVector<NodeId>& {
  155. CARBON_CHECK(block_id != NodeBlockId::Unreachable);
  156. return node_blocks_[block_id.index];
  157. }
  158. // Adds a real literal, returning an ID to reference it.
  159. auto AddRealLiteral(RealLiteral real_literal) -> RealLiteralId {
  160. RealLiteralId id(real_literals_.size());
  161. real_literals_.push_back(real_literal);
  162. return id;
  163. }
  164. // Returns the requested real literal.
  165. auto GetRealLiteral(RealLiteralId int_id) const -> const RealLiteral& {
  166. return real_literals_[int_id.index];
  167. }
  168. // Adds an string, returning an ID to reference it.
  169. auto AddString(llvm::StringRef str) -> StringId {
  170. // Look up the string, or add it if it's new.
  171. StringId next_id(strings_.size());
  172. auto [it, added] = string_to_id_.insert({str, next_id});
  173. if (added) {
  174. // Update the reverse mapping from IDs to strings.
  175. CARBON_CHECK(it->second == next_id);
  176. strings_.push_back(it->first());
  177. }
  178. return it->second;
  179. }
  180. // Returns the requested string.
  181. auto GetString(StringId string_id) const -> llvm::StringRef {
  182. return strings_[string_id.index];
  183. }
  184. // Adds a type, returning an ID to reference it.
  185. auto AddType(NodeId node_id) -> TypeId {
  186. TypeId type_id(types_.size());
  187. types_.push_back(node_id);
  188. return type_id;
  189. }
  190. // Gets the node ID for a type. This doesn't handle TypeType or InvalidType in
  191. // order to avoid a check; callers that need that should use
  192. // GetTypeAllowBuiltinTypes.
  193. auto GetType(TypeId type_id) const -> NodeId {
  194. // Double-check it's not called with TypeType or InvalidType.
  195. CARBON_CHECK(type_id.index >= 0)
  196. << "Invalid argument for GetType: " << type_id;
  197. return types_[type_id.index];
  198. }
  199. auto GetTypeAllowBuiltinTypes(TypeId type_id) const -> NodeId {
  200. if (type_id == TypeId::TypeType) {
  201. return NodeId::BuiltinTypeType;
  202. } else if (type_id == TypeId::Error) {
  203. return NodeId::BuiltinError;
  204. } else {
  205. return GetType(type_id);
  206. }
  207. }
  208. // Adds an empty type block, returning an ID to reference it.
  209. auto AddTypeBlock() -> TypeBlockId {
  210. TypeBlockId id(type_blocks_.size());
  211. type_blocks_.push_back({});
  212. return id;
  213. }
  214. // Returns the requested type block.
  215. auto GetTypeBlock(TypeBlockId block_id) const
  216. -> const llvm::SmallVector<TypeId>& {
  217. return type_blocks_[block_id.index];
  218. }
  219. // Returns the requested type block.
  220. auto GetTypeBlock(TypeBlockId block_id) -> llvm::SmallVector<TypeId>& {
  221. return type_blocks_[block_id.index];
  222. }
  223. // Produces a string version of a type. If `in_type_context` is false, an
  224. // explicit conversion to type `type` will be added in cases where the type
  225. // expression would otherwise have a different type, such as a tuple or
  226. // struct type.
  227. auto StringifyType(TypeId type_id, bool in_type_context = false) const
  228. -> std::string;
  229. auto functions_size() const -> int { return functions_.size(); }
  230. auto nodes_size() const -> int { return nodes_.size(); }
  231. auto node_blocks_size() const -> int { return node_blocks_.size(); }
  232. auto types() const -> const llvm::SmallVector<NodeId>& { return types_; }
  233. // The node blocks, for direct mutation.
  234. auto node_blocks() -> llvm::SmallVector<llvm::SmallVector<NodeId>>& {
  235. return node_blocks_;
  236. }
  237. auto top_node_block_id() const -> NodeBlockId { return top_node_block_id_; }
  238. auto set_top_node_block_id(NodeBlockId block_id) -> void {
  239. top_node_block_id_ = block_id;
  240. }
  241. // Returns true if there were errors creating the semantics IR.
  242. auto has_errors() const -> bool { return has_errors_; }
  243. auto set_has_errors(bool has_errors) -> void { has_errors_ = has_errors; }
  244. private:
  245. bool has_errors_ = false;
  246. // Storage for callable objects.
  247. llvm::SmallVector<Function> functions_;
  248. // Related IRs. There will always be at least 2 entries, the builtin IR (used
  249. // for references of builtins) followed by the current IR (used for references
  250. // crossing node blocks).
  251. llvm::SmallVector<const File*> cross_reference_irs_;
  252. // Storage for integer literals.
  253. llvm::SmallVector<llvm::APInt> integer_literals_;
  254. // Storage for name scopes.
  255. llvm::SmallVector<llvm::DenseMap<StringId, NodeId>> name_scopes_;
  256. // Storage for real literals.
  257. llvm::SmallVector<RealLiteral> real_literals_;
  258. // Storage for strings. strings_ provides a list of allocated strings, while
  259. // string_to_id_ provides a mapping to identify strings.
  260. llvm::StringMap<StringId> string_to_id_;
  261. llvm::SmallVector<llvm::StringRef> strings_;
  262. // Nodes which correspond to in-use types. Stored separately for easy access
  263. // by lowering.
  264. llvm::SmallVector<NodeId> types_;
  265. // Storage for blocks within the IR. These reference entries in types_.
  266. llvm::SmallVector<llvm::SmallVector<TypeId>> type_blocks_;
  267. // All nodes. The first entries will always be cross-references to builtins,
  268. // at indices matching BuiltinKind ordering.
  269. llvm::SmallVector<Node> nodes_;
  270. // Storage for blocks within the IR. These reference entries in nodes_.
  271. llvm::SmallVector<llvm::SmallVector<NodeId>> node_blocks_;
  272. // The top node block ID.
  273. NodeBlockId top_node_block_id_ = NodeBlockId::Invalid;
  274. };
  275. // The expression category of a semantics node. See /docs/design/values.md for
  276. // details.
  277. enum class ExpressionCategory : int8_t {
  278. // This node does not correspond to an expression, and as such has no
  279. // category.
  280. NotExpression,
  281. // This node represents a value expression.
  282. Value,
  283. // This node represents a durable reference expression, that denotes an
  284. // object that outlives the current full expression context.
  285. DurableReference,
  286. // This node represents an ephemeral reference expression, that denotes an
  287. // object that does not outlive the current full expression context.
  288. EphemeralReference,
  289. // This node represents an initializing expression, that describes how to
  290. // initialize an object.
  291. Initializing,
  292. };
  293. // Returns the expression category for a node.
  294. auto GetExpressionCategory(const File& file, NodeId node_id)
  295. -> ExpressionCategory;
  296. // The value representation to use when passing by value.
  297. struct ValueRepresentation {
  298. enum Kind : int8_t {
  299. // The type has no value representation. This is used for empty types, such
  300. // as `()`, where there is no value.
  301. None,
  302. // The value representation is a copy of the value. On call boundaries, the
  303. // value itself will be passed. `type` is the value type.
  304. // TODO: `type` should be `const`-qualified, but is currently not.
  305. Copy,
  306. // The value representation is a pointer to an object. When used as a
  307. // parameter, the argument is a reference expression. `type` is the pointee
  308. // type.
  309. // TODO: `type` should be `const`-qualified, but is currently not.
  310. Pointer,
  311. // The value representation has been customized, and has the same behavior
  312. // as the value representation of some other type.
  313. // TODO: This is not implemented or used yet.
  314. Custom,
  315. };
  316. // The kind of value representation used by this type.
  317. Kind kind;
  318. // The type used to model the value representation.
  319. TypeId type;
  320. };
  321. // Returns information about the value representation to use for a type.
  322. auto GetValueRepresentation(const File& file, TypeId type_id)
  323. -> ValueRepresentation;
  324. // The initializing representation to use when returning by value.
  325. struct InitializingRepresentation {
  326. enum Kind : int8_t {
  327. // The type has no initializing representation. This is used for empty
  328. // types, where no initialization is necessary.
  329. None,
  330. // An initializing expression produces a value, which is copied into the
  331. // initialized object.
  332. ByCopy,
  333. // An initializing expression takes a location as input, which is
  334. // initialized as a side effect of evaluating the expression.
  335. InPlace,
  336. // TODO: Consider adding a kind where the expression takes an advisory
  337. // location and returns a value plus an indicator of whether the location
  338. // was actually initialized.
  339. };
  340. // The kind of initializing representation used by this type.
  341. Kind kind;
  342. // Returns whether a return slot is used when returning this type.
  343. auto has_return_slot() const -> bool { return kind == InPlace; }
  344. };
  345. // Returns information about the initializing representation to use for a type.
  346. auto GetInitializingRepresentation(const File& file, TypeId type_id)
  347. -> InitializingRepresentation;
  348. } // namespace Carbon::SemIR
  349. #endif // CARBON_TOOLCHAIN_SEM_IR_FILE_H_