parser.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_PARSER_PARSER_H_
  5. #define CARBON_TOOLCHAIN_PARSER_PARSER_H_
  6. #include "common/check.h"
  7. #include "llvm/ADT/Optional.h"
  8. #include "toolchain/lexer/token_kind.h"
  9. #include "toolchain/lexer/tokenized_buffer.h"
  10. #include "toolchain/parser/parse_node_kind.h"
  11. #include "toolchain/parser/parse_tree.h"
  12. #include "toolchain/parser/parser_state.h"
  13. #include "toolchain/parser/precedence.h"
  14. namespace Carbon {
  15. // This parser uses a stack for state transitions. See parser_state.def for
  16. // state documentation.
  17. class Parser {
  18. public:
  19. // Parses the tokens into a parse tree, emitting any errors encountered.
  20. //
  21. // This is the entry point to the parser implementation.
  22. static auto Parse(TokenizedBuffer& tokens, TokenDiagnosticEmitter& emitter)
  23. -> ParseTree {
  24. ParseTree tree(tokens);
  25. Parser parser(tree, tokens, emitter);
  26. parser.Parse();
  27. return tree;
  28. }
  29. private:
  30. // Possible operator fixities for errors.
  31. enum class OperatorFixity { Prefix, Infix, Postfix };
  32. // Possible return values for FindListToken.
  33. enum class ListTokenKind { Comma, Close, CommaClose };
  34. // Supported kinds for HandleBraceExpression.
  35. enum class BraceExpressionKind { Unknown, Value, Type };
  36. // Supported kinds for HandlePattern.
  37. enum class PatternKind { Parameter, Variable };
  38. // Helper class for tracing state_stack_ on crashes.
  39. class PrettyStackTraceParseState;
  40. // Used to track state on state_stack_.
  41. struct StateStackEntry {
  42. StateStackEntry(ParserState state, PrecedenceGroup ambient_precedence,
  43. PrecedenceGroup lhs_precedence,
  44. TokenizedBuffer::Token token, int32_t subtree_start)
  45. : state(state),
  46. ambient_precedence(ambient_precedence),
  47. lhs_precedence(lhs_precedence),
  48. token(token),
  49. subtree_start(subtree_start) {}
  50. // The state.
  51. ParserState state;
  52. // Set to true to indicate that an error was found, and that contextual
  53. // error recovery may be needed.
  54. bool has_error = false;
  55. // Precedence information used by expression states in order to determine
  56. // operator precedence. The ambient_precedence deals with how the expression
  57. // should interact with outside context, while the lhs_precedence is
  58. // specific to the lhs of an operator expression.
  59. PrecedenceGroup ambient_precedence;
  60. PrecedenceGroup lhs_precedence;
  61. // A token providing context based on the subtree. This will typically be
  62. // the first token in the subtree, but may sometimes be a token within. It
  63. // will typically be used for the subtree's root node.
  64. TokenizedBuffer::Token token;
  65. // The offset within the ParseTree of the subtree start.
  66. int32_t subtree_start;
  67. };
  68. // We expect StateStackEntry to fit into 12 bytes:
  69. // state = 1 byte
  70. // has_error = 1 byte
  71. // ambient_precedence = 1 byte
  72. // lhs_precedence = 1 byte
  73. // token = 4 bytes
  74. // subtree_start = 4 bytes
  75. // If it becomes bigger, it'd be worth examining better packing; it should be
  76. // feasible to pack the 1-byte entries more tightly.
  77. static_assert(sizeof(StateStackEntry) == 12,
  78. "StateStackEntry has unexpected size!");
  79. Parser(ParseTree& tree, TokenizedBuffer& tokens,
  80. TokenDiagnosticEmitter& emitter);
  81. auto Parse() -> void;
  82. // Adds a node to the parse tree that has no children (a leaf).
  83. auto AddLeafNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  84. bool has_error = false) -> void;
  85. // Adds a node to the parse tree that has children.
  86. auto AddNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  87. int subtree_start, bool has_error) -> void;
  88. // Returns the current position and moves past it.
  89. auto Consume() -> TokenizedBuffer::Token { return *(position_++); }
  90. // Parses an open paren token, possibly diagnosing if necessary. Creates a
  91. // leaf parse node of the specified start kind. The default_token is used when
  92. // there's no open paren.
  93. auto ConsumeAndAddOpenParen(TokenizedBuffer::Token default_token,
  94. ParseNodeKind start_kind) -> void;
  95. // Parses a close paren token corresponding to the given open paren token,
  96. // possibly skipping forward and diagnosing if necessary. Creates a parse node
  97. // of the specified close kind.
  98. auto ConsumeAndAddCloseParen(StateStackEntry state, ParseNodeKind close_kind)
  99. -> void;
  100. // Composes `ConsumeIf` and `AddLeafNode`, returning false when ConsumeIf
  101. // fails.
  102. auto ConsumeAndAddLeafNodeIf(TokenKind token_kind, ParseNodeKind node_kind)
  103. -> bool;
  104. // Returns the current position and moves past it. Requires the token is the
  105. // expected kind.
  106. auto ConsumeChecked(TokenKind kind) -> TokenizedBuffer::Token;
  107. // If the current position's token matches this `Kind`, returns it and
  108. // advances to the next position. Otherwise returns an empty optional.
  109. auto ConsumeIf(TokenKind kind) -> llvm::Optional<TokenizedBuffer::Token>;
  110. // Find the next token of any of the given kinds at the current bracketing
  111. // level.
  112. auto FindNextOf(std::initializer_list<TokenKind> desired_kinds)
  113. -> llvm::Optional<TokenizedBuffer::Token>;
  114. // If the token is an opening symbol for a matched group, skips to the matched
  115. // closing symbol and returns true. Otherwise, returns false.
  116. auto SkipMatchingGroup() -> bool;
  117. // Skips forward to move past the likely end of a declaration or statement.
  118. //
  119. // Looks forward, skipping over any matched symbol groups, to find the next
  120. // position that is likely past the end of a declaration or statement. This
  121. // is a heuristic and should only be called when skipping past parse errors.
  122. //
  123. // The strategy for recognizing when we have likely passed the end of a
  124. // declaration or statement:
  125. // - If we get to a close curly brace, we likely ended the entire context.
  126. // - If we get to a semicolon, that should have ended the declaration or
  127. // statement.
  128. // - If we get to a new line from the `SkipRoot` token, but with the same or
  129. // less indentation, there is likely a missing semicolon. Continued
  130. // declarations or statements across multiple lines should be indented.
  131. //
  132. // Returns a semicolon token if one is the likely end.
  133. auto SkipPastLikelyEnd(TokenizedBuffer::Token skip_root)
  134. -> llvm::Optional<TokenizedBuffer::Token>;
  135. // Skip forward to the given token. Verifies that it is actually forward.
  136. auto SkipTo(TokenizedBuffer::Token t) -> void;
  137. // Returns true if the current token satisfies the lexical validity rules
  138. // for an infix operator.
  139. auto IsLexicallyValidInfixOperator() -> bool;
  140. // Determines whether the current trailing operator should be treated as
  141. // infix.
  142. auto IsTrailingOperatorInfix() -> bool;
  143. // Diagnoses whether the current token is not written properly for the given
  144. // fixity. For example, because mandatory whitespace is missing. Regardless of
  145. // whether there's an error, it's expected that parsing continues.
  146. auto DiagnoseOperatorFixity(OperatorFixity fixity) -> void;
  147. // If the current position is a `,`, consumes it, adds the provided token, and
  148. // returns `Comma`. Returns `Close` if the current position is close_token
  149. // (for example, `)`). `CommaClose` indicates it found both (for example,
  150. // `,)`). Handles cases where invalid tokens are present by advancing the
  151. // position, and may emit errors. Pass already_has_error in order to suppress
  152. // duplicate errors.
  153. auto ConsumeListToken(ParseNodeKind comma_kind, TokenKind close_kind,
  154. bool already_has_error) -> ListTokenKind;
  155. // Gets the kind of the next token to be consumed.
  156. auto PositionKind() const -> TokenKind {
  157. return tokens_->GetKind(*position_);
  158. }
  159. // Tests whether the next token to be consumed is of the specified kind.
  160. auto PositionIs(TokenKind kind) const -> bool {
  161. return PositionKind() == kind;
  162. }
  163. // Pops the state and keeps the value for inspection.
  164. auto PopState() -> StateStackEntry { return state_stack_.pop_back_val(); }
  165. // Pops the state and discards it.
  166. auto PopAndDiscardState() -> void { state_stack_.pop_back(); }
  167. // Pushes a new state with the current position for context.
  168. auto PushState(ParserState state) -> void {
  169. PushState(StateStackEntry(state, PrecedenceGroup::ForTopLevelExpression(),
  170. PrecedenceGroup::ForTopLevelExpression(),
  171. *position_, tree_->size()));
  172. }
  173. // Pushes a new expression state with specific precedence.
  174. auto PushStateForExpression(PrecedenceGroup ambient_precedence) -> void {
  175. PushState(StateStackEntry(ParserState::Expression(), ambient_precedence,
  176. PrecedenceGroup::ForTopLevelExpression(),
  177. *position_, tree_->size()));
  178. }
  179. // Pushes a new state with detailed precedence for expression resume states.
  180. auto PushStateForExpressionLoop(ParserState state,
  181. PrecedenceGroup ambient_precedence,
  182. PrecedenceGroup lhs_precedence) -> void {
  183. PushState(StateStackEntry(state, ambient_precedence, lhs_precedence,
  184. *position_, tree_->size()));
  185. }
  186. // Pushes a constructed state onto the stack.
  187. auto PushState(StateStackEntry state) -> void {
  188. state_stack_.push_back(state);
  189. CARBON_CHECK(state_stack_.size() < (1 << 20))
  190. << "Excessive stack size: likely infinite loop";
  191. }
  192. // Propagates an error up the state stack, to the parent state.
  193. auto ReturnErrorOnState() -> void { state_stack_.back().has_error = true; }
  194. // Returns the appropriate ParserState for the input kind.
  195. static auto BraceExpressionKindToParserState(BraceExpressionKind kind,
  196. ParserState type,
  197. ParserState value,
  198. ParserState unknown)
  199. -> ParserState;
  200. // Prints a diagnostic for brace expression syntax errors.
  201. auto HandleBraceExpressionParameterError(StateStackEntry state,
  202. BraceExpressionKind kind) -> void;
  203. // Handles BraceExpressionParameterAs(Type|Value|Unknown).
  204. auto HandleBraceExpressionParameter(BraceExpressionKind kind) -> void;
  205. // Handles BraceExpressionParameterAfterDesignatorAs(Type|Value|Unknown).
  206. auto HandleBraceExpressionParameterAfterDesignator(BraceExpressionKind kind)
  207. -> void;
  208. // Handles BraceExpressionParameterFinishAs(Type|Value|Unknown).
  209. auto HandleBraceExpressionParameterFinish(BraceExpressionKind kind) -> void;
  210. // Handles BraceExpressionFinishAs(Type|Value|Unknown).
  211. auto HandleBraceExpressionFinish(BraceExpressionKind kind) -> void;
  212. // Handles DesignatorAs.
  213. auto HandleDesignator(bool as_struct) -> void;
  214. // When handling errors before the start of the definition, treat it as a
  215. // declaration. Recover to a semicolon when it makes sense as a possible
  216. // function end, otherwise use the fn token for the error.
  217. auto HandleFunctionError(StateStackEntry state, bool skip_past_likely_end)
  218. -> void;
  219. // Handles ParenConditionAs(If|While)
  220. auto HandleParenCondition(ParseNodeKind start_kind, ParserState finish_state)
  221. -> void;
  222. // Handles ParenExpressionParameterFinishAs(Unknown|Tuple).
  223. auto HandleParenExpressionParameterFinish(bool as_tuple) -> void;
  224. // Handles PatternAs(FunctionParameter|Variable).
  225. auto HandlePattern(PatternKind pattern_kind) -> void;
  226. // Handles the `;` after a keyword statement.
  227. auto HandleStatementKeywordFinish(ParseNodeKind node_kind) -> void;
  228. // Handles VarAs(Semicolon|For).
  229. auto HandleVar(ParserState finish_state) -> void;
  230. // `clang-format` has a bug with spacing around `->` returns in macros. See
  231. // https://bugs.llvm.org/show_bug.cgi?id=48320 for details.
  232. #define CARBON_PARSER_STATE(Name) auto Handle##Name##State()->void;
  233. #include "toolchain/parser/parser_state.def"
  234. ParseTree* tree_;
  235. TokenizedBuffer* tokens_;
  236. TokenDiagnosticEmitter* emitter_;
  237. // The current position within the token buffer.
  238. TokenizedBuffer::TokenIterator position_;
  239. // The EndOfFile token.
  240. TokenizedBuffer::TokenIterator end_;
  241. llvm::SmallVector<StateStackEntry> state_stack_;
  242. };
  243. } // namespace Carbon
  244. #endif // CARBON_TOOLCHAIN_PARSER_PARSER_H_