parser2.h 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_PARSER_PARSER2_H_
  5. #define CARBON_TOOLCHAIN_PARSER_PARSER2_H_
  6. #include "llvm/ADT/Optional.h"
  7. #include "toolchain/lexer/token_kind.h"
  8. #include "toolchain/lexer/tokenized_buffer.h"
  9. #include "toolchain/parser/parse_node_kind.h"
  10. #include "toolchain/parser/parse_tree.h"
  11. #include "toolchain/parser/parser_state.h"
  12. namespace Carbon {
  13. class Parser2 {
  14. public:
  15. // Parses the tokens into a parse tree, emitting any errors encountered.
  16. //
  17. // This is the entry point to the parser implementation.
  18. static auto Parse(TokenizedBuffer& tokens, TokenDiagnosticEmitter& emitter)
  19. -> ParseTree {
  20. ParseTree tree(tokens);
  21. Parser2 parser(tree, tokens, emitter);
  22. parser.Parse();
  23. return tree;
  24. }
  25. private:
  26. // Supported kinds of patterns for HandlePattern.
  27. enum class PatternKind { Parameter, Variable };
  28. // Helper class for tracing state_stack_ on crashes.
  29. class PrettyStackTraceParseState;
  30. // Used to track state on state_stack_.
  31. struct StateStackEntry {
  32. StateStackEntry(ParserState state, TokenizedBuffer::Token token,
  33. int32_t subtree_start)
  34. : state(state), token(token), subtree_start(subtree_start) {}
  35. // The state.
  36. ParserState state;
  37. // A token providing context based on the subtree. This will typically be
  38. // the first token in the subtree, but may sometimes be a token within. It
  39. // will typically be used for the subtree's root node.
  40. TokenizedBuffer::Token token;
  41. // The offset within the ParseTree of the subtree start.
  42. int32_t subtree_start;
  43. // Set to true to indicate that an error was found, and that contextual
  44. // error recovery may be needed.
  45. bool has_error = false;
  46. };
  47. Parser2(ParseTree& tree, TokenizedBuffer& tokens,
  48. TokenDiagnosticEmitter& emitter);
  49. auto Parse() -> void;
  50. // Adds a node to the parse tree that is fully parsed, has no children
  51. // ("leaf"), and has a subsequent sibling.
  52. //
  53. // This sets up the next sibling of the node to be the next node in the parse
  54. // tree's preorder sequence.
  55. auto AddLeafNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  56. bool has_error = false) -> void;
  57. auto AddNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  58. int subtree_start, bool has_error) -> void;
  59. // Parses a close paren token corresponding to the given open paren token,
  60. // possibly skipping forward and diagnosing if necessary. Creates a parse node
  61. // of the specified kind if successful.
  62. auto ConsumeAndAddCloseParen(TokenizedBuffer::Token open_paren,
  63. ParseNodeKind close_kind) -> bool;
  64. // Composes `ConsumeIf` and `AddLeafNode`, returning false when ConsumeIf
  65. // fails.
  66. auto ConsumeAndAddLeafNodeIf(TokenKind token_kind, ParseNodeKind node_kind)
  67. -> bool;
  68. // If the current position's token matches this `Kind`, returns it and
  69. // advances to the next position. Otherwise returns an empty optional.
  70. auto ConsumeIf(TokenKind kind) -> llvm::Optional<TokenizedBuffer::Token>;
  71. // Find the next token of any of the given kinds at the current bracketing
  72. // level.
  73. auto FindNextOf(std::initializer_list<TokenKind> desired_kinds)
  74. -> llvm::Optional<TokenizedBuffer::Token>;
  75. // Gets the kind of the next token to be consumed.
  76. auto PositionKind() const -> TokenKind { return tokens_.GetKind(*position_); }
  77. // Tests whether the next token to be consumed is of the specified kind.
  78. auto PositionIs(TokenKind kind) const -> bool {
  79. return PositionKind() == kind;
  80. }
  81. // If the token is an opening symbol for a matched group, skips to the matched
  82. // closing symbol and returns true. Otherwise, returns false.
  83. auto SkipMatchingGroup() -> bool;
  84. // Skips forward to move past the likely end of a declaration or statement.
  85. //
  86. // Looks forward, skipping over any matched symbol groups, to find the next
  87. // position that is likely past the end of a declaration or statement. This
  88. // is a heuristic and should only be called when skipping past parse errors.
  89. //
  90. // The strategy for recognizing when we have likely passed the end of a
  91. // declaration or statement:
  92. // - If we get to a close curly brace, we likely ended the entire context.
  93. // - If we get to a semicolon, that should have ended the declaration or
  94. // statement.
  95. // - If we get to a new line from the `SkipRoot` token, but with the same or
  96. // less indentation, there is likely a missing semicolon. Continued
  97. // declarations or statements across multiple lines should be indented.
  98. //
  99. // Returns a semicolon token if one is the likely end.
  100. auto SkipPastLikelyEnd(TokenizedBuffer::Token skip_root)
  101. -> llvm::Optional<TokenizedBuffer::Token>;
  102. // Skip forward to the given token. Verifies that it is actually forward.
  103. auto SkipTo(TokenizedBuffer::Token t) -> void;
  104. // Pushes a new state with the current position for context.
  105. auto PushState(ParserState state) -> void {
  106. PushState(StateStackEntry(state, *position_, tree_.size()));
  107. }
  108. // Pushes a new state with the token for context.
  109. auto PushState(ParserState state, TokenizedBuffer::Token token) -> void {
  110. PushState(StateStackEntry(state, token, tree_.size()));
  111. }
  112. // Pushes a constructed state onto the stack.
  113. auto PushState(StateStackEntry state) -> void {
  114. state_stack_.push_back(state);
  115. }
  116. // Pops the state and keeps the value for inspection.
  117. auto PopState() -> StateStackEntry { return state_stack_.pop_back_val(); }
  118. // Pops the state and discards it.
  119. auto PopAndDiscardState() -> void { state_stack_.pop_back(); }
  120. // Propagates an error up the state stack, to the parent state.
  121. auto ReturnErrorOnState() -> void { state_stack_.back().has_error = true; }
  122. // Parses a primary expression, which is either a terminal portion of an
  123. // expression tree, such as an identifier or literal, or a parenthesized
  124. // expression.
  125. auto HandleExpressionFormPrimary() -> void;
  126. // When handling errors before the start of the definition, treat it as a
  127. // declaration. Recover to a semicolon when it makes sense as a possible
  128. // function end, otherwise use the fn token for the error.
  129. auto HandleFunctionError(StateStackEntry state, bool skip_past_likely_end)
  130. -> void;
  131. // Handles a code block in the context of a statement scope.
  132. auto HandleCodeBlock() -> void;
  133. // Handles parsing of a function parameter list, including commas and the
  134. // close paren.
  135. auto HandleFunctionParameterList(bool is_start) -> void;
  136. // Handles the `;` after a keyword statement.
  137. auto HandleKeywordStatementFinish(TokenKind token_kind,
  138. ParseNodeKind node_kind) -> void;
  139. // Handles the start of a pattern.
  140. // If the start of the pattern is invalid, it's the responsibility of the
  141. // outside context to advance past the pattern.
  142. auto HandlePatternStart(PatternKind pattern_kind) -> void;
  143. // Handles a single statement. While typically within a statement block, this
  144. // can also be used for error recovery where we expect a statement block and
  145. // are missing braces.
  146. auto HandleStatement(TokenKind token_kind) -> void;
  147. // Handles a `if` statement at the start `if` token.
  148. auto HandleStatementIf() -> void;
  149. // `clang-format` has a bug with spacing around `->` returns in macros. See
  150. // https://bugs.llvm.org/show_bug.cgi?id=48320 for details.
  151. #define CARBON_PARSER_STATE(Name) auto Handle##Name##State()->void;
  152. #include "toolchain/parser/parser_state.def"
  153. ParseTree& tree_;
  154. TokenizedBuffer& tokens_;
  155. TokenDiagnosticEmitter& emitter_;
  156. // The current position within the token buffer.
  157. TokenizedBuffer::TokenIterator position_;
  158. // The EndOfFile token.
  159. TokenizedBuffer::TokenIterator end_;
  160. llvm::SmallVector<StateStackEntry> state_stack_;
  161. };
  162. } // namespace Carbon
  163. #endif // CARBON_TOOLCHAIN_PARSER_PARSER2_H_