parser2.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_PARSER_PARSER2_H_
  5. #define CARBON_TOOLCHAIN_PARSER_PARSER2_H_
  6. #include "llvm/ADT/Optional.h"
  7. #include "toolchain/lexer/token_kind.h"
  8. #include "toolchain/lexer/tokenized_buffer.h"
  9. #include "toolchain/parser/parse_node_kind.h"
  10. #include "toolchain/parser/parse_tree.h"
  11. #include "toolchain/parser/parser_state.h"
  12. namespace Carbon {
  13. class Parser2 {
  14. public:
  15. // Parses the tokens into a parse tree, emitting any errors encountered.
  16. //
  17. // This is the entry point to the parser implementation.
  18. static auto Parse(TokenizedBuffer& tokens, TokenDiagnosticEmitter& emitter)
  19. -> ParseTree {
  20. ParseTree tree(tokens);
  21. Parser2 parser(tree, tokens, emitter);
  22. parser.Parse();
  23. return tree;
  24. }
  25. private:
  26. // Used to track state on state_stack_.
  27. struct StateStackEntry {
  28. // The state.
  29. ParserState state;
  30. // The token indicating the start of a tracked subtree.
  31. TokenizedBuffer::Token start_token;
  32. // The offset within the ParseTree of the subtree start.
  33. int32_t subtree_start;
  34. };
  35. Parser2(ParseTree& tree, TokenizedBuffer& tokens,
  36. TokenDiagnosticEmitter& emitter);
  37. auto Parse() -> void;
  38. // Adds a node to the parse tree that is fully parsed, has no children
  39. // ("leaf"), and has a subsequent sibling.
  40. //
  41. // This sets up the next sibling of the node to be the next node in the parse
  42. // tree's preorder sequence.
  43. auto AddLeafNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  44. bool has_error = false) -> void;
  45. auto AddNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  46. int subtree_start, bool has_error = false) -> void;
  47. // Composes `ConsumeIf` and `AddLeafNode`, returning false when ConsumeIf
  48. // fails.
  49. auto ConsumeAndAddLeafNodeIf(TokenKind token_kind, ParseNodeKind node_kind)
  50. -> bool;
  51. // If the current position's token matches this `Kind`, returns it and
  52. // advances to the next position. Otherwise returns an empty optional.
  53. auto ConsumeIf(TokenKind kind) -> llvm::Optional<TokenizedBuffer::Token>;
  54. // Gets the kind of the next token to be consumed.
  55. auto PositionKind() const -> TokenKind { return tokens_.GetKind(*position_); }
  56. // Tests whether the next token to be consumed is of the specified kind.
  57. auto PositionIs(TokenKind kind) const -> bool {
  58. return PositionKind() == kind;
  59. }
  60. // If the token is an opening symbol for a matched group, skips to the matched
  61. // closing symbol and returns true. Otherwise, returns false.
  62. auto SkipMatchingGroup() -> bool;
  63. // Skips forward to move past the likely end of a declaration or statement.
  64. //
  65. // Looks forward, skipping over any matched symbol groups, to find the next
  66. // position that is likely past the end of a declaration or statement. This
  67. // is a heuristic and should only be called when skipping past parse errors.
  68. //
  69. // The strategy for recognizing when we have likely passed the end of a
  70. // declaration or statement:
  71. // - If we get to a close curly brace, we likely ended the entire context.
  72. // - If we get to a semicolon, that should have ended the declaration or
  73. // statement.
  74. // - If we get to a new line from the `SkipRoot` token, but with the same or
  75. // less indentation, there is likely a missing semicolon. Continued
  76. // declarations or statements across multiple lines should be indented.
  77. //
  78. // Returns a semicolon token if one is the likely end.
  79. auto SkipPastLikelyEnd(TokenizedBuffer::Token skip_root)
  80. -> llvm::Optional<TokenizedBuffer::Token>;
  81. // Skip forward to the given token. Verifies that it is actually forward.
  82. auto SkipTo(TokenizedBuffer::Token t) -> void;
  83. auto PushState(ParserState state) -> void {
  84. state_stack_.push_back({state, *position_, tree_.size()});
  85. }
  86. // When handling errors before the start of the definition, treat it as a
  87. // declaration. Recover to a semicolon when it makes sense as a possible
  88. // function end, otherwise use the fn token for the error.
  89. auto HandleFunctionError(bool skip_past_likely_end) -> void;
  90. #define CARBON_PARSER_STATE(Name) auto Handle##Name##State()->void;
  91. #include "toolchain/parser/parser_state.def"
  92. ParseTree& tree_;
  93. TokenizedBuffer& tokens_;
  94. TokenDiagnosticEmitter& emitter_;
  95. // The current position within the token buffer.
  96. TokenizedBuffer::TokenIterator position_;
  97. // The EndOfFile token.
  98. TokenizedBuffer::TokenIterator end_;
  99. llvm::SmallVector<StateStackEntry> state_stack_;
  100. };
  101. } // namespace Carbon
  102. #endif // CARBON_TOOLCHAIN_PARSER_PARSER2_H_