parser2.cpp 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser2.h"
  5. #include <cstdlib>
  6. #include <memory>
  7. #include "common/check.h"
  8. #include "llvm/ADT/Optional.h"
  9. #include "llvm/Support/PrettyStackTrace.h"
  10. #include "toolchain/lexer/token_kind.h"
  11. #include "toolchain/lexer/tokenized_buffer.h"
  12. #include "toolchain/parser/parse_node_kind.h"
  13. #include "toolchain/parser/parse_tree.h"
  14. namespace Carbon {
  15. class Parser2::PrettyStackTraceParseState : public llvm::PrettyStackTraceEntry {
  16. public:
  17. explicit PrettyStackTraceParseState(const Parser2* parser)
  18. : parser_(parser) {}
  19. ~PrettyStackTraceParseState() override = default;
  20. auto print(llvm::raw_ostream& output) const -> void override {
  21. output << "Parser stack:\n";
  22. for (int i = 0; i < static_cast<int>(parser_->state_stack_.size()); ++i) {
  23. const auto& entry = parser_->state_stack_[i];
  24. output << "\t" << i << ".\t" << entry.state << " @ " << entry.start_token
  25. << ":" << parser_->tokens_.GetKind(entry.start_token).Name()
  26. << "\n";
  27. }
  28. }
  29. private:
  30. const Parser2* parser_;
  31. };
  32. Parser2::Parser2(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  33. TokenDiagnosticEmitter& emitter)
  34. : tree_(tree_arg),
  35. tokens_(tokens_arg),
  36. emitter_(emitter),
  37. position_(tokens_.tokens().begin()),
  38. end_(tokens_.tokens().end()) {
  39. CARBON_CHECK(position_ != end_) << "Empty TokenizedBuffer";
  40. --end_;
  41. CARBON_CHECK(tokens_.GetKind(*end_) == TokenKind::EndOfFile())
  42. << "TokenizedBuffer should end with EndOfFile, ended with "
  43. << tokens_.GetKind(*end_).Name();
  44. }
  45. auto Parser2::AddLeafNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  46. bool has_error) -> void {
  47. tree_.node_impls_.push_back(
  48. ParseTree::NodeImpl(kind, has_error, token, /*subtree_size=*/1));
  49. if (has_error) {
  50. tree_.has_errors_ = true;
  51. }
  52. }
  53. auto Parser2::AddNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  54. int subtree_start, bool has_error) -> void {
  55. int subtree_size = tree_.size() - subtree_start + 1;
  56. tree_.node_impls_.push_back(
  57. ParseTree::NodeImpl(kind, has_error, token, subtree_size));
  58. if (has_error) {
  59. tree_.has_errors_ = true;
  60. }
  61. }
  62. auto Parser2::ConsumeAndAddLeafNodeIf(TokenKind token_kind,
  63. ParseNodeKind node_kind) -> bool {
  64. auto token = ConsumeIf(token_kind);
  65. if (!token) {
  66. return false;
  67. }
  68. AddLeafNode(node_kind, *token);
  69. return true;
  70. }
  71. auto Parser2::ConsumeIf(TokenKind kind)
  72. -> llvm::Optional<TokenizedBuffer::Token> {
  73. if (!PositionIs(kind)) {
  74. return llvm::None;
  75. }
  76. auto token = *position_;
  77. ++position_;
  78. return token;
  79. }
  80. auto Parser2::Parse() -> void {
  81. #ifndef NDEBUG
  82. PrettyStackTraceParseState pretty_stack(this);
  83. #endif
  84. PushState(ParserState::Declaration());
  85. while (!state_stack_.empty()) {
  86. switch (state_stack_.back().state) {
  87. #define CARBON_PARSER_STATE(Name) \
  88. case ParserState::Name(): \
  89. Handle##Name##State(); \
  90. break;
  91. #include "toolchain/parser/parser_state.def"
  92. }
  93. }
  94. AddLeafNode(ParseNodeKind::FileEnd(), *position_);
  95. }
  96. auto Parser2::SkipMatchingGroup() -> bool {
  97. if (!PositionKind().IsOpeningSymbol()) {
  98. return false;
  99. }
  100. SkipTo(tokens_.GetMatchedClosingToken(*position_));
  101. ++position_;
  102. return true;
  103. }
  104. auto Parser2::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root)
  105. -> llvm::Optional<TokenizedBuffer::Token> {
  106. CARBON_CHECK(position_ < end_);
  107. TokenizedBuffer::Line root_line = tokens_.GetLine(skip_root);
  108. int root_line_indent = tokens_.GetIndentColumnNumber(root_line);
  109. // We will keep scanning through tokens on the same line as the root or
  110. // lines with greater indentation than root's line.
  111. auto is_same_line_or_indent_greater_than_root =
  112. [&](TokenizedBuffer::Token t) {
  113. TokenizedBuffer::Line l = tokens_.GetLine(t);
  114. if (l == root_line) {
  115. return true;
  116. }
  117. return tokens_.GetIndentColumnNumber(l) > root_line_indent;
  118. };
  119. do {
  120. if (PositionIs(TokenKind::CloseCurlyBrace())) {
  121. // Immediately bail out if we hit an unmatched close curly, this will
  122. // pop us up a level of the syntax grouping.
  123. return llvm::None;
  124. }
  125. // We assume that a semicolon is always intended to be the end of the
  126. // current construct.
  127. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  128. return semi;
  129. }
  130. // Skip over any matching group of tokens_.
  131. if (SkipMatchingGroup()) {
  132. continue;
  133. }
  134. // Otherwise just step forward one token.
  135. ++position_;
  136. } while (position_ != end_ &&
  137. is_same_line_or_indent_greater_than_root(*position_));
  138. return llvm::None;
  139. }
  140. auto Parser2::SkipTo(TokenizedBuffer::Token t) -> void {
  141. CARBON_CHECK(t > *position_) << "Tried to skip backwards.";
  142. position_ = TokenizedBuffer::TokenIterator(t);
  143. CARBON_CHECK(position_ != end_) << "Skipped past EOF.";
  144. }
  145. auto Parser2::HandleDeclarationState() -> void {
  146. do {
  147. switch (auto token_kind = PositionKind()) {
  148. case TokenKind::EndOfFile(): {
  149. state_stack_.pop_back();
  150. return;
  151. }
  152. case TokenKind::Fn(): {
  153. PushState(ParserState::FunctionIntroducer());
  154. AddLeafNode(ParseNodeKind::FunctionIntroducer(), *position_);
  155. ++position_;
  156. return;
  157. }
  158. case TokenKind::Semi(): {
  159. AddLeafNode(ParseNodeKind::EmptyDeclaration(), *position_);
  160. ++position_;
  161. break;
  162. }
  163. default: {
  164. CARBON_DIAGNOSTIC(UnrecognizedDeclaration, Error,
  165. "Unrecognized declaration introducer.");
  166. emitter_.Emit(*position_, UnrecognizedDeclaration);
  167. tree_.has_errors_ = true;
  168. if (auto semi = SkipPastLikelyEnd(*position_)) {
  169. AddLeafNode(ParseNodeKind::EmptyDeclaration(), *semi,
  170. /*has_error=*/true);
  171. }
  172. break;
  173. }
  174. }
  175. } while (position_ < end_);
  176. }
  177. auto Parser2::HandleFunctionError(bool skip_past_likely_end) -> void {
  178. auto token = state_stack_.back().start_token;
  179. if (skip_past_likely_end && SkipPastLikelyEnd(token)) {
  180. token = *position_;
  181. }
  182. AddNode(ParseNodeKind::FunctionDeclaration(), token,
  183. state_stack_.back().subtree_start,
  184. /*has_error=*/true);
  185. state_stack_.pop_back();
  186. }
  187. auto Parser2::HandleFunctionIntroducerState() -> void {
  188. if (!ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  189. ParseNodeKind::DeclaredName())) {
  190. CARBON_DIAGNOSTIC(ExpectedFunctionName, Error,
  191. "Expected function name after `fn` keyword.");
  192. emitter_.Emit(*position_, ExpectedFunctionName);
  193. // TODO: We could change the lexer to allow us to synthesize certain
  194. // kinds of tokens and try to "recover" here, but unclear that this is
  195. // really useful.
  196. HandleFunctionError(true);
  197. return;
  198. }
  199. if (!PositionIs(TokenKind::OpenParen())) {
  200. CARBON_DIAGNOSTIC(ExpectedFunctionParams, Error,
  201. "Expected `(` after function name.");
  202. emitter_.Emit(*position_, ExpectedFunctionParams);
  203. HandleFunctionError(true);
  204. return;
  205. }
  206. // Parse the parameter list as its own subtree; once that pops, resume
  207. // function parsing.
  208. state_stack_.back().state = ParserState::FunctionParameterListDone();
  209. PushState(ParserState::FunctionParameterList());
  210. // Advance past the open parenthesis before continuing.
  211. // TODO: When swapping () start/end, this should AddNode the open before
  212. // continuing.
  213. ++position_;
  214. }
  215. auto Parser2::HandleFunctionParameterListState() -> void {
  216. // TODO: Handle non-empty lists.
  217. if (!PositionIs(TokenKind::CloseParen())) {
  218. CARBON_DIAGNOSTIC(ExpectedFunctionParams, Error,
  219. "Expected `(` after function name.");
  220. emitter_.Emit(*position_, ExpectedFunctionParams);
  221. SkipTo(tokens_.GetMatchedClosingToken(state_stack_.back().start_token));
  222. AddLeafNode(ParseNodeKind::ParameterListEnd(), *position_,
  223. /*has_error=*/true);
  224. AddNode(ParseNodeKind::ParameterList(), state_stack_.back().start_token,
  225. state_stack_.back().subtree_start);
  226. ++position_;
  227. return;
  228. }
  229. AddLeafNode(ParseNodeKind::ParameterListEnd(), *position_);
  230. AddNode(ParseNodeKind::ParameterList(), state_stack_.back().start_token,
  231. state_stack_.back().subtree_start);
  232. ++position_;
  233. state_stack_.pop_back();
  234. }
  235. auto Parser2::HandleFunctionParameterListDoneState() -> void {
  236. switch (auto token_kind = PositionKind()) {
  237. case TokenKind::Semi(): {
  238. AddNode(ParseNodeKind::FunctionDeclaration(), *position_,
  239. state_stack_.back().subtree_start);
  240. ++position_;
  241. state_stack_.pop_back();
  242. break;
  243. }
  244. // TODO: OpenCurlyBrace is a definition.
  245. case TokenKind::OpenCurlyBrace(): {
  246. CARBON_DIAGNOSTIC(
  247. ExpectedFunctionBodyOrSemi, Error,
  248. "Expected function definition or `;` after function declaration.");
  249. emitter_.Emit(*position_, ExpectedFunctionBodyOrSemi);
  250. HandleFunctionError(true);
  251. break;
  252. }
  253. default: {
  254. CARBON_DIAGNOSTIC(
  255. ExpectedFunctionBodyOrSemi, Error,
  256. "Expected function definition or `;` after function declaration.");
  257. emitter_.Emit(*position_, ExpectedFunctionBodyOrSemi);
  258. // Only need to skip if we've not already found a new line.
  259. HandleFunctionError(tokens_.GetLine(*position_) ==
  260. tokens_.GetLine(state_stack_.back().start_token));
  261. break;
  262. }
  263. }
  264. }
  265. } // namespace Carbon