parser_impl.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "parser/parser_impl.h"
  5. #include <cstdlib>
  6. #include "lexer/token_kind.h"
  7. #include "lexer/tokenized_buffer.h"
  8. #include "llvm/ADT/Optional.h"
  9. #include "llvm/Support/raw_ostream.h"
  10. #include "parser/parse_node_kind.h"
  11. #include "parser/parse_tree.h"
  12. namespace Carbon {
  13. struct UnexpectedTokenInFunctionParams
  14. : SimpleDiagnostic<UnexpectedTokenInFunctionParams> {
  15. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  16. static constexpr llvm::StringLiteral Message =
  17. "Unexpected token in function parameter list.";
  18. };
  19. struct UnexpectedTokenInCodeBlock
  20. : SimpleDiagnostic<UnexpectedTokenInCodeBlock> {
  21. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  22. static constexpr llvm::StringLiteral Message =
  23. "Unexpected token in code block.";
  24. };
  25. struct ExpectedFunctionName : SimpleDiagnostic<ExpectedFunctionName> {
  26. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  27. static constexpr llvm::StringLiteral Message =
  28. "Expected function name after `fn` keyword.";
  29. };
  30. struct ExpectedFunctionParams : SimpleDiagnostic<ExpectedFunctionParams> {
  31. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  32. static constexpr llvm::StringLiteral Message =
  33. "Expected `(` after function name.";
  34. };
  35. struct ExpectedFunctionBodyOrSemi
  36. : SimpleDiagnostic<ExpectedFunctionBodyOrSemi> {
  37. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  38. static constexpr llvm::StringLiteral Message =
  39. "Expected function definition or `;` after function declaration.";
  40. };
  41. struct ExpectedVariableName : SimpleDiagnostic<ExpectedVariableName> {
  42. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  43. static constexpr llvm::StringLiteral Message =
  44. "Expected variable name after type in `var` declaration.";
  45. };
  46. struct UnrecognizedDeclaration : SimpleDiagnostic<UnrecognizedDeclaration> {
  47. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  48. static constexpr llvm::StringLiteral Message =
  49. "Unrecognized declaration introducer.";
  50. };
  51. struct ExpectedExpression : SimpleDiagnostic<ExpectedExpression> {
  52. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  53. static constexpr llvm::StringLiteral Message = "Expected expression.";
  54. };
  55. struct ExpectedCloseParen : SimpleDiagnostic<ExpectedCloseParen> {
  56. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  57. static constexpr llvm::StringLiteral Message =
  58. "Unexpected tokens before `)`.";
  59. };
  60. struct ExpectedSemiAfterExpression
  61. : SimpleDiagnostic<ExpectedSemiAfterExpression> {
  62. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  63. static constexpr llvm::StringLiteral Message =
  64. "Expected `;` after expression.";
  65. };
  66. struct ExpectedIdentifierAfterDot
  67. : SimpleDiagnostic<ExpectedIdentifierAfterDot> {
  68. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  69. static constexpr llvm::StringLiteral Message =
  70. "Expected identifier after `.`.";
  71. };
  72. struct UnexpectedTokenInFunctionArgs
  73. : SimpleDiagnostic<UnexpectedTokenInFunctionArgs> {
  74. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  75. static constexpr llvm::StringLiteral Message =
  76. "Unexpected token in function argument list.";
  77. };
  78. struct OperatorRequiresParentheses
  79. : SimpleDiagnostic<OperatorRequiresParentheses> {
  80. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  81. static constexpr llvm::StringLiteral Message =
  82. "Parentheses are required to disambiguate operator precedence.";
  83. };
  84. ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  85. TokenDiagnosticEmitter& emitter)
  86. : tree(tree_arg),
  87. tokens(tokens_arg),
  88. emitter(emitter),
  89. position(tokens.Tokens().begin()),
  90. end(tokens.Tokens().end()) {
  91. assert(std::find_if(position, end,
  92. [&](TokenizedBuffer::Token t) {
  93. return tokens.GetKind(t) == TokenKind::EndOfFile();
  94. }) != end &&
  95. "No EndOfFileToken in token buffer.");
  96. }
  97. auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
  98. TokenDiagnosticEmitter& emitter) -> ParseTree {
  99. ParseTree tree(tokens);
  100. // We expect to have a 1:1 correspondence between tokens and tree nodes, so
  101. // reserve the space we expect to need here to avoid allocation and copying
  102. // overhead.
  103. tree.node_impls.reserve(tokens.Size());
  104. Parser parser(tree, tokens, emitter);
  105. while (!parser.AtEndOfFile()) {
  106. if (!parser.ParseDeclaration()) {
  107. // We don't have an enclosing parse tree node to mark as erroneous, so
  108. // just mark the tree as a whole.
  109. tree.has_errors = true;
  110. }
  111. }
  112. parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position);
  113. assert(tree.Verify() && "Parse tree built but does not verify!");
  114. return tree;
  115. }
  116. auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
  117. TokenizedBuffer::Token t = *position;
  118. assert(kind != TokenKind::EndOfFile() && "Cannot consume the EOF token!");
  119. assert(tokens.GetKind(t) == kind && "The current token is the wrong kind!");
  120. ++position;
  121. assert(position != end && "Reached end of tokens without finding EOF token.");
  122. return t;
  123. }
  124. auto ParseTree::Parser::ConsumeIf(TokenKind kind)
  125. -> llvm::Optional<TokenizedBuffer::Token> {
  126. if (tokens.GetKind(*position) != kind) {
  127. return {};
  128. }
  129. return Consume(kind);
  130. }
  131. auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
  132. TokenizedBuffer::Token token) -> Node {
  133. Node n(tree.node_impls.size());
  134. tree.node_impls.push_back(NodeImpl(kind, token, /*subtree_size_arg=*/1));
  135. return n;
  136. }
  137. auto ParseTree::Parser::ConsumeAndAddLeafNodeIf(TokenKind t_kind,
  138. ParseNodeKind n_kind)
  139. -> llvm::Optional<Node> {
  140. auto t = ConsumeIf(t_kind);
  141. if (!t) {
  142. return {};
  143. }
  144. return AddLeafNode(n_kind, *t);
  145. }
  146. auto ParseTree::Parser::MarkNodeError(Node n) -> void {
  147. tree.node_impls[n.index].has_error = true;
  148. tree.has_errors = true;
  149. }
  150. // A marker for the start of a node's subtree.
  151. //
  152. // This is used to track the size of the node's subtree. It can be used
  153. // repeatedly if multiple subtrees start at the same position.
  154. struct ParseTree::Parser::SubtreeStart {
  155. int tree_size;
  156. };
  157. auto ParseTree::Parser::StartSubtree() -> SubtreeStart {
  158. return {static_cast<int>(tree.node_impls.size())};
  159. }
  160. auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
  161. SubtreeStart start, bool has_error) -> Node {
  162. // The size of the subtree is the change in size from when we started this
  163. // subtree to now, but including the node we're about to add.
  164. int tree_stop_size = static_cast<int>(tree.node_impls.size()) + 1;
  165. int subtree_size = tree_stop_size - start.tree_size;
  166. Node n(tree.node_impls.size());
  167. tree.node_impls.push_back(NodeImpl(n_kind, t, subtree_size));
  168. if (has_error) {
  169. MarkNodeError(n);
  170. }
  171. return n;
  172. }
  173. auto ParseTree::Parser::SkipMatchingGroup() -> bool {
  174. TokenizedBuffer::Token t = *position;
  175. TokenKind t_kind = tokens.GetKind(t);
  176. if (!t_kind.IsOpeningSymbol()) {
  177. return false;
  178. }
  179. SkipTo(tokens.GetMatchedClosingToken(t));
  180. Consume(t_kind.GetClosingSymbol());
  181. return true;
  182. }
  183. auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
  184. assert(t >= *position && "Tried to skip backwards.");
  185. position = TokenizedBuffer::TokenIterator(t);
  186. assert(position != end && "Skipped past EOF.");
  187. }
  188. auto ParseTree::Parser::FindNextOf(
  189. std::initializer_list<TokenKind> desired_kinds)
  190. -> llvm::Optional<TokenizedBuffer::Token> {
  191. auto new_position = position;
  192. while (true) {
  193. TokenizedBuffer::Token token = *new_position;
  194. TokenKind kind = tokens.GetKind(token);
  195. for (TokenKind desired_kind : desired_kinds) {
  196. if (kind == desired_kind) {
  197. return token;
  198. }
  199. }
  200. // Step to the next token at the current bracketing level.
  201. if (kind.IsClosingSymbol() || kind == TokenKind::EndOfFile()) {
  202. // There are no more tokens at this level.
  203. return llvm::None;
  204. } else if (kind.IsOpeningSymbol()) {
  205. new_position =
  206. TokenizedBuffer::TokenIterator(tokens.GetMatchedClosingToken(token));
  207. } else {
  208. ++new_position;
  209. }
  210. }
  211. }
  212. auto ParseTree::Parser::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root,
  213. SemiHandler on_semi)
  214. -> llvm::Optional<Node> {
  215. if (AtEndOfFile()) {
  216. return llvm::None;
  217. }
  218. TokenizedBuffer::Line root_line = tokens.GetLine(skip_root);
  219. int root_line_indent = tokens.GetIndentColumnNumber(root_line);
  220. // We will keep scanning through tokens on the same line as the root or
  221. // lines with greater indentation than root's line.
  222. auto is_same_line_or_indent_greater_than_root =
  223. [&](TokenizedBuffer::Token t) {
  224. TokenizedBuffer::Line l = tokens.GetLine(t);
  225. if (l == root_line) {
  226. return true;
  227. }
  228. return tokens.GetIndentColumnNumber(l) > root_line_indent;
  229. };
  230. do {
  231. TokenKind current_kind = tokens.GetKind(*position);
  232. if (current_kind == TokenKind::CloseCurlyBrace()) {
  233. // Immediately bail out if we hit an unmatched close curly, this will
  234. // pop us up a level of the syntax grouping.
  235. return llvm::None;
  236. }
  237. // We assume that a semicolon is always intended to be the end of the
  238. // current construct.
  239. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  240. return on_semi(*semi);
  241. }
  242. // Skip over any matching group of tokens.
  243. if (SkipMatchingGroup()) {
  244. continue;
  245. }
  246. // Otherwise just step forward one token.
  247. Consume(current_kind);
  248. } while (!AtEndOfFile() &&
  249. is_same_line_or_indent_greater_than_root(*position));
  250. return llvm::None;
  251. }
  252. auto ParseTree::Parser::ParseFunctionSignature() -> Node {
  253. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  254. auto start = StartSubtree();
  255. // FIXME: Add support for parsing parameters.
  256. bool has_errors = false;
  257. if (tokens.GetKind(*position) != TokenKind::CloseParen()) {
  258. emitter.EmitError<UnexpectedTokenInFunctionParams>(*position);
  259. has_errors = true;
  260. // We can trivially skip to the actual close parenthesis from here.
  261. SkipTo(tokens.GetMatchedClosingToken(open_paren));
  262. }
  263. AddLeafNode(ParseNodeKind::ParameterListEnd(),
  264. Consume(TokenKind::CloseParen()));
  265. // FIXME: Implement parsing of a return type.
  266. return AddNode(ParseNodeKind::ParameterList(), open_paren, start, has_errors);
  267. }
  268. auto ParseTree::Parser::ParseCodeBlock() -> Node {
  269. TokenizedBuffer::Token open_curly = Consume(TokenKind::OpenCurlyBrace());
  270. auto start = StartSubtree();
  271. bool has_errors = false;
  272. // Loop over all the different possibly nested elements in the code block.
  273. while (tokens.GetKind(*position) != TokenKind::CloseCurlyBrace()) {
  274. if (!ParseStatement()) {
  275. // We detected and diagnosed an error of some kind. We can trivially skip
  276. // to the actual close curly brace from here.
  277. // FIXME: It would be better to skip to the next semicolon, or the next
  278. // token at the start of a line with the same indent as this one.
  279. SkipTo(tokens.GetMatchedClosingToken(open_curly));
  280. has_errors = true;
  281. break;
  282. }
  283. }
  284. // We always reach here having set our position in the token stream to the
  285. // close curly brace.
  286. AddLeafNode(ParseNodeKind::CodeBlockEnd(),
  287. Consume(TokenKind::CloseCurlyBrace()));
  288. return AddNode(ParseNodeKind::CodeBlock(), open_curly, start, has_errors);
  289. }
  290. auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
  291. TokenizedBuffer::Token function_intro_token = Consume(TokenKind::FnKeyword());
  292. auto start = StartSubtree();
  293. auto add_error_function_node = [&] {
  294. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  295. start, /*has_error=*/true);
  296. };
  297. auto handle_semi_in_error_recovery = [&](TokenizedBuffer::Token semi) {
  298. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  299. };
  300. auto name_n = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  301. ParseNodeKind::DeclaredName());
  302. if (!name_n) {
  303. emitter.EmitError<ExpectedFunctionName>(*position);
  304. // FIXME: We could change the lexer to allow us to synthesize certain
  305. // kinds of tokens and try to "recover" here, but unclear that this is
  306. // really useful.
  307. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  308. return add_error_function_node();
  309. }
  310. TokenizedBuffer::Token open_paren = *position;
  311. if (tokens.GetKind(open_paren) != TokenKind::OpenParen()) {
  312. emitter.EmitError<ExpectedFunctionParams>(open_paren);
  313. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  314. return add_error_function_node();
  315. }
  316. TokenizedBuffer::Token close_paren =
  317. tokens.GetMatchedClosingToken(open_paren);
  318. Node signature_n = ParseFunctionSignature();
  319. assert(*std::prev(position) == close_paren &&
  320. "Should have parsed through the close paren, whether successfully "
  321. "or with errors.");
  322. if (tree.node_impls[signature_n.index].has_error) {
  323. // Don't try to parse more of the function declaration, but consume a
  324. // declaration ending semicolon if found (without going to a new line).
  325. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  326. return add_error_function_node();
  327. }
  328. // See if we should parse a definition which is represented as a code block.
  329. if (tokens.GetKind(*position) == TokenKind::OpenCurlyBrace()) {
  330. ParseCodeBlock();
  331. } else if (!ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  332. ParseNodeKind::DeclarationEnd())) {
  333. emitter.EmitError<ExpectedFunctionBodyOrSemi>(*position);
  334. if (tokens.GetLine(*position) == tokens.GetLine(close_paren)) {
  335. // Only need to skip if we've not already found a new line.
  336. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  337. }
  338. return add_error_function_node();
  339. }
  340. // Successfully parsed the function, add that node.
  341. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  342. start);
  343. }
  344. auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
  345. // `var` expression identifier [= expression] `;`
  346. TokenizedBuffer::Token var_token = Consume(TokenKind::VarKeyword());
  347. auto start = StartSubtree();
  348. auto type = ParseExpression();
  349. auto name = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  350. ParseNodeKind::DeclaredName());
  351. if (!name) {
  352. emitter.EmitError<ExpectedVariableName>(*position);
  353. if (auto after_name = FindNextOf({TokenKind::Equal(), TokenKind::Semi()})) {
  354. SkipTo(*after_name);
  355. }
  356. }
  357. auto start_init = StartSubtree();
  358. if (auto equal_token = ConsumeIf(TokenKind::Equal())) {
  359. auto init = ParseExpression();
  360. AddNode(ParseNodeKind::VariableInitializer(), *equal_token, start_init,
  361. /*has_error=*/!init);
  362. }
  363. auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  364. ParseNodeKind::DeclarationEnd());
  365. if (!semi) {
  366. SkipPastLikelyEnd(var_token, [&](TokenizedBuffer::Token semi) {
  367. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  368. });
  369. }
  370. return AddNode(ParseNodeKind::VariableDeclaration(), var_token, start,
  371. /*has_error=*/!type || !name || !semi);
  372. }
  373. auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
  374. return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
  375. Consume(TokenKind::Semi()));
  376. }
  377. auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
  378. TokenizedBuffer::Token t = *position;
  379. switch (tokens.GetKind(t)) {
  380. case TokenKind::FnKeyword():
  381. return ParseFunctionDeclaration();
  382. case TokenKind::VarKeyword():
  383. return ParseVariableDeclaration();
  384. case TokenKind::Semi():
  385. return ParseEmptyDeclaration();
  386. case TokenKind::EndOfFile():
  387. return llvm::None;
  388. default:
  389. // Errors are handled outside the switch.
  390. break;
  391. }
  392. // We didn't recognize an introducer for a valid declaration.
  393. emitter.EmitError<UnrecognizedDeclaration>(t);
  394. // Skip forward past any end of a declaration we simply didn't understand so
  395. // that we can find the start of the next declaration or the end of a scope.
  396. if (auto found_semi_n =
  397. SkipPastLikelyEnd(t, [&](TokenizedBuffer::Token semi) {
  398. return AddLeafNode(ParseNodeKind::EmptyDeclaration(), semi);
  399. })) {
  400. MarkNodeError(*found_semi_n);
  401. return *found_semi_n;
  402. }
  403. // Nothing, not even a semicolon found.
  404. return llvm::None;
  405. }
  406. auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
  407. // `(` expression `)`
  408. auto start = StartSubtree();
  409. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  410. // TODO: If the next token is a close paren, build an empty tuple literal.
  411. bool has_errors = !ParseExpression();
  412. // TODO: If the next token is a comma, build a tuple literal.
  413. if (tokens.GetKind(*position) != TokenKind::CloseParen()) {
  414. if (!has_errors) {
  415. emitter.EmitError<ExpectedCloseParen>(*position);
  416. has_errors = true;
  417. }
  418. SkipTo(tokens.GetMatchedClosingToken(open_paren));
  419. }
  420. AddLeafNode(ParseNodeKind::ParenExpressionEnd(),
  421. Consume(TokenKind::CloseParen()));
  422. return AddNode(ParseNodeKind::ParenExpression(), open_paren, start,
  423. has_errors);
  424. }
  425. auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
  426. TokenizedBuffer::Token t = *position;
  427. TokenKind token_kind = tokens.GetKind(t);
  428. llvm::Optional<ParseNodeKind> kind;
  429. switch (token_kind) {
  430. case TokenKind::Identifier():
  431. kind = ParseNodeKind::NameReference();
  432. break;
  433. case TokenKind::IntegerLiteral():
  434. case TokenKind::RealLiteral():
  435. case TokenKind::StringLiteral():
  436. kind = ParseNodeKind::Literal();
  437. break;
  438. case TokenKind::OpenParen():
  439. return ParseParenExpression();
  440. default:
  441. emitter.EmitError<ExpectedExpression>(t);
  442. return llvm::None;
  443. }
  444. return AddLeafNode(*kind, Consume(token_kind));
  445. }
  446. auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
  447. bool has_errors)
  448. -> llvm::Optional<Node> {
  449. // `.` identifier
  450. auto dot = Consume(TokenKind::Period());
  451. auto name = ConsumeIf(TokenKind::Identifier());
  452. if (name) {
  453. AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  454. } else {
  455. // If we see a keyword, assume it was intended to be the designated name.
  456. // TODO: Should keywords be valid in designators?
  457. if (tokens.GetKind(*position).IsKeyword()) {
  458. Consume(tokens.GetKind(*position));
  459. }
  460. emitter.EmitError<ExpectedIdentifierAfterDot>(*position);
  461. has_errors = true;
  462. }
  463. return AddNode(ParseNodeKind::DesignatorExpression(), dot, start, has_errors);
  464. }
  465. auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
  466. -> llvm::Optional<Node> {
  467. // `(` expression-list[opt] `)`
  468. //
  469. // expression-list ::= expression
  470. // ::= expression `,` expression-list
  471. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  472. // Parse arguments, if any are specified.
  473. if (tokens.GetKind(*position) != TokenKind::CloseParen()) {
  474. while (true) {
  475. bool argument_error = !ParseExpression();
  476. has_errors |= argument_error;
  477. if (tokens.GetKind(*position) == TokenKind::CloseParen()) {
  478. break;
  479. }
  480. if (tokens.GetKind(*position) != TokenKind::Comma()) {
  481. if (!argument_error) {
  482. emitter.EmitError<UnexpectedTokenInFunctionArgs>(*position);
  483. }
  484. has_errors = true;
  485. auto comma_position = FindNextOf({TokenKind::Comma()});
  486. if (!comma_position) {
  487. SkipTo(tokens.GetMatchedClosingToken(open_paren));
  488. break;
  489. }
  490. SkipTo(*comma_position);
  491. }
  492. AddLeafNode(ParseNodeKind::CallExpressionComma(),
  493. Consume(TokenKind::Comma()));
  494. }
  495. }
  496. AddLeafNode(ParseNodeKind::CallExpressionEnd(),
  497. Consume(TokenKind::CloseParen()));
  498. return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
  499. has_errors);
  500. }
  501. auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
  502. auto start = StartSubtree();
  503. llvm::Optional<Node> expression = ParsePrimaryExpression();
  504. while (true) {
  505. switch (tokens.GetKind(*position)) {
  506. case TokenKind::Period():
  507. expression = ParseDesignatorExpression(start, !expression);
  508. break;
  509. case TokenKind::OpenParen():
  510. expression = ParseCallExpression(start, !expression);
  511. break;
  512. default: {
  513. return expression;
  514. }
  515. }
  516. }
  517. }
  518. auto ParseTree::Parser::ParseOperatorExpression(
  519. PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
  520. auto start = StartSubtree();
  521. llvm::Optional<Node> lhs;
  522. PrecedenceGroup lhs_precedence = PrecedenceGroup::ForPostfixExpression();
  523. // Check for a prefix operator.
  524. if (auto operator_precedence =
  525. PrecedenceGroup::ForLeading(tokens.GetKind(*position));
  526. !operator_precedence) {
  527. lhs = ParsePostfixExpression();
  528. } else {
  529. if (PrecedenceGroup::GetPriority(ambient_precedence,
  530. *operator_precedence) !=
  531. OperatorPriority::RightFirst) {
  532. // The precedence rules don't permit this prefix operator in this
  533. // context. Diagnose this, but carry on and parse it anyway.
  534. emitter.EmitError<OperatorRequiresParentheses>(*position);
  535. }
  536. auto operator_token = Consume(tokens.GetKind(*position));
  537. bool has_errors = !ParseOperatorExpression(*operator_precedence);
  538. lhs = AddNode(ParseNodeKind::PrefixOperator(), operator_token, start,
  539. has_errors);
  540. lhs_precedence = *operator_precedence;
  541. }
  542. // Consume a sequence of infix and postfix operators.
  543. while (auto trailing_operator =
  544. PrecedenceGroup::ForTrailing(tokens.GetKind(*position))) {
  545. auto [operator_precedence, is_binary] = *trailing_operator;
  546. if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
  547. OperatorPriority::RightFirst) {
  548. // The precedence rules don't permit this operator in this context. Try
  549. // again in the enclosing expression context.
  550. return lhs;
  551. }
  552. if (PrecedenceGroup::GetPriority(lhs_precedence, operator_precedence) !=
  553. OperatorPriority::LeftFirst) {
  554. // Either the LHS operator and this operator are ambiguous, or the
  555. // LHS operaor is a unary operator that can't be nested within
  556. // this operator. Either way, parentheses are required.
  557. emitter.EmitError<OperatorRequiresParentheses>(*position);
  558. lhs = llvm::None;
  559. }
  560. auto operator_token = Consume(tokens.GetKind(*position));
  561. if (is_binary) {
  562. auto rhs = ParseOperatorExpression(operator_precedence);
  563. lhs = AddNode(ParseNodeKind::InfixOperator(), operator_token, start,
  564. /*has_error=*/!lhs || !rhs);
  565. } else {
  566. lhs = AddNode(ParseNodeKind::PostfixOperator(), operator_token, start,
  567. /*has_error=*/!lhs);
  568. }
  569. lhs_precedence = operator_precedence;
  570. }
  571. return lhs;
  572. }
  573. auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
  574. return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
  575. }
  576. auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
  577. TokenizedBuffer::Token start_token = *position;
  578. auto start = StartSubtree();
  579. bool has_errors = !ParseExpression();
  580. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  581. return AddNode(ParseNodeKind::ExpressionStatement(), *semi, start,
  582. has_errors);
  583. }
  584. if (!has_errors) {
  585. emitter.EmitError<ExpectedSemiAfterExpression>(*position);
  586. }
  587. if (auto recovery_node =
  588. SkipPastLikelyEnd(start_token, [&](TokenizedBuffer::Token semi) {
  589. return AddNode(ParseNodeKind::ExpressionStatement(), semi, start,
  590. true);
  591. })) {
  592. return recovery_node;
  593. }
  594. // Found junk not even followed by a `;`.
  595. return llvm::None;
  596. }
  597. auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
  598. switch (tokens.GetKind(*position)) {
  599. case TokenKind::VarKeyword():
  600. return ParseVariableDeclaration();
  601. case TokenKind::OpenCurlyBrace():
  602. return ParseCodeBlock();
  603. default:
  604. // A statement with no introducer token can only be an expression
  605. // statement.
  606. return ParseExpressionStatement();
  607. }
  608. }
  609. } // namespace Carbon