parser_impl.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "parser/parser_impl.h"
  5. #include <cstdlib>
  6. #include "lexer/token_kind.h"
  7. #include "lexer/tokenized_buffer.h"
  8. #include "llvm/ADT/Optional.h"
  9. #include "llvm/Support/FormatVariadic.h"
  10. #include "llvm/Support/raw_ostream.h"
  11. #include "parser/parse_node_kind.h"
  12. #include "parser/parse_tree.h"
  13. namespace Carbon {
  14. struct UnexpectedTokenInCodeBlock
  15. : SimpleDiagnostic<UnexpectedTokenInCodeBlock> {
  16. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  17. static constexpr llvm::StringLiteral Message =
  18. "Unexpected token in code block.";
  19. };
  20. struct ExpectedFunctionName : SimpleDiagnostic<ExpectedFunctionName> {
  21. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  22. static constexpr llvm::StringLiteral Message =
  23. "Expected function name after `fn` keyword.";
  24. };
  25. struct ExpectedFunctionParams : SimpleDiagnostic<ExpectedFunctionParams> {
  26. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  27. static constexpr llvm::StringLiteral Message =
  28. "Expected `(` after function name.";
  29. };
  30. struct ExpectedFunctionBodyOrSemi
  31. : SimpleDiagnostic<ExpectedFunctionBodyOrSemi> {
  32. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  33. static constexpr llvm::StringLiteral Message =
  34. "Expected function definition or `;` after function declaration.";
  35. };
  36. struct ExpectedVariableName : SimpleDiagnostic<ExpectedVariableName> {
  37. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  38. static constexpr llvm::StringLiteral Message =
  39. "Expected variable name after type in `var` declaration.";
  40. };
  41. struct UnrecognizedDeclaration : SimpleDiagnostic<UnrecognizedDeclaration> {
  42. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  43. static constexpr llvm::StringLiteral Message =
  44. "Unrecognized declaration introducer.";
  45. };
  46. struct ExpectedExpression : SimpleDiagnostic<ExpectedExpression> {
  47. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  48. static constexpr llvm::StringLiteral Message = "Expected expression.";
  49. };
  50. struct ExpectedParenAfter : SimpleDiagnostic<ExpectedParenAfter> {
  51. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  52. static constexpr const char* Message = "Expected `(` after `{0}`.";
  53. TokenKind introducer;
  54. auto Format() -> std::string {
  55. return llvm::formatv(Message, introducer.GetFixedSpelling()).str();
  56. }
  57. };
  58. struct ExpectedCloseParen : SimpleDiagnostic<ExpectedCloseParen> {
  59. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  60. static constexpr llvm::StringLiteral Message =
  61. "Unexpected tokens before `)`.";
  62. // TODO: Include the location of the matching open paren in the diagnostic.
  63. TokenizedBuffer::Token open_paren;
  64. };
  65. struct ExpectedSemiAfterExpression
  66. : SimpleDiagnostic<ExpectedSemiAfterExpression> {
  67. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  68. static constexpr llvm::StringLiteral Message =
  69. "Expected `;` after expression.";
  70. };
  71. struct ExpectedSemiAfter : SimpleDiagnostic<ExpectedSemiAfter> {
  72. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  73. static constexpr const char* Message = "Expected `;` after `{0}`.";
  74. TokenKind preceding;
  75. auto Format() -> std::string {
  76. return llvm::formatv(Message, preceding.GetFixedSpelling()).str();
  77. }
  78. };
  79. struct ExpectedIdentifierAfterDot
  80. : SimpleDiagnostic<ExpectedIdentifierAfterDot> {
  81. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  82. static constexpr llvm::StringLiteral Message =
  83. "Expected identifier after `.`.";
  84. };
  85. struct UnexpectedTokenInFunctionArgs
  86. : SimpleDiagnostic<UnexpectedTokenInFunctionArgs> {
  87. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  88. static constexpr llvm::StringLiteral Message =
  89. "Unexpected token in function argument list.";
  90. };
  91. struct OperatorRequiresParentheses
  92. : SimpleDiagnostic<OperatorRequiresParentheses> {
  93. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  94. static constexpr llvm::StringLiteral Message =
  95. "Parentheses are required to disambiguate operator precedence.";
  96. };
  97. ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  98. TokenDiagnosticEmitter& emitter)
  99. : tree(tree_arg),
  100. tokens(tokens_arg),
  101. emitter(emitter),
  102. position(tokens.Tokens().begin()),
  103. end(tokens.Tokens().end()) {
  104. assert(std::find_if(position, end,
  105. [&](TokenizedBuffer::Token t) {
  106. return tokens.GetKind(t) == TokenKind::EndOfFile();
  107. }) != end &&
  108. "No EndOfFileToken in token buffer.");
  109. }
  110. auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
  111. TokenDiagnosticEmitter& emitter) -> ParseTree {
  112. ParseTree tree(tokens);
  113. // We expect to have a 1:1 correspondence between tokens and tree nodes, so
  114. // reserve the space we expect to need here to avoid allocation and copying
  115. // overhead.
  116. tree.node_impls.reserve(tokens.Size());
  117. Parser parser(tree, tokens, emitter);
  118. while (!parser.AtEndOfFile()) {
  119. if (!parser.ParseDeclaration()) {
  120. // We don't have an enclosing parse tree node to mark as erroneous, so
  121. // just mark the tree as a whole.
  122. tree.has_errors = true;
  123. }
  124. }
  125. parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position);
  126. assert(tree.Verify() && "Parse tree built but does not verify!");
  127. return tree;
  128. }
  129. auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
  130. TokenizedBuffer::Token t = *position;
  131. assert(kind != TokenKind::EndOfFile() && "Cannot consume the EOF token!");
  132. assert(tokens.GetKind(t) == kind && "The current token is the wrong kind!");
  133. ++position;
  134. assert(position != end && "Reached end of tokens without finding EOF token.");
  135. return t;
  136. }
  137. auto ParseTree::Parser::ConsumeIf(TokenKind kind)
  138. -> llvm::Optional<TokenizedBuffer::Token> {
  139. if (tokens.GetKind(*position) != kind) {
  140. return {};
  141. }
  142. return Consume(kind);
  143. }
  144. auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
  145. TokenizedBuffer::Token token) -> Node {
  146. Node n(tree.node_impls.size());
  147. tree.node_impls.push_back(NodeImpl(kind, token, /*subtree_size_arg=*/1));
  148. return n;
  149. }
  150. auto ParseTree::Parser::ConsumeAndAddLeafNodeIf(TokenKind t_kind,
  151. ParseNodeKind n_kind)
  152. -> llvm::Optional<Node> {
  153. auto t = ConsumeIf(t_kind);
  154. if (!t) {
  155. return {};
  156. }
  157. return AddLeafNode(n_kind, *t);
  158. }
  159. auto ParseTree::Parser::MarkNodeError(Node n) -> void {
  160. tree.node_impls[n.index].has_error = true;
  161. tree.has_errors = true;
  162. }
  163. // A marker for the start of a node's subtree.
  164. //
  165. // This is used to track the size of the node's subtree. It can be used
  166. // repeatedly if multiple subtrees start at the same position.
  167. struct ParseTree::Parser::SubtreeStart {
  168. int tree_size;
  169. };
  170. auto ParseTree::Parser::StartSubtree() -> SubtreeStart {
  171. return {static_cast<int>(tree.node_impls.size())};
  172. }
  173. auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
  174. SubtreeStart start, bool has_error) -> Node {
  175. // The size of the subtree is the change in size from when we started this
  176. // subtree to now, but including the node we're about to add.
  177. int tree_stop_size = static_cast<int>(tree.node_impls.size()) + 1;
  178. int subtree_size = tree_stop_size - start.tree_size;
  179. Node n(tree.node_impls.size());
  180. tree.node_impls.push_back(NodeImpl(n_kind, t, subtree_size));
  181. if (has_error) {
  182. MarkNodeError(n);
  183. }
  184. return n;
  185. }
  186. auto ParseTree::Parser::SkipMatchingGroup() -> bool {
  187. TokenizedBuffer::Token t = *position;
  188. TokenKind t_kind = tokens.GetKind(t);
  189. if (!t_kind.IsOpeningSymbol()) {
  190. return false;
  191. }
  192. SkipTo(tokens.GetMatchedClosingToken(t));
  193. Consume(t_kind.GetClosingSymbol());
  194. return true;
  195. }
  196. auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
  197. assert(t >= *position && "Tried to skip backwards.");
  198. position = TokenizedBuffer::TokenIterator(t);
  199. assert(position != end && "Skipped past EOF.");
  200. }
  201. auto ParseTree::Parser::FindNextOf(
  202. std::initializer_list<TokenKind> desired_kinds)
  203. -> llvm::Optional<TokenizedBuffer::Token> {
  204. auto new_position = position;
  205. while (true) {
  206. TokenizedBuffer::Token token = *new_position;
  207. TokenKind kind = tokens.GetKind(token);
  208. for (TokenKind desired_kind : desired_kinds) {
  209. if (kind == desired_kind) {
  210. return token;
  211. }
  212. }
  213. // Step to the next token at the current bracketing level.
  214. if (kind.IsClosingSymbol() || kind == TokenKind::EndOfFile()) {
  215. // There are no more tokens at this level.
  216. return llvm::None;
  217. } else if (kind.IsOpeningSymbol()) {
  218. new_position =
  219. TokenizedBuffer::TokenIterator(tokens.GetMatchedClosingToken(token));
  220. } else {
  221. ++new_position;
  222. }
  223. }
  224. }
  225. auto ParseTree::Parser::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root,
  226. SemiHandler on_semi)
  227. -> llvm::Optional<Node> {
  228. if (AtEndOfFile()) {
  229. return llvm::None;
  230. }
  231. TokenizedBuffer::Line root_line = tokens.GetLine(skip_root);
  232. int root_line_indent = tokens.GetIndentColumnNumber(root_line);
  233. // We will keep scanning through tokens on the same line as the root or
  234. // lines with greater indentation than root's line.
  235. auto is_same_line_or_indent_greater_than_root =
  236. [&](TokenizedBuffer::Token t) {
  237. TokenizedBuffer::Line l = tokens.GetLine(t);
  238. if (l == root_line) {
  239. return true;
  240. }
  241. return tokens.GetIndentColumnNumber(l) > root_line_indent;
  242. };
  243. do {
  244. TokenKind current_kind = tokens.GetKind(*position);
  245. if (current_kind == TokenKind::CloseCurlyBrace()) {
  246. // Immediately bail out if we hit an unmatched close curly, this will
  247. // pop us up a level of the syntax grouping.
  248. return llvm::None;
  249. }
  250. // We assume that a semicolon is always intended to be the end of the
  251. // current construct.
  252. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  253. return on_semi(*semi);
  254. }
  255. // Skip over any matching group of tokens.
  256. if (SkipMatchingGroup()) {
  257. continue;
  258. }
  259. // Otherwise just step forward one token.
  260. Consume(current_kind);
  261. } while (!AtEndOfFile() &&
  262. is_same_line_or_indent_greater_than_root(*position));
  263. return llvm::None;
  264. }
  265. auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
  266. ParseNodeKind kind)
  267. -> llvm::Optional<Node> {
  268. if (auto close_paren =
  269. ConsumeAndAddLeafNodeIf(TokenKind::CloseParen(), kind)) {
  270. return close_paren;
  271. }
  272. emitter.EmitError<ExpectedCloseParen>(*position, {.open_paren = open_paren});
  273. SkipTo(tokens.GetMatchedClosingToken(open_paren));
  274. AddLeafNode(kind, Consume(TokenKind::CloseParen()));
  275. return llvm::None;
  276. }
  277. auto ParseTree::Parser::ParseFunctionSignature() -> Node {
  278. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  279. auto start = StartSubtree();
  280. // FIXME: Add support for parsing parameters.
  281. auto close_paren =
  282. ParseCloseParen(open_paren, ParseNodeKind::ParameterListEnd());
  283. // FIXME: Implement parsing of a return type.
  284. return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
  285. /*has_errors=*/!close_paren);
  286. }
  287. auto ParseTree::Parser::ParseCodeBlock() -> Node {
  288. TokenizedBuffer::Token open_curly = Consume(TokenKind::OpenCurlyBrace());
  289. auto start = StartSubtree();
  290. bool has_errors = false;
  291. // Loop over all the different possibly nested elements in the code block.
  292. while (tokens.GetKind(*position) != TokenKind::CloseCurlyBrace()) {
  293. if (!ParseStatement()) {
  294. // We detected and diagnosed an error of some kind. We can trivially skip
  295. // to the actual close curly brace from here.
  296. // FIXME: It would be better to skip to the next semicolon, or the next
  297. // token at the start of a line with the same indent as this one.
  298. SkipTo(tokens.GetMatchedClosingToken(open_curly));
  299. has_errors = true;
  300. break;
  301. }
  302. }
  303. // We always reach here having set our position in the token stream to the
  304. // close curly brace.
  305. AddLeafNode(ParseNodeKind::CodeBlockEnd(),
  306. Consume(TokenKind::CloseCurlyBrace()));
  307. return AddNode(ParseNodeKind::CodeBlock(), open_curly, start, has_errors);
  308. }
  309. auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
  310. TokenizedBuffer::Token function_intro_token = Consume(TokenKind::FnKeyword());
  311. auto start = StartSubtree();
  312. auto add_error_function_node = [&] {
  313. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  314. start, /*has_error=*/true);
  315. };
  316. auto handle_semi_in_error_recovery = [&](TokenizedBuffer::Token semi) {
  317. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  318. };
  319. auto name_n = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  320. ParseNodeKind::DeclaredName());
  321. if (!name_n) {
  322. emitter.EmitError<ExpectedFunctionName>(*position);
  323. // FIXME: We could change the lexer to allow us to synthesize certain
  324. // kinds of tokens and try to "recover" here, but unclear that this is
  325. // really useful.
  326. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  327. return add_error_function_node();
  328. }
  329. TokenizedBuffer::Token open_paren = *position;
  330. if (tokens.GetKind(open_paren) != TokenKind::OpenParen()) {
  331. emitter.EmitError<ExpectedFunctionParams>(open_paren);
  332. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  333. return add_error_function_node();
  334. }
  335. TokenizedBuffer::Token close_paren =
  336. tokens.GetMatchedClosingToken(open_paren);
  337. Node signature_n = ParseFunctionSignature();
  338. assert(*std::prev(position) == close_paren &&
  339. "Should have parsed through the close paren, whether successfully "
  340. "or with errors.");
  341. if (tree.node_impls[signature_n.index].has_error) {
  342. // Don't try to parse more of the function declaration, but consume a
  343. // declaration ending semicolon if found (without going to a new line).
  344. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  345. return add_error_function_node();
  346. }
  347. // See if we should parse a definition which is represented as a code block.
  348. if (tokens.GetKind(*position) == TokenKind::OpenCurlyBrace()) {
  349. ParseCodeBlock();
  350. } else if (!ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  351. ParseNodeKind::DeclarationEnd())) {
  352. emitter.EmitError<ExpectedFunctionBodyOrSemi>(*position);
  353. if (tokens.GetLine(*position) == tokens.GetLine(close_paren)) {
  354. // Only need to skip if we've not already found a new line.
  355. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  356. }
  357. return add_error_function_node();
  358. }
  359. // Successfully parsed the function, add that node.
  360. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  361. start);
  362. }
  363. auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
  364. // `var` expression identifier [= expression] `;`
  365. TokenizedBuffer::Token var_token = Consume(TokenKind::VarKeyword());
  366. auto start = StartSubtree();
  367. auto type = ParseExpression();
  368. auto name = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  369. ParseNodeKind::DeclaredName());
  370. if (!name) {
  371. emitter.EmitError<ExpectedVariableName>(*position);
  372. if (auto after_name = FindNextOf({TokenKind::Equal(), TokenKind::Semi()})) {
  373. SkipTo(*after_name);
  374. }
  375. }
  376. auto start_init = StartSubtree();
  377. if (auto equal_token = ConsumeIf(TokenKind::Equal())) {
  378. auto init = ParseExpression();
  379. AddNode(ParseNodeKind::VariableInitializer(), *equal_token, start_init,
  380. /*has_error=*/!init);
  381. }
  382. auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  383. ParseNodeKind::DeclarationEnd());
  384. if (!semi) {
  385. SkipPastLikelyEnd(var_token, [&](TokenizedBuffer::Token semi) {
  386. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  387. });
  388. }
  389. return AddNode(ParseNodeKind::VariableDeclaration(), var_token, start,
  390. /*has_error=*/!type || !name || !semi);
  391. }
  392. auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
  393. return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
  394. Consume(TokenKind::Semi()));
  395. }
  396. auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
  397. TokenizedBuffer::Token t = *position;
  398. switch (tokens.GetKind(t)) {
  399. case TokenKind::FnKeyword():
  400. return ParseFunctionDeclaration();
  401. case TokenKind::VarKeyword():
  402. return ParseVariableDeclaration();
  403. case TokenKind::Semi():
  404. return ParseEmptyDeclaration();
  405. case TokenKind::EndOfFile():
  406. return llvm::None;
  407. default:
  408. // Errors are handled outside the switch.
  409. break;
  410. }
  411. // We didn't recognize an introducer for a valid declaration.
  412. emitter.EmitError<UnrecognizedDeclaration>(t);
  413. // Skip forward past any end of a declaration we simply didn't understand so
  414. // that we can find the start of the next declaration or the end of a scope.
  415. if (auto found_semi_n =
  416. SkipPastLikelyEnd(t, [&](TokenizedBuffer::Token semi) {
  417. return AddLeafNode(ParseNodeKind::EmptyDeclaration(), semi);
  418. })) {
  419. MarkNodeError(*found_semi_n);
  420. return *found_semi_n;
  421. }
  422. // Nothing, not even a semicolon found.
  423. return llvm::None;
  424. }
  425. auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
  426. // `(` expression `)`
  427. auto start = StartSubtree();
  428. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  429. // TODO: If the next token is a close paren, build an empty tuple literal.
  430. auto expr = ParseExpression();
  431. // TODO: If the next token is a comma, build a tuple literal.
  432. auto close_paren =
  433. ParseCloseParen(open_paren, ParseNodeKind::ParenExpressionEnd());
  434. return AddNode(ParseNodeKind::ParenExpression(), open_paren, start,
  435. /*has_errors=*/!expr || !close_paren);
  436. }
  437. auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
  438. TokenizedBuffer::Token t = *position;
  439. TokenKind token_kind = tokens.GetKind(t);
  440. llvm::Optional<ParseNodeKind> kind;
  441. switch (token_kind) {
  442. case TokenKind::Identifier():
  443. kind = ParseNodeKind::NameReference();
  444. break;
  445. case TokenKind::IntegerLiteral():
  446. case TokenKind::RealLiteral():
  447. case TokenKind::StringLiteral():
  448. kind = ParseNodeKind::Literal();
  449. break;
  450. case TokenKind::OpenParen():
  451. return ParseParenExpression();
  452. default:
  453. emitter.EmitError<ExpectedExpression>(t);
  454. return llvm::None;
  455. }
  456. return AddLeafNode(*kind, Consume(token_kind));
  457. }
  458. auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
  459. bool has_errors)
  460. -> llvm::Optional<Node> {
  461. // `.` identifier
  462. auto dot = Consume(TokenKind::Period());
  463. auto name = ConsumeIf(TokenKind::Identifier());
  464. if (name) {
  465. AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  466. } else {
  467. // If we see a keyword, assume it was intended to be the designated name.
  468. // TODO: Should keywords be valid in designators?
  469. if (tokens.GetKind(*position).IsKeyword()) {
  470. Consume(tokens.GetKind(*position));
  471. }
  472. emitter.EmitError<ExpectedIdentifierAfterDot>(*position);
  473. has_errors = true;
  474. }
  475. return AddNode(ParseNodeKind::DesignatorExpression(), dot, start, has_errors);
  476. }
  477. auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
  478. -> llvm::Optional<Node> {
  479. // `(` expression-list[opt] `)`
  480. //
  481. // expression-list ::= expression
  482. // ::= expression `,` expression-list
  483. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  484. // Parse arguments, if any are specified.
  485. if (tokens.GetKind(*position) != TokenKind::CloseParen()) {
  486. while (true) {
  487. bool argument_error = !ParseExpression();
  488. has_errors |= argument_error;
  489. if (tokens.GetKind(*position) == TokenKind::CloseParen()) {
  490. break;
  491. }
  492. if (tokens.GetKind(*position) != TokenKind::Comma()) {
  493. if (!argument_error) {
  494. emitter.EmitError<UnexpectedTokenInFunctionArgs>(*position);
  495. }
  496. has_errors = true;
  497. auto comma_position = FindNextOf({TokenKind::Comma()});
  498. if (!comma_position) {
  499. SkipTo(tokens.GetMatchedClosingToken(open_paren));
  500. break;
  501. }
  502. SkipTo(*comma_position);
  503. }
  504. AddLeafNode(ParseNodeKind::CallExpressionComma(),
  505. Consume(TokenKind::Comma()));
  506. }
  507. }
  508. AddLeafNode(ParseNodeKind::CallExpressionEnd(),
  509. Consume(TokenKind::CloseParen()));
  510. return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
  511. has_errors);
  512. }
  513. auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
  514. auto start = StartSubtree();
  515. llvm::Optional<Node> expression = ParsePrimaryExpression();
  516. while (true) {
  517. switch (tokens.GetKind(*position)) {
  518. case TokenKind::Period():
  519. expression = ParseDesignatorExpression(start, !expression);
  520. break;
  521. case TokenKind::OpenParen():
  522. expression = ParseCallExpression(start, !expression);
  523. break;
  524. default: {
  525. return expression;
  526. }
  527. }
  528. }
  529. }
  530. auto ParseTree::Parser::ParseOperatorExpression(
  531. PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
  532. auto start = StartSubtree();
  533. llvm::Optional<Node> lhs;
  534. PrecedenceGroup lhs_precedence = PrecedenceGroup::ForPostfixExpression();
  535. // Check for a prefix operator.
  536. if (auto operator_precedence =
  537. PrecedenceGroup::ForLeading(tokens.GetKind(*position));
  538. !operator_precedence) {
  539. lhs = ParsePostfixExpression();
  540. } else {
  541. if (PrecedenceGroup::GetPriority(ambient_precedence,
  542. *operator_precedence) !=
  543. OperatorPriority::RightFirst) {
  544. // The precedence rules don't permit this prefix operator in this
  545. // context. Diagnose this, but carry on and parse it anyway.
  546. emitter.EmitError<OperatorRequiresParentheses>(*position);
  547. }
  548. auto operator_token = Consume(tokens.GetKind(*position));
  549. bool has_errors = !ParseOperatorExpression(*operator_precedence);
  550. lhs = AddNode(ParseNodeKind::PrefixOperator(), operator_token, start,
  551. has_errors);
  552. lhs_precedence = *operator_precedence;
  553. }
  554. // Consume a sequence of infix and postfix operators.
  555. while (auto trailing_operator =
  556. PrecedenceGroup::ForTrailing(tokens.GetKind(*position))) {
  557. auto [operator_precedence, is_binary] = *trailing_operator;
  558. if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
  559. OperatorPriority::RightFirst) {
  560. // The precedence rules don't permit this operator in this context. Try
  561. // again in the enclosing expression context.
  562. return lhs;
  563. }
  564. if (PrecedenceGroup::GetPriority(lhs_precedence, operator_precedence) !=
  565. OperatorPriority::LeftFirst) {
  566. // Either the LHS operator and this operator are ambiguous, or the
  567. // LHS operaor is a unary operator that can't be nested within
  568. // this operator. Either way, parentheses are required.
  569. emitter.EmitError<OperatorRequiresParentheses>(*position);
  570. lhs = llvm::None;
  571. }
  572. auto operator_token = Consume(tokens.GetKind(*position));
  573. if (is_binary) {
  574. auto rhs = ParseOperatorExpression(operator_precedence);
  575. lhs = AddNode(ParseNodeKind::InfixOperator(), operator_token, start,
  576. /*has_error=*/!lhs || !rhs);
  577. } else {
  578. lhs = AddNode(ParseNodeKind::PostfixOperator(), operator_token, start,
  579. /*has_error=*/!lhs);
  580. }
  581. lhs_precedence = operator_precedence;
  582. }
  583. return lhs;
  584. }
  585. auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
  586. return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
  587. }
  588. auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
  589. TokenizedBuffer::Token start_token = *position;
  590. auto start = StartSubtree();
  591. bool has_errors = !ParseExpression();
  592. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  593. return AddNode(ParseNodeKind::ExpressionStatement(), *semi, start,
  594. has_errors);
  595. }
  596. if (!has_errors) {
  597. emitter.EmitError<ExpectedSemiAfterExpression>(*position);
  598. }
  599. if (auto recovery_node =
  600. SkipPastLikelyEnd(start_token, [&](TokenizedBuffer::Token semi) {
  601. return AddNode(ParseNodeKind::ExpressionStatement(), semi, start,
  602. true);
  603. })) {
  604. return recovery_node;
  605. }
  606. // Found junk not even followed by a `;`.
  607. return llvm::None;
  608. }
  609. auto ParseTree::Parser::ParseParenCondition(TokenKind introducer)
  610. -> llvm::Optional<Node> {
  611. // `(` expression `)`
  612. auto start = StartSubtree();
  613. auto open_paren = ConsumeIf(TokenKind::OpenParen());
  614. if (!open_paren) {
  615. emitter.EmitError<ExpectedParenAfter>(*position,
  616. {.introducer = introducer});
  617. }
  618. auto expr = ParseExpression();
  619. if (!open_paren) {
  620. // Don't expect a matching closing paren if there wasn't an opening paren.
  621. return llvm::None;
  622. }
  623. auto close_paren =
  624. ParseCloseParen(*open_paren, ParseNodeKind::ConditionEnd());
  625. return AddNode(ParseNodeKind::Condition(), *open_paren, start,
  626. /*has_errors=*/!expr || !close_paren);
  627. }
  628. auto ParseTree::Parser::ParseIfStatement() -> llvm::Optional<Node> {
  629. auto start = StartSubtree();
  630. auto if_token = Consume(TokenKind::IfKeyword());
  631. auto cond = ParseParenCondition(TokenKind::IfKeyword());
  632. auto then_case = ParseStatement();
  633. bool else_has_errors = false;
  634. if (ConsumeAndAddLeafNodeIf(TokenKind::ElseKeyword(),
  635. ParseNodeKind::IfStatementElse())) {
  636. else_has_errors = !ParseStatement();
  637. }
  638. return AddNode(ParseNodeKind::IfStatement(), if_token, start,
  639. /*has_errors=*/!cond || !then_case || else_has_errors);
  640. }
  641. auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
  642. auto start = StartSubtree();
  643. auto while_token = Consume(TokenKind::WhileKeyword());
  644. auto cond = ParseParenCondition(TokenKind::WhileKeyword());
  645. auto body = ParseStatement();
  646. return AddNode(ParseNodeKind::WhileStatement(), while_token, start,
  647. /*has_errors=*/!cond || !body);
  648. }
  649. auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind)
  650. -> llvm::Optional<Node> {
  651. auto keyword_kind = tokens.GetKind(*position);
  652. assert(keyword_kind.IsKeyword());
  653. auto start = StartSubtree();
  654. auto keyword = Consume(keyword_kind);
  655. auto semi =
  656. ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd());
  657. if (!semi) {
  658. emitter.EmitError<ExpectedSemiAfter>(*position,
  659. {.preceding = keyword_kind});
  660. }
  661. return AddNode(kind, keyword, start, /*has_errors=*/!semi);
  662. }
  663. auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
  664. switch (tokens.GetKind(*position)) {
  665. case TokenKind::VarKeyword():
  666. return ParseVariableDeclaration();
  667. case TokenKind::IfKeyword():
  668. return ParseIfStatement();
  669. case TokenKind::WhileKeyword():
  670. return ParseWhileStatement();
  671. case TokenKind::ContinueKeyword():
  672. return ParseKeywordStatement(ParseNodeKind::ContinueStatement());
  673. case TokenKind::BreakKeyword():
  674. return ParseKeywordStatement(ParseNodeKind::BreakStatement());
  675. case TokenKind::OpenCurlyBrace():
  676. return ParseCodeBlock();
  677. default:
  678. // A statement with no introducer token can only be an expression
  679. // statement.
  680. return ParseExpressionStatement();
  681. }
  682. }
  683. } // namespace Carbon