parser_impl.cpp 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser_impl.h"
  5. #include <cstdlib>
  6. #include "common/check.h"
  7. #include "llvm/ADT/Optional.h"
  8. #include "llvm/Support/FormatVariadic.h"
  9. #include "llvm/Support/raw_ostream.h"
  10. #include "toolchain/lexer/token_kind.h"
  11. #include "toolchain/lexer/tokenized_buffer.h"
  12. #include "toolchain/parser/parse_node_kind.h"
  13. #include "toolchain/parser/parse_tree.h"
  14. namespace Carbon {
  15. struct UnexpectedTokenInCodeBlock
  16. : SimpleDiagnostic<UnexpectedTokenInCodeBlock> {
  17. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  18. static constexpr llvm::StringLiteral Message =
  19. "Unexpected token in code block.";
  20. };
  21. struct ExpectedFunctionName : SimpleDiagnostic<ExpectedFunctionName> {
  22. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  23. static constexpr llvm::StringLiteral Message =
  24. "Expected function name after `fn` keyword.";
  25. };
  26. struct ExpectedFunctionParams : SimpleDiagnostic<ExpectedFunctionParams> {
  27. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  28. static constexpr llvm::StringLiteral Message =
  29. "Expected `(` after function name.";
  30. };
  31. struct ExpectedFunctionBodyOrSemi
  32. : SimpleDiagnostic<ExpectedFunctionBodyOrSemi> {
  33. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  34. static constexpr llvm::StringLiteral Message =
  35. "Expected function definition or `;` after function declaration.";
  36. };
  37. struct ExpectedVariableName : SimpleDiagnostic<ExpectedVariableName> {
  38. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  39. static constexpr llvm::StringLiteral Message =
  40. "Expected pattern in `var` declaration.";
  41. };
  42. struct ExpectedParameterName : SimpleDiagnostic<ExpectedParameterName> {
  43. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  44. static constexpr llvm::StringLiteral Message =
  45. "Expected parameter declaration.";
  46. };
  47. struct ExpectedStructLiteralField
  48. : SimpleDiagnostic<ExpectedStructLiteralField> {
  49. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  50. auto Format() -> std::string {
  51. std::string result = "Expected ";
  52. if (can_be_type) {
  53. result += "`.field: type`";
  54. }
  55. if (can_be_type && can_be_value) {
  56. result += " or ";
  57. }
  58. if (can_be_value) {
  59. result += "`.field = value`";
  60. }
  61. result += ".";
  62. return result;
  63. }
  64. bool can_be_type;
  65. bool can_be_value;
  66. };
  67. struct UnrecognizedDeclaration : SimpleDiagnostic<UnrecognizedDeclaration> {
  68. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  69. static constexpr llvm::StringLiteral Message =
  70. "Unrecognized declaration introducer.";
  71. };
  72. struct ExpectedCodeBlock : SimpleDiagnostic<ExpectedCodeBlock> {
  73. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  74. static constexpr llvm::StringLiteral Message = "Expected braced code block.";
  75. };
  76. struct ExpectedExpression : SimpleDiagnostic<ExpectedExpression> {
  77. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  78. static constexpr llvm::StringLiteral Message = "Expected expression.";
  79. };
  80. struct ExpectedParenAfter : SimpleDiagnostic<ExpectedParenAfter> {
  81. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  82. static constexpr const char* Message = "Expected `(` after `{0}`.";
  83. auto Format() -> std::string {
  84. return llvm::formatv(Message, introducer.GetFixedSpelling()).str();
  85. }
  86. TokenKind introducer;
  87. };
  88. struct ExpectedCloseParen : SimpleDiagnostic<ExpectedCloseParen> {
  89. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  90. static constexpr llvm::StringLiteral Message =
  91. "Unexpected tokens before `)`.";
  92. // TODO: Include the location of the matching open paren in the diagnostic.
  93. TokenizedBuffer::Token open_paren;
  94. };
  95. struct ExpectedSemiAfterExpression
  96. : SimpleDiagnostic<ExpectedSemiAfterExpression> {
  97. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  98. static constexpr llvm::StringLiteral Message =
  99. "Expected `;` after expression.";
  100. };
  101. struct ExpectedSemiAfter : SimpleDiagnostic<ExpectedSemiAfter> {
  102. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  103. static constexpr const char* Message = "Expected `;` after `{0}`.";
  104. auto Format() -> std::string {
  105. return llvm::formatv(Message, preceding.GetFixedSpelling()).str();
  106. }
  107. TokenKind preceding;
  108. };
  109. struct ExpectedIdentifierAfterDot
  110. : SimpleDiagnostic<ExpectedIdentifierAfterDot> {
  111. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  112. static constexpr llvm::StringLiteral Message =
  113. "Expected identifier after `.`.";
  114. };
  115. struct UnexpectedTokenAfterListElement
  116. : SimpleDiagnostic<UnexpectedTokenAfterListElement> {
  117. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  118. static constexpr const char* Message = "Expected `,` or `{0}`.";
  119. auto Format() -> std::string {
  120. return llvm::formatv(Message, close.GetFixedSpelling()).str();
  121. }
  122. TokenKind close;
  123. };
  124. struct BinaryOperatorRequiresWhitespace
  125. : SimpleDiagnostic<BinaryOperatorRequiresWhitespace> {
  126. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  127. static constexpr const char* Message =
  128. "Whitespace missing {0} binary operator.";
  129. auto Format() -> std::string {
  130. const char* position = "around";
  131. if (has_leading_space) {
  132. position = "after";
  133. } else if (has_trailing_space) {
  134. position = "before";
  135. }
  136. return llvm::formatv(Message, position);
  137. }
  138. bool has_leading_space;
  139. bool has_trailing_space;
  140. };
  141. struct UnaryOperatorHasWhitespace
  142. : SimpleDiagnostic<UnaryOperatorHasWhitespace> {
  143. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  144. static constexpr const char* Message =
  145. "Whitespace is not allowed {0} this unary operator.";
  146. auto Format() -> std::string {
  147. return llvm::formatv(Message, prefix ? "after" : "before");
  148. }
  149. bool prefix;
  150. };
  151. struct UnaryOperatorRequiresWhitespace
  152. : SimpleDiagnostic<UnaryOperatorRequiresWhitespace> {
  153. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  154. static constexpr const char* Message =
  155. "Whitespace is required {0} this unary operator.";
  156. auto Format() -> std::string {
  157. return llvm::formatv(Message, prefix ? "before" : "after");
  158. }
  159. bool prefix;
  160. };
  161. struct OperatorRequiresParentheses
  162. : SimpleDiagnostic<OperatorRequiresParentheses> {
  163. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  164. static constexpr llvm::StringLiteral Message =
  165. "Parentheses are required to disambiguate operator precedence.";
  166. };
  167. ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  168. TokenDiagnosticEmitter& emitter)
  169. : tree_(tree_arg),
  170. tokens_(tokens_arg),
  171. emitter_(emitter),
  172. position_(tokens_.Tokens().begin()),
  173. end_(tokens_.Tokens().end()) {
  174. CHECK(std::find_if(position_, end_,
  175. [&](TokenizedBuffer::Token t) {
  176. return tokens_.GetKind(t) == TokenKind::EndOfFile();
  177. }) != end_)
  178. << "No EndOfFileToken in token buffer.";
  179. }
  180. auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
  181. TokenDiagnosticEmitter& emitter) -> ParseTree {
  182. ParseTree tree(tokens);
  183. // We expect to have a 1:1 correspondence between tokens and tree nodes, so
  184. // reserve the space we expect to need here to avoid allocation and copying
  185. // overhead.
  186. tree.node_impls_.reserve(tokens.Size());
  187. Parser parser(tree, tokens, emitter);
  188. while (!parser.AtEndOfFile()) {
  189. if (!parser.ParseDeclaration()) {
  190. // We don't have an enclosing parse tree node to mark as erroneous, so
  191. // just mark the tree as a whole.
  192. tree.has_errors_ = true;
  193. }
  194. }
  195. parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position_);
  196. CHECK(tree.Verify()) << "Parse tree built but does not verify!";
  197. return tree;
  198. }
  199. auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
  200. CHECK(kind != TokenKind::EndOfFile()) << "Cannot consume the EOF token!";
  201. CHECK(NextTokenIs(kind)) << "The current token is the wrong kind!";
  202. TokenizedBuffer::Token t = *position_;
  203. ++position_;
  204. CHECK(position_ != end_)
  205. << "Reached end of tokens without finding EOF token.";
  206. return t;
  207. }
  208. auto ParseTree::Parser::ConsumeIf(TokenKind kind)
  209. -> llvm::Optional<TokenizedBuffer::Token> {
  210. if (!NextTokenIs(kind)) {
  211. return {};
  212. }
  213. return Consume(kind);
  214. }
  215. auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
  216. TokenizedBuffer::Token token) -> Node {
  217. Node n(tree_.node_impls_.size());
  218. tree_.node_impls_.push_back(NodeImpl(kind, token, /*subtree_size_arg=*/1));
  219. return n;
  220. }
  221. auto ParseTree::Parser::ConsumeAndAddLeafNodeIf(TokenKind t_kind,
  222. ParseNodeKind n_kind)
  223. -> llvm::Optional<Node> {
  224. auto t = ConsumeIf(t_kind);
  225. if (!t) {
  226. return {};
  227. }
  228. return AddLeafNode(n_kind, *t);
  229. }
  230. auto ParseTree::Parser::MarkNodeError(Node n) -> void {
  231. tree_.node_impls_[n.index_].has_error = true;
  232. tree_.has_errors_ = true;
  233. }
  234. // A marker for the start of a node's subtree.
  235. //
  236. // This is used to track the size of the node's subtree. It can be used
  237. // repeatedly if multiple subtrees start at the same position.
  238. struct ParseTree::Parser::SubtreeStart {
  239. int tree_size;
  240. };
  241. auto ParseTree::Parser::GetSubtreeStartPosition() -> SubtreeStart {
  242. return {static_cast<int>(tree_.node_impls_.size())};
  243. }
  244. auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
  245. SubtreeStart start, bool has_error) -> Node {
  246. // The size of the subtree is the change in size from when we started this
  247. // subtree to now, but including the node we're about to add.
  248. int tree_stop_size = static_cast<int>(tree_.node_impls_.size()) + 1;
  249. int subtree_size = tree_stop_size - start.tree_size;
  250. Node n(tree_.node_impls_.size());
  251. tree_.node_impls_.push_back(NodeImpl(n_kind, t, subtree_size));
  252. if (has_error) {
  253. MarkNodeError(n);
  254. }
  255. return n;
  256. }
  257. auto ParseTree::Parser::SkipMatchingGroup() -> bool {
  258. TokenizedBuffer::Token t = *position_;
  259. TokenKind t_kind = tokens_.GetKind(t);
  260. if (!t_kind.IsOpeningSymbol()) {
  261. return false;
  262. }
  263. SkipTo(tokens_.GetMatchedClosingToken(t));
  264. Consume(t_kind.GetClosingSymbol());
  265. return true;
  266. }
  267. auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
  268. CHECK(t >= *position_) << "Tried to skip backwards.";
  269. position_ = TokenizedBuffer::TokenIterator(t);
  270. CHECK(position_ != end_) << "Skipped past EOF.";
  271. }
  272. auto ParseTree::Parser::FindNextOf(
  273. std::initializer_list<TokenKind> desired_kinds)
  274. -> llvm::Optional<TokenizedBuffer::Token> {
  275. auto new_position = position_;
  276. while (true) {
  277. TokenizedBuffer::Token token = *new_position;
  278. TokenKind kind = tokens_.GetKind(token);
  279. if (kind.IsOneOf(desired_kinds)) {
  280. return token;
  281. }
  282. // Step to the next token at the current bracketing level.
  283. if (kind.IsClosingSymbol() || kind == TokenKind::EndOfFile()) {
  284. // There are no more tokens at this level.
  285. return llvm::None;
  286. } else if (kind.IsOpeningSymbol()) {
  287. new_position =
  288. TokenizedBuffer::TokenIterator(tokens_.GetMatchedClosingToken(token));
  289. } else {
  290. ++new_position;
  291. }
  292. }
  293. }
  294. auto ParseTree::Parser::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root,
  295. SemiHandler on_semi)
  296. -> llvm::Optional<Node> {
  297. if (AtEndOfFile()) {
  298. return llvm::None;
  299. }
  300. TokenizedBuffer::Line root_line = tokens_.GetLine(skip_root);
  301. int root_line_indent = tokens_.GetIndentColumnNumber(root_line);
  302. // We will keep scanning through tokens on the same line as the root or
  303. // lines with greater indentation than root's line.
  304. auto is_same_line_or_indent_greater_than_root =
  305. [&](TokenizedBuffer::Token t) {
  306. TokenizedBuffer::Line l = tokens_.GetLine(t);
  307. if (l == root_line) {
  308. return true;
  309. }
  310. return tokens_.GetIndentColumnNumber(l) > root_line_indent;
  311. };
  312. do {
  313. if (NextTokenKind() == TokenKind::CloseCurlyBrace()) {
  314. // Immediately bail out if we hit an unmatched close curly, this will
  315. // pop us up a level of the syntax grouping.
  316. return llvm::None;
  317. }
  318. // We assume that a semicolon is always intended to be the end of the
  319. // current construct.
  320. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  321. return on_semi(*semi);
  322. }
  323. // Skip over any matching group of tokens_.
  324. if (SkipMatchingGroup()) {
  325. continue;
  326. }
  327. // Otherwise just step forward one token.
  328. Consume(NextTokenKind());
  329. } while (!AtEndOfFile() &&
  330. is_same_line_or_indent_greater_than_root(*position_));
  331. return llvm::None;
  332. }
  333. auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
  334. ParseNodeKind kind)
  335. -> llvm::Optional<Node> {
  336. if (auto close_paren =
  337. ConsumeAndAddLeafNodeIf(TokenKind::CloseParen(), kind)) {
  338. return close_paren;
  339. }
  340. emitter_.EmitError<ExpectedCloseParen>(*position_,
  341. {.open_paren = open_paren});
  342. SkipTo(tokens_.GetMatchedClosingToken(open_paren));
  343. AddLeafNode(kind, Consume(TokenKind::CloseParen()));
  344. return llvm::None;
  345. }
  346. template <typename ListElementParser, typename ListCompletionHandler>
  347. auto ParseTree::Parser::ParseList(TokenKind open, TokenKind close,
  348. ListElementParser list_element_parser,
  349. ParseNodeKind comma_kind,
  350. ListCompletionHandler list_handler,
  351. bool allow_trailing_comma)
  352. -> llvm::Optional<Node> {
  353. // `(` element-list[opt] `)`
  354. //
  355. // element-list ::= element
  356. // ::= element `,` element-list
  357. TokenizedBuffer::Token open_paren = Consume(open);
  358. bool has_errors = false;
  359. bool any_commas = false;
  360. int64_t num_elements = 0;
  361. // Parse elements, if any are specified.
  362. if (!NextTokenIs(close)) {
  363. while (true) {
  364. bool element_error = !list_element_parser();
  365. has_errors |= element_error;
  366. ++num_elements;
  367. if (!NextTokenIsOneOf({close, TokenKind::Comma()})) {
  368. if (!element_error) {
  369. emitter_.EmitError<UnexpectedTokenAfterListElement>(*position_,
  370. {.close = close});
  371. }
  372. has_errors = true;
  373. auto end_of_element = FindNextOf({TokenKind::Comma(), close});
  374. // The lexer guarantees that parentheses are balanced.
  375. CHECK(end_of_element) << "missing matching `)` for `(`";
  376. SkipTo(*end_of_element);
  377. }
  378. if (NextTokenIs(close)) {
  379. break;
  380. }
  381. AddLeafNode(comma_kind, Consume(TokenKind::Comma()));
  382. any_commas = true;
  383. if (allow_trailing_comma && NextTokenIs(close)) {
  384. break;
  385. }
  386. }
  387. }
  388. bool is_single_item = num_elements == 1 && !any_commas;
  389. return list_handler(open_paren, is_single_item, Consume(close), has_errors);
  390. }
  391. auto ParseTree::Parser::ParsePattern(PatternKind kind) -> llvm::Optional<Node> {
  392. if (NextTokenIs(TokenKind::Identifier()) &&
  393. tokens_.GetKind(*(position_ + 1)) == TokenKind::Colon()) {
  394. // identifier `:` type
  395. auto start = GetSubtreeStartPosition();
  396. AddLeafNode(ParseNodeKind::DeclaredName(),
  397. Consume(TokenKind::Identifier()));
  398. auto colon = Consume(TokenKind::Colon());
  399. auto type = ParseType();
  400. return AddNode(ParseNodeKind::PatternBinding(), colon, start,
  401. /*has_error=*/!type);
  402. }
  403. switch (kind) {
  404. case PatternKind::Parameter:
  405. emitter_.EmitError<ExpectedParameterName>(*position_);
  406. break;
  407. case PatternKind::Variable:
  408. emitter_.EmitError<ExpectedVariableName>(*position_);
  409. break;
  410. }
  411. return llvm::None;
  412. }
  413. auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional<Node> {
  414. return ParsePattern(PatternKind::Parameter);
  415. }
  416. auto ParseTree::Parser::ParseFunctionSignature() -> bool {
  417. auto start = GetSubtreeStartPosition();
  418. auto params = ParseParenList(
  419. [&] { return ParseFunctionParameter(); },
  420. ParseNodeKind::ParameterListComma(),
  421. [&](TokenizedBuffer::Token open_paren, bool /*is_single_item*/,
  422. TokenizedBuffer::Token close_paren, bool has_errors) {
  423. AddLeafNode(ParseNodeKind::ParameterListEnd(), close_paren);
  424. return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
  425. has_errors);
  426. });
  427. auto start_return_type = GetSubtreeStartPosition();
  428. if (auto arrow = ConsumeIf(TokenKind::MinusGreater())) {
  429. auto return_type = ParseType();
  430. AddNode(ParseNodeKind::ReturnType(), *arrow, start_return_type,
  431. /*has_error=*/!return_type);
  432. if (!return_type) {
  433. return false;
  434. }
  435. }
  436. return params.hasValue();
  437. }
  438. auto ParseTree::Parser::ParseCodeBlock() -> llvm::Optional<Node> {
  439. llvm::Optional<TokenizedBuffer::Token> maybe_open_curly =
  440. ConsumeIf(TokenKind::OpenCurlyBrace());
  441. if (!maybe_open_curly) {
  442. // Recover by parsing a single statement.
  443. emitter_.EmitError<ExpectedCodeBlock>(*position_);
  444. return ParseStatement();
  445. }
  446. TokenizedBuffer::Token open_curly = *maybe_open_curly;
  447. auto start = GetSubtreeStartPosition();
  448. bool has_errors = false;
  449. // Loop over all the different possibly nested elements in the code block.
  450. while (!NextTokenIs(TokenKind::CloseCurlyBrace())) {
  451. if (!ParseStatement()) {
  452. // We detected and diagnosed an error of some kind. We can trivially skip
  453. // to the actual close curly brace from here.
  454. // FIXME: It would be better to skip to the next semicolon, or the next
  455. // token at the start of a line with the same indent as this one.
  456. SkipTo(tokens_.GetMatchedClosingToken(open_curly));
  457. has_errors = true;
  458. break;
  459. }
  460. }
  461. // We always reach here having set our position in the token stream to the
  462. // close curly brace.
  463. AddLeafNode(ParseNodeKind::CodeBlockEnd(),
  464. Consume(TokenKind::CloseCurlyBrace()));
  465. return AddNode(ParseNodeKind::CodeBlock(), open_curly, start, has_errors);
  466. }
  467. auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
  468. TokenizedBuffer::Token function_intro_token = Consume(TokenKind::FnKeyword());
  469. auto start = GetSubtreeStartPosition();
  470. auto add_error_function_node = [&] {
  471. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  472. start, /*has_error=*/true);
  473. };
  474. auto handle_semi_in_error_recovery = [&](TokenizedBuffer::Token semi) {
  475. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  476. };
  477. auto name_n = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  478. ParseNodeKind::DeclaredName());
  479. if (!name_n) {
  480. emitter_.EmitError<ExpectedFunctionName>(*position_);
  481. // FIXME: We could change the lexer to allow us to synthesize certain
  482. // kinds of tokens and try to "recover" here, but unclear that this is
  483. // really useful.
  484. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  485. return add_error_function_node();
  486. }
  487. TokenizedBuffer::Token open_paren = *position_;
  488. if (tokens_.GetKind(open_paren) != TokenKind::OpenParen()) {
  489. emitter_.EmitError<ExpectedFunctionParams>(open_paren);
  490. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  491. return add_error_function_node();
  492. }
  493. TokenizedBuffer::Token close_paren =
  494. tokens_.GetMatchedClosingToken(open_paren);
  495. if (!ParseFunctionSignature()) {
  496. // Don't try to parse more of the function declaration, but consume a
  497. // declaration ending semicolon if found (without going to a new line).
  498. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  499. return add_error_function_node();
  500. }
  501. // See if we should parse a definition which is represented as a code block.
  502. if (NextTokenIs(TokenKind::OpenCurlyBrace())) {
  503. if (!ParseCodeBlock()) {
  504. return add_error_function_node();
  505. }
  506. } else if (!ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  507. ParseNodeKind::DeclarationEnd())) {
  508. emitter_.EmitError<ExpectedFunctionBodyOrSemi>(*position_);
  509. if (tokens_.GetLine(*position_) == tokens_.GetLine(close_paren)) {
  510. // Only need to skip if we've not already found a new line.
  511. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  512. }
  513. return add_error_function_node();
  514. }
  515. // Successfully parsed the function, add that node.
  516. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  517. start);
  518. }
  519. auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
  520. // `var` pattern [= expression] `;`
  521. TokenizedBuffer::Token var_token = Consume(TokenKind::VarKeyword());
  522. auto start = GetSubtreeStartPosition();
  523. auto pattern = ParsePattern(PatternKind::Variable);
  524. if (!pattern) {
  525. if (auto after_pattern =
  526. FindNextOf({TokenKind::Equal(), TokenKind::Semi()})) {
  527. SkipTo(*after_pattern);
  528. }
  529. }
  530. auto start_init = GetSubtreeStartPosition();
  531. if (auto equal_token = ConsumeIf(TokenKind::Equal())) {
  532. auto init = ParseExpression();
  533. AddNode(ParseNodeKind::VariableInitializer(), *equal_token, start_init,
  534. /*has_error=*/!init);
  535. }
  536. auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  537. ParseNodeKind::DeclarationEnd());
  538. if (!semi) {
  539. emitter_.EmitError<ExpectedSemiAfterExpression>(*position_);
  540. SkipPastLikelyEnd(var_token, [&](TokenizedBuffer::Token semi) {
  541. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  542. });
  543. }
  544. return AddNode(ParseNodeKind::VariableDeclaration(), var_token, start,
  545. /*has_error=*/!pattern || !semi);
  546. }
  547. auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
  548. return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
  549. Consume(TokenKind::Semi()));
  550. }
  551. auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
  552. switch (NextTokenKind()) {
  553. case TokenKind::FnKeyword():
  554. return ParseFunctionDeclaration();
  555. case TokenKind::VarKeyword():
  556. return ParseVariableDeclaration();
  557. case TokenKind::Semi():
  558. return ParseEmptyDeclaration();
  559. case TokenKind::EndOfFile():
  560. return llvm::None;
  561. default:
  562. // Errors are handled outside the switch.
  563. break;
  564. }
  565. // We didn't recognize an introducer for a valid declaration.
  566. emitter_.EmitError<UnrecognizedDeclaration>(*position_);
  567. // Skip forward past any end of a declaration we simply didn't understand so
  568. // that we can find the start of the next declaration or the end of a scope.
  569. if (auto found_semi_n =
  570. SkipPastLikelyEnd(*position_, [&](TokenizedBuffer::Token semi) {
  571. return AddLeafNode(ParseNodeKind::EmptyDeclaration(), semi);
  572. })) {
  573. MarkNodeError(*found_semi_n);
  574. return *found_semi_n;
  575. }
  576. // Nothing, not even a semicolon found.
  577. return llvm::None;
  578. }
  579. auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
  580. // parenthesized-expression ::= `(` expression `)`
  581. // tuple-literal ::= `(` `)`
  582. // ::= `(` expression `,` [expression-list [`,`]] `)`
  583. //
  584. // Parse the union of these, `(` [expression-list [`,`]] `)`, and work out
  585. // whether it's a tuple or a parenthesized expression afterwards.
  586. auto start = GetSubtreeStartPosition();
  587. return ParseParenList(
  588. [&] { return ParseExpression(); }, ParseNodeKind::TupleLiteralComma(),
  589. [&](TokenizedBuffer::Token open_paren, bool is_single_item,
  590. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  591. AddLeafNode(is_single_item ? ParseNodeKind::ParenExpressionEnd()
  592. : ParseNodeKind::TupleLiteralEnd(),
  593. close_paren);
  594. return AddNode(is_single_item ? ParseNodeKind::ParenExpression()
  595. : ParseNodeKind::TupleLiteral(),
  596. open_paren, start, has_arg_errors);
  597. },
  598. /*allow_trailing_comma=*/true);
  599. }
  600. auto ParseTree::Parser::ParseBraceExpression() -> llvm::Optional<Node> {
  601. // braced-expression ::= `{` [field-value-list] `}`
  602. // ::= `{` field-type-list `}`
  603. // field-value-list ::= field-value [`,`]
  604. // ::= field-value `,` field-value-list
  605. // field-value ::= `.` identifier `=` expression
  606. // field-type-list ::= field-type [`,`]
  607. // ::= field-type `,` field-type-list
  608. // field-type ::= `.` identifier `:` type
  609. //
  610. // Note that `{` `}` is the first form (an empty struct), but that an empty
  611. // struct value also behaves as an empty struct type.
  612. auto start = GetSubtreeStartPosition();
  613. enum Kind { Unknown, Value, Type };
  614. Kind kind = Unknown;
  615. return ParseList(
  616. TokenKind::OpenCurlyBrace(), TokenKind::CloseCurlyBrace(),
  617. [&]() -> llvm::Optional<Node> {
  618. auto start_elem = GetSubtreeStartPosition();
  619. auto diagnose_invalid_syntax = [&] {
  620. emitter_.EmitError<ExpectedStructLiteralField>(
  621. *position_,
  622. {.can_be_type = kind != Value, .can_be_value = kind != Type});
  623. return llvm::None;
  624. };
  625. if (!NextTokenIs(TokenKind::Period())) {
  626. return diagnose_invalid_syntax();
  627. }
  628. auto designator = ParseDesignatorExpression(
  629. start_elem, ParseNodeKind::StructFieldDesignator(),
  630. /*has_errors=*/false);
  631. if (!designator) {
  632. auto recovery_pos = FindNextOf(
  633. {TokenKind::Equal(), TokenKind::Colon(), TokenKind::Comma()});
  634. if (!recovery_pos ||
  635. tokens_.GetKind(*recovery_pos) == TokenKind::Comma()) {
  636. return llvm::None;
  637. }
  638. SkipTo(*recovery_pos);
  639. }
  640. // Work out the kind of this element
  641. Kind elem_kind =
  642. (NextTokenIs(TokenKind::Equal())
  643. ? Value
  644. : NextTokenIs(TokenKind::Colon()) ? Type : Unknown);
  645. if (elem_kind == Unknown || (kind != Unknown && elem_kind != kind)) {
  646. return diagnose_invalid_syntax();
  647. }
  648. kind = elem_kind;
  649. // Struct type fields and value fields use the same grammar except that
  650. // one has a `:` separator and the other has an `=` separator.
  651. auto equal_or_colon_token =
  652. Consume(kind == Type ? TokenKind::Colon() : TokenKind::Equal());
  653. auto type_or_value = ParseExpression();
  654. return AddNode(kind == Type ? ParseNodeKind::StructFieldType()
  655. : ParseNodeKind::StructFieldValue(),
  656. equal_or_colon_token, start_elem,
  657. /*has_error=*/!designator || !type_or_value);
  658. },
  659. ParseNodeKind::StructComma(),
  660. [&](TokenizedBuffer::Token open_brace, bool /*is_single_item*/,
  661. TokenizedBuffer::Token close_brace, bool has_errors) {
  662. AddLeafNode(ParseNodeKind::StructEnd(), close_brace);
  663. return AddNode(kind == Type ? ParseNodeKind::StructTypeLiteral()
  664. : ParseNodeKind::StructLiteral(),
  665. open_brace, start, has_errors);
  666. },
  667. /*allow_trailing_comma=*/true);
  668. }
  669. auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
  670. llvm::Optional<ParseNodeKind> kind;
  671. switch (NextTokenKind()) {
  672. case TokenKind::Identifier():
  673. kind = ParseNodeKind::NameReference();
  674. break;
  675. case TokenKind::IntegerLiteral():
  676. case TokenKind::RealLiteral():
  677. case TokenKind::StringLiteral():
  678. case TokenKind::IntegerTypeLiteral():
  679. case TokenKind::UnsignedIntegerTypeLiteral():
  680. case TokenKind::FloatingPointTypeLiteral():
  681. kind = ParseNodeKind::Literal();
  682. break;
  683. case TokenKind::OpenParen():
  684. return ParseParenExpression();
  685. case TokenKind::OpenCurlyBrace():
  686. return ParseBraceExpression();
  687. default:
  688. emitter_.EmitError<ExpectedExpression>(*position_);
  689. return llvm::None;
  690. }
  691. return AddLeafNode(*kind, Consume(NextTokenKind()));
  692. }
  693. auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
  694. ParseNodeKind kind,
  695. bool has_errors)
  696. -> llvm::Optional<Node> {
  697. // `.` identifier
  698. auto dot = Consume(TokenKind::Period());
  699. auto name = ConsumeIf(TokenKind::Identifier());
  700. if (name) {
  701. AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  702. } else {
  703. emitter_.EmitError<ExpectedIdentifierAfterDot>(*position_);
  704. // If we see a keyword, assume it was intended to be the designated name.
  705. // TODO: Should keywords be valid in designators?
  706. if (NextTokenKind().IsKeyword()) {
  707. name = Consume(NextTokenKind());
  708. auto name_node = AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  709. MarkNodeError(name_node);
  710. } else {
  711. has_errors = true;
  712. }
  713. }
  714. Node result = AddNode(kind, dot, start, has_errors);
  715. return name ? result : llvm::Optional<Node>();
  716. }
  717. auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
  718. -> llvm::Optional<Node> {
  719. // `(` expression-list[opt] `)`
  720. //
  721. // expression-list ::= expression
  722. // ::= expression `,` expression-list
  723. return ParseParenList(
  724. [&] { return ParseExpression(); }, ParseNodeKind::CallExpressionComma(),
  725. [&](TokenizedBuffer::Token open_paren, bool /*is_single_item*/,
  726. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  727. AddLeafNode(ParseNodeKind::CallExpressionEnd(), close_paren);
  728. return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
  729. has_errors || has_arg_errors);
  730. });
  731. }
  732. auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
  733. auto start = GetSubtreeStartPosition();
  734. llvm::Optional<Node> expression = ParsePrimaryExpression();
  735. while (true) {
  736. switch (NextTokenKind()) {
  737. case TokenKind::Period():
  738. expression = ParseDesignatorExpression(
  739. start, ParseNodeKind::DesignatorExpression(), !expression);
  740. break;
  741. case TokenKind::OpenParen():
  742. expression = ParseCallExpression(start, !expression);
  743. break;
  744. default: {
  745. return expression;
  746. }
  747. }
  748. }
  749. }
  750. // Determines whether the given token is considered to be the start of an
  751. // operand according to the rules for infix operator parsing.
  752. static auto IsAssumedStartOfOperand(TokenKind kind) -> bool {
  753. return kind.IsOneOf({TokenKind::OpenParen(), TokenKind::Identifier(),
  754. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  755. TokenKind::StringLiteral()});
  756. }
  757. // Determines whether the given token is considered to be the end of an operand
  758. // according to the rules for infix operator parsing.
  759. static auto IsAssumedEndOfOperand(TokenKind kind) -> bool {
  760. return kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  761. TokenKind::CloseSquareBracket(), TokenKind::Identifier(),
  762. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  763. TokenKind::StringLiteral()});
  764. }
  765. // Determines whether the given token could possibly be the start of an operand.
  766. // This is conservatively correct, and will never incorrectly return `false`,
  767. // but can incorrectly return `true`.
  768. static auto IsPossibleStartOfOperand(TokenKind kind) -> bool {
  769. return !kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  770. TokenKind::CloseSquareBracket(), TokenKind::Comma(),
  771. TokenKind::Semi(), TokenKind::Colon()});
  772. }
  773. auto ParseTree::Parser::IsLexicallyValidInfixOperator() -> bool {
  774. CHECK(!AtEndOfFile()) << "Expected an operator token.";
  775. bool leading_space = tokens_.HasLeadingWhitespace(*position_);
  776. bool trailing_space = tokens_.HasTrailingWhitespace(*position_);
  777. // If there's whitespace on both sides, it's an infix operator.
  778. if (leading_space && trailing_space) {
  779. return true;
  780. }
  781. // If there's whitespace on exactly one side, it's not an infix operator.
  782. if (leading_space || trailing_space) {
  783. return false;
  784. }
  785. // Otherwise, for an infix operator, the preceding token must be any close
  786. // bracket, identifier, or literal and the next token must be an open paren,
  787. // identifier, or literal.
  788. if (position_ == tokens_.Tokens().begin() ||
  789. !IsAssumedEndOfOperand(tokens_.GetKind(*(position_ - 1))) ||
  790. !IsAssumedStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  791. return false;
  792. }
  793. return true;
  794. }
  795. auto ParseTree::Parser::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  796. bool is_valid_as_infix = IsLexicallyValidInfixOperator();
  797. if (fixity == OperatorFixity::Infix) {
  798. // Infix operators must satisfy the infix operator rules.
  799. if (!is_valid_as_infix) {
  800. emitter_.EmitError<BinaryOperatorRequiresWhitespace>(
  801. *position_,
  802. {.has_leading_space = tokens_.HasLeadingWhitespace(*position_),
  803. .has_trailing_space = tokens_.HasTrailingWhitespace(*position_)});
  804. }
  805. } else {
  806. bool prefix = fixity == OperatorFixity::Prefix;
  807. // Whitespace is not permitted between a symbolic pre/postfix operator and
  808. // its operand.
  809. if (NextTokenKind().IsSymbol() &&
  810. (prefix ? tokens_.HasTrailingWhitespace(*position_)
  811. : tokens_.HasLeadingWhitespace(*position_))) {
  812. emitter_.EmitError<UnaryOperatorHasWhitespace>(*position_,
  813. {.prefix = prefix});
  814. }
  815. // Pre/postfix operators must not satisfy the infix operator rules.
  816. if (is_valid_as_infix) {
  817. emitter_.EmitError<UnaryOperatorRequiresWhitespace>(*position_,
  818. {.prefix = prefix});
  819. }
  820. }
  821. }
  822. auto ParseTree::Parser::IsTrailingOperatorInfix() -> bool {
  823. if (AtEndOfFile()) {
  824. return false;
  825. }
  826. // An operator that follows the infix operator rules is parsed as
  827. // infix, unless the next token means that it can't possibly be.
  828. if (IsLexicallyValidInfixOperator() &&
  829. IsPossibleStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  830. return true;
  831. }
  832. // A trailing operator with leading whitespace that's not valid as infix is
  833. // not valid at all. If the next token looks like the start of an operand,
  834. // then parse as infix, otherwise as postfix. Either way we'll produce a
  835. // diagnostic later on.
  836. if (tokens_.HasLeadingWhitespace(*position_) &&
  837. IsAssumedStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  838. return true;
  839. }
  840. return false;
  841. }
  842. auto ParseTree::Parser::ParseOperatorExpression(
  843. PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
  844. auto start = GetSubtreeStartPosition();
  845. llvm::Optional<Node> lhs;
  846. PrecedenceGroup lhs_precedence = PrecedenceGroup::ForPostfixExpression();
  847. // Check for a prefix operator.
  848. if (auto operator_precedence = PrecedenceGroup::ForLeading(NextTokenKind());
  849. !operator_precedence) {
  850. lhs = ParsePostfixExpression();
  851. } else {
  852. if (PrecedenceGroup::GetPriority(ambient_precedence,
  853. *operator_precedence) !=
  854. OperatorPriority::RightFirst) {
  855. // The precedence rules don't permit this prefix operator in this
  856. // context. Diagnose this, but carry on and parse it anyway.
  857. emitter_.EmitError<OperatorRequiresParentheses>(*position_);
  858. } else {
  859. // Check that this operator follows the proper whitespace rules.
  860. DiagnoseOperatorFixity(OperatorFixity::Prefix);
  861. }
  862. auto operator_token = Consume(NextTokenKind());
  863. bool has_errors = !ParseOperatorExpression(*operator_precedence);
  864. lhs = AddNode(ParseNodeKind::PrefixOperator(), operator_token, start,
  865. has_errors);
  866. lhs_precedence = *operator_precedence;
  867. }
  868. // Consume a sequence of infix and postfix operators.
  869. while (auto trailing_operator = PrecedenceGroup::ForTrailing(
  870. NextTokenKind(), IsTrailingOperatorInfix())) {
  871. auto [operator_precedence, is_binary] = *trailing_operator;
  872. // FIXME: If this operator is ambiguous with either the ambient precedence
  873. // or the LHS precedence, and there's a variant with a different fixity
  874. // that would work, use that one instead for error recovery.
  875. if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
  876. OperatorPriority::RightFirst) {
  877. // The precedence rules don't permit this operator in this context. Try
  878. // again in the enclosing expression context.
  879. return lhs;
  880. }
  881. if (PrecedenceGroup::GetPriority(lhs_precedence, operator_precedence) !=
  882. OperatorPriority::LeftFirst) {
  883. // Either the LHS operator and this operator are ambiguous, or the
  884. // LHS operaor is a unary operator that can't be nested within
  885. // this operator. Either way, parentheses are required.
  886. emitter_.EmitError<OperatorRequiresParentheses>(*position_);
  887. lhs = llvm::None;
  888. } else {
  889. DiagnoseOperatorFixity(is_binary ? OperatorFixity::Infix
  890. : OperatorFixity::Postfix);
  891. }
  892. auto operator_token = Consume(NextTokenKind());
  893. if (is_binary) {
  894. auto rhs = ParseOperatorExpression(operator_precedence);
  895. lhs = AddNode(ParseNodeKind::InfixOperator(), operator_token, start,
  896. /*has_error=*/!lhs || !rhs);
  897. } else {
  898. lhs = AddNode(ParseNodeKind::PostfixOperator(), operator_token, start,
  899. /*has_error=*/!lhs);
  900. }
  901. lhs_precedence = operator_precedence;
  902. }
  903. return lhs;
  904. }
  905. auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
  906. return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
  907. }
  908. auto ParseTree::Parser::ParseType() -> llvm::Optional<Node> {
  909. return ParseOperatorExpression(PrecedenceGroup::ForType());
  910. }
  911. auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
  912. TokenizedBuffer::Token start_token = *position_;
  913. auto start = GetSubtreeStartPosition();
  914. bool has_errors = !ParseExpression();
  915. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  916. return AddNode(ParseNodeKind::ExpressionStatement(), *semi, start,
  917. has_errors);
  918. }
  919. if (!has_errors) {
  920. emitter_.EmitError<ExpectedSemiAfterExpression>(*position_);
  921. }
  922. if (auto recovery_node =
  923. SkipPastLikelyEnd(start_token, [&](TokenizedBuffer::Token semi) {
  924. return AddNode(ParseNodeKind::ExpressionStatement(), semi, start,
  925. true);
  926. })) {
  927. return recovery_node;
  928. }
  929. // Found junk not even followed by a `;`.
  930. return llvm::None;
  931. }
  932. auto ParseTree::Parser::ParseParenCondition(TokenKind introducer)
  933. -> llvm::Optional<Node> {
  934. // `(` expression `)`
  935. auto start = GetSubtreeStartPosition();
  936. auto open_paren = ConsumeIf(TokenKind::OpenParen());
  937. if (!open_paren) {
  938. emitter_.EmitError<ExpectedParenAfter>(*position_,
  939. {.introducer = introducer});
  940. }
  941. auto expr = ParseExpression();
  942. if (!open_paren) {
  943. // Don't expect a matching closing paren if there wasn't an opening paren.
  944. return llvm::None;
  945. }
  946. auto close_paren =
  947. ParseCloseParen(*open_paren, ParseNodeKind::ConditionEnd());
  948. return AddNode(ParseNodeKind::Condition(), *open_paren, start,
  949. /*has_error=*/!expr || !close_paren);
  950. }
  951. auto ParseTree::Parser::ParseIfStatement() -> llvm::Optional<Node> {
  952. auto start = GetSubtreeStartPosition();
  953. auto if_token = Consume(TokenKind::IfKeyword());
  954. auto cond = ParseParenCondition(TokenKind::IfKeyword());
  955. auto then_case = ParseCodeBlock();
  956. bool else_has_errors = false;
  957. if (ConsumeAndAddLeafNodeIf(TokenKind::ElseKeyword(),
  958. ParseNodeKind::IfStatementElse())) {
  959. // 'else if' is permitted as a special case.
  960. if (NextTokenIs(TokenKind::IfKeyword())) {
  961. else_has_errors = !ParseIfStatement();
  962. } else {
  963. else_has_errors = !ParseCodeBlock();
  964. }
  965. }
  966. return AddNode(ParseNodeKind::IfStatement(), if_token, start,
  967. /*has_error=*/!cond || !then_case || else_has_errors);
  968. }
  969. auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
  970. auto start = GetSubtreeStartPosition();
  971. auto while_token = Consume(TokenKind::WhileKeyword());
  972. auto cond = ParseParenCondition(TokenKind::WhileKeyword());
  973. auto body = ParseCodeBlock();
  974. return AddNode(ParseNodeKind::WhileStatement(), while_token, start,
  975. /*has_error=*/!cond || !body);
  976. }
  977. auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
  978. KeywordStatementArgument argument)
  979. -> llvm::Optional<Node> {
  980. auto keyword_kind = NextTokenKind();
  981. assert(keyword_kind.IsKeyword());
  982. auto start = GetSubtreeStartPosition();
  983. auto keyword = Consume(keyword_kind);
  984. bool arg_error = false;
  985. if ((argument == KeywordStatementArgument::Optional &&
  986. NextTokenKind() != TokenKind::Semi()) ||
  987. argument == KeywordStatementArgument::Mandatory) {
  988. arg_error = !ParseExpression();
  989. }
  990. auto semi =
  991. ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd());
  992. if (!semi) {
  993. emitter_.EmitError<ExpectedSemiAfter>(*position_,
  994. {.preceding = keyword_kind});
  995. // FIXME: Try to skip to a semicolon to recover.
  996. }
  997. return AddNode(kind, keyword, start, /*has_error=*/!semi || arg_error);
  998. }
  999. auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
  1000. switch (NextTokenKind()) {
  1001. case TokenKind::VarKeyword():
  1002. return ParseVariableDeclaration();
  1003. case TokenKind::IfKeyword():
  1004. return ParseIfStatement();
  1005. case TokenKind::WhileKeyword():
  1006. return ParseWhileStatement();
  1007. case TokenKind::ContinueKeyword():
  1008. return ParseKeywordStatement(ParseNodeKind::ContinueStatement(),
  1009. KeywordStatementArgument::None);
  1010. case TokenKind::BreakKeyword():
  1011. return ParseKeywordStatement(ParseNodeKind::BreakStatement(),
  1012. KeywordStatementArgument::None);
  1013. case TokenKind::ReturnKeyword():
  1014. return ParseKeywordStatement(ParseNodeKind::ReturnStatement(),
  1015. KeywordStatementArgument::Optional);
  1016. default:
  1017. // A statement with no introducer token can only be an expression
  1018. // statement.
  1019. return ParseExpressionStatement();
  1020. }
  1021. }
  1022. } // namespace Carbon