parser_context.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser_context.h"
  5. #include <optional>
  6. #include "common/check.h"
  7. #include "llvm/ADT/STLExtras.h"
  8. #include "toolchain/lexer/token_kind.h"
  9. #include "toolchain/lexer/tokenized_buffer.h"
  10. #include "toolchain/parser/parse_node_kind.h"
  11. #include "toolchain/parser/parse_tree.h"
  12. namespace Carbon {
  13. // A relative location for characters in errors.
  14. enum class RelativeLocation : int8_t {
  15. Around,
  16. After,
  17. Before,
  18. };
  19. // Adapts RelativeLocation for use with formatv.
  20. static auto operator<<(llvm::raw_ostream& out, RelativeLocation loc)
  21. -> llvm::raw_ostream& {
  22. switch (loc) {
  23. case RelativeLocation::Around:
  24. out << "around";
  25. break;
  26. case RelativeLocation::After:
  27. out << "after";
  28. break;
  29. case RelativeLocation::Before:
  30. out << "before";
  31. break;
  32. }
  33. return out;
  34. }
  35. ParserContext::ParserContext(ParseTree& tree, TokenizedBuffer& tokens,
  36. TokenDiagnosticEmitter& emitter,
  37. llvm::raw_ostream* vlog_stream)
  38. : tree_(&tree),
  39. tokens_(&tokens),
  40. emitter_(&emitter),
  41. vlog_stream_(vlog_stream),
  42. position_(tokens_->tokens().begin()),
  43. end_(tokens_->tokens().end()) {
  44. CARBON_CHECK(position_ != end_) << "Empty TokenizedBuffer";
  45. --end_;
  46. CARBON_CHECK(tokens_->GetKind(*end_) == TokenKind::EndOfFile)
  47. << "TokenizedBuffer should end with EndOfFile, ended with "
  48. << tokens_->GetKind(*end_);
  49. }
  50. auto ParserContext::AddLeafNode(ParseNodeKind kind,
  51. TokenizedBuffer::Token token, bool has_error)
  52. -> void {
  53. tree_->node_impls_.push_back(
  54. ParseTree::NodeImpl(kind, has_error, token, /*subtree_size=*/1));
  55. if (has_error) {
  56. tree_->has_errors_ = true;
  57. }
  58. }
  59. auto ParserContext::AddNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  60. int subtree_start, bool has_error) -> void {
  61. int subtree_size = tree_->size() - subtree_start + 1;
  62. tree_->node_impls_.push_back(
  63. ParseTree::NodeImpl(kind, has_error, token, subtree_size));
  64. if (has_error) {
  65. tree_->has_errors_ = true;
  66. }
  67. }
  68. auto ParserContext::ConsumeAndAddOpenParen(TokenizedBuffer::Token default_token,
  69. ParseNodeKind start_kind)
  70. -> std::optional<TokenizedBuffer::Token> {
  71. if (auto open_paren = ConsumeIf(TokenKind::OpenParen)) {
  72. AddLeafNode(start_kind, *open_paren, /*has_error=*/false);
  73. return open_paren;
  74. } else {
  75. CARBON_DIAGNOSTIC(ExpectedParenAfter, Error, "Expected `(` after `{0}`.",
  76. TokenKind);
  77. emitter_->Emit(*position_, ExpectedParenAfter,
  78. tokens().GetKind(default_token));
  79. AddLeafNode(start_kind, default_token, /*has_error=*/true);
  80. return std::nullopt;
  81. }
  82. }
  83. auto ParserContext::ConsumeAndAddCloseSymbol(
  84. TokenizedBuffer::Token expected_open, StateStackEntry state,
  85. ParseNodeKind close_kind) -> void {
  86. TokenKind open_token_kind = tokens().GetKind(expected_open);
  87. if (!open_token_kind.is_opening_symbol()) {
  88. AddNode(close_kind, state.token, state.subtree_start, /*has_error=*/true);
  89. } else if (auto close_token = ConsumeIf(open_token_kind.closing_symbol())) {
  90. AddNode(close_kind, *close_token, state.subtree_start, state.has_error);
  91. } else {
  92. // TODO: Include the location of the matching opening delimiter in the
  93. // diagnostic.
  94. CARBON_DIAGNOSTIC(ExpectedCloseSymbol, Error,
  95. "Unexpected tokens before `{0}`.", llvm::StringRef);
  96. emitter_->Emit(*position_, ExpectedCloseSymbol,
  97. open_token_kind.closing_symbol().fixed_spelling());
  98. SkipTo(tokens().GetMatchedClosingToken(expected_open));
  99. AddNode(close_kind, Consume(), state.subtree_start, /*has_error=*/true);
  100. }
  101. }
  102. auto ParserContext::ConsumeAndAddLeafNodeIf(TokenKind token_kind,
  103. ParseNodeKind node_kind) -> bool {
  104. auto token = ConsumeIf(token_kind);
  105. if (!token) {
  106. return false;
  107. }
  108. AddLeafNode(node_kind, *token);
  109. return true;
  110. }
  111. auto ParserContext::ConsumeChecked(TokenKind kind) -> TokenizedBuffer::Token {
  112. CARBON_CHECK(PositionIs(kind))
  113. << "Required " << kind << ", found " << PositionKind();
  114. return Consume();
  115. }
  116. auto ParserContext::ConsumeIf(TokenKind kind)
  117. -> std::optional<TokenizedBuffer::Token> {
  118. if (!PositionIs(kind)) {
  119. return std::nullopt;
  120. }
  121. return Consume();
  122. }
  123. auto ParserContext::ConsumeIfPatternKeyword(TokenKind keyword_token,
  124. ParserState keyword_state,
  125. int subtree_start) -> void {
  126. if (auto token = ConsumeIf(keyword_token)) {
  127. PushState(ParserContext::StateStackEntry(
  128. keyword_state, PrecedenceGroup::ForTopLevelExpression(),
  129. PrecedenceGroup::ForTopLevelExpression(), *token, subtree_start));
  130. }
  131. }
  132. auto ParserContext::FindNextOf(std::initializer_list<TokenKind> desired_kinds)
  133. -> std::optional<TokenizedBuffer::Token> {
  134. auto new_position = position_;
  135. while (true) {
  136. TokenizedBuffer::Token token = *new_position;
  137. TokenKind kind = tokens().GetKind(token);
  138. if (kind.IsOneOf(desired_kinds)) {
  139. return token;
  140. }
  141. // Step to the next token at the current bracketing level.
  142. if (kind.is_closing_symbol() || kind == TokenKind::EndOfFile) {
  143. // There are no more tokens at this level.
  144. return std::nullopt;
  145. } else if (kind.is_opening_symbol()) {
  146. new_position = TokenizedBuffer::TokenIterator(
  147. tokens().GetMatchedClosingToken(token));
  148. // Advance past the closing token.
  149. ++new_position;
  150. } else {
  151. ++new_position;
  152. }
  153. }
  154. }
  155. auto ParserContext::SkipMatchingGroup() -> bool {
  156. if (!PositionKind().is_opening_symbol()) {
  157. return false;
  158. }
  159. SkipTo(tokens().GetMatchedClosingToken(*position_));
  160. ++position_;
  161. return true;
  162. }
  163. auto ParserContext::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root)
  164. -> std::optional<TokenizedBuffer::Token> {
  165. if (position_ == end_) {
  166. return std::nullopt;
  167. }
  168. TokenizedBuffer::Line root_line = tokens().GetLine(skip_root);
  169. int root_line_indent = tokens().GetIndentColumnNumber(root_line);
  170. // We will keep scanning through tokens on the same line as the root or
  171. // lines with greater indentation than root's line.
  172. auto is_same_line_or_indent_greater_than_root =
  173. [&](TokenizedBuffer::Token t) {
  174. TokenizedBuffer::Line l = tokens().GetLine(t);
  175. if (l == root_line) {
  176. return true;
  177. }
  178. return tokens().GetIndentColumnNumber(l) > root_line_indent;
  179. };
  180. do {
  181. if (PositionIs(TokenKind::CloseCurlyBrace)) {
  182. // Immediately bail out if we hit an unmatched close curly, this will
  183. // pop us up a level of the syntax grouping.
  184. return std::nullopt;
  185. }
  186. // We assume that a semicolon is always intended to be the end of the
  187. // current construct.
  188. if (auto semi = ConsumeIf(TokenKind::Semi)) {
  189. return semi;
  190. }
  191. // Skip over any matching group of tokens().
  192. if (SkipMatchingGroup()) {
  193. continue;
  194. }
  195. // Otherwise just step forward one token.
  196. ++position_;
  197. } while (position_ != end_ &&
  198. is_same_line_or_indent_greater_than_root(*position_));
  199. return std::nullopt;
  200. }
  201. auto ParserContext::SkipTo(TokenizedBuffer::Token t) -> void {
  202. CARBON_CHECK(t >= *position_) << "Tried to skip backwards from " << position_
  203. << " to " << TokenizedBuffer::TokenIterator(t);
  204. position_ = TokenizedBuffer::TokenIterator(t);
  205. CARBON_CHECK(position_ != end_) << "Skipped past EOF.";
  206. }
  207. // Determines whether the given token is considered to be the start of an
  208. // operand according to the rules for infix operator parsing.
  209. static auto IsAssumedStartOfOperand(TokenKind kind) -> bool {
  210. return kind.IsOneOf({TokenKind::OpenParen, TokenKind::Identifier,
  211. TokenKind::IntegerLiteral, TokenKind::RealLiteral,
  212. TokenKind::StringLiteral});
  213. }
  214. // Determines whether the given token is considered to be the end of an
  215. // operand according to the rules for infix operator parsing.
  216. static auto IsAssumedEndOfOperand(TokenKind kind) -> bool {
  217. return kind.IsOneOf({TokenKind::CloseParen, TokenKind::CloseCurlyBrace,
  218. TokenKind::CloseSquareBracket, TokenKind::Identifier,
  219. TokenKind::IntegerLiteral, TokenKind::RealLiteral,
  220. TokenKind::StringLiteral});
  221. }
  222. // Determines whether the given token could possibly be the start of an
  223. // operand. This is conservatively correct, and will never incorrectly return
  224. // `false`, but can incorrectly return `true`.
  225. static auto IsPossibleStartOfOperand(TokenKind kind) -> bool {
  226. return !kind.IsOneOf({TokenKind::CloseParen, TokenKind::CloseCurlyBrace,
  227. TokenKind::CloseSquareBracket, TokenKind::Comma,
  228. TokenKind::Semi, TokenKind::Colon});
  229. }
  230. auto ParserContext::IsLexicallyValidInfixOperator() -> bool {
  231. CARBON_CHECK(position_ != end_) << "Expected an operator token.";
  232. bool leading_space = tokens().HasLeadingWhitespace(*position_);
  233. bool trailing_space = tokens().HasTrailingWhitespace(*position_);
  234. // If there's whitespace on both sides, it's an infix operator.
  235. if (leading_space && trailing_space) {
  236. return true;
  237. }
  238. // If there's whitespace on exactly one side, it's not an infix operator.
  239. if (leading_space || trailing_space) {
  240. return false;
  241. }
  242. // Otherwise, for an infix operator, the preceding token must be any close
  243. // bracket, identifier, or literal and the next token must be an open paren,
  244. // identifier, or literal.
  245. if (position_ == tokens().tokens().begin() ||
  246. !IsAssumedEndOfOperand(tokens().GetKind(*(position_ - 1))) ||
  247. !IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  248. return false;
  249. }
  250. return true;
  251. }
  252. auto ParserContext::IsTrailingOperatorInfix() -> bool {
  253. if (position_ == end_) {
  254. return false;
  255. }
  256. // An operator that follows the infix operator rules is parsed as
  257. // infix, unless the next token means that it can't possibly be.
  258. if (IsLexicallyValidInfixOperator() &&
  259. IsPossibleStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  260. return true;
  261. }
  262. // A trailing operator with leading whitespace that's not valid as infix is
  263. // not valid at all. If the next token looks like the start of an operand,
  264. // then parse as infix, otherwise as postfix. Either way we'll produce a
  265. // diagnostic later on.
  266. if (tokens().HasLeadingWhitespace(*position_) &&
  267. IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  268. return true;
  269. }
  270. return false;
  271. }
  272. auto ParserContext::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  273. if (!PositionKind().is_symbol()) {
  274. // Whitespace-based fixity rules only apply to symbolic operators.
  275. return;
  276. }
  277. if (fixity == OperatorFixity::Infix) {
  278. // Infix operators must satisfy the infix operator rules.
  279. if (!IsLexicallyValidInfixOperator()) {
  280. CARBON_DIAGNOSTIC(BinaryOperatorRequiresWhitespace, Error,
  281. "Whitespace missing {0} binary operator.",
  282. RelativeLocation);
  283. emitter_->Emit(*position_, BinaryOperatorRequiresWhitespace,
  284. tokens().HasLeadingWhitespace(*position_)
  285. ? RelativeLocation::After
  286. : (tokens().HasTrailingWhitespace(*position_)
  287. ? RelativeLocation::Before
  288. : RelativeLocation::Around));
  289. }
  290. } else {
  291. bool prefix = fixity == OperatorFixity::Prefix;
  292. // Whitespace is not permitted between a symbolic pre/postfix operator and
  293. // its operand.
  294. if ((prefix ? tokens().HasTrailingWhitespace(*position_)
  295. : tokens().HasLeadingWhitespace(*position_))) {
  296. CARBON_DIAGNOSTIC(UnaryOperatorHasWhitespace, Error,
  297. "Whitespace is not allowed {0} this unary operator.",
  298. RelativeLocation);
  299. emitter_->Emit(
  300. *position_, UnaryOperatorHasWhitespace,
  301. prefix ? RelativeLocation::After : RelativeLocation::Before);
  302. } else if (IsLexicallyValidInfixOperator()) {
  303. // Pre/postfix operators must not satisfy the infix operator rules.
  304. CARBON_DIAGNOSTIC(UnaryOperatorRequiresWhitespace, Error,
  305. "Whitespace is required {0} this unary operator.",
  306. RelativeLocation);
  307. emitter_->Emit(
  308. *position_, UnaryOperatorRequiresWhitespace,
  309. prefix ? RelativeLocation::Before : RelativeLocation::After);
  310. }
  311. }
  312. }
  313. auto ParserContext::ConsumeListToken(ParseNodeKind comma_kind,
  314. TokenKind close_kind,
  315. bool already_has_error) -> ListTokenKind {
  316. if (!PositionIs(TokenKind::Comma) && !PositionIs(close_kind)) {
  317. // Don't error a second time on the same element.
  318. if (!already_has_error) {
  319. CARBON_DIAGNOSTIC(UnexpectedTokenAfterListElement, Error,
  320. "Expected `,` or `{0}`.", TokenKind);
  321. emitter_->Emit(*position_, UnexpectedTokenAfterListElement, close_kind);
  322. ReturnErrorOnState();
  323. }
  324. // Recover from the invalid token.
  325. auto end_of_element = FindNextOf({TokenKind::Comma, close_kind});
  326. // The lexer guarantees that parentheses are balanced.
  327. CARBON_CHECK(end_of_element)
  328. << "missing matching `" << close_kind.opening_symbol() << "` for `"
  329. << close_kind << "`";
  330. SkipTo(*end_of_element);
  331. }
  332. if (PositionIs(close_kind)) {
  333. return ListTokenKind::Close;
  334. } else {
  335. AddLeafNode(comma_kind, Consume());
  336. return PositionIs(close_kind) ? ListTokenKind::CommaClose
  337. : ListTokenKind::Comma;
  338. }
  339. }
  340. auto ParserContext::GetDeclarationContext() -> DeclarationContext {
  341. // i == 0 is the file-level DeclarationScopeLoop. Additionally, i == 1 can be
  342. // skipped because it will never be a DeclarationScopeLoop.
  343. for (int i = state_stack_.size() - 1; i > 1; --i) {
  344. // The declaration context is always the state _above_ a
  345. // DeclarationScopeLoop.
  346. if (state_stack_[i].state == ParserState::DeclarationScopeLoop) {
  347. switch (state_stack_[i - 1].state) {
  348. case ParserState::TypeDefinitionFinishAsClass:
  349. return DeclarationContext::Class;
  350. case ParserState::TypeDefinitionFinishAsInterface:
  351. return DeclarationContext::Interface;
  352. case ParserState::TypeDefinitionFinishAsNamedConstraint:
  353. return DeclarationContext::NamedConstraint;
  354. default:
  355. llvm_unreachable("Missing handling for a declaration scope");
  356. }
  357. }
  358. }
  359. CARBON_CHECK(!state_stack_.empty() &&
  360. state_stack_[0].state == ParserState::DeclarationScopeLoop);
  361. return DeclarationContext::File;
  362. }
  363. auto ParserContext::RecoverFromDeclarationError(StateStackEntry state,
  364. ParseNodeKind parse_node_kind,
  365. bool skip_past_likely_end)
  366. -> void {
  367. auto token = state.token;
  368. if (skip_past_likely_end) {
  369. if (auto semi = SkipPastLikelyEnd(token)) {
  370. token = *semi;
  371. }
  372. }
  373. AddNode(parse_node_kind, token, state.subtree_start,
  374. /*has_error=*/true);
  375. }
  376. auto ParserContext::EmitExpectedDeclarationSemi(TokenKind expected_kind)
  377. -> void {
  378. CARBON_DIAGNOSTIC(ExpectedDeclarationSemi, Error,
  379. "`{0}` declarations must end with a `;`.", TokenKind);
  380. emitter().Emit(*position(), ExpectedDeclarationSemi, expected_kind);
  381. }
  382. auto ParserContext::EmitExpectedDeclarationSemiOrDefinition(
  383. TokenKind expected_kind) -> void {
  384. CARBON_DIAGNOSTIC(ExpectedDeclarationSemiOrDefinition, Error,
  385. "`{0}` declarations must either end with a `;` or "
  386. "have a `{{ ... }` block for a definition.",
  387. TokenKind);
  388. emitter().Emit(*position(), ExpectedDeclarationSemiOrDefinition,
  389. expected_kind);
  390. }
  391. auto ParserContext::PrintForStackDump(llvm::raw_ostream& output) const -> void {
  392. output << "Parser stack:\n";
  393. for (auto [i, entry] : llvm::enumerate(state_stack_)) {
  394. output << "\t" << i << ".\t" << entry.state;
  395. PrintTokenForStackDump(output, entry.token);
  396. }
  397. output << "\tcursor\tposition_";
  398. PrintTokenForStackDump(output, *position_);
  399. }
  400. auto ParserContext::PrintTokenForStackDump(llvm::raw_ostream& output,
  401. TokenizedBuffer::Token token) const
  402. -> void {
  403. output << " @ " << tokens_->GetLineNumber(tokens_->GetLine(token)) << ":"
  404. << tokens_->GetColumnNumber(token) << ": token " << token << " : "
  405. << tokens_->GetKind(token) << "\n";
  406. }
  407. } // namespace Carbon