Procházet zdrojové kódy

[toolchain][parser] Parse for statement. (#2180)

Summary:

Add support for parsing `for` loops.

Co-authored-by: ergawy <kareem.ergawy@guardsquare.com>
Co-authored-by: Richard Smith <richard@metafoo.co.uk>
Kareem Ergawy před 3 roky
rodič
revize
8d5d48a4a7

+ 4 - 0
toolchain/diagnostics/diagnostic_registry.def

@@ -58,6 +58,7 @@ CARBON_DIAGNOSTIC_KIND(ExpectedParenAfter)
 CARBON_DIAGNOSTIC_KIND(ExpectedSemiAfter)
 CARBON_DIAGNOSTIC_KIND(ExpectedSemiAfterExpression)
 CARBON_DIAGNOSTIC_KIND(ExpectedStructLiteralField)
+CARBON_DIAGNOSTIC_KIND(ExpectedVariableDeclaration)
 CARBON_DIAGNOSTIC_KIND(ExpectedVariableName)
 CARBON_DIAGNOSTIC_KIND(OperatorRequiresParentheses)
 CARBON_DIAGNOSTIC_KIND(StackLimitExceeded)
@@ -74,6 +75,9 @@ CARBON_DIAGNOSTIC_KIND(MissingLibraryKeyword)
 CARBON_DIAGNOSTIC_KIND(ExpectedApiOrImpl)
 CARBON_DIAGNOSTIC_KIND(ExpectedSemiToEndPackageDirective)
 
+// For-specific diagnostics
+CARBON_DIAGNOSTIC_KIND(ExpectedIn)
+
 // ============================================================================
 // Other diagnostics
 // ============================================================================

+ 1 - 0
toolchain/lexer/token_registry.def

@@ -133,6 +133,7 @@ CARBON_KEYWORD_TOKEN(External,   "external")
 CARBON_KEYWORD_TOKEN(Final,      "final")
 CARBON_KEYWORD_TOKEN(Fn,         "fn")
 CARBON_KEYWORD_TOKEN(For,        "for")
+CARBON_KEYWORD_TOKEN(In,         "in")
 CARBON_KEYWORD_TOKEN(Friend,     "friend")
 CARBON_KEYWORD_TOKEN(If,         "if")
 CARBON_KEYWORD_TOKEN(Impl,       "impl")

+ 4 - 0
toolchain/parser/parse_node_kind.def

@@ -47,6 +47,10 @@ CARBON_PARSE_NODE_KIND(ContinueStatement)
 CARBON_PARSE_NODE_KIND(BreakStatement)
 CARBON_PARSE_NODE_KIND(ReturnStatement)
 CARBON_PARSE_NODE_KIND(StatementEnd)
+CARBON_PARSE_NODE_KIND(ForStatement)
+CARBON_PARSE_NODE_KIND(ForHeader)
+CARBON_PARSE_NODE_KIND(ForHeaderEnd)
+CARBON_PARSE_NODE_KIND(ForIn)
 
 // Expressions.
 CARBON_PARSE_NODE_KIND(Literal)

+ 184 - 0
toolchain/parser/parse_tree_test.cpp

@@ -1356,5 +1356,189 @@ TEST_F(ParseTreeTest, PackageErrors) {
   }
 }
 
+TEST_F(ParseTreeTest, ForSimple) {
+  TokenizedBuffer tokens = GetTokenizedBuffer(R"(
+    fn foo() {
+      for (var x : i32 in y) {
+        Print(x);
+      }
+    }
+  )");
+
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
+
+  EXPECT_THAT(
+      tree,
+      MatchParseTreeNodes(
+          {MatchFunctionDeclaration(
+               MatchDeclaredName("foo"), MatchParameters(),
+               MatchCodeBlock(
+                   MatchForStatement(
+                       MatchForHeader(
+                           MatchVariableDeclaration(MatchPatternBinding(
+                               MatchDeclaredName("x"), MatchLiteral("i32"))),
+                           MatchForIn(), MatchNameReference("y"),
+                           MatchForHeaderEnd()),
+                       MatchCodeBlock(
+                           MatchExpressionStatement(
+                               MatchCallExpression(MatchNameReference("Print"),
+                                                   MatchNameReference("x"),
+                                                   MatchCallExpressionEnd())),
+                           MatchCodeBlockEnd())),
+                   MatchCodeBlockEnd())),
+           MatchFileEnd()}));
+}
+
+TEST_F(ParseTreeTest, ForNested) {
+  TokenizedBuffer tokens = GetTokenizedBuffer(R"(
+    fn foo() {
+      for (var y : i32 in x) {
+          for (var z : i32 in y) {
+              Print(z);
+          }
+      }
+    }
+  )");
+
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
+
+  EXPECT_THAT(
+      tree,
+      MatchParseTreeNodes(
+          {MatchFunctionDeclaration(
+               MatchDeclaredName("foo"), MatchParameters(),
+               MatchCodeBlock(
+                   MatchForStatement(
+                       MatchForHeader(
+                           MatchVariableDeclaration(MatchPatternBinding(
+                               MatchDeclaredName("y"), MatchLiteral("i32"))),
+                           MatchForIn(), MatchNameReference("x"),
+                           MatchForHeaderEnd()),
+                       MatchCodeBlock(
+                           MatchForStatement(
+                               MatchForHeader(
+                                   MatchVariableDeclaration(MatchPatternBinding(
+                                       MatchDeclaredName("z"),
+                                       MatchLiteral("i32"))),
+                                   MatchForIn(), MatchNameReference("y"),
+                                   MatchForHeaderEnd()),
+                               MatchCodeBlock(
+                                   MatchExpressionStatement(MatchCallExpression(
+                                       MatchNameReference("Print"),
+                                       MatchNameReference("z"),
+                                       MatchCallExpressionEnd())),
+                                   MatchCodeBlockEnd())),
+                           MatchCodeBlockEnd())),
+                   MatchCodeBlockEnd())),
+           MatchFileEnd()}));
+}
+
+TEST_F(ParseTreeTest, ForIterVarError) {
+  TokenizedBuffer tokens = GetTokenizedBuffer(R"(
+    fn foo() {
+      for (x : i32 in y) {
+        Print(x);
+      }
+    }
+  )");
+
+  Testing::MockDiagnosticConsumer consumer;
+  EXPECT_CALL(
+      consumer,
+      HandleDiagnostic(IsDiagnosticMessage("Expected `var` declaration.")));
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
+
+  EXPECT_THAT(tree,
+              MatchParseTreeNodes(
+                  {MatchFunctionDeclaration(
+                       MatchDeclaredName("foo"), MatchParameters(),
+                       MatchCodeBlock(
+                           MatchForStatement(
+                               MatchForHeader(HasError, MatchForIn(),
+                                              MatchNameReference("y"),
+                                              MatchForHeaderEnd()),
+                               MatchCodeBlock(
+                                   MatchExpressionStatement(MatchCallExpression(
+                                       MatchNameReference("Print"),
+                                       MatchNameReference("x"),
+                                       MatchCallExpressionEnd())),
+                                   MatchCodeBlockEnd())),
+                           MatchCodeBlockEnd())),
+                   MatchFileEnd()}));
+}
+
+TEST_F(ParseTreeTest, ForColonInsteafOfIn) {
+  TokenizedBuffer tokens = GetTokenizedBuffer(R"(
+    fn foo() {
+      for (var x : i32 : y) {
+        Print(x);
+      }
+    }
+  )");
+
+  Testing::MockDiagnosticConsumer consumer;
+  EXPECT_CALL(
+      consumer,
+      HandleDiagnostic(IsDiagnosticMessage("`:` should be replaced by `in`.")));
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
+
+  EXPECT_THAT(
+      tree,
+      MatchParseTreeNodes(
+          {MatchFunctionDeclaration(
+               MatchDeclaredName("foo"), MatchParameters(),
+               MatchCodeBlock(
+                   MatchForStatement(
+                       MatchForHeader(
+                           HasError,
+                           MatchVariableDeclaration(MatchPatternBinding(
+                               MatchDeclaredName("x"), MatchLiteral("i32"))),
+                           MatchNameReference("y"), MatchForHeaderEnd()),
+                       MatchCodeBlock(
+                           MatchExpressionStatement(
+                               MatchCallExpression(MatchNameReference("Print"),
+                                                   MatchNameReference("x"),
+                                                   MatchCallExpressionEnd())),
+                           MatchCodeBlockEnd())),
+                   MatchCodeBlockEnd())),
+           MatchFileEnd()}));
+}
+
+TEST_F(ParseTreeTest, ForMissingIn) {
+  TokenizedBuffer tokens = GetTokenizedBuffer(R"(
+    fn foo() {
+      for (var x : i32 y) {
+        Print(x);
+      }
+    }
+  )");
+
+  Testing::MockDiagnosticConsumer consumer;
+  EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnosticMessage(
+                            "Expected `in` after loop `var` declaration.")));
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
+
+  EXPECT_THAT(
+      tree,
+      MatchParseTreeNodes(
+          {MatchFunctionDeclaration(
+               MatchDeclaredName("foo"), MatchParameters(),
+               MatchCodeBlock(
+                   MatchForStatement(
+                       MatchForHeader(
+                           HasError,
+                           MatchVariableDeclaration(MatchPatternBinding(
+                               MatchDeclaredName("x"), MatchLiteral("i32"))),
+                           MatchForHeaderEnd()),
+                       MatchCodeBlock(
+                           MatchExpressionStatement(
+                               MatchCallExpression(MatchNameReference("Print"),
+                                                   MatchNameReference("x"),
+                                                   MatchCallExpressionEnd())),
+                           MatchCodeBlockEnd())),
+                   MatchCodeBlockEnd())),
+           MatchFileEnd()}));
+}
+
 }  // namespace
 }  // namespace Carbon::Testing

+ 87 - 0
toolchain/parser/parser_impl.cpp

@@ -1180,6 +1180,90 @@ auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
                  /*has_error=*/!cond || !body);
 }
 
+auto ParseTree::Parser::ParseForStatement() -> llvm::Optional<Node> {
+  CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
+  auto for_start = GetSubtreeStartPosition();
+  auto for_token = Consume(TokenKind::For());
+
+  // Parse the loop header.
+  auto header = [this]() -> llvm::Optional<Node> {
+    auto open_paren = ConsumeIf(TokenKind::OpenParen());
+    auto header_start = GetSubtreeStartPosition();
+
+    if (!open_paren) {
+      CARBON_DIAGNOSTIC(ExpectedParenAfter, Error,
+                        "Expected `(` after `{0}`. Recovering from missing `(` "
+                        "not implemented yet!",
+                        TokenKind);
+      emitter_.Emit(*position_, ExpectedParenAfter, TokenKind::For());
+      // TODO: A proper recovery strategy is needed here. For now, I assume that
+      // all brackets are properly balanced (i.e. each open bracket has a
+      // closing one).
+      // This is temporary until we come to a conclusion regarding the recovery
+      // tokens strategy.
+      return llvm::None;
+    }
+
+    bool iter_var_parsed = false;
+
+    if (NextTokenIs(TokenKind::Var())) {
+      auto var_token = Consume(TokenKind::Var());
+      auto var_start = GetSubtreeStartPosition();
+      auto pattern = ParsePattern(PatternKind::Variable);
+      AddNode(ParseNodeKind::VariableDeclaration(), var_token, var_start,
+              !pattern);
+      iter_var_parsed = true;
+    } else {
+      CARBON_DIAGNOSTIC(ExpectedVariableDeclaration, Error,
+                        "Expected `var` declaration.");
+      emitter_.Emit(*position_, ExpectedVariableDeclaration);
+
+      if (auto next_in = FindNextOf({TokenKind::In()}); next_in) {
+        SkipTo(*next_in);
+      }
+    }
+
+    // A separator is either an `in` or a `:`. Even though `:` is incorrect,
+    // accidentally typing it by a C++ programmer might be a common mistake that
+    // warrants special handling.
+    bool separator_parsed = false;
+    bool in_parsed = false;
+
+    if (NextTokenIs(TokenKind::In())) {
+      separator_parsed = true;
+      in_parsed = true;
+      AddLeafNode(ParseNodeKind::ForIn(), Consume(TokenKind::In()));
+    } else if (NextTokenIs(TokenKind::Colon())) {
+      separator_parsed = true;
+      CARBON_DIAGNOSTIC(ExpectedIn, Error, "`:` should be replaced by `in`.");
+      emitter_.Emit(*position_, ExpectedIn);
+      Consume(TokenKind::Colon());
+    } else {
+      CARBON_DIAGNOSTIC(ExpectedIn, Error,
+                        "Expected `in` after loop `var` declaration.");
+      emitter_.Emit(*position_, ExpectedIn);
+      SkipTo(tokens_.GetMatchedClosingToken(*open_paren));
+    }
+
+    // Only try to parse the container expression if a separator was parsed.
+    // This reduces the emitted error messages if the separator was missing
+    // altogether.
+    auto container_expr = separator_parsed ? ParseExpression() : llvm::None;
+
+    auto close_paren =
+        ParseCloseParen(*open_paren, ParseNodeKind::ForHeaderEnd());
+
+    return AddNode(
+        ParseNodeKind::ForHeader(), *open_paren, header_start,
+        !iter_var_parsed || !in_parsed || !container_expr || !close_paren);
+  }();
+
+  auto body = ParseCodeBlock();
+
+  return AddNode(ParseNodeKind::ForStatement(), for_token, for_start,
+                 !header || !body);
+}
+
 auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
                                               KeywordStatementArgument argument)
     -> llvm::Optional<Node> {
@@ -1220,6 +1304,9 @@ auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
     case TokenKind::While():
       return ParseWhileStatement();
 
+    case TokenKind::For():
+      return ParseForStatement();
+
     case TokenKind::Continue():
       return ParseKeywordStatement(ParseNodeKind::ContinueStatement(),
                                    KeywordStatementArgument::None);

+ 3 - 0
toolchain/parser/parser_impl.h

@@ -250,6 +250,9 @@ class ParseTree::Parser {
   // Parses a while-statement.
   auto ParseWhileStatement() -> llvm::Optional<Node>;
 
+  // Parses a for-statement.
+  auto ParseForStatement() -> llvm::Optional<Node>;
+
   enum class KeywordStatementArgument {
     None,
     Optional,