Просмотр исходного кода

Add sentinel Eof token at the end of the tokenized buffer. (#388)

This has two goals:

1) It allows us to simplify and remove special cases from the parser:
   when we expect a particular token next, we can just check for it
   without needing a special case for end-of-file.

2) It gives us a token to use as a position when emitting diagnostics at
   the end of the file.

Centralize all updating of `position` to `Consume` and `SkipTo`, so that we can in a single place ensure that we never go past the EOF token.
Richard Smith 5 лет назад
Родитель
Сommit
1e6c7e3963

+ 1 - 0
lexer/token_registry.def

@@ -158,5 +158,6 @@ CARBON_TOKEN(IntegerLiteral)
 CARBON_TOKEN(RealLiteral)
 CARBON_TOKEN(StringLiteral)
 CARBON_TOKEN(Error)
+CARBON_TOKEN(EndOfFile)
 
 #undef CARBON_TOKEN

+ 11 - 0
lexer/tokenized_buffer.cpp

@@ -463,6 +463,12 @@ class TokenizedBuffer::Lexer {
     buffer.has_errors = true;
     return token;
   }
+
+  auto AddEndOfFileToken() -> void {
+    buffer.AddToken({.kind = TokenKind::EndOfFile(),
+                     .token_line = current_line,
+                     .column = current_column});
+  }
 };
 
 auto TokenizedBuffer::Lex(SourceBuffer& source, DiagnosticConsumer& consumer)
@@ -491,6 +497,7 @@ auto TokenizedBuffer::Lex(SourceBuffer& source, DiagnosticConsumer& consumer)
   }
 
   lexer.CloseInvalidOpenGroups(TokenKind::Error());
+  lexer.AddEndOfFileToken();
   return buffer;
 }
 
@@ -546,6 +553,10 @@ auto TokenizedBuffer::GetTokenText(Token token) const -> llvm::StringRef {
     return relexed_token->Text();
   }
 
+  if (token_info.kind == TokenKind::EndOfFile()) {
+    return llvm::StringRef();
+  }
+
   assert(token_info.kind == TokenKind::Identifier() &&
          "Only identifiers have stored text!");
   return GetIdentifierText(token_info.id);

+ 246 - 166
lexer/tokenized_buffer_test.cpp

@@ -51,103 +51,109 @@ struct LexerTest : ::testing::Test {
 TEST_F(LexerTest, HandlesEmptyBuffer) {
   auto buffer = Lex("");
   EXPECT_FALSE(buffer.HasErrors());
-  EXPECT_EQ(buffer.Tokens().begin(), buffer.Tokens().end());
+  EXPECT_THAT(
+      buffer,
+      HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
 }
 
 TEST_F(LexerTest, TracksLinesAndColumns) {
   auto buffer = Lex("\n  ;;\n   ;;;\n   x\"foo\" \"\"\"baz\n  a\n \"\"\" y");
   EXPECT_FALSE(buffer.HasErrors());
-  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
-                          {.kind = TokenKind::Semi(),
-                           .line = 2,
-                           .column = 3,
-                           .indent_column = 3},
-                          {.kind = TokenKind::Semi(),
-                           .line = 2,
-                           .column = 4,
-                           .indent_column = 3},
-                          {.kind = TokenKind::Semi(),
-                           .line = 3,
-                           .column = 4,
-                           .indent_column = 4},
-                          {.kind = TokenKind::Semi(),
-                           .line = 3,
-                           .column = 5,
-                           .indent_column = 4},
-                          {.kind = TokenKind::Semi(),
-                           .line = 3,
-                           .column = 6,
-                           .indent_column = 4},
-                          {.kind = TokenKind::Identifier(),
-                           .line = 4,
-                           .column = 4,
-                           .indent_column = 4,
-                           .text = "x"},
-                          {.kind = TokenKind::StringLiteral(),
-                           .line = 4,
-                           .column = 5,
-                           .indent_column = 4},
-                          {.kind = TokenKind::StringLiteral(),
-                           .line = 4,
-                           .column = 11,
-                           .indent_column = 4},
-                          {.kind = TokenKind::Identifier(),
-                           .line = 6,
-                           .column = 6,
-                           .indent_column = 11,
-                           .text = "y"},
-                      }));
+  EXPECT_THAT(buffer,
+              HasTokens(llvm::ArrayRef<ExpectedToken>{
+                  {.kind = TokenKind::Semi(),
+                   .line = 2,
+                   .column = 3,
+                   .indent_column = 3},
+                  {.kind = TokenKind::Semi(),
+                   .line = 2,
+                   .column = 4,
+                   .indent_column = 3},
+                  {.kind = TokenKind::Semi(),
+                   .line = 3,
+                   .column = 4,
+                   .indent_column = 4},
+                  {.kind = TokenKind::Semi(),
+                   .line = 3,
+                   .column = 5,
+                   .indent_column = 4},
+                  {.kind = TokenKind::Semi(),
+                   .line = 3,
+                   .column = 6,
+                   .indent_column = 4},
+                  {.kind = TokenKind::Identifier(),
+                   .line = 4,
+                   .column = 4,
+                   .indent_column = 4,
+                   .text = "x"},
+                  {.kind = TokenKind::StringLiteral(),
+                   .line = 4,
+                   .column = 5,
+                   .indent_column = 4},
+                  {.kind = TokenKind::StringLiteral(),
+                   .line = 4,
+                   .column = 11,
+                   .indent_column = 4},
+                  {.kind = TokenKind::Identifier(),
+                   .line = 6,
+                   .column = 6,
+                   .indent_column = 11,
+                   .text = "y"},
+                  {.kind = TokenKind::EndOfFile(), .line = 6, .column = 7},
+              }));
 }
 
 TEST_F(LexerTest, HandlesNumericLiteral) {
   auto buffer = Lex("12-578\n  1  2\n0x12_3ABC\n0b10_10_11\n1_234_567\n1.5e9");
   EXPECT_FALSE(buffer.HasErrors());
-  ASSERT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
-                          {.kind = TokenKind::IntegerLiteral(),
-                           .line = 1,
-                           .column = 1,
-                           .indent_column = 1,
-                           .text = "12"},
-                          {.kind = TokenKind::Minus(),
-                           .line = 1,
-                           .column = 3,
-                           .indent_column = 1},
-                          {.kind = TokenKind::IntegerLiteral(),
-                           .line = 1,
-                           .column = 4,
-                           .indent_column = 1,
-                           .text = "578"},
-                          {.kind = TokenKind::IntegerLiteral(),
-                           .line = 2,
-                           .column = 3,
-                           .indent_column = 3,
-                           .text = "1"},
-                          {.kind = TokenKind::IntegerLiteral(),
-                           .line = 2,
-                           .column = 6,
-                           .indent_column = 3,
-                           .text = "2"},
-                          {.kind = TokenKind::IntegerLiteral(),
-                           .line = 3,
-                           .column = 1,
-                           .indent_column = 1,
-                           .text = "0x12_3ABC"},
-                          {.kind = TokenKind::IntegerLiteral(),
-                           .line = 4,
-                           .column = 1,
-                           .indent_column = 1,
-                           .text = "0b10_10_11"},
-                          {.kind = TokenKind::IntegerLiteral(),
-                           .line = 5,
-                           .column = 1,
-                           .indent_column = 1,
-                           .text = "1_234_567"},
-                          {.kind = TokenKind::RealLiteral(),
-                           .line = 6,
-                           .column = 1,
-                           .indent_column = 1,
-                           .text = "1.5e9"},
-                      }));
+  ASSERT_THAT(buffer,
+              HasTokens(llvm::ArrayRef<ExpectedToken>{
+                  {.kind = TokenKind::IntegerLiteral(),
+                   .line = 1,
+                   .column = 1,
+                   .indent_column = 1,
+                   .text = "12"},
+                  {.kind = TokenKind::Minus(),
+                   .line = 1,
+                   .column = 3,
+                   .indent_column = 1},
+                  {.kind = TokenKind::IntegerLiteral(),
+                   .line = 1,
+                   .column = 4,
+                   .indent_column = 1,
+                   .text = "578"},
+                  {.kind = TokenKind::IntegerLiteral(),
+                   .line = 2,
+                   .column = 3,
+                   .indent_column = 3,
+                   .text = "1"},
+                  {.kind = TokenKind::IntegerLiteral(),
+                   .line = 2,
+                   .column = 6,
+                   .indent_column = 3,
+                   .text = "2"},
+                  {.kind = TokenKind::IntegerLiteral(),
+                   .line = 3,
+                   .column = 1,
+                   .indent_column = 1,
+                   .text = "0x12_3ABC"},
+                  {.kind = TokenKind::IntegerLiteral(),
+                   .line = 4,
+                   .column = 1,
+                   .indent_column = 1,
+                   .text = "0b10_10_11"},
+                  {.kind = TokenKind::IntegerLiteral(),
+                   .line = 5,
+                   .column = 1,
+                   .indent_column = 1,
+                   .text = "1_234_567"},
+                  {.kind = TokenKind::RealLiteral(),
+                   .line = 6,
+                   .column = 1,
+                   .indent_column = 1,
+                   .text = "1.5e9"},
+                  {.kind = TokenKind::EndOfFile(), .line = 6, .column = 6},
+              }));
   auto token_12 = buffer.Tokens().begin();
   EXPECT_EQ(buffer.GetIntegerLiteral(*token_12), 12);
   auto token_578 = buffer.Tokens().begin() + 2;
@@ -172,33 +178,35 @@ TEST_F(LexerTest, HandlesNumericLiteral) {
 TEST_F(LexerTest, HandlesInvalidNumericLiterals) {
   auto buffer = Lex("14x 15_49 0x3.5q 0x3_4.5_6 0ops");
   EXPECT_TRUE(buffer.HasErrors());
-  ASSERT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
-                          {.kind = TokenKind::Error(),
-                           .line = 1,
-                           .column = 1,
-                           .indent_column = 1,
-                           .text = "14x"},
-                          {.kind = TokenKind::IntegerLiteral(),
-                           .line = 1,
-                           .column = 5,
-                           .indent_column = 1,
-                           .text = "15_49"},
-                          {.kind = TokenKind::Error(),
-                           .line = 1,
-                           .column = 11,
-                           .indent_column = 1,
-                           .text = "0x3.5q"},
-                          {.kind = TokenKind::RealLiteral(),
-                           .line = 1,
-                           .column = 18,
-                           .indent_column = 1,
-                           .text = "0x3_4.5_6"},
-                          {.kind = TokenKind::Error(),
-                           .line = 1,
-                           .column = 28,
-                           .indent_column = 1,
-                           .text = "0ops"},
-                      }));
+  ASSERT_THAT(buffer,
+              HasTokens(llvm::ArrayRef<ExpectedToken>{
+                  {.kind = TokenKind::Error(),
+                   .line = 1,
+                   .column = 1,
+                   .indent_column = 1,
+                   .text = "14x"},
+                  {.kind = TokenKind::IntegerLiteral(),
+                   .line = 1,
+                   .column = 5,
+                   .indent_column = 1,
+                   .text = "15_49"},
+                  {.kind = TokenKind::Error(),
+                   .line = 1,
+                   .column = 11,
+                   .indent_column = 1,
+                   .text = "0x3.5q"},
+                  {.kind = TokenKind::RealLiteral(),
+                   .line = 1,
+                   .column = 18,
+                   .indent_column = 1,
+                   .text = "0x3_4.5_6"},
+                  {.kind = TokenKind::Error(),
+                   .line = 1,
+                   .column = 28,
+                   .indent_column = 1,
+                   .text = "0ops"},
+                  {.kind = TokenKind::EndOfFile(), .line = 1, .column = 32},
+              }));
 }
 
 TEST_F(LexerTest, SplitsNumericLiteralsProperly) {
@@ -268,6 +276,8 @@ TEST_F(LexerTest, SplitsNumericLiteralsProperly) {
                   {.kind = TokenKind::IntegerLiteral(), .text = "13"},
                   {.kind = TokenKind::Period()},
                   {.kind = TokenKind::UnderscoreKeyword()},
+                  // newline
+                  {.kind = TokenKind::EndOfFile()},
               }));
 }
 
@@ -309,6 +319,7 @@ TEST_F(LexerTest, HandlesGarbageCharacters) {
            .line = 4,
            .column = 2,
            .text = llvm::StringRef("\\", 1)},
+          {.kind = TokenKind::EndOfFile(), .line = 4, .column = 3},
       }));
 }
 
@@ -321,6 +332,7 @@ TEST_F(LexerTest, Symbols) {
   EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
                           {TokenKind::LessLess()},
                           {TokenKind::Less()},
+                          {TokenKind::EndOfFile()},
                       }));
 
   buffer = Lex("<<=>>");
@@ -328,6 +340,7 @@ TEST_F(LexerTest, Symbols) {
   EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
                           {TokenKind::LessLessEqual()},
                           {TokenKind::GreaterGreater()},
+                          {TokenKind::EndOfFile()},
                       }));
 
   buffer = Lex("< <=> >");
@@ -336,6 +349,7 @@ TEST_F(LexerTest, Symbols) {
                           {TokenKind::Less()},
                           {TokenKind::LessEqualGreater()},
                           {TokenKind::Greater()},
+                          {TokenKind::EndOfFile()},
                       }));
 
   buffer = Lex("\\/?@&^!");
@@ -348,6 +362,7 @@ TEST_F(LexerTest, Symbols) {
                           {TokenKind::Amp()},
                           {TokenKind::Caret()},
                           {TokenKind::Exclaim()},
+                          {TokenKind::EndOfFile()},
                       }));
 }
 
@@ -357,6 +372,7 @@ TEST_F(LexerTest, Parens) {
   EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
                           {TokenKind::OpenParen()},
                           {TokenKind::CloseParen()},
+                          {TokenKind::EndOfFile()},
                       }));
 
   buffer = Lex("((()()))");
@@ -370,6 +386,7 @@ TEST_F(LexerTest, Parens) {
                           {TokenKind::CloseParen()},
                           {TokenKind::CloseParen()},
                           {TokenKind::CloseParen()},
+                          {TokenKind::EndOfFile()},
                       }));
 }
 
@@ -379,6 +396,7 @@ TEST_F(LexerTest, CurlyBraces) {
   EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
                           {TokenKind::OpenCurlyBrace()},
                           {TokenKind::CloseCurlyBrace()},
+                          {TokenKind::EndOfFile()},
                       }));
 
   buffer = Lex("{{{}{}}}");
@@ -392,6 +410,7 @@ TEST_F(LexerTest, CurlyBraces) {
                           {TokenKind::CloseCurlyBrace()},
                           {TokenKind::CloseCurlyBrace()},
                           {TokenKind::CloseCurlyBrace()},
+                          {TokenKind::EndOfFile()},
                       }));
 }
 
@@ -412,6 +431,8 @@ TEST_F(LexerTest, MatchingGroups) {
               buffer.GetMatchedClosingToken(open_curly_token));
     EXPECT_EQ(open_curly_token,
               buffer.GetMatchedOpeningToken(close_curly_token));
+    auto eof_token = *it++;
+    EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::EndOfFile());
     EXPECT_EQ(buffer.Tokens().end(), it);
   }
 
@@ -473,6 +494,8 @@ TEST_F(LexerTest, MatchingGroups) {
     EXPECT_EQ(inner_open_paren_token,
               buffer.GetMatchedOpeningToken(inner_close_paren_token));
 
+    auto eof_token = *it++;
+    EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::EndOfFile());
     EXPECT_EQ(buffer.Tokens().end(), it);
   }
 }
@@ -484,12 +507,14 @@ TEST_F(LexerTest, MismatchedGroups) {
               HasTokens(llvm::ArrayRef<ExpectedToken>{
                   {TokenKind::OpenCurlyBrace()},
                   {.kind = TokenKind::CloseCurlyBrace(), .recovery = true},
+                  {TokenKind::EndOfFile()},
               }));
 
   buffer = Lex("}");
   EXPECT_TRUE(buffer.HasErrors());
   EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
                           {.kind = TokenKind::Error(), .text = "}"},
+                          {TokenKind::EndOfFile()},
                       }));
 
   buffer = Lex("{(}");
@@ -501,6 +526,7 @@ TEST_F(LexerTest, MismatchedGroups) {
           {.kind = TokenKind::OpenParen(), .column = 2},
           {.kind = TokenKind::CloseParen(), .column = 3, .recovery = true},
           {.kind = TokenKind::CloseCurlyBrace(), .column = 3},
+          {TokenKind::EndOfFile()},
       }));
 
   buffer = Lex(")({)");
@@ -513,6 +539,7 @@ TEST_F(LexerTest, MismatchedGroups) {
           {.kind = TokenKind::OpenCurlyBrace(), .column = 3},
           {.kind = TokenKind::CloseCurlyBrace(), .column = 4, .recovery = true},
           {.kind = TokenKind::CloseParen(), .column = 4},
+          {TokenKind::EndOfFile()},
       }));
 }
 
@@ -523,6 +550,7 @@ TEST_F(LexerTest, Keywords) {
       buffer,
       HasTokens(llvm::ArrayRef<ExpectedToken>{
           {.kind = TokenKind::FnKeyword(), .column = 4, .indent_column = 4},
+          {TokenKind::EndOfFile()},
       }));
 
   buffer = Lex("and or not if else for loop return var break continue _");
@@ -540,36 +568,45 @@ TEST_F(LexerTest, Keywords) {
                           {TokenKind::BreakKeyword()},
                           {TokenKind::ContinueKeyword()},
                           {TokenKind::UnderscoreKeyword()},
+                          {TokenKind::EndOfFile()},
                       }));
 }
 
 TEST_F(LexerTest, Comments) {
-  auto buffer = Lex(" ;\n  // foo\n  ;");
+  auto buffer = Lex(" ;\n  // foo\n  ;\n");
   EXPECT_FALSE(buffer.HasErrors());
-  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
-                          {.kind = TokenKind::Semi(),
-                           .line = 1,
-                           .column = 2,
-                           .indent_column = 2},
-                          {.kind = TokenKind::Semi(),
-                           .line = 3,
-                           .column = 3,
-                           .indent_column = 3},
-                      }));
+  EXPECT_THAT(buffer,
+              HasTokens(llvm::ArrayRef<ExpectedToken>{
+                  {.kind = TokenKind::Semi(),
+                   .line = 1,
+                   .column = 2,
+                   .indent_column = 2},
+                  {.kind = TokenKind::Semi(),
+                   .line = 3,
+                   .column = 3,
+                   .indent_column = 3},
+                  {.kind = TokenKind::EndOfFile(), .line = 3, .column = 4},
+              }));
 
   buffer = Lex("// foo\n//\n// bar");
   EXPECT_FALSE(buffer.HasErrors());
-  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{}));
+  EXPECT_THAT(
+      buffer,
+      HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
 
   // Make sure weird characters aren't a problem.
   buffer = Lex("  // foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]");
   EXPECT_FALSE(buffer.HasErrors());
-  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{}));
+  EXPECT_THAT(
+      buffer,
+      HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
 
   // Make sure we can lex a comment at the end of the input.
   buffer = Lex("//");
   EXPECT_FALSE(buffer.HasErrors());
-  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{}));
+  EXPECT_THAT(
+      buffer,
+      HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
 }
 
 TEST_F(LexerTest, InvalidComments) {
@@ -593,6 +630,7 @@ TEST_F(LexerTest, Identifiers) {
                            .column = 4,
                            .indent_column = 4,
                            .text = "foobar"},
+                          {TokenKind::EndOfFile()},
                       }));
 
   // Check different kinds of identifier character sequences.
@@ -600,6 +638,7 @@ TEST_F(LexerTest, Identifiers) {
   EXPECT_FALSE(buffer.HasErrors());
   EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
                           {.kind = TokenKind::Identifier(), .text = "_foo_bar"},
+                          {TokenKind::EndOfFile()},
                       }));
 
   buffer = Lex("foo2bar00");
@@ -607,6 +646,7 @@ TEST_F(LexerTest, Identifiers) {
   EXPECT_THAT(buffer,
               HasTokens(llvm::ArrayRef<ExpectedToken>{
                   {.kind = TokenKind::Identifier(), .text = "foo2bar00"},
+                  {TokenKind::EndOfFile()},
               }));
 
   // Check that we can parse identifiers that start with a keyword.
@@ -614,39 +654,42 @@ TEST_F(LexerTest, Identifiers) {
   EXPECT_FALSE(buffer.HasErrors());
   EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
                           {.kind = TokenKind::Identifier(), .text = "fnord"},
+                          {TokenKind::EndOfFile()},
                       }));
 
   // Check multiple identifiers with indent and interning.
   buffer = Lex("   foo;bar\nbar \n  foo\tfoo");
   EXPECT_FALSE(buffer.HasErrors());
-  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
-                          {.kind = TokenKind::Identifier(),
-                           .line = 1,
-                           .column = 4,
-                           .indent_column = 4,
-                           .text = "foo"},
-                          {.kind = TokenKind::Semi()},
-                          {.kind = TokenKind::Identifier(),
-                           .line = 1,
-                           .column = 8,
-                           .indent_column = 4,
-                           .text = "bar"},
-                          {.kind = TokenKind::Identifier(),
-                           .line = 2,
-                           .column = 1,
-                           .indent_column = 1,
-                           .text = "bar"},
-                          {.kind = TokenKind::Identifier(),
-                           .line = 3,
-                           .column = 3,
-                           .indent_column = 3,
-                           .text = "foo"},
-                          {.kind = TokenKind::Identifier(),
-                           .line = 3,
-                           .column = 7,
-                           .indent_column = 3,
-                           .text = "foo"},
-                      }));
+  EXPECT_THAT(buffer,
+              HasTokens(llvm::ArrayRef<ExpectedToken>{
+                  {.kind = TokenKind::Identifier(),
+                   .line = 1,
+                   .column = 4,
+                   .indent_column = 4,
+                   .text = "foo"},
+                  {.kind = TokenKind::Semi()},
+                  {.kind = TokenKind::Identifier(),
+                   .line = 1,
+                   .column = 8,
+                   .indent_column = 4,
+                   .text = "bar"},
+                  {.kind = TokenKind::Identifier(),
+                   .line = 2,
+                   .column = 1,
+                   .indent_column = 1,
+                   .text = "bar"},
+                  {.kind = TokenKind::Identifier(),
+                   .line = 3,
+                   .column = 3,
+                   .indent_column = 3,
+                   .text = "foo"},
+                  {.kind = TokenKind::Identifier(),
+                   .line = 3,
+                   .column = 7,
+                   .indent_column = 3,
+                   .text = "foo"},
+                  {.kind = TokenKind::EndOfFile(), .line = 3, .column = 10},
+              }));
 }
 
 TEST_F(LexerTest, StringLiterals) {
@@ -718,6 +761,7 @@ TEST_F(LexerTest, StringLiterals) {
                    .column = 10,
                    .indent_column = 5,
                    .string_contents = {""}},
+                  {.kind = TokenKind::EndOfFile(), .line = 16, .column = 3},
               }));
 }
 
@@ -816,8 +860,11 @@ TEST_F(LexerTest, Printing) {
   buffer.Print(print_stream);
   llvm::StringRef print = print_stream.str();
   EXPECT_THAT(GetAndDropLine(print),
-              StrEq("token: { index: 0, kind: 'Semi', line: 1, column: 1, "
+              StrEq("token: { index: 0, kind:      'Semi', line: 1, column: 1, "
                     "indent: 1, spelling: ';' }"));
+  EXPECT_THAT(GetAndDropLine(print),
+              StrEq("token: { index: 1, kind: 'EndOfFile', line: 1, column: 2, "
+                    "indent: 1, spelling: '' }"));
   EXPECT_TRUE(print.empty()) << print;
 
   // Test kind padding.
@@ -841,6 +888,9 @@ TEST_F(LexerTest, Printing) {
   EXPECT_THAT(GetAndDropLine(print),
               StrEq("token: { index: 4, kind: 'CloseParen', line: 1, column: "
                     "7, indent: 1, spelling: ')', opening_token: 0 }"));
+  EXPECT_THAT(GetAndDropLine(print),
+              StrEq("token: { index: 5, kind:  'EndOfFile', line: 1, column: "
+                    "8, indent: 1, spelling: '' }"));
   EXPECT_TRUE(print.empty()) << print;
 
   // Test digit padding with max values of 9, 10, and 11.
@@ -849,15 +899,22 @@ TEST_F(LexerTest, Printing) {
   print_storage.clear();
   buffer.Print(print_stream);
   print = print_stream.str();
-  EXPECT_THAT(GetAndDropLine(print),
-              StrEq("token: { index: 0, kind: 'Semi', line:  1, column:  1, "
-                    "indent: 1, spelling: ';' }"));
-  EXPECT_THAT(GetAndDropLine(print),
-              StrEq("token: { index: 1, kind: 'Semi', line: 11, column:  9, "
-                    "indent: 9, spelling: ';' }"));
-  EXPECT_THAT(GetAndDropLine(print),
-              StrEq("token: { index: 2, kind: 'Semi', line: 11, column: 10, "
-                    "indent: 9, spelling: ';' }"));
+  EXPECT_THAT(
+      GetAndDropLine(print),
+      StrEq("token: { index: 0, kind:      'Semi', line:  1, column:  1, "
+            "indent: 1, spelling: ';' }"));
+  EXPECT_THAT(
+      GetAndDropLine(print),
+      StrEq("token: { index: 1, kind:      'Semi', line: 11, column:  9, "
+            "indent: 9, spelling: ';' }"));
+  EXPECT_THAT(
+      GetAndDropLine(print),
+      StrEq("token: { index: 2, kind:      'Semi', line: 11, column: 10, "
+            "indent: 9, spelling: ';' }"));
+  EXPECT_THAT(
+      GetAndDropLine(print),
+      StrEq("token: { index: 3, kind: 'EndOfFile', line: 11, column: 11, "
+            "indent: 9, spelling: '' }"));
   EXPECT_TRUE(print.empty()) << print;
 }
 
@@ -949,6 +1006,29 @@ TEST_F(LexerTest, PrintingAsYaml) {
   EXPECT_THAT(&*token_it, IsKeyValueScalars("spelling", ";"));
   EXPECT_THAT(++token_it, Eq(token_value_node->end()));
 
+  ++mapping_it;
+  token_node = llvm::dyn_cast<llvm::yaml::KeyValueNode>(&*mapping_it);
+  ASSERT_THAT(token_node, NotNull());
+  token_key_node = llvm::dyn_cast<llvm::yaml::ScalarNode>(token_node->getKey());
+  ASSERT_THAT(token_key_node, NotNull());
+  EXPECT_THAT(token_key_node->getRawValue(), StrEq("token"));
+  token_value_node =
+      llvm::dyn_cast<llvm::yaml::MappingNode>(token_node->getValue());
+  ASSERT_THAT(token_value_node, NotNull());
+  token_it = token_value_node->begin();
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("index", "3"));
+  ++token_it;
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("kind", "EndOfFile"));
+  ++token_it;
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("line", "15"));
+  ++token_it;
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("column", "1"));
+  ++token_it;
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("indent", "1"));
+  ++token_it;
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("spelling", ""));
+  EXPECT_THAT(++token_it, Eq(token_value_node->end()));
+
   ASSERT_THAT(++mapping_it, Eq(root_node->end()));
   ASSERT_THAT(++yaml_it, Eq(yaml_stream.end()));
 }

+ 1 - 0
parser/parse_node_kind.def

@@ -21,5 +21,6 @@ CARBON_PARSE_NODE_KIND(FunctionDeclaration)
 CARBON_PARSE_NODE_KIND(Identifier)
 CARBON_PARSE_NODE_KIND(ParameterListEnd)
 CARBON_PARSE_NODE_KIND(ParameterList)
+CARBON_PARSE_NODE_KIND(FileEnd)
 
 #undef CARBON_PARSE_NODE_KIND

+ 100 - 60
parser/parse_tree_test.cpp

@@ -48,7 +48,7 @@ TEST_F(ParseTreeTest, Empty) {
   TokenizedBuffer tokens = GetTokenizedBuffer("");
   ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_FALSE(tree.HasErrors());
-  EXPECT_THAT(tree.Postorder().begin(), Eq(tree.Postorder().end()));
+  EXPECT_THAT(tree, MatchParseTreeNodes({{.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, EmptyDeclaration) {
@@ -59,20 +59,28 @@ TEST_F(ParseTreeTest, EmptyDeclaration) {
   auto end = tree.Postorder().end();
   ASSERT_THAT(it, Ne(end));
   ParseTree::Node n = *it++;
+  ASSERT_THAT(it, Ne(end));
+  ParseTree::Node eof = *it++;
   EXPECT_THAT(it, Eq(end));
 
   // Directly test the main API so that we get easier to understand errors in
   // simple cases than what the custom matcher will produce.
   EXPECT_FALSE(tree.HasErrorInNode(n));
+  EXPECT_FALSE(tree.HasErrorInNode(eof));
   EXPECT_THAT(tree.GetNodeKind(n), Eq(ParseNodeKind::EmptyDeclaration()));
+  EXPECT_THAT(tree.GetNodeKind(eof), Eq(ParseNodeKind::FileEnd()));
+
   auto t = tree.GetNodeToken(n);
   ASSERT_THAT(tokens.Tokens().begin(), Ne(tokens.Tokens().end()));
   EXPECT_THAT(t, Eq(*tokens.Tokens().begin()));
   EXPECT_THAT(tokens.GetTokenText(t), Eq(";"));
 
-  EXPECT_THAT(tree.Postorder(n).begin(), Eq(tree.Postorder().begin()));
-  EXPECT_THAT(tree.Postorder(n).end(), Eq(tree.Postorder().end()));
   EXPECT_THAT(tree.Children(n).begin(), Eq(tree.Children(n).end()));
+  EXPECT_THAT(tree.Children(eof).begin(), Eq(tree.Children(eof).end()));
+
+  EXPECT_THAT(tree.Postorder().begin(), Eq(tree.Postorder(n).begin()));
+  EXPECT_THAT(tree.Postorder(n).end(), Eq(tree.Postorder(eof).begin()));
+  EXPECT_THAT(tree.Postorder(eof).end(), Eq(tree.Postorder().end()));
 }
 
 TEST_F(ParseTreeTest, BasicFunctionDeclaration) {
@@ -83,19 +91,20 @@ TEST_F(ParseTreeTest, BasicFunctionDeclaration) {
       tree, MatchParseTreeNodes(
                 {{.kind = ParseNodeKind::FunctionDeclaration(),
                   .text = "fn",
-                  .children = {
-                      {ParseNodeKind::Identifier(), "F"},
-                      {.kind = ParseNodeKind::ParameterList(),
-                       .text = "(",
-                       .children = {{ParseNodeKind::ParameterListEnd(), ")"}}},
-                      {ParseNodeKind::DeclarationEnd(), ";"}}}}));
+                  .children = {{ParseNodeKind::Identifier(), "F"},
+                               {.kind = ParseNodeKind::ParameterList(),
+                                .text = "(",
+                                .children = {{ParseNodeKind::ParameterListEnd(),
+                                              ")"}}},
+                               {ParseNodeKind::DeclarationEnd(), ";"}}},
+                 {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, NoDeclarationIntroducerOrSemi) {
   TokenizedBuffer tokens = GetTokenizedBuffer("foo bar baz");
   ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
-  EXPECT_THAT(tree.Postorder().begin(), Eq(tree.Postorder().end()));
+  EXPECT_THAT(tree, MatchParseTreeNodes({{.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, NoDeclarationIntroducerWithSemi) {
@@ -105,7 +114,8 @@ TEST_F(ParseTreeTest, NoDeclarationIntroducerWithSemi) {
   EXPECT_THAT(tree,
               MatchParseTreeNodes({{.kind = ParseNodeKind::EmptyDeclaration(),
                                     .text = ";",
-                                    .has_error = true}}));
+                                    .has_error = true},
+                                   {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, JustFunctionIntroducerAndSemi) {
@@ -115,7 +125,8 @@ TEST_F(ParseTreeTest, JustFunctionIntroducerAndSemi) {
   EXPECT_THAT(tree, MatchParseTreeNodes(
                         {{.kind = ParseNodeKind::FunctionDeclaration(),
                           .has_error = true,
-                          .children = {{ParseNodeKind::DeclarationEnd()}}}}));
+                          .children = {{ParseNodeKind::DeclarationEnd()}}},
+                         {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, RepeatedFunctionIntroducerAndSemi) {
@@ -125,18 +136,19 @@ TEST_F(ParseTreeTest, RepeatedFunctionIntroducerAndSemi) {
   EXPECT_THAT(tree, MatchParseTreeNodes(
                         {{.kind = ParseNodeKind::FunctionDeclaration(),
                           .has_error = true,
-                          .children = {{ParseNodeKind::DeclarationEnd()}}}}));
+                          .children = {{ParseNodeKind::DeclarationEnd()}}},
+                         {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDeclarationWithNoSignatureOrSemi) {
   TokenizedBuffer tokens = GetTokenizedBuffer("fn foo");
   ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
-  EXPECT_THAT(tree,
-              MatchParseTreeNodes(
-                  {{.kind = ParseNodeKind::FunctionDeclaration(),
-                    .has_error = true,
-                    .children = {{ParseNodeKind::Identifier(), "foo"}}}}));
+  EXPECT_THAT(tree, MatchParseTreeNodes(
+                        {{.kind = ParseNodeKind::FunctionDeclaration(),
+                          .has_error = true,
+                          .children = {{ParseNodeKind::Identifier(), "foo"}}},
+                         {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest,
@@ -148,7 +160,8 @@ TEST_F(ParseTreeTest,
                         {{.kind = ParseNodeKind::FunctionDeclaration(),
                           .has_error = true,
                           .children = {{ParseNodeKind::Identifier(), "foo"},
-                                       {ParseNodeKind::DeclarationEnd()}}}}));
+                                       {ParseNodeKind::DeclarationEnd()}}},
+                         {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDeclarationWithSingleIdentifierParameterList) {
@@ -166,7 +179,8 @@ TEST_F(ParseTreeTest, FunctionDeclarationWithSingleIdentifierParameterList) {
                          {.kind = ParseNodeKind::ParameterList(),
                           .has_error = true,
                           .children = {{ParseNodeKind::ParameterListEnd()}}},
-                         {ParseNodeKind::DeclarationEnd()}}}}));
+                         {ParseNodeKind::DeclarationEnd()}}},
+           {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDeclarationWithoutName) {
@@ -176,7 +190,8 @@ TEST_F(ParseTreeTest, FunctionDeclarationWithoutName) {
   EXPECT_THAT(tree, MatchParseTreeNodes(
                         {{.kind = ParseNodeKind::FunctionDeclaration(),
                           .has_error = true,
-                          .children = {{ParseNodeKind::DeclarationEnd()}}}}));
+                          .children = {{ParseNodeKind::DeclarationEnd()}}},
+                         {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest,
@@ -188,7 +203,8 @@ TEST_F(ParseTreeTest,
   EXPECT_THAT(tree, MatchParseTreeNodes(
                         {{.kind = ParseNodeKind::FunctionDeclaration(),
                           .has_error = true,
-                          .children = {{ParseNodeKind::DeclarationEnd()}}}}));
+                          .children = {{ParseNodeKind::DeclarationEnd()}}},
+                         {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDeclarationSkipToNewlineWithoutSemi) {
@@ -205,7 +221,8 @@ TEST_F(ParseTreeTest, FunctionDeclarationSkipToNewlineWithoutSemi) {
             .children = {{ParseNodeKind::Identifier(), "F"},
                          {.kind = ParseNodeKind::ParameterList(),
                           .children = {{ParseNodeKind::ParameterListEnd()}}},
-                         {ParseNodeKind::DeclarationEnd()}}}}));
+                         {ParseNodeKind::DeclarationEnd()}}},
+           {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDeclarationSkipIndentedNewlineWithSemi) {
@@ -226,7 +243,8 @@ TEST_F(ParseTreeTest, FunctionDeclarationSkipIndentedNewlineWithSemi) {
             .children = {{ParseNodeKind::Identifier(), "F"},
                          {.kind = ParseNodeKind::ParameterList(),
                           .children = {{ParseNodeKind::ParameterListEnd()}}},
-                         {ParseNodeKind::DeclarationEnd()}}}}));
+                         {ParseNodeKind::DeclarationEnd()}}},
+           {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDeclarationSkipIndentedNewlineWithoutSemi) {
@@ -245,7 +263,8 @@ TEST_F(ParseTreeTest, FunctionDeclarationSkipIndentedNewlineWithoutSemi) {
             .children = {{ParseNodeKind::Identifier(), "F"},
                          {.kind = ParseNodeKind::ParameterList(),
                           .children = {{ParseNodeKind::ParameterListEnd()}}},
-                         {ParseNodeKind::DeclarationEnd()}}}}));
+                         {ParseNodeKind::DeclarationEnd()}}},
+           {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDeclarationSkipIndentedNewlineUntilOutdent) {
@@ -264,7 +283,8 @@ TEST_F(ParseTreeTest, FunctionDeclarationSkipIndentedNewlineUntilOutdent) {
             .children = {{ParseNodeKind::Identifier(), "F"},
                          {.kind = ParseNodeKind::ParameterList(),
                           .children = {{ParseNodeKind::ParameterListEnd()}}},
-                         {ParseNodeKind::DeclarationEnd()}}}}));
+                         {ParseNodeKind::DeclarationEnd()}}},
+           {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDeclarationSkipWithoutSemiToCurly) {
@@ -287,15 +307,16 @@ TEST_F(ParseTreeTest, BasicFunctionDefinition) {
   ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_FALSE(tree.HasErrors());
   EXPECT_THAT(
-      tree, MatchParseTreeNodes(
-                {{.kind = ParseNodeKind::FunctionDeclaration(),
-                  .children = {
-                      {ParseNodeKind::Identifier(), "F"},
-                      {.kind = ParseNodeKind::ParameterList(),
-                       .children = {{ParseNodeKind::ParameterListEnd()}}},
-                      {.kind = ParseNodeKind::CodeBlock(),
-                       .text = "{",
-                       .children = {{ParseNodeKind::CodeBlockEnd(), "}"}}}}}}));
+      tree,
+      MatchParseTreeNodes(
+          {{.kind = ParseNodeKind::FunctionDeclaration(),
+            .children = {{ParseNodeKind::Identifier(), "F"},
+                         {.kind = ParseNodeKind::ParameterList(),
+                          .children = {{ParseNodeKind::ParameterListEnd()}}},
+                         {.kind = ParseNodeKind::CodeBlock(),
+                          .text = "{",
+                          .children = {{ParseNodeKind::CodeBlockEnd(), "}"}}}}},
+           {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDefinitionWithNestedBlocks) {
@@ -311,21 +332,22 @@ TEST_F(ParseTreeTest, FunctionDefinitionWithNestedBlocks) {
       tree,
       MatchParseTreeNodes(
           {{.kind = ParseNodeKind::FunctionDeclaration(),
-            .children = {
-                {ParseNodeKind::Identifier(), "F"},
-                {.kind = ParseNodeKind::ParameterList(),
-                 .children = {{ParseNodeKind::ParameterListEnd()}}},
-                {.kind = ParseNodeKind::CodeBlock(),
-                 .children = {
-                     {.kind = ParseNodeKind::CodeBlock(),
-                      .children = {{.kind = ParseNodeKind::CodeBlock(),
-                                    .children =
-                                        {{.kind = ParseNodeKind::CodeBlock(),
-                                          .children = {{ParseNodeKind::
-                                                            CodeBlockEnd()}}},
-                                         {ParseNodeKind::CodeBlockEnd()}}},
-                                   {ParseNodeKind::CodeBlockEnd()}}},
-                     {ParseNodeKind::CodeBlockEnd()}}}}}}));
+            .children =
+                {{ParseNodeKind::Identifier(), "F"},
+                 {.kind = ParseNodeKind::ParameterList(),
+                  .children = {{ParseNodeKind::ParameterListEnd()}}},
+                 {.kind = ParseNodeKind::CodeBlock(),
+                  .children =
+                      {{.kind = ParseNodeKind::CodeBlock(),
+                        .children =
+                            {{.kind = ParseNodeKind::CodeBlock(),
+                              .children = {{.kind = ParseNodeKind::CodeBlock(),
+                                            .children = {{ParseNodeKind::
+                                                              CodeBlockEnd()}}},
+                                           {ParseNodeKind::CodeBlockEnd()}}},
+                             {ParseNodeKind::CodeBlockEnd()}}},
+                       {ParseNodeKind::CodeBlockEnd()}}}}},
+           {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDefinitionWithIdenifierInStatements) {
@@ -346,7 +368,8 @@ TEST_F(ParseTreeTest, FunctionDefinitionWithIdenifierInStatements) {
                           .children = {{ParseNodeKind::ParameterListEnd()}}},
                          {.kind = ParseNodeKind::CodeBlock(),
                           .has_error = true,
-                          .children = {{ParseNodeKind::CodeBlockEnd()}}}}}}));
+                          .children = {{ParseNodeKind::CodeBlockEnd()}}}}},
+           {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 TEST_F(ParseTreeTest, FunctionDefinitionWithIdenifierInNestedBlock) {
@@ -362,15 +385,16 @@ TEST_F(ParseTreeTest, FunctionDefinitionWithIdenifierInNestedBlock) {
       tree,
       MatchParseTreeNodes(
           {{.kind = ParseNodeKind::FunctionDeclaration(),
-            .children = {
-                {ParseNodeKind::Identifier(), "F"},
-                {.kind = ParseNodeKind::ParameterList(),
-                 .children = {{ParseNodeKind::ParameterListEnd()}}},
-                {.kind = ParseNodeKind::CodeBlock(),
-                 .children = {{.kind = ParseNodeKind::CodeBlock(),
-                               .has_error = true,
-                               .children = {{ParseNodeKind::CodeBlockEnd()}}},
-                              {ParseNodeKind::CodeBlockEnd()}}}}}}));
+            .children =
+                {{ParseNodeKind::Identifier(), "F"},
+                 {.kind = ParseNodeKind::ParameterList(),
+                  .children = {{ParseNodeKind::ParameterListEnd()}}},
+                 {.kind = ParseNodeKind::CodeBlock(),
+                  .children = {{.kind = ParseNodeKind::CodeBlock(),
+                                .has_error = true,
+                                .children = {{ParseNodeKind::CodeBlockEnd()}}},
+                               {ParseNodeKind::CodeBlockEnd()}}}}},
+           {.kind = ParseNodeKind::FileEnd()}}));
 }
 
 auto GetAndDropLine(llvm::StringRef& s) -> std::string {
@@ -408,6 +432,8 @@ TEST_F(ParseTreeTest, Printing) {
                     "text: ')'}]},"));
   EXPECT_THAT(GetAndDropLine(print),
               StrEq("  {node_index: 3, kind: 'DeclarationEnd', text: ';'}]},"));
+  EXPECT_THAT(GetAndDropLine(print),
+              StrEq("{node_index: 5, kind: 'FileEnd', text: ''},"));
   EXPECT_THAT(GetAndDropLine(print), StrEq("]"));
   EXPECT_TRUE(print.empty()) << print;
 }
@@ -526,6 +552,20 @@ TEST_F(ParseTreeTest, PrintingAsYAML) {
 
   ++nkvi;
   EXPECT_THAT(nkvi, Eq(nkve));
+
+  ++ni;
+  ASSERT_THAT(ni, Ne(ne));
+  node = llvm::dyn_cast<llvm::yaml::MappingNode>(&*ni);
+  ASSERT_THAT(node, NotNull());
+  nkvi = node->begin();
+  EXPECT_THAT(&*nkvi, IsKeyValueScalars("node_index", "5"));
+  ++nkvi;
+  EXPECT_THAT(&*nkvi, IsKeyValueScalars("kind", "FileEnd"));
+  ++nkvi;
+  EXPECT_THAT(&*nkvi, IsKeyValueScalars("text", ""));
+  ++nkvi;
+  EXPECT_THAT(nkvi, Eq(node->end()));
+
   ++ni;
   EXPECT_THAT(ni, Eq(ne));
   ++di;

+ 45 - 53
parser/parser_impl.cpp

@@ -25,18 +25,34 @@ auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
   tree.node_impls.reserve(tokens.Size());
 
   Parser parser(tree, tokens);
-  while (parser.position != parser.end) {
+  while (!parser.AtEndOfFile()) {
     parser.ParseDeclaration();
   }
 
+  parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position);
+
   assert(tree.Verify() && "Parse tree built but does not verify!");
   return tree;
 }
 
+ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg)
+    : tree(tree_arg),
+      tokens(tokens_arg),
+      position(tokens.Tokens().begin()),
+      end(tokens.Tokens().end()) {
+  assert(std::find_if(position, end,
+                      [&](TokenizedBuffer::Token t) {
+                        return tokens.GetKind(t) == TokenKind::EndOfFile();
+                      }) != end &&
+         "No EndOfFileToken in token buffer.");
+}
+
 auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
   TokenizedBuffer::Token t = *position;
+  assert(kind != TokenKind::EndOfFile() && "Cannot consume the EOF token!");
   assert(tokens.GetKind(t) == kind && "The current token is the wrong kind!");
   ++position;
+  assert(position != end && "Reached end of tokens without finding EOF token.");
   return t;
 }
 
@@ -45,8 +61,7 @@ auto ParseTree::Parser::ConsumeIf(TokenKind kind)
   if (tokens.GetKind(*position) != kind) {
     return {};
   }
-
-  return *position++;
+  return Consume(kind);
 }
 
 auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
@@ -108,22 +123,27 @@ auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
 }
 
 auto ParseTree::Parser::SkipMatchingGroup() -> bool {
-  assert(position != end && "Cannot skip at the end!");
   TokenizedBuffer::Token t = *position;
   TokenKind t_kind = tokens.GetKind(t);
   if (!t_kind.IsOpeningSymbol()) {
     return false;
   }
 
-  position = std::next(
-      TokenizedBuffer::TokenIterator(tokens.GetMatchedClosingToken(t)));
+  SkipTo(tokens.GetMatchedClosingToken(t));
+  Consume(t_kind.GetClosingSymbol());
   return true;
 }
 
+auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
+  assert(t >= *position && "Tried to skip backwards.");
+  position = TokenizedBuffer::TokenIterator(t);
+  assert(position != end && "Skipped past EOF.");
+}
+
 auto ParseTree::Parser::SkipPastLikelyDeclarationEnd(
     TokenizedBuffer::Token skip_root, bool is_inside_declaration)
     -> llvm::Optional<Node> {
-  if (position == end) {
+  if (AtEndOfFile()) {
     return {};
   }
 
@@ -150,17 +170,14 @@ auto ParseTree::Parser::SkipPastLikelyDeclarationEnd(
       return {};
     }
 
-    // If we find a semicolon, we want to parse it to end the declaration.
-    if (current_kind == TokenKind::Semi()) {
-      TokenizedBuffer::Token semi = *position++;
-
-      // Add a node for the semicolon. If we're inside of a declaration, this
-      // is a declaration ending semicolon, otherwise it simply forms an empty
-      // declaration.
-      return AddLeafNode(is_inside_declaration
-                             ? ParseNodeKind::DeclarationEnd()
-                             : ParseNodeKind::EmptyDeclaration(),
-                         semi);
+    // If we find a semicolon, parse it and add a corresponding node. If we're
+    // inside of a declaration, this is a declaration ending semicolon,
+    // otherwise it simply forms an empty declaration.
+    if (auto end_node = ConsumeAndAddLeafNodeIf(
+            TokenKind::Semi(), is_inside_declaration
+                                   ? ParseNodeKind::DeclarationEnd()
+                                   : ParseNodeKind::EmptyDeclaration())) {
+      return end_node;
     }
 
     // Skip over any matching group of tokens.
@@ -169,36 +186,31 @@ auto ParseTree::Parser::SkipPastLikelyDeclarationEnd(
     }
 
     // Otherwise just step forward one token.
-    ++position;
-  } while (position != end &&
+    Consume(current_kind);
+  } while (!AtEndOfFile() &&
            is_same_line_or_indent_greater_than_root(*position));
 
   return {};
 }
 
 auto ParseTree::Parser::ParseFunctionSignature() -> Node {
-  assert(position != end && "Cannot parse past the end!");
-
   TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
-  assert(position != end &&
-         "The lexer ensures we always have a closing paren!");
   auto start = StartSubtree();
 
   // FIXME: Add support for parsing parameters.
 
   bool has_errors = false;
-  auto close_paren = ConsumeIf(TokenKind::CloseParen());
-  if (!close_paren) {
+  if (tokens.GetKind(*position) != TokenKind::CloseParen()) {
     llvm::errs() << "ERROR: unexpected token before the close of the "
                     "parameters on line "
                  << tokens.GetLineNumber(*position) << "!\n";
     has_errors = true;
 
     // We can trivially skip to the actual close parenthesis from here.
-    close_paren = tokens.GetMatchedClosingToken(open_paren);
-    position = std::next(TokenizedBuffer::TokenIterator(*close_paren));
+    SkipTo(tokens.GetMatchedClosingToken(open_paren));
   }
-  AddLeafNode(ParseNodeKind::ParameterListEnd(), *close_paren);
+  AddLeafNode(ParseNodeKind::ParameterListEnd(),
+              Consume(TokenKind::CloseParen()));
 
   // FIXME: Implement parsing of a return type.
 
@@ -206,11 +218,7 @@ auto ParseTree::Parser::ParseFunctionSignature() -> Node {
 }
 
 auto ParseTree::Parser::ParseCodeBlock() -> Node {
-  assert(position != end && "Cannot parse past the end!");
-
   TokenizedBuffer::Token open_curly = Consume(TokenKind::OpenCurlyBrace());
-  assert(position != end &&
-         "The lexer ensures we always have a closing curly!");
   auto start = StartSubtree();
 
   bool has_errors = false;
@@ -226,8 +234,7 @@ auto ParseTree::Parser::ParseCodeBlock() -> Node {
         has_errors = true;
 
         // We can trivially skip to the actual close curly brace from here.
-        position = TokenizedBuffer::TokenIterator(
-            tokens.GetMatchedClosingToken(open_curly));
+        SkipTo(tokens.GetMatchedClosingToken(open_curly));
         // Now fall through to the close curly brace handling code.
         LLVM_FALLTHROUGH;
 
@@ -254,21 +261,14 @@ auto ParseTree::Parser::ParseCodeBlock() -> Node {
 }
 
 auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
-  assert(position != end && "Cannot parse past the end!");
-
   TokenizedBuffer::Token function_intro_token = Consume(TokenKind::FnKeyword());
   auto start = StartSubtree();
+
   auto add_error_function_node = [&] {
     return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
                    start, /*has_error=*/true);
   };
 
-  if (position == end) {
-    llvm::errs() << "ERROR: File ended with a function introducer on line "
-                 << tokens.GetLineNumber(function_intro_token) << "!\n";
-    return add_error_function_node();
-  }
-
   auto name_n = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
                                         ParseNodeKind::Identifier());
   if (!name_n) {
@@ -280,12 +280,6 @@ auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
     SkipPastLikelyDeclarationEnd(function_intro_token);
     return add_error_function_node();
   }
-  if (position == end) {
-    llvm::errs() << "ERROR: File ended after a function introducer and "
-                    "identifier on line "
-                 << tokens.GetLineNumber(function_intro_token) << "!\n";
-    return add_error_function_node();
-  }
 
   TokenizedBuffer::Token open_paren = *position;
   if (tokens.GetKind(open_paren) != TokenKind::OpenParen()) {
@@ -296,8 +290,6 @@ auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
     SkipPastLikelyDeclarationEnd(function_intro_token);
     return add_error_function_node();
   }
-  assert(std::next(position) != end &&
-         "Unbalanced parentheses should be rejected by the lexer.");
   TokenizedBuffer::Token close_paren =
       tokens.GetMatchedClosingToken(open_paren);
 
@@ -333,19 +325,19 @@ auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
 }
 
 auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
-  assert(position != end && "Cannot parse past the end!");
   return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
                      Consume(TokenKind::Semi()));
 }
 
 auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
-  assert(position != end && "Cannot parse past the end!");
   TokenizedBuffer::Token t = *position;
   switch (tokens.GetKind(t)) {
     case TokenKind::FnKeyword():
       return ParseFunctionDeclaration();
     case TokenKind::Semi():
       return ParseEmptyDeclaration();
+    case TokenKind::EndOfFile():
+      return llvm::None;
     default:
       // Errors are handled outside the switch.
       break;

+ 11 - 5
parser/parser_impl.h

@@ -25,11 +25,11 @@ class ParseTree::Parser {
  private:
   struct SubtreeStart;
 
-  explicit Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg)
-      : tree(tree_arg),
-        tokens(tokens_arg),
-        position(tokens.Tokens().begin()),
-        end(tokens.Tokens().end()) {}
+  explicit Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg);
+
+  auto AtEndOfFile() -> bool {
+    return tokens.GetKind(*position) == TokenKind::EndOfFile();
+  }
 
   // Requires (and asserts) that the current position matches the provide
   // `Kind`. Returns the current token and advances to the next position.
@@ -75,6 +75,9 @@ class ParseTree::Parser {
   // returns false.
   auto SkipMatchingGroup() -> bool;
 
+  // Skip forward to the token immediately after the given token.
+  auto SkipTo(TokenizedBuffer::Token t) -> void;
+
   // Skips forward to move past the likely end of a declaration.
   //
   // Looks forward, skipping over any matched symbol groups, to find the next
@@ -125,7 +128,10 @@ class ParseTree::Parser {
   ParseTree& tree;
   TokenizedBuffer& tokens;
 
+  // The current position within the token buffer. Never equal to `end`.
   TokenizedBuffer::TokenIterator position;
+  // The end position of the token buffer. There will always be an `EndOfFile`
+  // token between `position` (inclusive) and `end` (exclusive).
   TokenizedBuffer::TokenIterator end;
 };