5 gadi atpakaļ · bbaa0e788c
--- a/lexer/token_registry.def
+++ b/lexer/token_registry.def
@@ -156,7 +156,6 @@ CARBON_KEYWORD_TOKEN(XorKeyword,        "xor")
 
				 
			
 
				 CARBON_TOKEN(Identifier)
			
 
				 CARBON_TOKEN(IntegerLiteral)
			
 
				-CARBON_TOKEN(DocComment)
			
 
				 CARBON_TOKEN(Error)
			
 
				 
			
 
				 #undef CARBON_TOKEN
			
--- a/lexer/tokenized_buffer.cpp
+++ b/lexer/tokenized_buffer.cpp
@@ -35,6 +35,28 @@ static auto TakeLeadingIntegerLiteral(llvm::StringRef source_text)
 
				       [](char c) { return llvm::isAlnum(c) || c == '_'; });
			
 
				 }
			
 
				 
			
 
				+struct TrailingComment {
			
 
				+  static constexpr llvm::StringLiteral ShortName = "syntax-comments";
			
 
				+  static constexpr llvm::StringLiteral Message =
			
 
				+      "Trailing comments are not permitted.";
			
 
				+
			
 
				+  struct Substitutions {};
			
 
				+  static auto Format(const Substitutions&) -> std::string {
			
 
				+    return Message.str();
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+struct NoWhitespaceAfterCommentIntroducer {
			
 
				+  static constexpr llvm::StringLiteral ShortName = "syntax-comments";
			
 
				+  static constexpr llvm::StringLiteral Message =
			
 
				+      "Whitespace is required after '//'.";
			
 
				+
			
 
				+  struct Substitutions {};
			
 
				+  static auto Format(const Substitutions&) -> std::string {
			
 
				+    return Message.str();
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				 struct UnmatchedClosing {
			
 
				   static constexpr llvm::StringLiteral ShortName = "syntax-balanced-delimiters";
			
 
				   static constexpr llvm::StringLiteral Message =
			
@@ -138,6 +160,12 @@ struct UnrecognizedCharacters {
 
				   }
			
 
				 };
			
 
				 
			
 
				+// TODO(zygoloid): Update this to match whatever we decide qualifies as
			
 
				+// acceptable whitespace.
			
 
				+static bool isSpace(char c) {
			
 
				+  return c == ' ' || c == '\n' || c == '\t';
			
 
				+}
			
 
				+
			
 
				 // Implementation of the lexer logic itself.
			
 
				 //
			
 
				 // The design is that lexing can loop over the source buffer, consuming it into
			
@@ -166,21 +194,19 @@ class TokenizedBuffer::Lexer {
 
				   auto SkipWhitespace(llvm::StringRef& source_text) -> bool {
			
 
				     while (!source_text.empty()) {
			
 
				       // We only support line-oriented commenting and lex comments as-if they
			
 
				-      // were whitespace. Any comment must be the only non-whitespace on the
			
 
				-      // line.
			
 
				-      if (source_text.startswith("//") && !set_indent) {
			
 
				-        // Check if the comment has a special starting sequence of three slashes
			
 
				-        // followed by a space. This represents a documentation comment that is
			
 
				-        // preserved as a token in the buffer. When parsing, these comments will
			
 
				-        // only be accepted in specific parts of the grammar and will be
			
 
				-        // associated with the parsed constructs as structure documentation. All
			
 
				-        // other comments are simply treated as whitespace.
			
 
				-        if (source_text.startswith("///")) {
			
 
				-          current_line_info->indent = current_column;
			
 
				-          set_indent = true;
			
 
				-          buffer.AddToken({.kind = TokenKind::DocComment(),
			
 
				-                           .token_line = current_line,
			
 
				-                           .column = current_column});
			
 
				+      // were whitespace.
			
 
				+      if (source_text.startswith("//")) {
			
 
				+        // Any comment must be the only non-whitespace on the line.
			
 
				+        if (set_indent) {
			
 
				+          emitter.EmitError<TrailingComment>(
			
 
				+              [](TrailingComment::Substitutions&) {});
			
 
				+          buffer.has_errors = true;
			
 
				+        }
			
 
				+        // The introducer '//' must be followed by whitespace or EOF.
			
 
				+        if (source_text.size() > 2 && !isSpace(source_text[2])) {
			
 
				+          emitter.EmitError<NoWhitespaceAfterCommentIntroducer>(
			
 
				+              [](NoWhitespaceAfterCommentIntroducer::Substitutions&) {});
			
 
				+          buffer.has_errors = true;
			
 
				         }
			
 
				         while (!source_text.empty() && source_text.front() != '\n') {
			
 
				           ++current_column;
			
@@ -195,6 +221,7 @@ class TokenizedBuffer::Lexer {
 
				         default:
			
 
				           // If we find a non-whitespace character without exhausting the
			
 
				           // buffer, return true to continue lexing.
			
 
				+          assert(!isSpace(source_text.front()));
			
 
				           return true;
			
 
				 
			
 
				         case '\n':
			
@@ -631,14 +658,6 @@ auto TokenizedBuffer::GetTokenText(Token token) const -> llvm::StringRef {
 
				     return source->Text().substr(token_start, token_info.error_length);
			
 
				   }
			
 
				 
			
 
				-  // Documentation comment tokens refer back to the source text.
			
 
				-  if (token_info.kind == TokenKind::DocComment()) {
			
 
				-    auto& line_info = GetLineInfo(token_info.token_line);
			
 
				-    int64_t token_start = line_info.start + token_info.column;
			
 
				-    int64_t token_stop = line_info.start + line_info.length;
			
 
				-    return source->Text().slice(token_start, token_stop);
			
 
				-  }
			
 
				-
			
 
				   // Refer back to the source text to preserve oddities like radix or digit
			
 
				   // separators the author included.
			
 
				   if (token_info.kind == TokenKind::IntegerLiteral()) {
			
--- a/lexer/tokenized_buffer_test.cpp
+++ b/lexer/tokenized_buffer_test.cpp
@@ -520,67 +520,27 @@ TEST_F(LexerTest, Comments) {
 
				   EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{}));
			
 
				 
			
 
				   // Make sure weird characters aren't a problem.
			
 
				-  buffer = Lex("  //foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]");
			
 
				+  buffer = Lex("  // foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]");
			
 
				   EXPECT_FALSE(buffer.HasErrors());
			
 
				   EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{}));
			
 
				-}
			
 
				-
			
 
				-TEST_F(LexerTest, DocComments) {
			
 
				-  auto buffer = Lex("  /// foo");
			
 
				-  EXPECT_FALSE(buffer.HasErrors());
			
 
				-  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
			
 
				-                          {.kind = TokenKind::DocComment(),
			
 
				-                           .line = 1,
			
 
				-                           .column = 3,
			
 
				-                           .indent_column = 3,
			
 
				-                           .text = "/// foo"},
			
 
				-                      }));
			
 
				-
			
 
				-  buffer = Lex("/// foo\n//\n/// bar");
			
 
				-  EXPECT_FALSE(buffer.HasErrors());
			
 
				-  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
			
 
				-                          {.kind = TokenKind::DocComment(),
			
 
				-                           .line = 1,
			
 
				-                           .column = 1,
			
 
				-                           .indent_column = 1,
			
 
				-                           .text = "/// foo"},
			
 
				-                          {.kind = TokenKind::DocComment(),
			
 
				-                           .line = 3,
			
 
				-                           .column = 1,
			
 
				-                           .indent_column = 1,
			
 
				-                           .text = "/// bar"},
			
 
				-                      }));
			
 
				 
			
 
				-  buffer = Lex("/// foo\n///\n/// bar");
			
 
				+  // Make sure we can lex a comment at the end of the input.
			
 
				+  buffer = Lex("//");
			
 
				   EXPECT_FALSE(buffer.HasErrors());
			
 
				-  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
			
 
				-                          {.kind = TokenKind::DocComment(),
			
 
				-                           .line = 1,
			
 
				-                           .column = 1,
			
 
				-                           .indent_column = 1,
			
 
				-                           .text = "/// foo"},
			
 
				-                          {.kind = TokenKind::DocComment(),
			
 
				-                           .line = 2,
			
 
				-                           .column = 1,
			
 
				-                           .indent_column = 1,
			
 
				-                           .text = "///"},
			
 
				-                          {.kind = TokenKind::DocComment(),
			
 
				-                           .line = 3,
			
 
				-                           .column = 1,
			
 
				-                           .indent_column = 1,
			
 
				-                           .text = "/// bar"},
			
 
				-                      }));
			
 
				+  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{}));
			
 
				+}
			
 
				 
			
 
				-  // Make sure weird characters aren't a problem.
			
 
				-  buffer = Lex("  ///foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]");
			
 
				-  EXPECT_FALSE(buffer.HasErrors());
			
 
				-  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
			
 
				-                          {.kind = TokenKind::DocComment(),
			
 
				-                           .line = 1,
			
 
				-                           .column = 3,
			
 
				-                           .indent_column = 3,
			
 
				-                           .text = "///foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]"},
			
 
				-                      }));
			
 
				+TEST_F(LexerTest, InvalidComments) {
			
 
				+  llvm::StringLiteral testcases[] = {
			
 
				+      "  /// foo\n",
			
 
				+      "foo // bar\n",
			
 
				+      "//! hello",
			
 
				+      " //world",
			
 
				+  };
			
 
				+  for (llvm::StringLiteral testcase : testcases) {
			
 
				+    auto buffer = Lex(testcase);
			
 
				+    EXPECT_TRUE(buffer.HasErrors());
			
 
				+  }
			
 
				 }
			
 
				 
			
 
				 TEST_F(LexerTest, Identifiers) {