tokenized_buffer_test.cpp 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lexer/tokenized_buffer.h"
  5. #include <gmock/gmock.h>
  6. #include <gtest/gtest.h>
  7. #include <iterator>
  8. #include "llvm/ADT/ArrayRef.h"
  9. #include "llvm/ADT/None.h"
  10. #include "llvm/ADT/Sequence.h"
  11. #include "llvm/ADT/SmallString.h"
  12. #include "llvm/ADT/Twine.h"
  13. #include "llvm/Support/SourceMgr.h"
  14. #include "llvm/Support/raw_ostream.h"
  15. #include "toolchain/common/yaml_test_helpers.h"
  16. #include "toolchain/diagnostics/diagnostic_emitter.h"
  17. #include "toolchain/diagnostics/mocks.h"
  18. #include "toolchain/lexer/tokenized_buffer_test_helpers.h"
  19. namespace Carbon::Testing {
  20. namespace {
  21. using ::testing::_;
  22. using ::testing::ElementsAre;
  23. using ::testing::Eq;
  24. using ::testing::HasSubstr;
  25. using ::testing::StrEq;
  26. class LexerTest : public ::testing::Test {
  27. protected:
  28. auto GetSourceBuffer(llvm::Twine text) -> SourceBuffer& {
  29. source_storage.push_back(
  30. std::move(*SourceBuffer::CreateFromText(text.str())));
  31. return source_storage.back();
  32. }
  33. auto Lex(llvm::Twine text,
  34. DiagnosticConsumer& consumer = ConsoleDiagnosticConsumer())
  35. -> TokenizedBuffer {
  36. return TokenizedBuffer::Lex(GetSourceBuffer(text), consumer);
  37. }
  38. llvm::SmallVector<SourceBuffer, 16> source_storage;
  39. };
  40. TEST_F(LexerTest, HandlesEmptyBuffer) {
  41. auto buffer = Lex("");
  42. EXPECT_FALSE(buffer.has_errors());
  43. EXPECT_THAT(
  44. buffer,
  45. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  46. }
  47. TEST_F(LexerTest, TracksLinesAndColumns) {
  48. auto buffer = Lex("\n ;;\n ;;;\n x\"foo\" \"\"\"baz\n a\n \"\"\" y");
  49. EXPECT_FALSE(buffer.has_errors());
  50. EXPECT_THAT(buffer,
  51. HasTokens(llvm::ArrayRef<ExpectedToken>{
  52. {.kind = TokenKind::Semi(),
  53. .line = 2,
  54. .column = 3,
  55. .indent_column = 3},
  56. {.kind = TokenKind::Semi(),
  57. .line = 2,
  58. .column = 4,
  59. .indent_column = 3},
  60. {.kind = TokenKind::Semi(),
  61. .line = 3,
  62. .column = 4,
  63. .indent_column = 4},
  64. {.kind = TokenKind::Semi(),
  65. .line = 3,
  66. .column = 5,
  67. .indent_column = 4},
  68. {.kind = TokenKind::Semi(),
  69. .line = 3,
  70. .column = 6,
  71. .indent_column = 4},
  72. {.kind = TokenKind::Identifier(),
  73. .line = 4,
  74. .column = 4,
  75. .indent_column = 4,
  76. .text = "x"},
  77. {.kind = TokenKind::StringLiteral(),
  78. .line = 4,
  79. .column = 5,
  80. .indent_column = 4},
  81. {.kind = TokenKind::StringLiteral(),
  82. .line = 4,
  83. .column = 11,
  84. .indent_column = 4},
  85. {.kind = TokenKind::Identifier(),
  86. .line = 6,
  87. .column = 6,
  88. .indent_column = 11,
  89. .text = "y"},
  90. {.kind = TokenKind::EndOfFile(), .line = 6, .column = 7},
  91. }));
  92. }
  93. TEST_F(LexerTest, HandlesNumericLiteral) {
  94. auto buffer = Lex("12-578\n 1 2\n0x12_3ABC\n0b10_10_11\n1_234_567\n1.5e9");
  95. EXPECT_FALSE(buffer.has_errors());
  96. ASSERT_THAT(buffer,
  97. HasTokens(llvm::ArrayRef<ExpectedToken>{
  98. {.kind = TokenKind::IntegerLiteral(),
  99. .line = 1,
  100. .column = 1,
  101. .indent_column = 1,
  102. .text = "12"},
  103. {.kind = TokenKind::Minus(),
  104. .line = 1,
  105. .column = 3,
  106. .indent_column = 1},
  107. {.kind = TokenKind::IntegerLiteral(),
  108. .line = 1,
  109. .column = 4,
  110. .indent_column = 1,
  111. .text = "578"},
  112. {.kind = TokenKind::IntegerLiteral(),
  113. .line = 2,
  114. .column = 3,
  115. .indent_column = 3,
  116. .text = "1"},
  117. {.kind = TokenKind::IntegerLiteral(),
  118. .line = 2,
  119. .column = 6,
  120. .indent_column = 3,
  121. .text = "2"},
  122. {.kind = TokenKind::IntegerLiteral(),
  123. .line = 3,
  124. .column = 1,
  125. .indent_column = 1,
  126. .text = "0x12_3ABC"},
  127. {.kind = TokenKind::IntegerLiteral(),
  128. .line = 4,
  129. .column = 1,
  130. .indent_column = 1,
  131. .text = "0b10_10_11"},
  132. {.kind = TokenKind::IntegerLiteral(),
  133. .line = 5,
  134. .column = 1,
  135. .indent_column = 1,
  136. .text = "1_234_567"},
  137. {.kind = TokenKind::RealLiteral(),
  138. .line = 6,
  139. .column = 1,
  140. .indent_column = 1,
  141. .text = "1.5e9"},
  142. {.kind = TokenKind::EndOfFile(), .line = 6, .column = 6},
  143. }));
  144. auto token_12 = buffer.tokens().begin();
  145. EXPECT_EQ(buffer.GetIntegerLiteral(*token_12), 12);
  146. auto token_578 = buffer.tokens().begin() + 2;
  147. EXPECT_EQ(buffer.GetIntegerLiteral(*token_578), 578);
  148. auto token_1 = buffer.tokens().begin() + 3;
  149. EXPECT_EQ(buffer.GetIntegerLiteral(*token_1), 1);
  150. auto token_2 = buffer.tokens().begin() + 4;
  151. EXPECT_EQ(buffer.GetIntegerLiteral(*token_2), 2);
  152. auto token_0x12_3abc = buffer.tokens().begin() + 5;
  153. EXPECT_EQ(buffer.GetIntegerLiteral(*token_0x12_3abc), 0x12'3abc);
  154. auto token_0b10_10_11 = buffer.tokens().begin() + 6;
  155. EXPECT_EQ(buffer.GetIntegerLiteral(*token_0b10_10_11), 0b10'10'11);
  156. auto token_1_234_567 = buffer.tokens().begin() + 7;
  157. EXPECT_EQ(buffer.GetIntegerLiteral(*token_1_234_567), 1'234'567);
  158. auto token_1_5e9 = buffer.tokens().begin() + 8;
  159. auto value_1_5e9 = buffer.GetRealLiteral(*token_1_5e9);
  160. EXPECT_EQ(value_1_5e9.Mantissa().getZExtValue(), 15);
  161. EXPECT_EQ(value_1_5e9.Exponent().getSExtValue(), 8);
  162. EXPECT_EQ(value_1_5e9.IsDecimal(), true);
  163. }
  164. TEST_F(LexerTest, HandlesInvalidNumericLiterals) {
  165. auto buffer = Lex("14x 15_49 0x3.5q 0x3_4.5_6 0ops");
  166. EXPECT_TRUE(buffer.has_errors());
  167. ASSERT_THAT(buffer,
  168. HasTokens(llvm::ArrayRef<ExpectedToken>{
  169. {.kind = TokenKind::Error(),
  170. .line = 1,
  171. .column = 1,
  172. .indent_column = 1,
  173. .text = "14x"},
  174. {.kind = TokenKind::IntegerLiteral(),
  175. .line = 1,
  176. .column = 5,
  177. .indent_column = 1,
  178. .text = "15_49"},
  179. {.kind = TokenKind::Error(),
  180. .line = 1,
  181. .column = 11,
  182. .indent_column = 1,
  183. .text = "0x3.5q"},
  184. {.kind = TokenKind::RealLiteral(),
  185. .line = 1,
  186. .column = 18,
  187. .indent_column = 1,
  188. .text = "0x3_4.5_6"},
  189. {.kind = TokenKind::Error(),
  190. .line = 1,
  191. .column = 28,
  192. .indent_column = 1,
  193. .text = "0ops"},
  194. {.kind = TokenKind::EndOfFile(), .line = 1, .column = 32},
  195. }));
  196. }
  197. TEST_F(LexerTest, SplitsNumericLiteralsProperly) {
  198. llvm::StringLiteral source_text = R"(
  199. 1.
  200. .2
  201. 3.+foo
  202. 4.0-bar
  203. 5.0e+123+456
  204. 6.0e+1e+2
  205. 1e7
  206. 8..10
  207. 9.0.9.5
  208. 10.foo
  209. 11.0.foo
  210. 12e+1
  211. 13._
  212. )";
  213. auto buffer = Lex(source_text);
  214. EXPECT_TRUE(buffer.has_errors());
  215. EXPECT_THAT(buffer,
  216. HasTokens(llvm::ArrayRef<ExpectedToken>{
  217. {.kind = TokenKind::IntegerLiteral(), .text = "1"},
  218. {.kind = TokenKind::Period()},
  219. // newline
  220. {.kind = TokenKind::Period()},
  221. {.kind = TokenKind::IntegerLiteral(), .text = "2"},
  222. // newline
  223. {.kind = TokenKind::IntegerLiteral(), .text = "3"},
  224. {.kind = TokenKind::Period()},
  225. {.kind = TokenKind::Plus()},
  226. {.kind = TokenKind::Identifier(), .text = "foo"},
  227. // newline
  228. {.kind = TokenKind::RealLiteral(), .text = "4.0"},
  229. {.kind = TokenKind::Minus()},
  230. {.kind = TokenKind::Identifier(), .text = "bar"},
  231. // newline
  232. {.kind = TokenKind::RealLiteral(), .text = "5.0e+123"},
  233. {.kind = TokenKind::Plus()},
  234. {.kind = TokenKind::IntegerLiteral(), .text = "456"},
  235. // newline
  236. {.kind = TokenKind::Error(), .text = "6.0e+1e"},
  237. {.kind = TokenKind::Plus()},
  238. {.kind = TokenKind::IntegerLiteral(), .text = "2"},
  239. // newline
  240. {.kind = TokenKind::Error(), .text = "1e7"},
  241. // newline
  242. {.kind = TokenKind::IntegerLiteral(), .text = "8"},
  243. {.kind = TokenKind::Period()},
  244. {.kind = TokenKind::Period()},
  245. {.kind = TokenKind::IntegerLiteral(), .text = "10"},
  246. // newline
  247. {.kind = TokenKind::RealLiteral(), .text = "9.0"},
  248. {.kind = TokenKind::Period()},
  249. {.kind = TokenKind::RealLiteral(), .text = "9.5"},
  250. // newline
  251. {.kind = TokenKind::Error(), .text = "10.foo"},
  252. // newline
  253. {.kind = TokenKind::RealLiteral(), .text = "11.0"},
  254. {.kind = TokenKind::Period()},
  255. {.kind = TokenKind::Identifier(), .text = "foo"},
  256. // newline
  257. {.kind = TokenKind::Error(), .text = "12e"},
  258. {.kind = TokenKind::Plus()},
  259. {.kind = TokenKind::IntegerLiteral(), .text = "1"},
  260. // newline
  261. {.kind = TokenKind::IntegerLiteral(), .text = "13"},
  262. {.kind = TokenKind::Period()},
  263. {.kind = TokenKind::Underscore()},
  264. // newline
  265. {.kind = TokenKind::EndOfFile()},
  266. }));
  267. }
  268. TEST_F(LexerTest, HandlesGarbageCharacters) {
  269. constexpr char GarbageText[] = "$$💩-$\n$\0$12$\n\\\"\\\n\"x";
  270. auto buffer = Lex(llvm::StringRef(GarbageText, sizeof(GarbageText) - 1));
  271. EXPECT_TRUE(buffer.has_errors());
  272. EXPECT_THAT(
  273. buffer,
  274. HasTokens(llvm::ArrayRef<ExpectedToken>{
  275. {.kind = TokenKind::Error(),
  276. .line = 1,
  277. .column = 1,
  278. // 💩 takes 4 bytes, and we count column as bytes offset.
  279. .text = llvm::StringRef("$$💩", 6)},
  280. {.kind = TokenKind::Minus(), .line = 1, .column = 7},
  281. {.kind = TokenKind::Error(), .line = 1, .column = 8, .text = "$"},
  282. // newline
  283. {.kind = TokenKind::Error(),
  284. .line = 2,
  285. .column = 1,
  286. .text = llvm::StringRef("$\0$", 3)},
  287. {.kind = TokenKind::IntegerLiteral(),
  288. .line = 2,
  289. .column = 4,
  290. .text = "12"},
  291. {.kind = TokenKind::Error(), .line = 2, .column = 6, .text = "$"},
  292. // newline
  293. {.kind = TokenKind::Backslash(),
  294. .line = 3,
  295. .column = 1,
  296. .text = "\\"},
  297. {.kind = TokenKind::Error(), .line = 3, .column = 2, .text = "\"\\"},
  298. // newline
  299. {.kind = TokenKind::Error(), .line = 4, .column = 1, .text = "\"x"},
  300. {.kind = TokenKind::EndOfFile(), .line = 4, .column = 3},
  301. }));
  302. }
  303. TEST_F(LexerTest, Symbols) {
  304. // We don't need to exhaustively test symbols here as they're handled with
  305. // common code, but we want to check specific patterns to verify things like
  306. // max-munch rule and handling of interesting symbols.
  307. auto buffer = Lex("<<<");
  308. EXPECT_FALSE(buffer.has_errors());
  309. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  310. {TokenKind::LessLess()},
  311. {TokenKind::Less()},
  312. {TokenKind::EndOfFile()},
  313. }));
  314. buffer = Lex("<<=>>");
  315. EXPECT_FALSE(buffer.has_errors());
  316. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  317. {TokenKind::LessLessEqual()},
  318. {TokenKind::GreaterGreater()},
  319. {TokenKind::EndOfFile()},
  320. }));
  321. buffer = Lex("< <=> >");
  322. EXPECT_FALSE(buffer.has_errors());
  323. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  324. {TokenKind::Less()},
  325. {TokenKind::LessEqualGreater()},
  326. {TokenKind::Greater()},
  327. {TokenKind::EndOfFile()},
  328. }));
  329. buffer = Lex("\\/?@&^!");
  330. EXPECT_FALSE(buffer.has_errors());
  331. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  332. {TokenKind::Backslash()},
  333. {TokenKind::Slash()},
  334. {TokenKind::Question()},
  335. {TokenKind::At()},
  336. {TokenKind::Amp()},
  337. {TokenKind::Caret()},
  338. {TokenKind::Exclaim()},
  339. {TokenKind::EndOfFile()},
  340. }));
  341. }
  342. TEST_F(LexerTest, Parens) {
  343. auto buffer = Lex("()");
  344. EXPECT_FALSE(buffer.has_errors());
  345. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  346. {TokenKind::OpenParen()},
  347. {TokenKind::CloseParen()},
  348. {TokenKind::EndOfFile()},
  349. }));
  350. buffer = Lex("((()()))");
  351. EXPECT_FALSE(buffer.has_errors());
  352. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  353. {TokenKind::OpenParen()},
  354. {TokenKind::OpenParen()},
  355. {TokenKind::OpenParen()},
  356. {TokenKind::CloseParen()},
  357. {TokenKind::OpenParen()},
  358. {TokenKind::CloseParen()},
  359. {TokenKind::CloseParen()},
  360. {TokenKind::CloseParen()},
  361. {TokenKind::EndOfFile()},
  362. }));
  363. }
  364. TEST_F(LexerTest, CurlyBraces) {
  365. auto buffer = Lex("{}");
  366. EXPECT_FALSE(buffer.has_errors());
  367. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  368. {TokenKind::OpenCurlyBrace()},
  369. {TokenKind::CloseCurlyBrace()},
  370. {TokenKind::EndOfFile()},
  371. }));
  372. buffer = Lex("{{{}{}}}");
  373. EXPECT_FALSE(buffer.has_errors());
  374. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  375. {TokenKind::OpenCurlyBrace()},
  376. {TokenKind::OpenCurlyBrace()},
  377. {TokenKind::OpenCurlyBrace()},
  378. {TokenKind::CloseCurlyBrace()},
  379. {TokenKind::OpenCurlyBrace()},
  380. {TokenKind::CloseCurlyBrace()},
  381. {TokenKind::CloseCurlyBrace()},
  382. {TokenKind::CloseCurlyBrace()},
  383. {TokenKind::EndOfFile()},
  384. }));
  385. }
  386. TEST_F(LexerTest, MatchingGroups) {
  387. {
  388. TokenizedBuffer buffer = Lex("(){}");
  389. ASSERT_FALSE(buffer.has_errors());
  390. auto it = buffer.tokens().begin();
  391. auto open_paren_token = *it++;
  392. auto close_paren_token = *it++;
  393. EXPECT_EQ(close_paren_token,
  394. buffer.GetMatchedClosingToken(open_paren_token));
  395. EXPECT_EQ(open_paren_token,
  396. buffer.GetMatchedOpeningToken(close_paren_token));
  397. auto open_curly_token = *it++;
  398. auto close_curly_token = *it++;
  399. EXPECT_EQ(close_curly_token,
  400. buffer.GetMatchedClosingToken(open_curly_token));
  401. EXPECT_EQ(open_curly_token,
  402. buffer.GetMatchedOpeningToken(close_curly_token));
  403. auto eof_token = *it++;
  404. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::EndOfFile());
  405. EXPECT_EQ(buffer.tokens().end(), it);
  406. }
  407. {
  408. TokenizedBuffer buffer = Lex("({x}){(y)} {{((z))}}");
  409. ASSERT_FALSE(buffer.has_errors());
  410. auto it = buffer.tokens().begin();
  411. auto open_paren_token = *it++;
  412. auto open_curly_token = *it++;
  413. ASSERT_EQ("x", buffer.GetIdentifierText(buffer.GetIdentifier(*it++)));
  414. auto close_curly_token = *it++;
  415. auto close_paren_token = *it++;
  416. EXPECT_EQ(close_paren_token,
  417. buffer.GetMatchedClosingToken(open_paren_token));
  418. EXPECT_EQ(open_paren_token,
  419. buffer.GetMatchedOpeningToken(close_paren_token));
  420. EXPECT_EQ(close_curly_token,
  421. buffer.GetMatchedClosingToken(open_curly_token));
  422. EXPECT_EQ(open_curly_token,
  423. buffer.GetMatchedOpeningToken(close_curly_token));
  424. open_curly_token = *it++;
  425. open_paren_token = *it++;
  426. ASSERT_EQ("y", buffer.GetIdentifierText(buffer.GetIdentifier(*it++)));
  427. close_paren_token = *it++;
  428. close_curly_token = *it++;
  429. EXPECT_EQ(close_curly_token,
  430. buffer.GetMatchedClosingToken(open_curly_token));
  431. EXPECT_EQ(open_curly_token,
  432. buffer.GetMatchedOpeningToken(close_curly_token));
  433. EXPECT_EQ(close_paren_token,
  434. buffer.GetMatchedClosingToken(open_paren_token));
  435. EXPECT_EQ(open_paren_token,
  436. buffer.GetMatchedOpeningToken(close_paren_token));
  437. open_curly_token = *it++;
  438. auto inner_open_curly_token = *it++;
  439. open_paren_token = *it++;
  440. auto inner_open_paren_token = *it++;
  441. ASSERT_EQ("z", buffer.GetIdentifierText(buffer.GetIdentifier(*it++)));
  442. auto inner_close_paren_token = *it++;
  443. close_paren_token = *it++;
  444. auto inner_close_curly_token = *it++;
  445. close_curly_token = *it++;
  446. EXPECT_EQ(close_curly_token,
  447. buffer.GetMatchedClosingToken(open_curly_token));
  448. EXPECT_EQ(open_curly_token,
  449. buffer.GetMatchedOpeningToken(close_curly_token));
  450. EXPECT_EQ(inner_close_curly_token,
  451. buffer.GetMatchedClosingToken(inner_open_curly_token));
  452. EXPECT_EQ(inner_open_curly_token,
  453. buffer.GetMatchedOpeningToken(inner_close_curly_token));
  454. EXPECT_EQ(close_paren_token,
  455. buffer.GetMatchedClosingToken(open_paren_token));
  456. EXPECT_EQ(open_paren_token,
  457. buffer.GetMatchedOpeningToken(close_paren_token));
  458. EXPECT_EQ(inner_close_paren_token,
  459. buffer.GetMatchedClosingToken(inner_open_paren_token));
  460. EXPECT_EQ(inner_open_paren_token,
  461. buffer.GetMatchedOpeningToken(inner_close_paren_token));
  462. auto eof_token = *it++;
  463. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::EndOfFile());
  464. EXPECT_EQ(buffer.tokens().end(), it);
  465. }
  466. }
  467. TEST_F(LexerTest, MismatchedGroups) {
  468. auto buffer = Lex("{");
  469. EXPECT_TRUE(buffer.has_errors());
  470. EXPECT_THAT(buffer,
  471. HasTokens(llvm::ArrayRef<ExpectedToken>{
  472. {TokenKind::OpenCurlyBrace()},
  473. {.kind = TokenKind::CloseCurlyBrace(), .recovery = true},
  474. {TokenKind::EndOfFile()},
  475. }));
  476. buffer = Lex("}");
  477. EXPECT_TRUE(buffer.has_errors());
  478. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  479. {.kind = TokenKind::Error(), .text = "}"},
  480. {TokenKind::EndOfFile()},
  481. }));
  482. buffer = Lex("{(}");
  483. EXPECT_TRUE(buffer.has_errors());
  484. EXPECT_THAT(
  485. buffer,
  486. HasTokens(llvm::ArrayRef<ExpectedToken>{
  487. {.kind = TokenKind::OpenCurlyBrace(), .column = 1},
  488. {.kind = TokenKind::OpenParen(), .column = 2},
  489. {.kind = TokenKind::CloseParen(), .column = 3, .recovery = true},
  490. {.kind = TokenKind::CloseCurlyBrace(), .column = 3},
  491. {TokenKind::EndOfFile()},
  492. }));
  493. buffer = Lex(")({)");
  494. EXPECT_TRUE(buffer.has_errors());
  495. EXPECT_THAT(
  496. buffer,
  497. HasTokens(llvm::ArrayRef<ExpectedToken>{
  498. {.kind = TokenKind::Error(), .column = 1, .text = ")"},
  499. {.kind = TokenKind::OpenParen(), .column = 2},
  500. {.kind = TokenKind::OpenCurlyBrace(), .column = 3},
  501. {.kind = TokenKind::CloseCurlyBrace(), .column = 4, .recovery = true},
  502. {.kind = TokenKind::CloseParen(), .column = 4},
  503. {TokenKind::EndOfFile()},
  504. }));
  505. }
  506. TEST_F(LexerTest, Whitespace) {
  507. auto buffer = Lex("{( } {(");
  508. // Whether there should be whitespace before/after each token.
  509. bool space[] = {true,
  510. // {
  511. false,
  512. // (
  513. true,
  514. // inserted )
  515. true,
  516. // }
  517. true,
  518. // {
  519. false,
  520. // (
  521. true,
  522. // inserted )
  523. true,
  524. // inserted }
  525. true,
  526. // EOF
  527. false};
  528. int pos = 0;
  529. for (TokenizedBuffer::Token token : buffer.tokens()) {
  530. ASSERT_LT(pos, std::size(space));
  531. EXPECT_THAT(buffer.HasLeadingWhitespace(token), Eq(space[pos]));
  532. ++pos;
  533. ASSERT_LT(pos, std::size(space));
  534. EXPECT_THAT(buffer.HasTrailingWhitespace(token), Eq(space[pos]));
  535. }
  536. ASSERT_EQ(pos + 1, std::size(space));
  537. }
  538. TEST_F(LexerTest, Keywords) {
  539. auto buffer = Lex(" fn");
  540. EXPECT_FALSE(buffer.has_errors());
  541. EXPECT_THAT(buffer,
  542. HasTokens(llvm::ArrayRef<ExpectedToken>{
  543. {.kind = TokenKind::Fn(), .column = 4, .indent_column = 4},
  544. {TokenKind::EndOfFile()},
  545. }));
  546. buffer = Lex("and or not if else for return var break continue _");
  547. EXPECT_FALSE(buffer.has_errors());
  548. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  549. {TokenKind::And()},
  550. {TokenKind::Or()},
  551. {TokenKind::Not()},
  552. {TokenKind::If()},
  553. {TokenKind::Else()},
  554. {TokenKind::For()},
  555. {TokenKind::Return()},
  556. {TokenKind::Var()},
  557. {TokenKind::Break()},
  558. {TokenKind::Continue()},
  559. {TokenKind::Underscore()},
  560. {TokenKind::EndOfFile()},
  561. }));
  562. }
  563. TEST_F(LexerTest, Comments) {
  564. auto buffer = Lex(" ;\n // foo\n ;\n");
  565. EXPECT_FALSE(buffer.has_errors());
  566. EXPECT_THAT(buffer,
  567. HasTokens(llvm::ArrayRef<ExpectedToken>{
  568. {.kind = TokenKind::Semi(),
  569. .line = 1,
  570. .column = 2,
  571. .indent_column = 2},
  572. {.kind = TokenKind::Semi(),
  573. .line = 3,
  574. .column = 3,
  575. .indent_column = 3},
  576. {.kind = TokenKind::EndOfFile(), .line = 3, .column = 4},
  577. }));
  578. buffer = Lex("// foo\n//\n// bar");
  579. EXPECT_FALSE(buffer.has_errors());
  580. EXPECT_THAT(
  581. buffer,
  582. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  583. // Make sure weird characters aren't a problem.
  584. buffer = Lex(" // foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]");
  585. EXPECT_FALSE(buffer.has_errors());
  586. EXPECT_THAT(
  587. buffer,
  588. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  589. // Make sure we can lex a comment at the end of the input.
  590. buffer = Lex("//");
  591. EXPECT_FALSE(buffer.has_errors());
  592. EXPECT_THAT(
  593. buffer,
  594. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  595. }
  596. TEST_F(LexerTest, InvalidComments) {
  597. llvm::StringLiteral testcases[] = {
  598. " /// foo\n",
  599. "foo // bar\n",
  600. "//! hello",
  601. " //world",
  602. };
  603. for (llvm::StringLiteral testcase : testcases) {
  604. auto buffer = Lex(testcase);
  605. EXPECT_TRUE(buffer.has_errors());
  606. }
  607. }
  608. TEST_F(LexerTest, Identifiers) {
  609. auto buffer = Lex(" foobar");
  610. EXPECT_FALSE(buffer.has_errors());
  611. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  612. {.kind = TokenKind::Identifier(),
  613. .column = 4,
  614. .indent_column = 4,
  615. .text = "foobar"},
  616. {TokenKind::EndOfFile()},
  617. }));
  618. // Check different kinds of identifier character sequences.
  619. buffer = Lex("_foo_bar");
  620. EXPECT_FALSE(buffer.has_errors());
  621. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  622. {.kind = TokenKind::Identifier(), .text = "_foo_bar"},
  623. {TokenKind::EndOfFile()},
  624. }));
  625. buffer = Lex("foo2bar00");
  626. EXPECT_FALSE(buffer.has_errors());
  627. EXPECT_THAT(buffer,
  628. HasTokens(llvm::ArrayRef<ExpectedToken>{
  629. {.kind = TokenKind::Identifier(), .text = "foo2bar00"},
  630. {TokenKind::EndOfFile()},
  631. }));
  632. // Check that we can parse identifiers that start with a keyword.
  633. buffer = Lex("fnord");
  634. EXPECT_FALSE(buffer.has_errors());
  635. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  636. {.kind = TokenKind::Identifier(), .text = "fnord"},
  637. {TokenKind::EndOfFile()},
  638. }));
  639. // Check multiple identifiers with indent and interning.
  640. buffer = Lex(" foo;bar\nbar \n foo\tfoo");
  641. EXPECT_FALSE(buffer.has_errors());
  642. EXPECT_THAT(buffer,
  643. HasTokens(llvm::ArrayRef<ExpectedToken>{
  644. {.kind = TokenKind::Identifier(),
  645. .line = 1,
  646. .column = 4,
  647. .indent_column = 4,
  648. .text = "foo"},
  649. {.kind = TokenKind::Semi()},
  650. {.kind = TokenKind::Identifier(),
  651. .line = 1,
  652. .column = 8,
  653. .indent_column = 4,
  654. .text = "bar"},
  655. {.kind = TokenKind::Identifier(),
  656. .line = 2,
  657. .column = 1,
  658. .indent_column = 1,
  659. .text = "bar"},
  660. {.kind = TokenKind::Identifier(),
  661. .line = 3,
  662. .column = 3,
  663. .indent_column = 3,
  664. .text = "foo"},
  665. {.kind = TokenKind::Identifier(),
  666. .line = 3,
  667. .column = 7,
  668. .indent_column = 3,
  669. .text = "foo"},
  670. {.kind = TokenKind::EndOfFile(), .line = 3, .column = 10},
  671. }));
  672. }
  673. TEST_F(LexerTest, StringLiterals) {
  674. llvm::StringLiteral testcase = R"(
  675. "hello world\n"
  676. """foo
  677. test \
  678. \xAB
  679. """ trailing
  680. #"""#
  681. "\0"
  682. #"\0"foo"\1"#
  683. """x"""
  684. )";
  685. auto buffer = Lex(testcase);
  686. EXPECT_FALSE(buffer.has_errors());
  687. EXPECT_THAT(buffer,
  688. HasTokens(llvm::ArrayRef<ExpectedToken>{
  689. {.kind = TokenKind::StringLiteral(),
  690. .line = 2,
  691. .column = 5,
  692. .indent_column = 5,
  693. .string_contents = {"hello world\n"}},
  694. {.kind = TokenKind::StringLiteral(),
  695. .line = 4,
  696. .column = 5,
  697. .indent_column = 5,
  698. .string_contents = {" test \xAB\n"}},
  699. {.kind = TokenKind::Identifier(),
  700. .line = 7,
  701. .column = 10,
  702. .indent_column = 5,
  703. .text = "trailing"},
  704. {.kind = TokenKind::StringLiteral(),
  705. .line = 9,
  706. .column = 7,
  707. .indent_column = 7,
  708. .string_contents = {"\""}},
  709. {.kind = TokenKind::StringLiteral(),
  710. .line = 11,
  711. .column = 5,
  712. .indent_column = 5,
  713. .string_contents = llvm::StringLiteral::withInnerNUL("\0")},
  714. {.kind = TokenKind::StringLiteral(),
  715. .line = 13,
  716. .column = 5,
  717. .indent_column = 5,
  718. .string_contents = {"\\0\"foo\"\\1"}},
  719. // """x""" is three string literals, not one.
  720. {.kind = TokenKind::StringLiteral(),
  721. .line = 15,
  722. .column = 5,
  723. .indent_column = 5,
  724. .string_contents = {""}},
  725. {.kind = TokenKind::StringLiteral(),
  726. .line = 15,
  727. .column = 7,
  728. .indent_column = 5,
  729. .string_contents = {"x"}},
  730. {.kind = TokenKind::StringLiteral(),
  731. .line = 15,
  732. .column = 10,
  733. .indent_column = 5,
  734. .string_contents = {""}},
  735. {.kind = TokenKind::EndOfFile(), .line = 16, .column = 3},
  736. }));
  737. }
  738. TEST_F(LexerTest, InvalidStringLiterals) {
  739. llvm::StringLiteral invalid[] = {
  740. // clang-format off
  741. R"(")",
  742. R"("""
  743. "")",
  744. R"("\)",
  745. R"("\")",
  746. R"("\\)",
  747. R"("\\\")",
  748. R"(""")",
  749. R"("""
  750. )",
  751. R"("""\)",
  752. R"(#"""
  753. """)",
  754. // clang-format on
  755. };
  756. for (llvm::StringLiteral test : invalid) {
  757. SCOPED_TRACE(test);
  758. auto buffer = Lex(test);
  759. EXPECT_TRUE(buffer.has_errors());
  760. // We should have formed at least one error token.
  761. bool found_error = false;
  762. for (TokenizedBuffer::Token token : buffer.tokens()) {
  763. if (buffer.GetKind(token) == TokenKind::Error()) {
  764. found_error = true;
  765. break;
  766. }
  767. }
  768. EXPECT_TRUE(found_error);
  769. }
  770. }
  771. TEST_F(LexerTest, TypeLiterals) {
  772. llvm::StringLiteral testcase = R"(
  773. i0 i1 i20 i999999999999 i0x1
  774. u0 u1 u64 u64b
  775. f32 f80 f1 fi
  776. s1
  777. )";
  778. auto buffer = Lex(testcase);
  779. EXPECT_FALSE(buffer.has_errors());
  780. ASSERT_THAT(buffer,
  781. HasTokens(llvm::ArrayRef<ExpectedToken>{
  782. {.kind = TokenKind::Identifier(),
  783. .line = 2,
  784. .column = 5,
  785. .indent_column = 5,
  786. .text = {"i0"}},
  787. {.kind = TokenKind::IntegerTypeLiteral(),
  788. .line = 2,
  789. .column = 8,
  790. .indent_column = 5,
  791. .text = {"i1"}},
  792. {.kind = TokenKind::IntegerTypeLiteral(),
  793. .line = 2,
  794. .column = 11,
  795. .indent_column = 5,
  796. .text = {"i20"}},
  797. {.kind = TokenKind::IntegerTypeLiteral(),
  798. .line = 2,
  799. .column = 15,
  800. .indent_column = 5,
  801. .text = {"i999999999999"}},
  802. {.kind = TokenKind::Identifier(),
  803. .line = 2,
  804. .column = 29,
  805. .indent_column = 5,
  806. .text = {"i0x1"}},
  807. {.kind = TokenKind::Identifier(),
  808. .line = 3,
  809. .column = 5,
  810. .indent_column = 5,
  811. .text = {"u0"}},
  812. {.kind = TokenKind::UnsignedIntegerTypeLiteral(),
  813. .line = 3,
  814. .column = 8,
  815. .indent_column = 5,
  816. .text = {"u1"}},
  817. {.kind = TokenKind::UnsignedIntegerTypeLiteral(),
  818. .line = 3,
  819. .column = 11,
  820. .indent_column = 5,
  821. .text = {"u64"}},
  822. {.kind = TokenKind::Identifier(),
  823. .line = 3,
  824. .column = 15,
  825. .indent_column = 5,
  826. .text = {"u64b"}},
  827. {.kind = TokenKind::FloatingPointTypeLiteral(),
  828. .line = 4,
  829. .column = 5,
  830. .indent_column = 5,
  831. .text = {"f32"}},
  832. {.kind = TokenKind::FloatingPointTypeLiteral(),
  833. .line = 4,
  834. .column = 9,
  835. .indent_column = 5,
  836. .text = {"f80"}},
  837. {.kind = TokenKind::FloatingPointTypeLiteral(),
  838. .line = 4,
  839. .column = 13,
  840. .indent_column = 5,
  841. .text = {"f1"}},
  842. {.kind = TokenKind::Identifier(),
  843. .line = 4,
  844. .column = 16,
  845. .indent_column = 5,
  846. .text = {"fi"}},
  847. {.kind = TokenKind::Identifier(),
  848. .line = 5,
  849. .column = 5,
  850. .indent_column = 5,
  851. .text = {"s1"}},
  852. {.kind = TokenKind::EndOfFile(), .line = 6, .column = 3},
  853. }));
  854. auto token_i1 = buffer.tokens().begin() + 1;
  855. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i1), 1);
  856. auto token_i20 = buffer.tokens().begin() + 2;
  857. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i20), 20);
  858. auto token_i999999999999 = buffer.tokens().begin() + 3;
  859. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i999999999999), 999999999999ULL);
  860. auto token_u1 = buffer.tokens().begin() + 6;
  861. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_u1), 1);
  862. auto token_u64 = buffer.tokens().begin() + 7;
  863. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_u64), 64);
  864. auto token_f32 = buffer.tokens().begin() + 9;
  865. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f32), 32);
  866. auto token_f80 = buffer.tokens().begin() + 10;
  867. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f80), 80);
  868. auto token_f1 = buffer.tokens().begin() + 11;
  869. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f1), 1);
  870. }
  871. TEST_F(LexerTest, TypeLiteralTooManyDigits) {
  872. std::string code = "i";
  873. constexpr int Count = 10000;
  874. code.append(Count, '9');
  875. Testing::MockDiagnosticConsumer consumer;
  876. EXPECT_CALL(consumer,
  877. HandleDiagnostic(IsDiagnostic(
  878. DiagnosticKind::TooManyDigits, DiagnosticLevel::Error, 1, 2,
  879. HasSubstr(llvm::formatv(" {0} ", Count)))));
  880. auto buffer = Lex(code, consumer);
  881. EXPECT_TRUE(buffer.has_errors());
  882. ASSERT_THAT(
  883. buffer,
  884. HasTokens(llvm::ArrayRef<ExpectedToken>{
  885. {.kind = TokenKind::Error(),
  886. .line = 1,
  887. .column = 1,
  888. .indent_column = 1,
  889. .text = {code}},
  890. {.kind = TokenKind::EndOfFile(), .line = 1, .column = Count + 2},
  891. }));
  892. }
  893. TEST_F(LexerTest, DiagnosticTrailingComment) {
  894. llvm::StringLiteral testcase = R"(
  895. // Hello!
  896. var String x; // trailing comment
  897. )";
  898. Testing::MockDiagnosticConsumer consumer;
  899. EXPECT_CALL(consumer,
  900. HandleDiagnostic(IsDiagnostic(DiagnosticKind::TrailingComment,
  901. DiagnosticLevel::Error, 3, 19, _)));
  902. Lex(testcase, consumer);
  903. }
  904. TEST_F(LexerTest, DiagnosticWhitespace) {
  905. Testing::MockDiagnosticConsumer consumer;
  906. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  907. DiagnosticKind::NoWhitespaceAfterCommentIntroducer,
  908. DiagnosticLevel::Error, 1, 3, _)));
  909. Lex("//no space after comment", consumer);
  910. }
  911. TEST_F(LexerTest, DiagnosticUnrecognizedEscape) {
  912. Testing::MockDiagnosticConsumer consumer;
  913. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  914. DiagnosticKind::UnknownEscapeSequence,
  915. DiagnosticLevel::Error, 1, 8, HasSubstr("`b`"))));
  916. Lex(R"("hello\bworld")", consumer);
  917. }
  918. TEST_F(LexerTest, DiagnosticBadHex) {
  919. Testing::MockDiagnosticConsumer consumer;
  920. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  921. DiagnosticKind::HexadecimalEscapeMissingDigits,
  922. DiagnosticLevel::Error, 1, 9, _)));
  923. Lex(R"("hello\xabworld")", consumer);
  924. }
  925. TEST_F(LexerTest, DiagnosticInvalidDigit) {
  926. Testing::MockDiagnosticConsumer consumer;
  927. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  928. DiagnosticKind::InvalidDigit,
  929. DiagnosticLevel::Error, 1, 6, HasSubstr("'a'"))));
  930. Lex("0x123abc", consumer);
  931. }
  932. TEST_F(LexerTest, DiagnosticMissingTerminator) {
  933. Testing::MockDiagnosticConsumer consumer;
  934. EXPECT_CALL(consumer,
  935. HandleDiagnostic(IsDiagnostic(DiagnosticKind::UnterminatedString,
  936. DiagnosticLevel::Error, 1, 1, _)));
  937. Lex(R"(#" ")", consumer);
  938. }
  939. TEST_F(LexerTest, DiagnosticUnrecognizedChar) {
  940. Testing::MockDiagnosticConsumer consumer;
  941. EXPECT_CALL(consumer, HandleDiagnostic(
  942. IsDiagnostic(DiagnosticKind::UnrecognizedCharacters,
  943. DiagnosticLevel::Error, 1, 1, _)));
  944. Lex("\b", consumer);
  945. }
  946. auto GetAndDropLine(llvm::StringRef& text) -> std::string {
  947. auto newline_offset = text.find_first_of('\n');
  948. llvm::StringRef line = text.slice(0, newline_offset);
  949. if (newline_offset != llvm::StringRef::npos) {
  950. text = text.substr(newline_offset + 1);
  951. } else {
  952. text = "";
  953. }
  954. return line.str();
  955. }
  956. TEST_F(LexerTest, PrintingBasic) {
  957. auto buffer = Lex(";");
  958. ASSERT_FALSE(buffer.has_errors());
  959. std::string print_storage;
  960. llvm::raw_string_ostream print_stream(print_storage);
  961. buffer.Print(print_stream);
  962. llvm::StringRef print = print_stream.str();
  963. EXPECT_THAT(GetAndDropLine(print),
  964. StrEq("token: { index: 0, kind: 'Semi', line: 1, column: 1, "
  965. "indent: 1, spelling: ';', has_trailing_space: true }"));
  966. EXPECT_THAT(GetAndDropLine(print),
  967. StrEq("token: { index: 1, kind: 'EndOfFile', line: 1, column: 2, "
  968. "indent: 1, spelling: '' }"));
  969. EXPECT_TRUE(print.empty()) << print;
  970. }
  971. TEST_F(LexerTest, PrintingInteger) {
  972. auto buffer = Lex("123");
  973. ASSERT_FALSE(buffer.has_errors());
  974. std::string print_storage;
  975. llvm::raw_string_ostream print_stream(print_storage);
  976. buffer.Print(print_stream);
  977. llvm::StringRef print = print_stream.str();
  978. EXPECT_THAT(GetAndDropLine(print),
  979. StrEq("token: { index: 0, kind: 'IntegerLiteral', line: 1, "
  980. "column: 1, indent: 1, spelling: '123', value: `123`, "
  981. "has_trailing_space: true }"));
  982. EXPECT_THAT(GetAndDropLine(print), HasSubstr("'EndOfFile'"));
  983. EXPECT_TRUE(print.empty()) << print;
  984. }
  985. TEST_F(LexerTest, PrintingReal) {
  986. auto buffer = Lex("2.5");
  987. ASSERT_FALSE(buffer.has_errors());
  988. std::string print_storage;
  989. llvm::raw_string_ostream print_stream(print_storage);
  990. buffer.Print(print_stream);
  991. llvm::StringRef print = print_stream.str();
  992. EXPECT_THAT(
  993. GetAndDropLine(print),
  994. StrEq(
  995. "token: { index: 0, kind: 'RealLiteral', line: 1, column: 1, indent: "
  996. "1, spelling: '2.5', value: `25*10^-1`, has_trailing_space: true }"));
  997. EXPECT_THAT(GetAndDropLine(print), HasSubstr("'EndOfFile'"));
  998. EXPECT_TRUE(print.empty()) << print;
  999. }
  1000. TEST_F(LexerTest, PrintingPadding) {
  1001. // Test kind padding.
  1002. auto buffer = Lex("(;foo;)");
  1003. ASSERT_FALSE(buffer.has_errors());
  1004. std::string print_storage;
  1005. llvm::raw_string_ostream print_stream(print_storage);
  1006. buffer.Print(print_stream);
  1007. llvm::StringRef print = print_stream.str();
  1008. EXPECT_THAT(GetAndDropLine(print),
  1009. StrEq("token: { index: 0, kind: 'OpenParen', line: 1, column: "
  1010. "1, indent: 1, spelling: '(', closing_token: 4 }"));
  1011. EXPECT_THAT(GetAndDropLine(print),
  1012. StrEq("token: { index: 1, kind: 'Semi', line: 1, column: "
  1013. "2, indent: 1, spelling: ';' }"));
  1014. EXPECT_THAT(GetAndDropLine(print),
  1015. StrEq("token: { index: 2, kind: 'Identifier', line: 1, column: "
  1016. "3, indent: 1, spelling: 'foo', identifier: 0 }"));
  1017. EXPECT_THAT(GetAndDropLine(print),
  1018. StrEq("token: { index: 3, kind: 'Semi', line: 1, column: "
  1019. "6, indent: 1, spelling: ';' }"));
  1020. EXPECT_THAT(GetAndDropLine(print),
  1021. StrEq("token: { index: 4, kind: 'CloseParen', line: 1, column: "
  1022. "7, indent: 1, spelling: ')', opening_token: 0, "
  1023. "has_trailing_space: true }"));
  1024. EXPECT_THAT(GetAndDropLine(print),
  1025. StrEq("token: { index: 5, kind: 'EndOfFile', line: 1, column: "
  1026. "8, indent: 1, spelling: '' }"));
  1027. EXPECT_TRUE(print.empty()) << print;
  1028. }
  1029. TEST_F(LexerTest, PrintingPaddingDigits) {
  1030. // Test digit padding with max values of 9, 10, and 11.
  1031. auto buffer = Lex(";\n\n\n\n\n\n\n\n\n\n ;;");
  1032. ASSERT_FALSE(buffer.has_errors());
  1033. std::string print_storage;
  1034. llvm::raw_string_ostream print_stream(print_storage);
  1035. buffer.Print(print_stream);
  1036. llvm::StringRef print = print_stream.str();
  1037. EXPECT_THAT(
  1038. GetAndDropLine(print),
  1039. StrEq("token: { index: 0, kind: 'Semi', line: 1, column: 1, "
  1040. "indent: 1, spelling: ';', has_trailing_space: true }"));
  1041. EXPECT_THAT(
  1042. GetAndDropLine(print),
  1043. StrEq("token: { index: 1, kind: 'Semi', line: 11, column: 9, "
  1044. "indent: 9, spelling: ';' }"));
  1045. EXPECT_THAT(
  1046. GetAndDropLine(print),
  1047. StrEq("token: { index: 2, kind: 'Semi', line: 11, column: 10, "
  1048. "indent: 9, spelling: ';', has_trailing_space: true }"));
  1049. EXPECT_THAT(
  1050. GetAndDropLine(print),
  1051. StrEq("token: { index: 3, kind: 'EndOfFile', line: 11, column: 11, "
  1052. "indent: 9, spelling: '' }"));
  1053. EXPECT_TRUE(print.empty()) << print;
  1054. }
  1055. TEST_F(LexerTest, PrintingAsYaml) {
  1056. // Test that we can parse this into YAML and verify line and indent data.
  1057. auto buffer = Lex("\n ;\n\n\n; ;\n\n\n\n\n\n\n\n\n\n\n");
  1058. ASSERT_FALSE(buffer.has_errors());
  1059. std::string print_output;
  1060. llvm::raw_string_ostream print_stream(print_output);
  1061. buffer.Print(print_stream);
  1062. print_stream.flush();
  1063. EXPECT_THAT(Yaml::Value::FromText(print_output),
  1064. ElementsAre(Yaml::MappingValue{
  1065. {"token", Yaml::MappingValue{{"index", "0"},
  1066. {"kind", "Semi"},
  1067. {"line", "2"},
  1068. {"column", "2"},
  1069. {"indent", "2"},
  1070. {"spelling", ";"},
  1071. {"has_trailing_space", "true"}}},
  1072. {"token", Yaml::MappingValue{{"index", "1"},
  1073. {"kind", "Semi"},
  1074. {"line", "5"},
  1075. {"column", "1"},
  1076. {"indent", "1"},
  1077. {"spelling", ";"},
  1078. {"has_trailing_space", "true"}}},
  1079. {"token", Yaml::MappingValue{{"index", "2"},
  1080. {"kind", "Semi"},
  1081. {"line", "5"},
  1082. {"column", "3"},
  1083. {"indent", "1"},
  1084. {"spelling", ";"},
  1085. {"has_trailing_space", "true"}}},
  1086. {"token", Yaml::MappingValue{{"index", "3"},
  1087. {"kind", "EndOfFile"},
  1088. {"line", "15"},
  1089. {"column", "1"},
  1090. {"indent", "1"},
  1091. {"spelling", ""}}}}));
  1092. }
  1093. } // namespace
  1094. } // namespace Carbon::Testing