tokenized_buffer_test.cpp 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lexer/tokenized_buffer.h"
  5. #include <gmock/gmock.h>
  6. #include <gtest/gtest.h>
  7. #include <iterator>
  8. #include "llvm/ADT/ArrayRef.h"
  9. #include "llvm/ADT/None.h"
  10. #include "llvm/ADT/Sequence.h"
  11. #include "llvm/ADT/SmallString.h"
  12. #include "llvm/ADT/Twine.h"
  13. #include "llvm/Support/SourceMgr.h"
  14. #include "llvm/Support/raw_ostream.h"
  15. #include "toolchain/common/yaml_test_helpers.h"
  16. #include "toolchain/diagnostics/diagnostic_emitter.h"
  17. #include "toolchain/diagnostics/mocks.h"
  18. #include "toolchain/lexer/tokenized_buffer_test_helpers.h"
  19. namespace Carbon::Testing {
  20. namespace {
  21. using ::testing::_;
  22. using ::testing::ElementsAre;
  23. using ::testing::Eq;
  24. using ::testing::HasSubstr;
  25. using ::testing::StrEq;
  26. class LexerTest : public ::testing::Test {
  27. protected:
  28. auto GetSourceBuffer(llvm::Twine text) -> SourceBuffer& {
  29. source_storage.push_back(
  30. std::move(*SourceBuffer::CreateFromText(text.str())));
  31. return source_storage.back();
  32. }
  33. auto Lex(llvm::Twine text,
  34. DiagnosticConsumer& consumer = ConsoleDiagnosticConsumer())
  35. -> TokenizedBuffer {
  36. return TokenizedBuffer::Lex(GetSourceBuffer(text), consumer);
  37. }
  38. llvm::SmallVector<SourceBuffer, 16> source_storage;
  39. };
  40. TEST_F(LexerTest, HandlesEmptyBuffer) {
  41. auto buffer = Lex("");
  42. EXPECT_FALSE(buffer.has_errors());
  43. EXPECT_THAT(
  44. buffer,
  45. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  46. }
  47. TEST_F(LexerTest, TracksLinesAndColumns) {
  48. auto buffer = Lex("\n ;;\n ;;;\n x\"foo\" '''baz\n a\n ''' y");
  49. EXPECT_FALSE(buffer.has_errors());
  50. EXPECT_THAT(buffer,
  51. HasTokens(llvm::ArrayRef<ExpectedToken>{
  52. {.kind = TokenKind::Semi(),
  53. .line = 2,
  54. .column = 3,
  55. .indent_column = 3},
  56. {.kind = TokenKind::Semi(),
  57. .line = 2,
  58. .column = 4,
  59. .indent_column = 3},
  60. {.kind = TokenKind::Semi(),
  61. .line = 3,
  62. .column = 4,
  63. .indent_column = 4},
  64. {.kind = TokenKind::Semi(),
  65. .line = 3,
  66. .column = 5,
  67. .indent_column = 4},
  68. {.kind = TokenKind::Semi(),
  69. .line = 3,
  70. .column = 6,
  71. .indent_column = 4},
  72. {.kind = TokenKind::Identifier(),
  73. .line = 4,
  74. .column = 4,
  75. .indent_column = 4,
  76. .text = "x"},
  77. {.kind = TokenKind::StringLiteral(),
  78. .line = 4,
  79. .column = 5,
  80. .indent_column = 4},
  81. {.kind = TokenKind::StringLiteral(),
  82. .line = 4,
  83. .column = 11,
  84. .indent_column = 4},
  85. {.kind = TokenKind::Identifier(),
  86. .line = 6,
  87. .column = 6,
  88. .indent_column = 11,
  89. .text = "y"},
  90. {.kind = TokenKind::EndOfFile(), .line = 6, .column = 7},
  91. }));
  92. }
  93. TEST_F(LexerTest, HandlesNumericLiteral) {
  94. auto buffer = Lex("12-578\n 1 2\n0x12_3ABC\n0b10_10_11\n1_234_567\n1.5e9");
  95. EXPECT_FALSE(buffer.has_errors());
  96. ASSERT_THAT(buffer,
  97. HasTokens(llvm::ArrayRef<ExpectedToken>{
  98. {.kind = TokenKind::IntegerLiteral(),
  99. .line = 1,
  100. .column = 1,
  101. .indent_column = 1,
  102. .text = "12"},
  103. {.kind = TokenKind::Minus(),
  104. .line = 1,
  105. .column = 3,
  106. .indent_column = 1},
  107. {.kind = TokenKind::IntegerLiteral(),
  108. .line = 1,
  109. .column = 4,
  110. .indent_column = 1,
  111. .text = "578"},
  112. {.kind = TokenKind::IntegerLiteral(),
  113. .line = 2,
  114. .column = 3,
  115. .indent_column = 3,
  116. .text = "1"},
  117. {.kind = TokenKind::IntegerLiteral(),
  118. .line = 2,
  119. .column = 6,
  120. .indent_column = 3,
  121. .text = "2"},
  122. {.kind = TokenKind::IntegerLiteral(),
  123. .line = 3,
  124. .column = 1,
  125. .indent_column = 1,
  126. .text = "0x12_3ABC"},
  127. {.kind = TokenKind::IntegerLiteral(),
  128. .line = 4,
  129. .column = 1,
  130. .indent_column = 1,
  131. .text = "0b10_10_11"},
  132. {.kind = TokenKind::IntegerLiteral(),
  133. .line = 5,
  134. .column = 1,
  135. .indent_column = 1,
  136. .text = "1_234_567"},
  137. {.kind = TokenKind::RealLiteral(),
  138. .line = 6,
  139. .column = 1,
  140. .indent_column = 1,
  141. .text = "1.5e9"},
  142. {.kind = TokenKind::EndOfFile(), .line = 6, .column = 6},
  143. }));
  144. auto token_12 = buffer.tokens().begin();
  145. EXPECT_EQ(buffer.GetIntegerLiteral(*token_12), 12);
  146. auto token_578 = buffer.tokens().begin() + 2;
  147. EXPECT_EQ(buffer.GetIntegerLiteral(*token_578), 578);
  148. auto token_1 = buffer.tokens().begin() + 3;
  149. EXPECT_EQ(buffer.GetIntegerLiteral(*token_1), 1);
  150. auto token_2 = buffer.tokens().begin() + 4;
  151. EXPECT_EQ(buffer.GetIntegerLiteral(*token_2), 2);
  152. auto token_0x12_3abc = buffer.tokens().begin() + 5;
  153. EXPECT_EQ(buffer.GetIntegerLiteral(*token_0x12_3abc), 0x12'3abc);
  154. auto token_0b10_10_11 = buffer.tokens().begin() + 6;
  155. EXPECT_EQ(buffer.GetIntegerLiteral(*token_0b10_10_11), 0b10'10'11);
  156. auto token_1_234_567 = buffer.tokens().begin() + 7;
  157. EXPECT_EQ(buffer.GetIntegerLiteral(*token_1_234_567), 1'234'567);
  158. auto token_1_5e9 = buffer.tokens().begin() + 8;
  159. auto value_1_5e9 = buffer.GetRealLiteral(*token_1_5e9);
  160. EXPECT_EQ(value_1_5e9.Mantissa().getZExtValue(), 15);
  161. EXPECT_EQ(value_1_5e9.Exponent().getSExtValue(), 8);
  162. EXPECT_EQ(value_1_5e9.IsDecimal(), true);
  163. }
  164. TEST_F(LexerTest, HandlesInvalidNumericLiterals) {
  165. auto buffer = Lex("14x 15_49 0x3.5q 0x3_4.5_6 0ops");
  166. EXPECT_TRUE(buffer.has_errors());
  167. ASSERT_THAT(buffer,
  168. HasTokens(llvm::ArrayRef<ExpectedToken>{
  169. {.kind = TokenKind::Error(),
  170. .line = 1,
  171. .column = 1,
  172. .indent_column = 1,
  173. .text = "14x"},
  174. {.kind = TokenKind::IntegerLiteral(),
  175. .line = 1,
  176. .column = 5,
  177. .indent_column = 1,
  178. .text = "15_49"},
  179. {.kind = TokenKind::Error(),
  180. .line = 1,
  181. .column = 11,
  182. .indent_column = 1,
  183. .text = "0x3.5q"},
  184. {.kind = TokenKind::RealLiteral(),
  185. .line = 1,
  186. .column = 18,
  187. .indent_column = 1,
  188. .text = "0x3_4.5_6"},
  189. {.kind = TokenKind::Error(),
  190. .line = 1,
  191. .column = 28,
  192. .indent_column = 1,
  193. .text = "0ops"},
  194. {.kind = TokenKind::EndOfFile(), .line = 1, .column = 32},
  195. }));
  196. }
  197. TEST_F(LexerTest, SplitsNumericLiteralsProperly) {
  198. llvm::StringLiteral source_text = R"(
  199. 1.
  200. .2
  201. 3.+foo
  202. 4.0-bar
  203. 5.0e+123+456
  204. 6.0e+1e+2
  205. 1e7
  206. 8..10
  207. 9.0.9.5
  208. 10.foo
  209. 11.0.foo
  210. 12e+1
  211. 13._
  212. )";
  213. auto buffer = Lex(source_text);
  214. EXPECT_TRUE(buffer.has_errors());
  215. EXPECT_THAT(buffer,
  216. HasTokens(llvm::ArrayRef<ExpectedToken>{
  217. {.kind = TokenKind::IntegerLiteral(), .text = "1"},
  218. {.kind = TokenKind::Period()},
  219. // newline
  220. {.kind = TokenKind::Period()},
  221. {.kind = TokenKind::IntegerLiteral(), .text = "2"},
  222. // newline
  223. {.kind = TokenKind::IntegerLiteral(), .text = "3"},
  224. {.kind = TokenKind::Period()},
  225. {.kind = TokenKind::Plus()},
  226. {.kind = TokenKind::Identifier(), .text = "foo"},
  227. // newline
  228. {.kind = TokenKind::RealLiteral(), .text = "4.0"},
  229. {.kind = TokenKind::Minus()},
  230. {.kind = TokenKind::Identifier(), .text = "bar"},
  231. // newline
  232. {.kind = TokenKind::RealLiteral(), .text = "5.0e+123"},
  233. {.kind = TokenKind::Plus()},
  234. {.kind = TokenKind::IntegerLiteral(), .text = "456"},
  235. // newline
  236. {.kind = TokenKind::Error(), .text = "6.0e+1e"},
  237. {.kind = TokenKind::Plus()},
  238. {.kind = TokenKind::IntegerLiteral(), .text = "2"},
  239. // newline
  240. {.kind = TokenKind::Error(), .text = "1e7"},
  241. // newline
  242. {.kind = TokenKind::IntegerLiteral(), .text = "8"},
  243. {.kind = TokenKind::Period()},
  244. {.kind = TokenKind::Period()},
  245. {.kind = TokenKind::IntegerLiteral(), .text = "10"},
  246. // newline
  247. {.kind = TokenKind::RealLiteral(), .text = "9.0"},
  248. {.kind = TokenKind::Period()},
  249. {.kind = TokenKind::RealLiteral(), .text = "9.5"},
  250. // newline
  251. {.kind = TokenKind::Error(), .text = "10.foo"},
  252. // newline
  253. {.kind = TokenKind::RealLiteral(), .text = "11.0"},
  254. {.kind = TokenKind::Period()},
  255. {.kind = TokenKind::Identifier(), .text = "foo"},
  256. // newline
  257. {.kind = TokenKind::Error(), .text = "12e"},
  258. {.kind = TokenKind::Plus()},
  259. {.kind = TokenKind::IntegerLiteral(), .text = "1"},
  260. // newline
  261. {.kind = TokenKind::IntegerLiteral(), .text = "13"},
  262. {.kind = TokenKind::Period()},
  263. {.kind = TokenKind::Underscore()},
  264. // newline
  265. {.kind = TokenKind::EndOfFile()},
  266. }));
  267. }
  268. TEST_F(LexerTest, HandlesGarbageCharacters) {
  269. constexpr char GarbageText[] = "$$💩-$\n$\0$12$\n\\\"\\\n\"x";
  270. auto buffer = Lex(llvm::StringRef(GarbageText, sizeof(GarbageText) - 1));
  271. EXPECT_TRUE(buffer.has_errors());
  272. EXPECT_THAT(
  273. buffer,
  274. HasTokens(llvm::ArrayRef<ExpectedToken>{
  275. {.kind = TokenKind::Error(),
  276. .line = 1,
  277. .column = 1,
  278. // 💩 takes 4 bytes, and we count column as bytes offset.
  279. .text = llvm::StringRef("$$💩", 6)},
  280. {.kind = TokenKind::Minus(), .line = 1, .column = 7},
  281. {.kind = TokenKind::Error(), .line = 1, .column = 8, .text = "$"},
  282. // newline
  283. {.kind = TokenKind::Error(),
  284. .line = 2,
  285. .column = 1,
  286. .text = llvm::StringRef("$\0$", 3)},
  287. {.kind = TokenKind::IntegerLiteral(),
  288. .line = 2,
  289. .column = 4,
  290. .text = "12"},
  291. {.kind = TokenKind::Error(), .line = 2, .column = 6, .text = "$"},
  292. // newline
  293. {.kind = TokenKind::Backslash(),
  294. .line = 3,
  295. .column = 1,
  296. .text = "\\"},
  297. {.kind = TokenKind::Error(), .line = 3, .column = 2, .text = "\"\\"},
  298. // newline
  299. {.kind = TokenKind::Error(), .line = 4, .column = 1, .text = "\"x"},
  300. {.kind = TokenKind::EndOfFile(), .line = 4, .column = 3},
  301. }));
  302. }
  303. TEST_F(LexerTest, Symbols) {
  304. // We don't need to exhaustively test symbols here as they're handled with
  305. // common code, but we want to check specific patterns to verify things like
  306. // max-munch rule and handling of interesting symbols.
  307. auto buffer = Lex("<<<");
  308. EXPECT_FALSE(buffer.has_errors());
  309. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  310. {TokenKind::LessLess()},
  311. {TokenKind::Less()},
  312. {TokenKind::EndOfFile()},
  313. }));
  314. buffer = Lex("<<=>>");
  315. EXPECT_FALSE(buffer.has_errors());
  316. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  317. {TokenKind::LessLessEqual()},
  318. {TokenKind::GreaterGreater()},
  319. {TokenKind::EndOfFile()},
  320. }));
  321. buffer = Lex("< <=> >");
  322. EXPECT_FALSE(buffer.has_errors());
  323. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  324. {TokenKind::Less()},
  325. {TokenKind::LessEqualGreater()},
  326. {TokenKind::Greater()},
  327. {TokenKind::EndOfFile()},
  328. }));
  329. buffer = Lex("\\/?@&^!");
  330. EXPECT_FALSE(buffer.has_errors());
  331. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  332. {TokenKind::Backslash()},
  333. {TokenKind::Slash()},
  334. {TokenKind::Question()},
  335. {TokenKind::At()},
  336. {TokenKind::Amp()},
  337. {TokenKind::Caret()},
  338. {TokenKind::Exclaim()},
  339. {TokenKind::EndOfFile()},
  340. }));
  341. }
  342. TEST_F(LexerTest, Parens) {
  343. auto buffer = Lex("()");
  344. EXPECT_FALSE(buffer.has_errors());
  345. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  346. {TokenKind::OpenParen()},
  347. {TokenKind::CloseParen()},
  348. {TokenKind::EndOfFile()},
  349. }));
  350. buffer = Lex("((()()))");
  351. EXPECT_FALSE(buffer.has_errors());
  352. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  353. {TokenKind::OpenParen()},
  354. {TokenKind::OpenParen()},
  355. {TokenKind::OpenParen()},
  356. {TokenKind::CloseParen()},
  357. {TokenKind::OpenParen()},
  358. {TokenKind::CloseParen()},
  359. {TokenKind::CloseParen()},
  360. {TokenKind::CloseParen()},
  361. {TokenKind::EndOfFile()},
  362. }));
  363. }
  364. TEST_F(LexerTest, CurlyBraces) {
  365. auto buffer = Lex("{}");
  366. EXPECT_FALSE(buffer.has_errors());
  367. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  368. {TokenKind::OpenCurlyBrace()},
  369. {TokenKind::CloseCurlyBrace()},
  370. {TokenKind::EndOfFile()},
  371. }));
  372. buffer = Lex("{{{}{}}}");
  373. EXPECT_FALSE(buffer.has_errors());
  374. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  375. {TokenKind::OpenCurlyBrace()},
  376. {TokenKind::OpenCurlyBrace()},
  377. {TokenKind::OpenCurlyBrace()},
  378. {TokenKind::CloseCurlyBrace()},
  379. {TokenKind::OpenCurlyBrace()},
  380. {TokenKind::CloseCurlyBrace()},
  381. {TokenKind::CloseCurlyBrace()},
  382. {TokenKind::CloseCurlyBrace()},
  383. {TokenKind::EndOfFile()},
  384. }));
  385. }
  386. TEST_F(LexerTest, MatchingGroups) {
  387. {
  388. TokenizedBuffer buffer = Lex("(){}");
  389. ASSERT_FALSE(buffer.has_errors());
  390. auto it = buffer.tokens().begin();
  391. auto open_paren_token = *it++;
  392. auto close_paren_token = *it++;
  393. EXPECT_EQ(close_paren_token,
  394. buffer.GetMatchedClosingToken(open_paren_token));
  395. EXPECT_EQ(open_paren_token,
  396. buffer.GetMatchedOpeningToken(close_paren_token));
  397. auto open_curly_token = *it++;
  398. auto close_curly_token = *it++;
  399. EXPECT_EQ(close_curly_token,
  400. buffer.GetMatchedClosingToken(open_curly_token));
  401. EXPECT_EQ(open_curly_token,
  402. buffer.GetMatchedOpeningToken(close_curly_token));
  403. auto eof_token = *it++;
  404. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::EndOfFile());
  405. EXPECT_EQ(buffer.tokens().end(), it);
  406. }
  407. {
  408. TokenizedBuffer buffer = Lex("({x}){(y)} {{((z))}}");
  409. ASSERT_FALSE(buffer.has_errors());
  410. auto it = buffer.tokens().begin();
  411. auto open_paren_token = *it++;
  412. auto open_curly_token = *it++;
  413. ASSERT_EQ("x", buffer.GetIdentifierText(buffer.GetIdentifier(*it++)));
  414. auto close_curly_token = *it++;
  415. auto close_paren_token = *it++;
  416. EXPECT_EQ(close_paren_token,
  417. buffer.GetMatchedClosingToken(open_paren_token));
  418. EXPECT_EQ(open_paren_token,
  419. buffer.GetMatchedOpeningToken(close_paren_token));
  420. EXPECT_EQ(close_curly_token,
  421. buffer.GetMatchedClosingToken(open_curly_token));
  422. EXPECT_EQ(open_curly_token,
  423. buffer.GetMatchedOpeningToken(close_curly_token));
  424. open_curly_token = *it++;
  425. open_paren_token = *it++;
  426. ASSERT_EQ("y", buffer.GetIdentifierText(buffer.GetIdentifier(*it++)));
  427. close_paren_token = *it++;
  428. close_curly_token = *it++;
  429. EXPECT_EQ(close_curly_token,
  430. buffer.GetMatchedClosingToken(open_curly_token));
  431. EXPECT_EQ(open_curly_token,
  432. buffer.GetMatchedOpeningToken(close_curly_token));
  433. EXPECT_EQ(close_paren_token,
  434. buffer.GetMatchedClosingToken(open_paren_token));
  435. EXPECT_EQ(open_paren_token,
  436. buffer.GetMatchedOpeningToken(close_paren_token));
  437. open_curly_token = *it++;
  438. auto inner_open_curly_token = *it++;
  439. open_paren_token = *it++;
  440. auto inner_open_paren_token = *it++;
  441. ASSERT_EQ("z", buffer.GetIdentifierText(buffer.GetIdentifier(*it++)));
  442. auto inner_close_paren_token = *it++;
  443. close_paren_token = *it++;
  444. auto inner_close_curly_token = *it++;
  445. close_curly_token = *it++;
  446. EXPECT_EQ(close_curly_token,
  447. buffer.GetMatchedClosingToken(open_curly_token));
  448. EXPECT_EQ(open_curly_token,
  449. buffer.GetMatchedOpeningToken(close_curly_token));
  450. EXPECT_EQ(inner_close_curly_token,
  451. buffer.GetMatchedClosingToken(inner_open_curly_token));
  452. EXPECT_EQ(inner_open_curly_token,
  453. buffer.GetMatchedOpeningToken(inner_close_curly_token));
  454. EXPECT_EQ(close_paren_token,
  455. buffer.GetMatchedClosingToken(open_paren_token));
  456. EXPECT_EQ(open_paren_token,
  457. buffer.GetMatchedOpeningToken(close_paren_token));
  458. EXPECT_EQ(inner_close_paren_token,
  459. buffer.GetMatchedClosingToken(inner_open_paren_token));
  460. EXPECT_EQ(inner_open_paren_token,
  461. buffer.GetMatchedOpeningToken(inner_close_paren_token));
  462. auto eof_token = *it++;
  463. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::EndOfFile());
  464. EXPECT_EQ(buffer.tokens().end(), it);
  465. }
  466. }
  467. TEST_F(LexerTest, MismatchedGroups) {
  468. auto buffer = Lex("{");
  469. EXPECT_TRUE(buffer.has_errors());
  470. EXPECT_THAT(buffer,
  471. HasTokens(llvm::ArrayRef<ExpectedToken>{
  472. {TokenKind::OpenCurlyBrace()},
  473. {.kind = TokenKind::CloseCurlyBrace(), .recovery = true},
  474. {TokenKind::EndOfFile()},
  475. }));
  476. buffer = Lex("}");
  477. EXPECT_TRUE(buffer.has_errors());
  478. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  479. {.kind = TokenKind::Error(), .text = "}"},
  480. {TokenKind::EndOfFile()},
  481. }));
  482. buffer = Lex("{(}");
  483. EXPECT_TRUE(buffer.has_errors());
  484. EXPECT_THAT(
  485. buffer,
  486. HasTokens(llvm::ArrayRef<ExpectedToken>{
  487. {.kind = TokenKind::OpenCurlyBrace(), .column = 1},
  488. {.kind = TokenKind::OpenParen(), .column = 2},
  489. {.kind = TokenKind::CloseParen(), .column = 3, .recovery = true},
  490. {.kind = TokenKind::CloseCurlyBrace(), .column = 3},
  491. {TokenKind::EndOfFile()},
  492. }));
  493. buffer = Lex(")({)");
  494. EXPECT_TRUE(buffer.has_errors());
  495. EXPECT_THAT(
  496. buffer,
  497. HasTokens(llvm::ArrayRef<ExpectedToken>{
  498. {.kind = TokenKind::Error(), .column = 1, .text = ")"},
  499. {.kind = TokenKind::OpenParen(), .column = 2},
  500. {.kind = TokenKind::OpenCurlyBrace(), .column = 3},
  501. {.kind = TokenKind::CloseCurlyBrace(), .column = 4, .recovery = true},
  502. {.kind = TokenKind::CloseParen(), .column = 4},
  503. {TokenKind::EndOfFile()},
  504. }));
  505. }
  506. TEST_F(LexerTest, Whitespace) {
  507. auto buffer = Lex("{( } {(");
  508. // Whether there should be whitespace before/after each token.
  509. bool space[] = {true,
  510. // {
  511. false,
  512. // (
  513. true,
  514. // inserted )
  515. true,
  516. // }
  517. true,
  518. // {
  519. false,
  520. // (
  521. true,
  522. // inserted )
  523. true,
  524. // inserted }
  525. true,
  526. // EOF
  527. false};
  528. int pos = 0;
  529. for (TokenizedBuffer::Token token : buffer.tokens()) {
  530. ASSERT_LT(pos, std::size(space));
  531. EXPECT_THAT(buffer.HasLeadingWhitespace(token), Eq(space[pos]));
  532. ++pos;
  533. ASSERT_LT(pos, std::size(space));
  534. EXPECT_THAT(buffer.HasTrailingWhitespace(token), Eq(space[pos]));
  535. }
  536. ASSERT_EQ(pos + 1, std::size(space));
  537. }
  538. TEST_F(LexerTest, Keywords) {
  539. auto buffer = Lex(" fn");
  540. EXPECT_FALSE(buffer.has_errors());
  541. EXPECT_THAT(buffer,
  542. HasTokens(llvm::ArrayRef<ExpectedToken>{
  543. {.kind = TokenKind::Fn(), .column = 4, .indent_column = 4},
  544. {TokenKind::EndOfFile()},
  545. }));
  546. buffer = Lex("and or not if else for return var break continue _");
  547. EXPECT_FALSE(buffer.has_errors());
  548. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  549. {TokenKind::And()},
  550. {TokenKind::Or()},
  551. {TokenKind::Not()},
  552. {TokenKind::If()},
  553. {TokenKind::Else()},
  554. {TokenKind::For()},
  555. {TokenKind::Return()},
  556. {TokenKind::Var()},
  557. {TokenKind::Break()},
  558. {TokenKind::Continue()},
  559. {TokenKind::Underscore()},
  560. {TokenKind::EndOfFile()},
  561. }));
  562. }
  563. TEST_F(LexerTest, Comments) {
  564. auto buffer = Lex(" ;\n // foo\n ;\n");
  565. EXPECT_FALSE(buffer.has_errors());
  566. EXPECT_THAT(buffer,
  567. HasTokens(llvm::ArrayRef<ExpectedToken>{
  568. {.kind = TokenKind::Semi(),
  569. .line = 1,
  570. .column = 2,
  571. .indent_column = 2},
  572. {.kind = TokenKind::Semi(),
  573. .line = 3,
  574. .column = 3,
  575. .indent_column = 3},
  576. {.kind = TokenKind::EndOfFile(), .line = 3, .column = 4},
  577. }));
  578. buffer = Lex("// foo\n//\n// bar");
  579. EXPECT_FALSE(buffer.has_errors());
  580. EXPECT_THAT(
  581. buffer,
  582. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  583. // Make sure weird characters aren't a problem.
  584. buffer = Lex(" // foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]");
  585. EXPECT_FALSE(buffer.has_errors());
  586. EXPECT_THAT(
  587. buffer,
  588. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  589. // Make sure we can lex a comment at the end of the input.
  590. buffer = Lex("//");
  591. EXPECT_FALSE(buffer.has_errors());
  592. EXPECT_THAT(
  593. buffer,
  594. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  595. }
  596. TEST_F(LexerTest, InvalidComments) {
  597. llvm::StringLiteral testcases[] = {
  598. " /// foo\n",
  599. "foo // bar\n",
  600. "//! hello",
  601. " //world",
  602. };
  603. for (llvm::StringLiteral testcase : testcases) {
  604. auto buffer = Lex(testcase);
  605. EXPECT_TRUE(buffer.has_errors());
  606. }
  607. }
  608. TEST_F(LexerTest, Identifiers) {
  609. auto buffer = Lex(" foobar");
  610. EXPECT_FALSE(buffer.has_errors());
  611. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  612. {.kind = TokenKind::Identifier(),
  613. .column = 4,
  614. .indent_column = 4,
  615. .text = "foobar"},
  616. {TokenKind::EndOfFile()},
  617. }));
  618. // Check different kinds of identifier character sequences.
  619. buffer = Lex("_foo_bar");
  620. EXPECT_FALSE(buffer.has_errors());
  621. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  622. {.kind = TokenKind::Identifier(), .text = "_foo_bar"},
  623. {TokenKind::EndOfFile()},
  624. }));
  625. buffer = Lex("foo2bar00");
  626. EXPECT_FALSE(buffer.has_errors());
  627. EXPECT_THAT(buffer,
  628. HasTokens(llvm::ArrayRef<ExpectedToken>{
  629. {.kind = TokenKind::Identifier(), .text = "foo2bar00"},
  630. {TokenKind::EndOfFile()},
  631. }));
  632. // Check that we can parse identifiers that start with a keyword.
  633. buffer = Lex("fnord");
  634. EXPECT_FALSE(buffer.has_errors());
  635. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  636. {.kind = TokenKind::Identifier(), .text = "fnord"},
  637. {TokenKind::EndOfFile()},
  638. }));
  639. // Check multiple identifiers with indent and interning.
  640. buffer = Lex(" foo;bar\nbar \n foo\tfoo");
  641. EXPECT_FALSE(buffer.has_errors());
  642. EXPECT_THAT(buffer,
  643. HasTokens(llvm::ArrayRef<ExpectedToken>{
  644. {.kind = TokenKind::Identifier(),
  645. .line = 1,
  646. .column = 4,
  647. .indent_column = 4,
  648. .text = "foo"},
  649. {.kind = TokenKind::Semi()},
  650. {.kind = TokenKind::Identifier(),
  651. .line = 1,
  652. .column = 8,
  653. .indent_column = 4,
  654. .text = "bar"},
  655. {.kind = TokenKind::Identifier(),
  656. .line = 2,
  657. .column = 1,
  658. .indent_column = 1,
  659. .text = "bar"},
  660. {.kind = TokenKind::Identifier(),
  661. .line = 3,
  662. .column = 3,
  663. .indent_column = 3,
  664. .text = "foo"},
  665. {.kind = TokenKind::Identifier(),
  666. .line = 3,
  667. .column = 7,
  668. .indent_column = 3,
  669. .text = "foo"},
  670. {.kind = TokenKind::EndOfFile(), .line = 3, .column = 10},
  671. }));
  672. }
  673. TEST_F(LexerTest, StringLiterals) {
  674. llvm::StringLiteral testcase = R"(
  675. "hello world\n"
  676. '''foo
  677. test \
  678. \xAB
  679. ''' trailing
  680. #"""#
  681. "\0"
  682. #"\0"foo"\1"#
  683. """x"""
  684. )";
  685. auto buffer = Lex(testcase);
  686. EXPECT_FALSE(buffer.has_errors());
  687. EXPECT_THAT(buffer,
  688. HasTokens(llvm::ArrayRef<ExpectedToken>{
  689. {.kind = TokenKind::StringLiteral(),
  690. .line = 2,
  691. .column = 5,
  692. .indent_column = 5,
  693. .string_contents = {"hello world\n"}},
  694. {.kind = TokenKind::StringLiteral(),
  695. .line = 4,
  696. .column = 5,
  697. .indent_column = 5,
  698. .string_contents = {" test \xAB\n"}},
  699. {.kind = TokenKind::Identifier(),
  700. .line = 7,
  701. .column = 10,
  702. .indent_column = 5,
  703. .text = "trailing"},
  704. {.kind = TokenKind::StringLiteral(),
  705. .line = 9,
  706. .column = 7,
  707. .indent_column = 7,
  708. .string_contents = {"\""}},
  709. {.kind = TokenKind::StringLiteral(),
  710. .line = 11,
  711. .column = 5,
  712. .indent_column = 5,
  713. .string_contents = llvm::StringLiteral::withInnerNUL("\0")},
  714. {.kind = TokenKind::StringLiteral(),
  715. .line = 13,
  716. .column = 5,
  717. .indent_column = 5,
  718. .string_contents = {"\\0\"foo\"\\1"}},
  719. // """x""" is three string literals, not one invalid
  720. // attempt at a block string literal.
  721. {.kind = TokenKind::StringLiteral(),
  722. .line = 15,
  723. .column = 5,
  724. .indent_column = 5,
  725. .string_contents = {""}},
  726. {.kind = TokenKind::StringLiteral(),
  727. .line = 15,
  728. .column = 7,
  729. .indent_column = 5,
  730. .string_contents = {"x"}},
  731. {.kind = TokenKind::StringLiteral(),
  732. .line = 15,
  733. .column = 10,
  734. .indent_column = 5,
  735. .string_contents = {""}},
  736. {.kind = TokenKind::EndOfFile(), .line = 16, .column = 3},
  737. }));
  738. }
  739. TEST_F(LexerTest, InvalidStringLiterals) {
  740. llvm::StringLiteral invalid[] = {
  741. // clang-format off
  742. R"(")",
  743. R"('''
  744. '')",
  745. R"("\)",
  746. R"("\")",
  747. R"("\\)",
  748. R"("\\\")",
  749. R"(''')",
  750. R"('''
  751. )",
  752. R"('''\)",
  753. R"(#'''
  754. ''')",
  755. // clang-format on
  756. };
  757. for (llvm::StringLiteral test : invalid) {
  758. SCOPED_TRACE(test);
  759. auto buffer = Lex(test);
  760. EXPECT_TRUE(buffer.has_errors());
  761. // We should have formed at least one error token.
  762. bool found_error = false;
  763. for (TokenizedBuffer::Token token : buffer.tokens()) {
  764. if (buffer.GetKind(token) == TokenKind::Error()) {
  765. found_error = true;
  766. break;
  767. }
  768. }
  769. EXPECT_TRUE(found_error);
  770. }
  771. }
  772. TEST_F(LexerTest, TypeLiterals) {
  773. llvm::StringLiteral testcase = R"(
  774. i0 i1 i20 i999999999999 i0x1
  775. u0 u1 u64 u64b
  776. f32 f80 f1 fi
  777. s1
  778. )";
  779. auto buffer = Lex(testcase);
  780. EXPECT_FALSE(buffer.has_errors());
  781. ASSERT_THAT(buffer,
  782. HasTokens(llvm::ArrayRef<ExpectedToken>{
  783. {.kind = TokenKind::Identifier(),
  784. .line = 2,
  785. .column = 5,
  786. .indent_column = 5,
  787. .text = {"i0"}},
  788. {.kind = TokenKind::IntegerTypeLiteral(),
  789. .line = 2,
  790. .column = 8,
  791. .indent_column = 5,
  792. .text = {"i1"}},
  793. {.kind = TokenKind::IntegerTypeLiteral(),
  794. .line = 2,
  795. .column = 11,
  796. .indent_column = 5,
  797. .text = {"i20"}},
  798. {.kind = TokenKind::IntegerTypeLiteral(),
  799. .line = 2,
  800. .column = 15,
  801. .indent_column = 5,
  802. .text = {"i999999999999"}},
  803. {.kind = TokenKind::Identifier(),
  804. .line = 2,
  805. .column = 29,
  806. .indent_column = 5,
  807. .text = {"i0x1"}},
  808. {.kind = TokenKind::Identifier(),
  809. .line = 3,
  810. .column = 5,
  811. .indent_column = 5,
  812. .text = {"u0"}},
  813. {.kind = TokenKind::UnsignedIntegerTypeLiteral(),
  814. .line = 3,
  815. .column = 8,
  816. .indent_column = 5,
  817. .text = {"u1"}},
  818. {.kind = TokenKind::UnsignedIntegerTypeLiteral(),
  819. .line = 3,
  820. .column = 11,
  821. .indent_column = 5,
  822. .text = {"u64"}},
  823. {.kind = TokenKind::Identifier(),
  824. .line = 3,
  825. .column = 15,
  826. .indent_column = 5,
  827. .text = {"u64b"}},
  828. {.kind = TokenKind::FloatingPointTypeLiteral(),
  829. .line = 4,
  830. .column = 5,
  831. .indent_column = 5,
  832. .text = {"f32"}},
  833. {.kind = TokenKind::FloatingPointTypeLiteral(),
  834. .line = 4,
  835. .column = 9,
  836. .indent_column = 5,
  837. .text = {"f80"}},
  838. {.kind = TokenKind::FloatingPointTypeLiteral(),
  839. .line = 4,
  840. .column = 13,
  841. .indent_column = 5,
  842. .text = {"f1"}},
  843. {.kind = TokenKind::Identifier(),
  844. .line = 4,
  845. .column = 16,
  846. .indent_column = 5,
  847. .text = {"fi"}},
  848. {.kind = TokenKind::Identifier(),
  849. .line = 5,
  850. .column = 5,
  851. .indent_column = 5,
  852. .text = {"s1"}},
  853. {.kind = TokenKind::EndOfFile(), .line = 6, .column = 3},
  854. }));
  855. auto token_i1 = buffer.tokens().begin() + 1;
  856. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i1), 1);
  857. auto token_i20 = buffer.tokens().begin() + 2;
  858. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i20), 20);
  859. auto token_i999999999999 = buffer.tokens().begin() + 3;
  860. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i999999999999), 999999999999ULL);
  861. auto token_u1 = buffer.tokens().begin() + 6;
  862. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_u1), 1);
  863. auto token_u64 = buffer.tokens().begin() + 7;
  864. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_u64), 64);
  865. auto token_f32 = buffer.tokens().begin() + 9;
  866. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f32), 32);
  867. auto token_f80 = buffer.tokens().begin() + 10;
  868. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f80), 80);
  869. auto token_f1 = buffer.tokens().begin() + 11;
  870. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f1), 1);
  871. }
  872. TEST_F(LexerTest, TypeLiteralTooManyDigits) {
  873. std::string code = "i";
  874. constexpr int Count = 10000;
  875. code.append(Count, '9');
  876. Testing::MockDiagnosticConsumer consumer;
  877. EXPECT_CALL(consumer,
  878. HandleDiagnostic(IsDiagnostic(
  879. DiagnosticKind::TooManyDigits, DiagnosticLevel::Error, 1, 2,
  880. HasSubstr(llvm::formatv(" {0} ", Count)))));
  881. auto buffer = Lex(code, consumer);
  882. EXPECT_TRUE(buffer.has_errors());
  883. ASSERT_THAT(
  884. buffer,
  885. HasTokens(llvm::ArrayRef<ExpectedToken>{
  886. {.kind = TokenKind::Error(),
  887. .line = 1,
  888. .column = 1,
  889. .indent_column = 1,
  890. .text = {code}},
  891. {.kind = TokenKind::EndOfFile(), .line = 1, .column = Count + 2},
  892. }));
  893. }
  894. TEST_F(LexerTest, DiagnosticTrailingComment) {
  895. llvm::StringLiteral testcase = R"(
  896. // Hello!
  897. var String x; // trailing comment
  898. )";
  899. Testing::MockDiagnosticConsumer consumer;
  900. EXPECT_CALL(consumer,
  901. HandleDiagnostic(IsDiagnostic(DiagnosticKind::TrailingComment,
  902. DiagnosticLevel::Error, 3, 19, _)));
  903. Lex(testcase, consumer);
  904. }
  905. TEST_F(LexerTest, DiagnosticWhitespace) {
  906. Testing::MockDiagnosticConsumer consumer;
  907. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  908. DiagnosticKind::NoWhitespaceAfterCommentIntroducer,
  909. DiagnosticLevel::Error, 1, 3, _)));
  910. Lex("//no space after comment", consumer);
  911. }
  912. TEST_F(LexerTest, DiagnosticUnrecognizedEscape) {
  913. Testing::MockDiagnosticConsumer consumer;
  914. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  915. DiagnosticKind::UnknownEscapeSequence,
  916. DiagnosticLevel::Error, 1, 8, HasSubstr("`b`"))));
  917. Lex(R"("hello\bworld")", consumer);
  918. }
  919. TEST_F(LexerTest, DiagnosticBadHex) {
  920. Testing::MockDiagnosticConsumer consumer;
  921. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  922. DiagnosticKind::HexadecimalEscapeMissingDigits,
  923. DiagnosticLevel::Error, 1, 9, _)));
  924. Lex(R"("hello\xabworld")", consumer);
  925. }
  926. TEST_F(LexerTest, DiagnosticInvalidDigit) {
  927. Testing::MockDiagnosticConsumer consumer;
  928. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  929. DiagnosticKind::InvalidDigit,
  930. DiagnosticLevel::Error, 1, 6, HasSubstr("'a'"))));
  931. Lex("0x123abc", consumer);
  932. }
  933. TEST_F(LexerTest, DiagnosticMissingTerminator) {
  934. Testing::MockDiagnosticConsumer consumer;
  935. EXPECT_CALL(consumer,
  936. HandleDiagnostic(IsDiagnostic(DiagnosticKind::UnterminatedString,
  937. DiagnosticLevel::Error, 1, 1, _)));
  938. Lex(R"(#" ")", consumer);
  939. }
  940. TEST_F(LexerTest, DiagnosticUnrecognizedChar) {
  941. Testing::MockDiagnosticConsumer consumer;
  942. EXPECT_CALL(consumer, HandleDiagnostic(
  943. IsDiagnostic(DiagnosticKind::UnrecognizedCharacters,
  944. DiagnosticLevel::Error, 1, 1, _)));
  945. Lex("\b", consumer);
  946. }
  947. auto GetAndDropLine(llvm::StringRef& text) -> std::string {
  948. auto newline_offset = text.find_first_of('\n');
  949. llvm::StringRef line = text.slice(0, newline_offset);
  950. if (newline_offset != llvm::StringRef::npos) {
  951. text = text.substr(newline_offset + 1);
  952. } else {
  953. text = "";
  954. }
  955. return line.str();
  956. }
  957. TEST_F(LexerTest, PrintingBasic) {
  958. auto buffer = Lex(";");
  959. ASSERT_FALSE(buffer.has_errors());
  960. std::string print_storage;
  961. llvm::raw_string_ostream print_stream(print_storage);
  962. buffer.Print(print_stream);
  963. llvm::StringRef print = print_stream.str();
  964. EXPECT_THAT(GetAndDropLine(print),
  965. StrEq("token: { index: 0, kind: 'Semi', line: 1, column: 1, "
  966. "indent: 1, spelling: ';', has_trailing_space: true }"));
  967. EXPECT_THAT(GetAndDropLine(print),
  968. StrEq("token: { index: 1, kind: 'EndOfFile', line: 1, column: 2, "
  969. "indent: 1, spelling: '' }"));
  970. EXPECT_TRUE(print.empty()) << print;
  971. }
  972. TEST_F(LexerTest, PrintingInteger) {
  973. auto buffer = Lex("123");
  974. ASSERT_FALSE(buffer.has_errors());
  975. std::string print_storage;
  976. llvm::raw_string_ostream print_stream(print_storage);
  977. buffer.Print(print_stream);
  978. llvm::StringRef print = print_stream.str();
  979. EXPECT_THAT(GetAndDropLine(print),
  980. StrEq("token: { index: 0, kind: 'IntegerLiteral', line: 1, "
  981. "column: 1, indent: 1, spelling: '123', value: `123`, "
  982. "has_trailing_space: true }"));
  983. EXPECT_THAT(GetAndDropLine(print), HasSubstr("'EndOfFile'"));
  984. EXPECT_TRUE(print.empty()) << print;
  985. }
  986. TEST_F(LexerTest, PrintingReal) {
  987. auto buffer = Lex("2.5");
  988. ASSERT_FALSE(buffer.has_errors());
  989. std::string print_storage;
  990. llvm::raw_string_ostream print_stream(print_storage);
  991. buffer.Print(print_stream);
  992. llvm::StringRef print = print_stream.str();
  993. EXPECT_THAT(
  994. GetAndDropLine(print),
  995. StrEq(
  996. "token: { index: 0, kind: 'RealLiteral', line: 1, column: 1, indent: "
  997. "1, spelling: '2.5', value: `25*10^-1`, has_trailing_space: true }"));
  998. EXPECT_THAT(GetAndDropLine(print), HasSubstr("'EndOfFile'"));
  999. EXPECT_TRUE(print.empty()) << print;
  1000. }
  1001. TEST_F(LexerTest, PrintingPadding) {
  1002. // Test kind padding.
  1003. auto buffer = Lex("(;foo;)");
  1004. ASSERT_FALSE(buffer.has_errors());
  1005. std::string print_storage;
  1006. llvm::raw_string_ostream print_stream(print_storage);
  1007. buffer.Print(print_stream);
  1008. llvm::StringRef print = print_stream.str();
  1009. EXPECT_THAT(GetAndDropLine(print),
  1010. StrEq("token: { index: 0, kind: 'OpenParen', line: 1, column: "
  1011. "1, indent: 1, spelling: '(', closing_token: 4 }"));
  1012. EXPECT_THAT(GetAndDropLine(print),
  1013. StrEq("token: { index: 1, kind: 'Semi', line: 1, column: "
  1014. "2, indent: 1, spelling: ';' }"));
  1015. EXPECT_THAT(GetAndDropLine(print),
  1016. StrEq("token: { index: 2, kind: 'Identifier', line: 1, column: "
  1017. "3, indent: 1, spelling: 'foo', identifier: 0 }"));
  1018. EXPECT_THAT(GetAndDropLine(print),
  1019. StrEq("token: { index: 3, kind: 'Semi', line: 1, column: "
  1020. "6, indent: 1, spelling: ';' }"));
  1021. EXPECT_THAT(GetAndDropLine(print),
  1022. StrEq("token: { index: 4, kind: 'CloseParen', line: 1, column: "
  1023. "7, indent: 1, spelling: ')', opening_token: 0, "
  1024. "has_trailing_space: true }"));
  1025. EXPECT_THAT(GetAndDropLine(print),
  1026. StrEq("token: { index: 5, kind: 'EndOfFile', line: 1, column: "
  1027. "8, indent: 1, spelling: '' }"));
  1028. EXPECT_TRUE(print.empty()) << print;
  1029. }
  1030. TEST_F(LexerTest, PrintingPaddingDigits) {
  1031. // Test digit padding with max values of 9, 10, and 11.
  1032. auto buffer = Lex(";\n\n\n\n\n\n\n\n\n\n ;;");
  1033. ASSERT_FALSE(buffer.has_errors());
  1034. std::string print_storage;
  1035. llvm::raw_string_ostream print_stream(print_storage);
  1036. buffer.Print(print_stream);
  1037. llvm::StringRef print = print_stream.str();
  1038. EXPECT_THAT(
  1039. GetAndDropLine(print),
  1040. StrEq("token: { index: 0, kind: 'Semi', line: 1, column: 1, "
  1041. "indent: 1, spelling: ';', has_trailing_space: true }"));
  1042. EXPECT_THAT(
  1043. GetAndDropLine(print),
  1044. StrEq("token: { index: 1, kind: 'Semi', line: 11, column: 9, "
  1045. "indent: 9, spelling: ';' }"));
  1046. EXPECT_THAT(
  1047. GetAndDropLine(print),
  1048. StrEq("token: { index: 2, kind: 'Semi', line: 11, column: 10, "
  1049. "indent: 9, spelling: ';', has_trailing_space: true }"));
  1050. EXPECT_THAT(
  1051. GetAndDropLine(print),
  1052. StrEq("token: { index: 3, kind: 'EndOfFile', line: 11, column: 11, "
  1053. "indent: 9, spelling: '' }"));
  1054. EXPECT_TRUE(print.empty()) << print;
  1055. }
  1056. TEST_F(LexerTest, PrintingAsYaml) {
  1057. // Test that we can parse this into YAML and verify line and indent data.
  1058. auto buffer = Lex("\n ;\n\n\n; ;\n\n\n\n\n\n\n\n\n\n\n");
  1059. ASSERT_FALSE(buffer.has_errors());
  1060. std::string print_output;
  1061. llvm::raw_string_ostream print_stream(print_output);
  1062. buffer.Print(print_stream);
  1063. print_stream.flush();
  1064. EXPECT_THAT(Yaml::Value::FromText(print_output),
  1065. ElementsAre(Yaml::MappingValue{
  1066. {"token", Yaml::MappingValue{{"index", "0"},
  1067. {"kind", "Semi"},
  1068. {"line", "2"},
  1069. {"column", "2"},
  1070. {"indent", "2"},
  1071. {"spelling", ";"},
  1072. {"has_trailing_space", "true"}}},
  1073. {"token", Yaml::MappingValue{{"index", "1"},
  1074. {"kind", "Semi"},
  1075. {"line", "5"},
  1076. {"column", "1"},
  1077. {"indent", "1"},
  1078. {"spelling", ";"},
  1079. {"has_trailing_space", "true"}}},
  1080. {"token", Yaml::MappingValue{{"index", "2"},
  1081. {"kind", "Semi"},
  1082. {"line", "5"},
  1083. {"column", "3"},
  1084. {"indent", "1"},
  1085. {"spelling", ";"},
  1086. {"has_trailing_space", "true"}}},
  1087. {"token", Yaml::MappingValue{{"index", "3"},
  1088. {"kind", "EndOfFile"},
  1089. {"line", "15"},
  1090. {"column", "1"},
  1091. {"indent", "1"},
  1092. {"spelling", ""}}}}));
  1093. }
  1094. TEST_F(LexerTest, PrintToken) {
  1095. auto buffer = Lex("0x9");
  1096. ASSERT_FALSE(buffer.has_errors());
  1097. std::string print_output;
  1098. llvm::raw_string_ostream print_stream(print_output);
  1099. buffer.Print(print_stream);
  1100. llvm::StringRef print = print_stream.str();
  1101. EXPECT_THAT(GetAndDropLine(print),
  1102. StrEq("token: { index: 0, kind: 'IntegerLiteral', line: 1, "
  1103. "column: 1, indent: 1, spelling: '0x9', value: `9`, "
  1104. "has_trailing_space: true }"));
  1105. }
  1106. } // namespace
  1107. } // namespace Carbon::Testing