autoupdate.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "testing/file_test/autoupdate.h"
  5. #include <fstream>
  6. #include "absl/strings/string_view.h"
  7. #include "common/check.h"
  8. #include "common/ostream.h"
  9. #include "llvm/ADT/DenseMap.h"
  10. #include "llvm/ADT/STLFunctionalExtras.h"
  11. #include "llvm/ADT/StringExtras.h"
  12. #include "llvm/Support/FormatVariadic.h"
  13. #include "re2/re2.h"
  14. namespace Carbon::Testing {
  15. // Put helper classes in an anonymous namespace.
  16. namespace {
  17. // Converts a matched line number to an int, trimming whitespace.
  18. static auto ParseLineNumber(absl::string_view matched_line_number) -> int {
  19. llvm::StringRef trimmed = matched_line_number;
  20. trimmed = trimmed.trim();
  21. // NOLINTNEXTLINE(google-runtime-int): API requirement.
  22. long long val;
  23. CARBON_CHECK(!llvm::getAsSignedInteger(trimmed, 10, val));
  24. return val;
  25. }
  26. class CheckLine : public FileTestLineBase {
  27. public:
  28. // RE2 is passed by a pointer because it doesn't support std::optional.
  29. explicit CheckLine(int file_number, int line_number,
  30. bool line_number_re_has_file, const RE2* line_number_re,
  31. std::string line)
  32. : FileTestLineBase(line_number),
  33. file_number_(file_number),
  34. line_number_re_has_file_(line_number_re_has_file),
  35. line_number_re_(line_number_re),
  36. line_(std::move(line)) {}
  37. auto Print(llvm::raw_ostream& out) const -> void override {
  38. out << indent_ << line_;
  39. }
  40. // When the location of the CHECK in output is known, we can set the indent
  41. // and its line.
  42. auto SetOutputLine(llvm::StringRef indent, int output_file_number,
  43. int output_line_number) -> void {
  44. indent_ = indent;
  45. output_file_number_ = output_file_number;
  46. output_line_number_ = output_line_number;
  47. }
  48. // When the location of all lines in a file are known, we can set the line
  49. // offset based on the target line.
  50. auto RemapLineNumbers(const std::string& line_formatv,
  51. llvm::function_ref<int(int)> line_remap) -> void {
  52. // Only need to do remappings when there's a regex.
  53. if (!line_number_re_) {
  54. return;
  55. }
  56. // If the CHECK was written to a different file from the file that it refers
  57. // to, leave behind an absolute line reference rather than a cross-file
  58. // offset.
  59. // TODO: We should also remap cross-file line references so that we don't
  60. // need multiple runs of autoupdate for the output to stabilize.
  61. if (output_file_number_ != file_number_) {
  62. return;
  63. }
  64. bool found_one = false;
  65. while (true) {
  66. // Look for a line number to replace. There may be multiple, so we
  67. // repeatedly check.
  68. absl::string_view matched_line_number;
  69. if (line_number_re_has_file_) {
  70. RE2::PartialMatch(line_, *line_number_re_, nullptr,
  71. &matched_line_number);
  72. } else {
  73. RE2::PartialMatch(line_, *line_number_re_, &matched_line_number);
  74. }
  75. if (matched_line_number.empty()) {
  76. CARBON_CHECK(found_one) << line_;
  77. return;
  78. }
  79. found_one = true;
  80. // Calculate the offset from the CHECK line to the new line number
  81. // (possibly with new CHECK lines added, or some removed).
  82. int new_line_number = line_remap(ParseLineNumber(matched_line_number));
  83. int offset = new_line_number - output_line_number_;
  84. // Update the line offset in the CHECK line.
  85. const char* offset_prefix = offset < 0 ? "" : "+";
  86. std::string replacement = llvm::formatv(
  87. line_formatv.c_str(),
  88. llvm::formatv("[[@LINE{0}{1}]]", offset_prefix, offset));
  89. line_.replace(matched_line_number.data() - line_.data(),
  90. matched_line_number.size(), replacement);
  91. }
  92. }
  93. int file_number() const { return file_number_; }
  94. auto is_blank() const -> bool override { return false; }
  95. private:
  96. int file_number_;
  97. bool line_number_re_has_file_;
  98. const RE2* line_number_re_;
  99. std::string line_;
  100. llvm::StringRef indent_;
  101. int output_file_number_ = -1;
  102. int output_line_number_ = -1;
  103. };
  104. } // namespace
  105. // Builds CheckLine lists for autoupdate.
  106. static auto BuildCheckLines(
  107. llvm::StringRef output, const char* label,
  108. const llvm::SmallVector<llvm::StringRef>& filenames,
  109. bool line_number_re_has_file, const RE2& line_number_re,
  110. std::function<void(std::string&)> do_extra_check_replacements)
  111. -> llvm::SmallVector<CheckLine> {
  112. llvm::SmallVector<CheckLine> check_lines;
  113. if (output.empty()) {
  114. return check_lines;
  115. }
  116. // Prepare to look for filenames in lines.
  117. llvm::DenseMap<llvm::StringRef, int> file_to_number_map;
  118. for (auto [number, name] : llvm::enumerate(filenames)) {
  119. file_to_number_map.insert({name, number});
  120. }
  121. // %t substitution means we may see TEST_TMPDIR in output.
  122. char* tmpdir_env = getenv("TEST_TMPDIR");
  123. CARBON_CHECK(tmpdir_env != nullptr);
  124. llvm::StringRef tmpdir = tmpdir_env;
  125. llvm::SmallVector<llvm::StringRef> lines(llvm::split(output, '\n'));
  126. // It's typical that output ends with a newline, but we don't want to add a
  127. // blank CHECK for it.
  128. if (lines.back().empty()) {
  129. lines.pop_back();
  130. }
  131. // `{{` and `[[` are escaped as a regex matcher.
  132. RE2 double_brace_re(R"(\{\{)");
  133. RE2 double_square_bracket_re(R"(\[\[)");
  134. // End-of-line whitespace is replaced with a regex matcher to make it visible.
  135. RE2 end_of_line_whitespace_re(R"((\s+)$)");
  136. for (const auto& line : lines) {
  137. std::string check_line = llvm::formatv("// CHECK:{0}:{1}{2}", label,
  138. line.empty() ? "" : " ", line);
  139. RE2::Replace(&check_line, double_brace_re, R"({{\\{\\{}})");
  140. RE2::Replace(&check_line, double_square_bracket_re, R"({{\\[\\[}})");
  141. RE2::Replace(&check_line, end_of_line_whitespace_re, R"({{\1}})");
  142. // Ignore TEST_TMPDIR in output.
  143. if (auto pos = check_line.find(tmpdir); pos != std::string::npos) {
  144. check_line.replace(pos, tmpdir.size(), "{{.+}}");
  145. }
  146. do_extra_check_replacements(check_line);
  147. // Look for line information in the output. use_line_number is only set if
  148. // the match is correct.
  149. std::optional<llvm::StringRef> use_line_number;
  150. absl::string_view match_line_number;
  151. int file_number = 0;
  152. if (line_number_re_has_file) {
  153. absl::string_view match_filename;
  154. if (RE2::PartialMatch(check_line, line_number_re, &match_filename,
  155. &match_line_number)) {
  156. if (auto it = file_to_number_map.find(match_filename);
  157. it != file_to_number_map.end()) {
  158. file_number = it->second;
  159. use_line_number = match_line_number;
  160. }
  161. }
  162. } else {
  163. // There's no file association, so we only look at the line, and assume it
  164. // refers to the main file.
  165. if (RE2::PartialMatch(check_line, line_number_re, &match_line_number)) {
  166. use_line_number = match_line_number;
  167. }
  168. }
  169. int line_number = use_line_number ? ParseLineNumber(*use_line_number) : -1;
  170. check_lines.push_back(
  171. CheckLine(file_number, line_number, line_number_re_has_file,
  172. use_line_number ? &line_number_re : nullptr, check_line));
  173. }
  174. return check_lines;
  175. }
  176. auto AutoupdateFileTest(
  177. const std::filesystem::path& file_test_path, llvm::StringRef input_content,
  178. const llvm::SmallVector<llvm::StringRef>& filenames,
  179. int autoupdate_line_number,
  180. llvm::SmallVector<llvm::SmallVector<FileTestLine>>& non_check_lines,
  181. llvm::StringRef stdout, llvm::StringRef stderr,
  182. FileTestLineNumberReplacement line_number_replacement,
  183. std::function<void(std::string&)> do_extra_check_replacements) -> bool {
  184. RE2 line_number_re(line_number_replacement.pattern);
  185. CARBON_CHECK(line_number_re.ok()) << "Invalid line replacement RE2: `"
  186. << line_number_replacement.pattern << "`";
  187. // Prepare CHECK lines.
  188. llvm::SmallVector<CheckLine> stdout_check_lines = BuildCheckLines(
  189. stdout, "STDOUT", filenames, line_number_replacement.has_file,
  190. line_number_re, do_extra_check_replacements);
  191. llvm::SmallVector<CheckLine> stderr_check_lines = BuildCheckLines(
  192. stderr, "STDERR", filenames, line_number_replacement.has_file,
  193. line_number_re, do_extra_check_replacements);
  194. auto* stdout_check_line = stdout_check_lines.begin();
  195. auto* stderr_check_line = stderr_check_lines.begin();
  196. bool any_attached_stdout_lines = std::any_of(
  197. stdout_check_lines.begin(), stdout_check_lines.end(),
  198. [&](const CheckLine& line) { return line.line_number() != -1; });
  199. // All CHECK lines are suppressed until we reach AUTOUPDATE.
  200. bool reached_autoupdate = false;
  201. const FileTestLine blank_line(-1, "");
  202. // Stitch together content.
  203. llvm::SmallVector<const FileTestLineBase*> new_lines;
  204. for (auto [file_number_as_size_t, filename, non_check_file] :
  205. llvm::enumerate(filenames, non_check_lines)) {
  206. auto file_number = static_cast<int>(file_number_as_size_t);
  207. llvm::DenseMap<int, int> output_line_remap;
  208. llvm::SmallVector<CheckLine*> check_lines_this_file;
  209. int output_line_number = 0;
  210. // Add all check lines from the given vector until we reach a check line
  211. // attached to a line later than `to_line_number`.
  212. auto add_check_lines = [&](const llvm::SmallVector<CheckLine>& lines,
  213. CheckLine*& line, int to_line_number,
  214. llvm::StringRef indent) {
  215. for (; line != lines.end() && (line->file_number() < file_number ||
  216. (line->file_number() == file_number &&
  217. line->line_number() <= to_line_number));
  218. ++line) {
  219. new_lines.push_back(line);
  220. line->SetOutputLine(indent, file_number, ++output_line_number);
  221. check_lines_this_file.push_back(line);
  222. }
  223. };
  224. // Looping through the original file, print check lines preceding each
  225. // original line.
  226. for (const auto& non_check_line : non_check_file) {
  227. // If there are any non-check lines with an invalid line_number, it's
  228. // something like a split directive which shouldn't increment
  229. // output_line_number.
  230. if (non_check_line.line_number() < 1) {
  231. new_lines.push_back(&non_check_line);
  232. continue;
  233. }
  234. // STDERR check lines are placed before the line they refer to, or as
  235. // early as possible if they don't refer to a line. Include all STDERR
  236. // lines until we find one that wants to go later in the file.
  237. if (reached_autoupdate) {
  238. add_check_lines(stderr_check_lines, stderr_check_line,
  239. non_check_line.line_number(), non_check_line.indent());
  240. } else if (autoupdate_line_number == non_check_line.line_number()) {
  241. // This is the AUTOUPDATE line, so we'll print it, then start printing
  242. // CHECK lines.
  243. reached_autoupdate = true;
  244. }
  245. new_lines.push_back(&non_check_line);
  246. CARBON_CHECK(
  247. output_line_remap
  248. .insert({non_check_line.line_number(), ++output_line_number})
  249. .second);
  250. // STDOUT check lines are placed after the line they refer to, or at the
  251. // end of the file if none of them refers to a line.
  252. if (reached_autoupdate && any_attached_stdout_lines) {
  253. // Include any early STDERR lines now, so that the initial batch of
  254. // CHECK lines have STDERR before STDOUT.
  255. if (autoupdate_line_number == non_check_line.line_number()) {
  256. add_check_lines(stderr_check_lines, stderr_check_line,
  257. non_check_line.line_number(),
  258. non_check_line.indent());
  259. }
  260. add_check_lines(stdout_check_lines, stdout_check_line,
  261. non_check_line.line_number(), non_check_line.indent());
  262. }
  263. }
  264. // This should always be true after the first file is processed.
  265. CARBON_CHECK(reached_autoupdate);
  266. // At the end of the last file, print remaining check lines which -- for
  267. // whatever reason -- come after all original lines.
  268. if (file_number == static_cast<int>(filenames.size()) - 1 &&
  269. (stderr_check_line != stderr_check_lines.end() ||
  270. stdout_check_line != stdout_check_lines.end())) {
  271. // Ensure there's a blank line before any trailing CHECKs.
  272. if (!new_lines.empty() && !new_lines.back()->is_blank()) {
  273. new_lines.push_back(&blank_line);
  274. ++output_line_number;
  275. }
  276. add_check_lines(stderr_check_lines, stderr_check_line, INT_MAX, "");
  277. add_check_lines(stdout_check_lines, stdout_check_line, INT_MAX, "");
  278. }
  279. // Update all remapped lines in CHECK output.
  280. for (auto* offset_check_line : check_lines_this_file) {
  281. int last_non_check_line = non_check_file.back().line_number();
  282. offset_check_line->RemapLineNumbers(
  283. line_number_replacement.line_formatv, [&](int old_line_number) {
  284. // Map old non-check lines to their new line numbers.
  285. auto remapped = output_line_remap.find(old_line_number);
  286. if (remapped != output_line_remap.end()) {
  287. return remapped->second;
  288. }
  289. // Map any reference to a line past the final non-check line to
  290. // the new end-of-file. We assume that any such reference is
  291. // referring to the end of file, not to some specific CHECK
  292. // comment.
  293. if (old_line_number > last_non_check_line) {
  294. return output_line_number;
  295. }
  296. // Line didn't get remapped; maybe it refers to a CHECK line.
  297. // We can't express that as an offset, just leave it as-is.
  298. return old_line_number;
  299. });
  300. }
  301. }
  302. // Generate the autoupdated file.
  303. std::string new_content;
  304. llvm::raw_string_ostream new_content_stream(new_content);
  305. for (const auto& line : new_lines) {
  306. line->Print(new_content_stream);
  307. new_content_stream << '\n';
  308. }
  309. // Update the file on disk if needed.
  310. if (new_content == input_content) {
  311. return false;
  312. }
  313. std::ofstream out(file_test_path);
  314. out << new_content;
  315. return true;
  316. }
  317. } // namespace Carbon::Testing