Jelajahi Sumber

Add more compile benchmark stats (#4408)

I was discussing some details of cross-compiler lex performance. Since
we were talking about LoC initially, and lex performance especially will
differ based on bytes and tokens being lexed, throwing in some stats for
how we're processing those. Here's some example output:

```
----------------------------------------------------------------------------------------------------------------------------
Benchmark                                                 Time             CPU   Iterations      Bytes      Lines     Tokens
----------------------------------------------------------------------------------------------------------------------------
BM_CompileAPIFileDenseDecls<Phase::Lex>/256           31828 ns        31798 ns        22528  165.64M/s 6.13247M/s 34.6249M/s
BM_CompileAPIFileDenseDecls<Phase::Lex>/1024         147513 ns       147434 ns         5120 220.363M/s 6.64025M/s  39.014M/s
BM_CompileAPIFileDenseDecls<Phase::Lex>/4096         611530 ns       610985 ns         1280  232.22M/s 6.59264M/s 39.0501M/s
BM_CompileAPIFileDenseDecls<Phase::Lex>/16384       2645671 ns      2643411 ns          320 231.122M/s 6.17119M/s  36.616M/s
BM_CompileAPIFileDenseDecls<Phase::Lex>/65536      11593324 ns     11587201 ns           64 217.864M/s 5.64934M/s 33.5378M/s
BM_CompileAPIFileDenseDecls<Phase::Lex>/262144     60338069 ns     60313976 ns           16 169.444M/s 4.34607M/s 25.8032M/s
BM_CompileAPIFileDenseDecls<Phase::Parse>/256         53355 ns        53308 ns        13312 98.8029M/s 3.65798M/s 20.6535M/s
BM_CompileAPIFileDenseDecls<Phase::Parse>/1024       253979 ns       253818 ns         3072 128.001M/s  3.8571M/s 22.6619M/s
BM_CompileAPIFileDenseDecls<Phase::Parse>/4096      1052984 ns      1052427 ns          768 134.815M/s 3.82734M/s 22.6705M/s
BM_CompileAPIFileDenseDecls<Phase::Parse>/16384     4364730 ns      4362756 ns          192 140.038M/s 3.73915M/s 22.1857M/s
BM_CompileAPIFileDenseDecls<Phase::Parse>/65536    19419562 ns     19413505 ns           48 130.035M/s 3.37188M/s 20.0175M/s
BM_CompileAPIFileDenseDecls<Phase::Parse>/262144   89023213 ns     88979387 ns            8 114.856M/s 2.94595M/s 17.4905M/s
BM_CompileAPIFileDenseDecls<Phase::Check>/256        676254 ns       675605 ns         1024 7.79597M/s  288.63k/s 1.62965M/s
BM_CompileAPIFileDenseDecls<Phase::Check>/1024      1412608 ns      1411876 ns         1024 23.0112M/s 693.404k/s 4.07401M/s
BM_CompileAPIFileDenseDecls<Phase::Check>/4096      4333665 ns      4331240 ns          256 32.7581M/s 929.988k/s 5.50858M/s
BM_CompileAPIFileDenseDecls<Phase::Check>/16384    16566625 ns     16553982 ns           64 36.9065M/s 985.443k/s 5.84699M/s
BM_CompileAPIFileDenseDecls<Phase::Check>/65536    68609701 ns     68542189 ns           16 36.8304M/s 955.032k/s 5.66963M/s
BM_CompileAPIFileDenseDecls<Phase::Check>/262144  302899379 ns    302596672 ns            8 33.7739M/s 866.265k/s 5.14313M/s
```

Also note, this is the discussion that led to [me looking at bytes per
token](https://discord.com/channels/655572317891461132/655578254970716160/1295803122844700786)
Jon Ross-Perkins 1 tahun lalu
induk
melakukan
c25177658a
2 mengubah file dengan 23 tambahan dan 13 penghapusan
  1. 1 0
      toolchain/driver/BUILD
  2. 22 13
      toolchain/driver/compile_benchmark.cpp

+ 1 - 0
toolchain/driver/BUILD

@@ -67,6 +67,7 @@ cc_binary(
         "//testing/base:global_exe_path",
         "//testing/base:source_gen_lib",
         "//toolchain/install:install_paths_test_helpers",
+        "//toolchain/testing:compile_helper",
         "@google_benchmark//:benchmark",
         "@llvm-project//llvm:Support",
     ],

+ 22 - 13
toolchain/driver/compile_benchmark.cpp

@@ -10,6 +10,7 @@
 #include "testing/base/source_gen.h"
 #include "toolchain/driver/driver.h"
 #include "toolchain/install/install_paths_test_helpers.h"
+#include "toolchain/testing/compile_helper.h"
 
 namespace Carbon::Testing {
 namespace {
@@ -96,18 +97,30 @@ static auto BM_CompileAPIFileDenseDecls(benchmark::State& state) -> void {
   int num_files = ComputeFileCount(target_lines);
   llvm::OwningArrayRef<std::string> sources(num_files);
 
-  // Create a collection of random source files. Average the actual number of
-  // lines resulting so we can use that to compute the compilation speed as a
-  // line-rate counter.
-  double avg_lines = 0.0;
+  // Create a collection of random source files. Compute average statistics for
+  // counters for compilation speed.
+  CompileHelper compile_helper;
+  double total_bytes = 0.0;
+  double total_tokens = 0.0;
+  double total_lines = 0.0;
   for (std::string& source : sources) {
     source = bench.gen().GenAPIFileDenseDecls(target_lines,
                                               SourceGen::DenseDeclParams{});
-    avg_lines += llvm::count(source, '\n');
-  }
-  avg_lines /= sources.size();
-
-  // Setup the sources as files for compilation.
+    total_bytes += source.size();
+    total_tokens += compile_helper.GetTokenizedBuffer(source).size();
+    total_lines += llvm::count(source, '\n');
+  };
+  state.counters["Bytes"] =
+      benchmark::Counter(total_bytes / sources.size(),
+                         benchmark::Counter::kIsIterationInvariantRate);
+  state.counters["Tokens"] =
+      benchmark::Counter(total_tokens / sources.size(),
+                         benchmark::Counter::kIsIterationInvariantRate);
+  state.counters["Lines"] =
+      benchmark::Counter(total_lines / sources.size(),
+                         benchmark::Counter::kIsIterationInvariantRate);
+
+  // Set up the sources as files for compilation.
   llvm::OwningArrayRef<std::string> file_names = bench.SetUpFiles(sources);
   CARBON_CHECK(static_cast<int>(file_names.size()) == num_files);
 
@@ -132,10 +145,6 @@ static auto BM_CompileAPIFileDenseDecls(benchmark::State& state) -> void {
       i += static_cast<ssize_t>(success);
     }
   }
-
-  // Compute the line-rate of these compilations.
-  state.counters["Lines"] = benchmark::Counter(
-      avg_lines, benchmark::Counter::kIsIterationInvariantRate);
 }
 
 // Benchmark from 256-line test cases through 256k line test cases, and for each