Bladeren bron

Factor out C-string argv building and simplify vlogs (#6478)

This extracts the C-string `argv`-like building routine to a more
broadly reusable location. It also sinks the verbose logging logic out
of it and into the relevant runners. In turn, it simplifies the verbose
logging logic significantly.

The biggest functional change is removing the implicit synthesis of a
tool's `-v` verbose flag from the presence of a `vlog` stream. I thought
this would be helpful, but in practice of debugging these layers it has
been more of a hindrance than a help -- I pretty often only want verbose
logging on one side or the other, and we have ways of explicitly passing
a `-v` flag to the underlying tools already. I think my instinct to do
this was just wrong, so rip it out and simplify.

This does add an unused feature -- prepending a prefix of arguments
while building the C-string variant. This isn't used in this PR but will
be used in subsequent PRs and it seemed more disruptive to undo that
logic and then re-do it in a later PR. Let me know if it's too confusing
here.

Assisted-by: Gemini Code Assist

---------

Co-authored-by: Geoff Romer <gromer@google.com>
Chandler Carruth 4 maanden geleden
bovenliggende
commit
ff8ce31e1b

+ 55 - 0
common/string_helpers.cpp

@@ -4,11 +4,18 @@
 
 #include "common/string_helpers.h"
 
+#include <string.h>
+#include <sys/types.h>
+
 #include <algorithm>
+#include <array>
 #include <optional>
 #include <string>
 
 #include "common/check.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ConvertUTF.h"
@@ -202,4 +209,52 @@ auto StringRefContainsPointer(llvm::StringRef ref, const char* ptr) -> bool {
   return le(ref.begin(), ptr) && le(ptr, ref.end());
 }
 
+auto BuildCStrArgs(llvm::StringRef tool_path,
+                   llvm::ArrayRef<llvm::StringRef> args,
+                   llvm::OwningArrayRef<char>& cstr_arg_storage)
+    -> llvm::SmallVector<const char*, 64> {
+  return BuildCStrArgs(tool_path, /*prefix_args=*/{}, args, cstr_arg_storage);
+}
+
+auto BuildCStrArgs(llvm::StringRef tool_path,
+                   llvm::ArrayRef<std::string> prefix_args,
+                   llvm::ArrayRef<llvm::StringRef> args,
+                   llvm::OwningArrayRef<char>& cstr_arg_storage)
+    -> llvm::SmallVector<const char*, 64> {
+  // Render the arguments into null-terminated C-strings. Command lines can get
+  // quite long in build systems so this tries to minimize the memory allocation
+  // overhead.
+
+  // Precompute the total C-string data size needed.
+  int total_size = tool_path.size() + 1;
+  for (llvm::StringRef arg : args) {
+    // Accumulate both the string size and a null terminator byte.
+    total_size += arg.size() + 1;
+  }
+
+  // Allocate one chunk of storage for the actual C-strings, and reserve a
+  // vector of pointers into the storage.
+  cstr_arg_storage = llvm::OwningArrayRef<char>(total_size);
+  ssize_t i = 0;
+  auto make_cstr = [&](llvm::StringRef arg) {
+    char* cstr = &cstr_arg_storage[i];
+    memcpy(cstr, arg.data(), arg.size());
+    cstr[arg.size()] = '\0';
+    i += arg.size() + 1;
+    return cstr;
+  };
+
+  llvm::SmallVector<const char*, 64> cstr_args;
+  cstr_args.reserve(1 + prefix_args.size() + args.size());
+  cstr_args.push_back(make_cstr(tool_path));
+  for (const std::string& prefix_arg : prefix_args) {
+    cstr_args.push_back(prefix_arg.c_str());
+  }
+  for (llvm::StringRef arg : args) {
+    cstr_args.push_back(make_cstr(arg));
+  }
+
+  return cstr_args;
+}
+
 }  // namespace Carbon

+ 32 - 0
common/string_helpers.h

@@ -9,6 +9,8 @@
 #include <string>
 
 #include "common/error.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace Carbon {
@@ -32,6 +34,36 @@ auto ParseBlockStringLiteral(llvm::StringRef source, int hashtag_num = 0)
 // correctness.
 auto StringRefContainsPointer(llvm::StringRef ref, const char* ptr) -> bool;
 
+// Converts `tool_path` and each of the `args` into C-strings and returns the
+// results. This is intended for use with APIs that expect `argv`-like command
+// line argument lists.
+//
+// Accepts a `cstr_arg_storage` that will provide the underlying storage for
+// the C-strings, and returns a small vector of the C-string pointers. The
+// returned small vector uses a large small size to allow most common command
+// lines to avoid extra allocations and growth passes.
+auto BuildCStrArgs(llvm::StringRef tool_path,
+                   llvm::ArrayRef<llvm::StringRef> args,
+                   llvm::OwningArrayRef<char>& cstr_arg_storage)
+    -> llvm::SmallVector<const char*, 64>;
+
+// An overload of `BuildCStrArgs` with the same core behavior as the above, but
+// with an extra series of `prefix_args` that are placed between the `tool_path`
+// and the `args` in the resulting list.
+//
+// Unlike the tool path and the main `args`, the `prefix_args` are accepted as
+// an array of `std::string`s and those string object's `c_str()` method is used
+// to get the underlying C-strings to include in the result. This is because
+// callers with prefix arguments regularly need to provide dedicated storage for
+// these arguments anyways and we can efficiently reuse that. In contrast, the
+// `args` are often pulled from an existing `llvm::StringRef` that may never
+// exist as a valid C-string and so we need to rebuild those using the storage.
+auto BuildCStrArgs(llvm::StringRef tool_path,
+                   llvm::ArrayRef<std::string> prefix_args,
+                   llvm::ArrayRef<llvm::StringRef> args,
+                   llvm::OwningArrayRef<char>& cstr_arg_storage)
+    -> llvm::SmallVector<const char*, 64>;
+
 }  // namespace Carbon
 
 #endif  // CARBON_COMMON_STRING_HELPERS_H_

+ 66 - 0
common/string_helpers_test.cpp

@@ -10,8 +10,12 @@
 #include <optional>
 #include <string>
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
 using ::testing::Eq;
 using ::testing::Optional;
+using ::testing::StrEq;
 
 namespace Carbon {
 namespace {
@@ -228,5 +232,67 @@ TEST(ParseBlockStringLiteral, OkMultipleSlashes) {
   EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
 }
 
+TEST(BuildCStrArgs, NoArgs) {
+  llvm::OwningArrayRef<char> storage;
+  auto result = BuildCStrArgs("tool", {}, storage);
+  ASSERT_THAT(result.size(), Eq(1));
+  EXPECT_THAT(result[0], StrEq("tool"));
+}
+
+TEST(BuildCStrArgs, OneArg) {
+  llvm::OwningArrayRef<char> storage;
+  auto result = BuildCStrArgs("tool", {"arg1"}, storage);
+  ASSERT_THAT(result.size(), Eq(2));
+  EXPECT_THAT(result[0], StrEq("tool"));
+  EXPECT_THAT(result[1], StrEq("arg1"));
+}
+
+TEST(BuildCStrArgs, MultipleArgs) {
+  llvm::OwningArrayRef<char> storage;
+  auto result = BuildCStrArgs("tool", {"arg1", "arg2"}, storage);
+  ASSERT_THAT(result.size(), Eq(3));
+  EXPECT_THAT(result[0], StrEq("tool"));
+  EXPECT_THAT(result[1], StrEq("arg1"));
+  EXPECT_THAT(result[2], StrEq("arg2"));
+}
+
+TEST(BuildCStrArgsWithPrefix, NoArgs) {
+  llvm::OwningArrayRef<char> storage;
+  auto result = BuildCStrArgs("tool", {}, {}, storage);
+  ASSERT_THAT(result.size(), Eq(1));
+  EXPECT_THAT(result[0], StrEq("tool"));
+}
+
+TEST(BuildCStrArgsWithPrefix, PrefixOnly) {
+  llvm::OwningArrayRef<char> storage;
+  std::string prefix_args[] = {"p_arg1", "p_arg2"};
+  auto result = BuildCStrArgs("tool", prefix_args, {}, storage);
+  ASSERT_THAT(result.size(), Eq(3));
+  EXPECT_THAT(result[0], StrEq("tool"));
+  EXPECT_THAT(result[1], Eq(prefix_args[0].c_str()));
+  EXPECT_THAT(result[2], Eq(prefix_args[1].c_str()));
+}
+
+TEST(BuildCStrArgsWithPrefix, ArgsOnly) {
+  llvm::OwningArrayRef<char> storage;
+  auto result = BuildCStrArgs("tool", {}, {"arg1", "arg2"}, storage);
+  ASSERT_THAT(result.size(), Eq(3));
+  EXPECT_THAT(result[0], StrEq("tool"));
+  EXPECT_THAT(result[1], StrEq("arg1"));
+  EXPECT_THAT(result[2], StrEq("arg2"));
+}
+
+TEST(BuildCStrArgsWithPrefix, BothPrefixAndArgs) {
+  llvm::OwningArrayRef<char> storage;
+  std::string prefix_args[] = {"p_arg1", "p_arg2"};
+  auto result = BuildCStrArgs("tool", prefix_args, {"arg1", "arg2"}, storage);
+  ASSERT_THAT(result.size(), Eq(5));
+  EXPECT_THAT(result[0], StrEq("tool"));
+  EXPECT_THAT(result[1], Eq(prefix_args[0].c_str()));
+  EXPECT_THAT(result[2], Eq(prefix_args[1].c_str()));
+  EXPECT_THAT(result[3], StrEq("arg1"));
+  EXPECT_THAT(result[4], StrEq("arg2"));
+}
+
 }  // namespace
 }  // namespace Carbon

+ 3 - 0
toolchain/driver/BUILD

@@ -36,6 +36,7 @@ cc_library(
         "//common:filesystem",
         "//common:latch",
         "//common:ostream",
+        "//common:string_helpers",
         "//common:vlog",
         "//third_party/llvm:clang_cc1",
         "//toolchain/base:install_paths",
@@ -250,6 +251,7 @@ cc_library(
     deps = [
         ":tool_runner_base",
         "//common:ostream",
+        "//common:string_helpers",
         "//common:vlog",
         "//toolchain/base:install_paths",
         "@llvm-project//lld:Common",
@@ -288,6 +290,7 @@ cc_library(
     deps = [
         ":tool_runner_base",
         "//common:ostream",
+        "//common:string_helpers",
         "//common:vlog",
         "//toolchain/base:install_paths",
         "//toolchain/base:llvm_tools",

+ 9 - 4
toolchain/driver/clang_runner.cpp

@@ -30,6 +30,7 @@
 #include "clang/Serialization/PCHContainerOperations.h"
 #include "common/check.h"
 #include "common/error.h"
+#include "common/string_helpers.h"
 #include "common/vlog.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
@@ -214,12 +215,13 @@ auto ClangRunner::RunInternal(
   // Rebuild the args as C-string args.
   llvm::OwningArrayRef<char> cstr_arg_storage;
   llvm::SmallVector<const char*, 64> cstr_args =
-      BuildCStrArgs("Clang", clang_path, "-v", args, cstr_arg_storage);
+      BuildCStrArgs(clang_path, args, cstr_arg_storage);
 
-  // Handle special dispatch for CC1 commands as they don't use the driver.
+  // Handle special dispatch for CC1 commands as they don't use the driver and
+  // we don't synthesize any default arguments there.
   if (!args.empty() && args[0].starts_with("-cc1")) {
     if (args[0] == "-cc1") {
-      CARBON_VLOG("Dispatching `-cc1` command line");
+      CARBON_VLOG("Dispatching `-cc1` command line...");
       int exit_code =
           RunClangCC1(*installation_, fs_, cstr_args, enable_leaking);
       // TODO: Should this be forwarding the full exit code?
@@ -244,7 +246,10 @@ auto ClangRunner::RunInternal(
     return exit_code == 0;
   }
 
-  CARBON_VLOG("Preparing Clang driver...\n");
+  CARBON_VLOG("Running Clang driver with the following arguments:\n");
+  for (const char* cstr_arg : llvm::ArrayRef(cstr_args)) {
+    CARBON_VLOG("    '{0}'\n", cstr_arg);
+  }
 
   // Create the diagnostic options and parse arguments controlling them out of
   // our arguments.

+ 7 - 2
toolchain/driver/lld_runner.cpp

@@ -10,6 +10,7 @@
 #include <optional>
 #include <string>
 
+#include "common/string_helpers.h"
 #include "common/vlog.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
@@ -34,9 +35,13 @@ auto LldRunner::LinkHelper(llvm::StringLiteral label,
   // pointers into the storage.
   llvm::OwningArrayRef<char> cstr_arg_storage;
   llvm::SmallVector<const char*, 64> cstr_args =
-      BuildCStrArgs("LLD", path, "-v", args, cstr_arg_storage);
+      BuildCStrArgs(path, args, cstr_arg_storage);
+
+  CARBON_VLOG("Running LLD {0}-platform link with args:\n", label);
+  for (const char* cstr_arg : cstr_args) {
+    CARBON_VLOG("    '{0}'\n", cstr_arg);
+  }
 
-  CARBON_VLOG("Running LLD {0}-platform link...\n", label);
   lld::Result result =
       lld::lldMain(cstr_args, llvm::outs(), llvm::errs(), {driver_def});
 

+ 8 - 3
toolchain/driver/llvm_runner.cpp

@@ -10,6 +10,7 @@
 #include <optional>
 #include <string>
 
+#include "common/string_helpers.h"
 #include "common/vlog.h"
 #include "lld/Common/Driver.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -24,10 +25,14 @@ auto LLVMRunner::Run(LLVMTool tool, llvm::ArrayRef<llvm::StringRef> args)
   // Allocate one chunk of storage for the actual C-strings and a vector of
   // pointers into the storage.
   llvm::OwningArrayRef<char> cstr_arg_storage;
-  llvm::SmallVector<const char*, 64> cstr_args = BuildCStrArgs(
-      tool.name(), path, /*verbose_flag=*/std::nullopt, args, cstr_arg_storage);
+  llvm::SmallVector<const char*, 64> cstr_args =
+      BuildCStrArgs(path, args, cstr_arg_storage);
+
+  CARBON_VLOG("Running LLVM's {0} tool with args:\n", tool.name());
+  for (const char* cstr_arg : cstr_args) {
+    CARBON_VLOG("    '{0}'\n", cstr_arg);
+  }
 
-  CARBON_VLOG("Running LLVM's {0} tool...\n", tool.name());
   int exit_code = tool.main_fn()(
       cstr_args.size(), const_cast<char**>(cstr_args.data()),
       {.Path = path.c_str(), .PrependArg = nullptr, .NeedsPrependArg = false});

+ 0 - 54
toolchain/driver/tool_runner_base.cpp

@@ -18,58 +18,4 @@ ToolRunnerBase::ToolRunnerBase(const InstallPaths* install_paths,
                                llvm::raw_ostream* vlog_stream)
     : installation_(install_paths), vlog_stream_(vlog_stream) {}
 
-auto ToolRunnerBase::BuildCStrArgs(llvm::StringRef tool_name,
-                                   llvm::StringRef tool_path,
-                                   std::optional<llvm::StringRef> verbose_flag,
-                                   llvm::ArrayRef<llvm::StringRef> args,
-                                   llvm::OwningArrayRef<char>& cstr_arg_storage)
-    -> llvm::SmallVector<const char*, 64> {
-  // TODO: Maybe handle response file expansion similar to the Clang CLI?
-
-  // If we have a verbose logging stream, and that stream is the same as
-  // `llvm::errs`, then add the `-v` flag so that the driver also prints verbose
-  // information.
-  bool inject_v_arg = verbose_flag.has_value() && vlog_stream_ == &llvm::errs();
-  std::array<llvm::StringRef, 1> v_arg_storage;
-  llvm::ArrayRef<llvm::StringRef> maybe_v_arg;
-  if (inject_v_arg) {
-    v_arg_storage[0] = *verbose_flag;
-    maybe_v_arg = v_arg_storage;
-  }
-
-  CARBON_VLOG("Running {} driver with arguments:\n", tool_name);
-
-  // Render the arguments into null-terminated C-strings. Command lines can get
-  // quite long in build systems so this tries to minimize the memory allocation
-  // overhead.
-
-  // Provide the wrapped tool path as the synthetic `argv[0]`.
-  std::array<llvm::StringRef, 1> exe_arg = {tool_path};
-  auto args_range =
-      llvm::concat<const llvm::StringRef>(exe_arg, maybe_v_arg, args);
-  int total_size = 0;
-  for (llvm::StringRef arg : args_range) {
-    // Accumulate both the string size and a null terminator byte.
-    total_size += arg.size() + 1;
-  }
-
-  // Allocate one chunk of storage for the actual C-strings and a vector of
-  // pointers into the storage.
-  cstr_arg_storage = llvm::OwningArrayRef<char>(total_size);
-  llvm::SmallVector<const char*, 64> cstr_args;
-  cstr_args.reserve(args.size() + inject_v_arg + 1);
-  for (ssize_t i = 0; llvm::StringRef arg : args_range) {
-    cstr_args.push_back(&cstr_arg_storage[i]);
-    memcpy(&cstr_arg_storage[i], arg.data(), arg.size());
-    i += arg.size();
-    cstr_arg_storage[i] = '\0';
-    ++i;
-  }
-  for (const char* cstr_arg : llvm::ArrayRef(cstr_args)) {
-    CARBON_VLOG("    '{0}'\n", cstr_arg);
-  }
-
-  return cstr_args;
-}
-
 }  // namespace Carbon

+ 0 - 19
toolchain/driver/tool_runner_base.h

@@ -32,25 +32,6 @@ class ToolRunnerBase {
                           llvm::raw_ostream* vlog_stream = nullptr);
 
  protected:
-  // Translates `args` into C-string arguments for tool APIs based on `main`.
-  //
-  // Accepts a `tool_name` for logging, and a `tool_path` that will be used as
-  // the first C-string argument to simulate and `argv[0]` entry.
-  //
-  // Accepts a `cstr_arg_storage` that will provide the underlying storage for
-  // the C-strings, and returns a small vector of the C-string pointers. The
-  // returned small vector uses a large small size to allow most common command
-  // lines to avoid extra allocations and growth passes.
-  //
-  // Lastly accepts an optional `verbose_flag`. If provided, and if
-  // `vlog_stream_` is bound to stderr for this instance, the verbose flag will
-  // be injected at the start of the argument list.
-  auto BuildCStrArgs(llvm::StringRef tool_name, llvm::StringRef tool_path,
-                     std::optional<llvm::StringRef> verbose_flag,
-                     llvm::ArrayRef<llvm::StringRef> args,
-                     llvm::OwningArrayRef<char>& cstr_arg_storage)
-      -> llvm::SmallVector<const char*, 64>;
-
   // We use protected members as this base is just factoring out common
   // implementation details of other runners.
   //