Przeglądaj źródła

Properly set up C++ include paths and similar environment settings when parsing imported C++. (#5767)

Stop using the clang tooling library to build an ASTUnit; that library
is set up to process clang frontend arguments, assuming that something
has already built frontend arguments from the compiler arguments. It is
also too encapsulated and doesn't let us inspect and modify the compiler
invocation before it's executed.

Instead, build the AST unit directly in two phases:

* FIrst, take a list of clang driver arguments and convert them into a
list of compiler arguments, using `clang::createInvocation`. Internally,
this uses the clang driver to build a frontend invocation, including
building system-specific include paths as needed.
* Then, directly build an ASTUnit from this compiler invocation.

I've factored this so that we can split out the `createInvocation` step,
with the intention that we may want to move it out of check and into the
carbon driver with the rest of the driver-level argument handling, and
we may want to customize some of the clang options before we invoke the
clang frontend with that set of options.

In order to make the invocation reusable, it no longer depends on the
name of the carbon file importing the C++ code. In place of synthesizing
a header file name as `<foo.carbon>.generated.cpp_imports.h`, we now
insert line marker directives into the generated header so that errors
in that header cause Clang to point a diagnostic back at the Carbon
source file itself. This results in a minor improvement in the
diagnostic output: we no longer refer to a nonexistent generated file.
But the snippet still contains text that doesn't match the source code,
so it remains imperfect.

---------

Co-authored-by: Chandler Carruth <chandlerc@gmail.com>
Richard Smith 10 miesięcy temu
rodzic
commit
3776e464e0

+ 4 - 1
bazel/manifest/defs.bzl

@@ -14,8 +14,11 @@ def _get_files(ctx):
         ])
 
     if ctx.attr.strip_package_dir:
+        # Files may or may not be prefixed with the bin directory, and then
+        # may or may not be prefixed with the package directory. Strip both.
+        bin_dir = ctx.bin_dir.path + "/"
         package_dir = ctx.label.package + "/"
-        files_stripped = [f.removeprefix(package_dir) for f in files]
+        files_stripped = [f.removeprefix(bin_dir).removeprefix(package_dir) for f in files]
     else:
         files_stripped = files
 

+ 2 - 0
toolchain/check/BUILD

@@ -135,7 +135,9 @@ cc_library(
         "//toolchain/sem_ir:file",
         "//toolchain/sem_ir:formatter",
         "//toolchain/sem_ir:typed_insts",
+        "@llvm-project//clang:basic",
         "@llvm-project//clang:frontend",
+        "@llvm-project//clang:lex",
         "@llvm-project//clang:sema",
         "@llvm-project//clang:tooling",
         "@llvm-project//llvm:Support",

+ 4 - 3
toolchain/check/check.cpp

@@ -389,7 +389,8 @@ static auto MaybeDumpSemIR(
 auto CheckParseTrees(
     llvm::MutableArrayRef<Unit> units,
     llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
-    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs, llvm::StringRef target,
+    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
+    llvm::StringRef clang_path, llvm::StringRef target,
     const CheckParseTreesOptions& options) -> void {
   // UnitAndImports is big due to its SmallVectors, so we default to 0 on the
   // stack.
@@ -445,7 +446,7 @@ auto CheckParseTrees(
   for (int check_index = 0;
        check_index < static_cast<int>(ready_to_check.size()); ++check_index) {
     auto* unit_info = ready_to_check[check_index];
-    CheckUnit(unit_info, tree_and_subtrees_getters, fs, target,
+    CheckUnit(unit_info, tree_and_subtrees_getters, fs, clang_path, target,
               options.vlog_stream)
         .Run();
     for (auto* incoming_import : unit_info->incoming_imports) {
@@ -494,7 +495,7 @@ auto CheckParseTrees(
     // incomplete imports.
     for (auto& unit_info : unit_infos) {
       if (unit_info.imports_remaining > 0) {
-        CheckUnit(&unit_info, tree_and_subtrees_getters, fs, target,
+        CheckUnit(&unit_info, tree_and_subtrees_getters, fs, clang_path, target,
                   options.vlog_stream)
             .Run();
       }

+ 2 - 1
toolchain/check/check.h

@@ -74,7 +74,8 @@ struct CheckParseTreesOptions {
 auto CheckParseTrees(
     llvm::MutableArrayRef<Unit> units,
     llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
-    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs, llvm::StringRef target,
+    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
+    llvm::StringRef clang_path, llvm::StringRef target,
     const CheckParseTreesOptions& options) -> void;
 
 }  // namespace Carbon::Check

+ 4 - 3
toolchain/check/check_unit.cpp

@@ -56,7 +56,8 @@ static auto GetImportedIRCount(UnitAndImports* unit_and_imports) -> int {
 CheckUnit::CheckUnit(
     UnitAndImports* unit_and_imports,
     llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
-    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs, llvm::StringRef target,
+    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
+    llvm::StringRef clang_path, llvm::StringRef target,
     llvm::raw_ostream* vlog_stream)
     : unit_and_imports_(unit_and_imports),
       tree_and_subtrees_getter_(
@@ -64,6 +65,7 @@ CheckUnit::CheckUnit(
               [unit_and_imports->unit->sem_ir->check_ir_id().index]),
       total_ir_count_(tree_and_subtrees_getters.size()),
       fs_(std::move(fs)),
+      clang_path_(clang_path),
       target_(target),
       emitter_(&unit_and_imports_->err_tracker, tree_and_subtrees_getters,
                unit_and_imports_->unit->sem_ir),
@@ -156,8 +158,7 @@ auto CheckUnit::InitPackageScopeAndImports() -> void {
     CARBON_CHECK(cpp_ast);
     CARBON_CHECK(!cpp_ast->get());
     *cpp_ast =
-        ImportCppFiles(context_, unit_and_imports_->unit->sem_ir->filename(),
-                       cpp_import_names, fs_, target_);
+        ImportCppFiles(context_, cpp_import_names, fs_, clang_path_, target_);
   }
 }
 

+ 3 - 1
toolchain/check/check_unit.h

@@ -124,7 +124,8 @@ class CheckUnit {
       UnitAndImports* unit_and_imports,
       llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> tree_and_subtrees_getters,
       llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
-      llvm::StringRef target, llvm::raw_ostream* vlog_stream);
+      llvm::StringRef clang_path, llvm::StringRef target,
+      llvm::raw_ostream* vlog_stream);
 
   // Produces and checks the IR for the provided unit.
   auto Run() -> void;
@@ -186,6 +187,7 @@ class CheckUnit {
   // The number of IRs being checked in total.
   int total_ir_count_;
   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs_;
+  llvm::StringRef clang_path_;
   llvm::StringRef target_;
 
   DiagnosticEmitter emitter_;

+ 165 - 38
toolchain/check/import_cpp.cpp

@@ -10,9 +10,13 @@
 #include <tuple>
 #include <utility>
 
+#include "clang/Basic/FileManager.h"
+#include "clang/Frontend/ASTUnit.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/TextDiagnostic.h"
+#include "clang/Lex/PreprocessorOptions.h"
 #include "clang/Sema/Lookup.h"
-#include "clang/Tooling/Tooling.h"
 #include "common/ostream.h"
 #include "common/raw_string_ostream.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
@@ -30,6 +34,7 @@
 #include "toolchain/check/pattern_match.h"
 #include "toolchain/check/type.h"
 #include "toolchain/diagnostics/diagnostic.h"
+#include "toolchain/diagnostics/diagnostic_emitter.h"
 #include "toolchain/diagnostics/format_providers.h"
 #include "toolchain/parse/node_ids.h"
 #include "toolchain/sem_ir/ids.h"
@@ -38,6 +43,9 @@
 
 namespace Carbon::Check {
 
+// The fake file name to use for the synthesized includes file.
+static constexpr const char IncludesFileName[] = "<carbon Cpp imports>";
+
 // Generates C++ file contents to #include all requested imports.
 static auto GenerateCppIncludesHeaderCode(
     Context& context, llvm::ArrayRef<Parse::Tree::PackagingNames> imports)
@@ -91,13 +99,107 @@ static auto AddImportIRInst(Context& context,
 
 namespace {
 
+// Used to convert diagnostics from the Clang driver to Carbon diagnostics.
+class CarbonClangDriverDiagnosticConsumer : public clang::DiagnosticConsumer {
+ public:
+  // Creates an instance with the location that triggers calling Clang.
+  // `context` must not be null.
+  explicit CarbonClangDriverDiagnosticConsumer(
+      Diagnostics::NoLocEmitter* emitter)
+      : emitter_(emitter) {}
+
+  // Generates a Carbon warning for each Clang warning and a Carbon error for
+  // each Clang error or fatal.
+  auto HandleDiagnostic(clang::DiagnosticsEngine::Level diag_level,
+                        const clang::Diagnostic& info) -> void override {
+    DiagnosticConsumer::HandleDiagnostic(diag_level, info);
+
+    llvm::SmallString<256> message;
+    info.FormatDiagnostic(message);
+
+    switch (diag_level) {
+      case clang::DiagnosticsEngine::Ignored:
+      case clang::DiagnosticsEngine::Note:
+      case clang::DiagnosticsEngine::Remark: {
+        // TODO: Emit notes and remarks.
+        break;
+      }
+      case clang::DiagnosticsEngine::Warning:
+      case clang::DiagnosticsEngine::Error:
+      case clang::DiagnosticsEngine::Fatal: {
+        CARBON_DIAGNOSTIC(CppInteropDriverWarning, Warning, "{0}", std::string);
+        CARBON_DIAGNOSTIC(CppInteropDriverError, Error, "{0}", std::string);
+        emitter_->Emit(diag_level == clang::DiagnosticsEngine::Warning
+                           ? CppInteropDriverWarning
+                           : CppInteropDriverError,
+                       message.str().str());
+        break;
+      }
+    }
+  }
+
+ private:
+  // Diagnostic emitter. Note that driver diagnostics don't have meaningful
+  // locations attached.
+  Diagnostics::NoLocEmitter* emitter_;
+};
+
+}  // namespace
+
+// Builds a clang `CompilerInvocation` describing the options to use to build an
+// imported C++ AST.
+// TODO: Cache the compiler invocation created here and reuse it if building
+// multiple AST units. Consider building the `CompilerInvocation` from the
+// driver and passing it into check. This would also allow us to have a shared
+// set of defaults between the Clang invocation we use for imports and the
+// invocation we use for `carbon clang`.
+static auto BuildCompilerInvocation(
+    Context& context, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
+    const std::string& clang_path, const std::string& target)
+    -> std::unique_ptr<clang::CompilerInvocation> {
+  Diagnostics::NoLocEmitter emitter(context.emitter());
+  CarbonClangDriverDiagnosticConsumer diagnostics_consumer(&emitter);
+
+  const char* driver_args[] = {
+      clang_path.c_str(),
+      // Propagate the target to Clang.
+      "-target",
+      target.c_str(),
+      // Require PIE. Note its default is configurable in Clang.
+      "-fPIE",
+      // Parse as a C++ (not C) header.
+      "-x",
+      "c++",
+      IncludesFileName,
+  };
+
+  // Build a diagnostics engine. Note that we don't have any diagnostic options
+  // yet; they're produced by running the driver.
+  clang::DiagnosticOptions driver_diag_opts;
+  llvm::IntrusiveRefCntPtr<clang::DiagnosticsEngine> driver_diags(
+      clang::CompilerInstance::createDiagnostics(*fs, driver_diag_opts,
+                                                 &diagnostics_consumer,
+                                                 /*ShouldOwnClient=*/false));
+
+  // Ask the driver to process the arguments and build a corresponding clang
+  // frontend invocation.
+  auto invocation =
+      clang::createInvocation(driver_args, {.Diags = driver_diags, .VFS = fs});
+
+  // Emit any queued diagnostics from parsing our driver arguments.
+  return invocation;
+}
+
+namespace {
+
 // Used to convert Clang diagnostics to Carbon diagnostics.
 class CarbonClangDiagnosticConsumer : public clang::DiagnosticConsumer {
  public:
   // Creates an instance with the location that triggers calling Clang.
   // `context` must not be null.
-  explicit CarbonClangDiagnosticConsumer(Context* context)
-      : context_(context) {}
+  explicit CarbonClangDiagnosticConsumer(Context* context,
+                                         clang::CompilerInvocation* invocation)
+      : context_(context), invocation_(invocation) {}
 
   // Generates a Carbon warning for each Clang warning and a Carbon error for
   // each Clang error or fatal.
@@ -111,16 +213,21 @@ class CarbonClangDiagnosticConsumer : public clang::DiagnosticConsumer {
     llvm::SmallString<256> message;
     info.FormatDiagnostic(message);
 
+    if (!info.hasSourceManager()) {
+      // If we don't have a source manager, we haven't actually started
+      // compiling yet, and this is an error from the driver or early in the
+      // frontend. Pass it on directly.
+      CARBON_CHECK(info.getLocation().isInvalid());
+      diagnostic_infos_.push_back({.level = diag_level,
+                                   .import_ir_inst_id = clang_import_ir_inst_id,
+                                   .message = message.str().str()});
+      return;
+    }
+
     RawStringOstream diagnostics_stream;
-    // TODO: Consider allowing setting `LangOptions` or use
-    // `ASTContext::getLangOptions()`.
-    clang::LangOptions lang_options;
-    // TODO: Consider allowing setting `DiagnosticOptions` or use
-    // `ASTUnit::getDiagnostics().getLangOptions().getDiagnosticOptions()`.
-    clang::DiagnosticOptions diagnostic_options;
-    diagnostic_options.ShowPresumedLoc = true;
-    clang::TextDiagnostic text_diagnostic(diagnostics_stream, lang_options,
-                                          diagnostic_options);
+    clang::TextDiagnostic text_diagnostic(diagnostics_stream,
+                                          invocation_->getLangOpts(),
+                                          invocation_->getDiagnosticOpts());
     text_diagnostic.emitDiagnostic(
         clang::FullSourceLoc(info.getLocation(), info.getSourceManager()),
         diag_level, message, info.getRanges(), info.getFixItHints());
@@ -169,6 +276,9 @@ class CarbonClangDiagnosticConsumer : public clang::DiagnosticConsumer {
   // The type-checking context in which we're running Clang.
   Context* context_;
 
+  // The compiler invocation that is producing the diagnostics.
+  clang::CompilerInvocation* invocation_;
+
   // Information on a Clang diagnostic that can be converted to a Carbon
   // diagnostic.
   struct ClangDiagnosticInfo {
@@ -195,35 +305,51 @@ class CarbonClangDiagnosticConsumer : public clang::DiagnosticConsumer {
 // compilation errors where encountered or the generated AST is null due to an
 // error. Sets the AST in the context's `sem_ir`.
 // TODO: Consider to always have a (non-null) AST.
-static auto GenerateAst(Context& context, llvm::StringRef importing_file_path,
+static auto GenerateAst(Context& context,
                         llvm::ArrayRef<Parse::Tree::PackagingNames> imports,
                         llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
-                        llvm::StringRef target)
+                        const std::string& clang_path,
+                        const std::string& target)
     -> std::pair<std::unique_ptr<clang::ASTUnit>, bool> {
-  CarbonClangDiagnosticConsumer diagnostics_consumer(&context);
-
-  // TODO: Share compilation flags with ClangRunner.
-  auto ast = clang::tooling::buildASTFromCodeWithArgs(
-      GenerateCppIncludesHeaderCode(context, imports),
-      // Parse C++ (and not C).
-      {
-          "-x",
-          "c++",
-          // Propagate the target to Clang.
-          "-target",
-          target.str(),
-          // Require PIE. Note its default is configurable in Clang.
-          "-fPIE",
-      },
-      (importing_file_path + ".generated.cpp_imports.h").str(), "clang-tool",
-      std::make_shared<clang::PCHContainerOperations>(),
-      clang::tooling::getClangStripDependencyFileAdjuster(),
-      clang::tooling::FileContentMappings(), &diagnostics_consumer, fs);
-  // Remove link to the diagnostics consumer before its deletion.
+  // Build the options to use to invoke the Clang frontend.
+  std::shared_ptr<clang::CompilerInvocation> invocation =
+      BuildCompilerInvocation(context, fs, clang_path, target);
+  if (!invocation) {
+    return {nullptr, true};
+  }
+
+  // Build a diagnostics engine.
+  CarbonClangDiagnosticConsumer diagnostics_consumer(&context,
+                                                     invocation.get());
+  llvm::IntrusiveRefCntPtr<clang::DiagnosticsEngine> diags(
+      clang::CompilerInstance::createDiagnostics(
+          *fs, invocation->getDiagnosticOpts(), &diagnostics_consumer,
+          /*ShouldOwnClient=*/false));
+
+  // Remap the imports file name to the corresponding `#include`s.
+  std::string includes = GenerateCppIncludesHeaderCode(context, imports);
+  auto includes_buffer =
+      llvm::MemoryBuffer::getMemBuffer(includes, IncludesFileName);
+  invocation->getPreprocessorOpts().addRemappedFile(IncludesFileName,
+                                                    includes_buffer.get());
+
+  // Create the AST unit.
+  auto ast = clang::ASTUnit::LoadFromCompilerInvocation(
+      invocation, std::make_shared<clang::PCHContainerOperations>(), nullptr,
+      diags, new clang::FileManager(invocation->getFileSystemOpts(), fs));
+
+  // Remove link to the diagnostics consumer before its destruction.
   ast->getDiagnostics().setClient(nullptr);
 
-  // In order to emit diagnostics, we need the AST.
+  // Remove remapped file before its underlying storage is destroyed.
+  invocation->getPreprocessorOpts().clearRemappedFiles();
+
+  // Attach the AST to SemIR. This needs to be done before we can emit any
+  // diagnostics, so their locations can be properly interpreted by our
+  // diagnostics machinery.
   context.sem_ir().set_cpp_ast(ast.get());
+
+  // Emit any diagnostics we queued up while building the AST.
   diagnostics_consumer.EmitDiagnostics();
 
   return {std::move(ast), !ast || diagnostics_consumer.getNumErrors() > 0};
@@ -257,10 +383,11 @@ static auto AddNamespace(Context& context, PackageNameId cpp_package_id,
       .add_result.name_scope_id;
 }
 
-auto ImportCppFiles(Context& context, llvm::StringRef importing_file_path,
+auto ImportCppFiles(Context& context,
                     llvm::ArrayRef<Parse::Tree::PackagingNames> imports,
                     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
-                    llvm::StringRef target) -> std::unique_ptr<clang::ASTUnit> {
+                    llvm::StringRef clang_path, llvm::StringRef target)
+    -> std::unique_ptr<clang::ASTUnit> {
   if (imports.empty()) {
     return nullptr;
   }
@@ -275,7 +402,7 @@ auto ImportCppFiles(Context& context, llvm::StringRef importing_file_path,
   auto name_scope_id = AddNamespace(context, package_id, imports);
 
   auto [generated_ast, ast_has_error] =
-      GenerateAst(context, importing_file_path, imports, fs, target);
+      GenerateAst(context, imports, fs, clang_path.str(), target.str());
 
   SemIR::NameScope& name_scope = context.name_scopes().Get(name_scope_id);
   name_scope.set_is_closed_import(true);

+ 3 - 2
toolchain/check/import_cpp.h

@@ -14,10 +14,11 @@ namespace Carbon::Check {
 // Generates a C++ header that includes the imported cpp files, parses it,
 // generates the AST from it and links `SemIR::File` to it. Report C++ errors
 // and warnings. If successful, adds a `Cpp` namespace and returns the AST.
-auto ImportCppFiles(Context& context, llvm::StringRef importing_file_path,
+auto ImportCppFiles(Context& context,
                     llvm::ArrayRef<Parse::Tree::PackagingNames> imports,
                     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
-                    llvm::StringRef target) -> std::unique_ptr<clang::ASTUnit>;
+                    llvm::StringRef clang_path, llvm::StringRef target)
+    -> std::unique_ptr<clang::ASTUnit>;
 
 // Looks up the given name in the Clang AST generated when importing C++ code.
 // If successful, generates the instruction and returns the new `InstId`.

+ 53 - 0
toolchain/check/testdata/interop/cpp/include_paths.carbon

@@ -0,0 +1,53 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// INCLUDE-FILE: toolchain/testing/testdata/min_prelude/uint.carbon
+//
+// AUTOUPDATE
+// TIP: To test this file alone, run:
+// TIP:   bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/check/testdata/interop/cpp/include_paths.carbon
+// TIP: To dump output, run:
+// TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/check/testdata/interop/cpp/include_paths.carbon
+
+// --- include_stddef.h
+
+// Check that include paths are properly set up when running Clang.
+
+// Clang provides <stddef.h> as a builtin header.
+#include <stddef.h>
+
+ptrdiff_t GetSize();
+inline int GetSizeAsInt() { return GetSize(); }
+
+// --- import_stddef_indirectly.carbon
+
+library "[[@TEST_NAME]]";
+
+import Cpp library "include_stddef.h";
+
+// TODO: `fn CallGetSize() -> Cpp.ptrdiff_t {`
+fn CallGetSize() -> i32 {
+  // TODO: Call `Cpp.GetSize` directly once we can import the types `long` and `long long`.
+  return Cpp.GetSizeAsInt() as i32;
+}
+
+// --- fail_todo_import_stddef_directly.carbon
+
+library "[[@TEST_NAME]]";
+
+import Cpp library "stddef.h";
+
+// TODO: Once we can import typedefs, this should work.
+// CHECK:STDERR: fail_todo_import_stddef_directly.carbon:[[@LINE+11]]:8: error: semantics TODO: `Unsupported: Declaration type Typedef` [SemanticsTodo]
+// CHECK:STDERR: var n: Cpp.size_t;
+// CHECK:STDERR:        ^~~~~~~~~~
+// CHECK:STDERR: fail_todo_import_stddef_directly.carbon:[[@LINE+8]]:8: note: in `Cpp` name lookup for `size_t` [InCppNameLookup]
+// CHECK:STDERR: var n: Cpp.size_t;
+// CHECK:STDERR:        ^~~~~~~~~~
+// CHECK:STDERR:
+// CHECK:STDERR: fail_todo_import_stddef_directly.carbon:[[@LINE+4]]:8: error: member name `size_t` not found in `Cpp` [MemberNameNotFoundInInstScope]
+// CHECK:STDERR: var n: Cpp.size_t;
+// CHECK:STDERR:        ^~~~~~~~~~
+// CHECK:STDERR:
+var n: Cpp.size_t;

+ 6 - 0
toolchain/diagnostics/coverage_test.cpp

@@ -58,6 +58,12 @@ constexpr Kind UntestedKinds[] = {
     // conversion cannot fail. This should be covered once we support `ref`
     // binding syntax.
     Kind::ConversionFailureNonRefToRef,
+
+    // TODO: These are temporarily unreachable because we don't pass invalid
+    // driver options to Clang, but will become reachable once we support
+    // passing custom Clang arguments.
+    Kind::CppInteropDriverError,
+    Kind::CppInteropDriverWarning,
 };
 
 // Looks for diagnostic kinds that aren't covered by a file_test.

+ 5 - 0
toolchain/diagnostics/diagnostic_emitter.h

@@ -186,6 +186,7 @@ class Emitter {
 
   template <typename OtherLocT, typename AnnotateFn>
   friend class AnnotationScope;
+  friend class NoLocEmitter;
 
   Consumer* consumer_;
   llvm::SmallVector<llvm::function_ref<auto(Builder& builder)->void>>
@@ -203,6 +204,10 @@ class NoLocEmitter : public Emitter<void*> {
  public:
   using Emitter::Emitter;
 
+  template <typename LocT>
+  explicit NoLocEmitter(const Emitter<LocT>& emitter)
+      : Emitter(emitter.consumer_) {}
+
   // Emits an error. This specialization only applies to
   // `NoLocEmitter`.
   template <typename... Args>

+ 2 - 0
toolchain/diagnostics/diagnostic_kind.def

@@ -166,6 +166,8 @@ CARBON_DIAGNOSTIC_KIND(ResolvingSpecificHere)
 // Package/import checking diagnostics.
 CARBON_DIAGNOSTIC_KIND(CppInteropFuzzing)
 CARBON_DIAGNOSTIC_KIND(CppInteropMissingLibrary)
+CARBON_DIAGNOSTIC_KIND(CppInteropDriverError)
+CARBON_DIAGNOSTIC_KIND(CppInteropDriverWarning)
 CARBON_DIAGNOSTIC_KIND(CppInteropParseError)
 CARBON_DIAGNOSTIC_KIND(CppInteropParseWarning)
 CARBON_DIAGNOSTIC_KIND(IncorrectExtension)

+ 2 - 2
toolchain/driver/compile_subcommand.cpp

@@ -944,8 +944,8 @@ auto CompileSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
     }
   }
   Check::CheckParseTrees(check_units, cache.tree_and_subtrees_getters(),
-                         driver_env.fs, options_.codegen_options.target,
-                         options);
+                         driver_env.fs, driver_env.installation->clang_path(),
+                         options_.codegen_options.target, options);
   CARBON_VLOG_TO(driver_env.vlog_stream,
                  "*** Check::CheckParseTrees done ***\n");
   for (auto& unit : units) {

+ 4 - 3
toolchain/driver/language_server_subcommand.cpp

@@ -27,9 +27,10 @@ auto LanguageServerSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
     return {.success = false};
   }
 
-  bool success = LanguageServer::Run(
-      driver_env.input_stream, *driver_env.output_stream,
-      *driver_env.error_stream, driver_env.vlog_stream, driver_env.consumer);
+  bool success =
+      LanguageServer::Run(*driver_env.installation, driver_env.input_stream,
+                          *driver_env.output_stream, *driver_env.error_stream,
+                          driver_env.vlog_stream, driver_env.consumer);
   return {.success = success};
 }
 

+ 14 - 1
toolchain/install/BUILD

@@ -181,7 +181,12 @@ install_dirs = {
         is_driver = True,
     ) for name in llvm_binaries],
     "lib/carbon/llvm/lib/clang/" + LLVM_VERSION_MAJOR: [
-        install_filegroup("include", ":clang_headers", "staging/include/"),
+        install_filegroup(
+            "include",
+            ":clang_headers",
+            label = "installed_clang_headers",
+            remove_prefix = "staging/include/",
+        ),
     ],
 }
 
@@ -205,6 +210,14 @@ manifest(
     srcs = [":install_data"],
 )
 
+# A list of clang's installed builtin header files.
+# This is consumed by //toolchain/testing:file_test.
+manifest(
+    name = "clang_headers_manifest.txt",
+    srcs = [":installed_clang_headers"],
+    strip_package_dir = True,
+)
+
 pkg_naming_variables(
     name = "packaging_variables",
 )

+ 11 - 6
toolchain/install/install_filegroups.bzl

@@ -7,7 +7,7 @@
 load("@rules_pkg//pkg:mappings.bzl", "pkg_attributes", "pkg_filegroup", "pkg_files", "pkg_mklink", "strip_prefix")
 load("symlink_helpers.bzl", "symlink_file", "symlink_filegroup")
 
-def install_filegroup(name, filegroup_target, remove_prefix = ""):
+def install_filegroup(name, filegroup_target, remove_prefix = "", label = None):
     """Adds a filegroup for install.
 
     Used in the `install_dirs` dict.
@@ -15,10 +15,15 @@ def install_filegroup(name, filegroup_target, remove_prefix = ""):
     Args:
       name: The base directory for the filegroup.
       filegroup_target: The bazel filegroup target to install.
+      remove_prefix: A prefix to remove from the name of each source file when
+        determining the name of the corresponding installed file.
+      label: A custom label to assign to the filegroup containing the
+        installed files.
     """
     return {
         "filegroup": filegroup_target,
         "is_driver": False,
+        "label": label,
         "name": name,
         "remove_prefix": remove_prefix,
     }
@@ -85,8 +90,8 @@ def make_install_filegroups(name, no_driver_name, pkg_name, install_dirs, prefix
             if not entry["is_driver"]:
                 no_driver_srcs.append(prefixed_path)
 
-            pkg_path = path + ".pkg"
-            pkg_srcs.append(pkg_path)
+            pkg_label = entry.get("label") or path + ".pkg"
+            pkg_srcs.append(pkg_label)
 
             if "target" in entry:
                 if entry["executable"]:
@@ -102,7 +107,7 @@ def make_install_filegroups(name, no_driver_name, pkg_name, install_dirs, prefix
                     )
                     mode = "0644"
                 pkg_files(
-                    name = pkg_path,
+                    name = pkg_label,
                     srcs = [entry["target"]],
                     attributes = pkg_attributes(mode = mode),
                     renames = {entry["target"]: path},
@@ -115,7 +120,7 @@ def make_install_filegroups(name, no_driver_name, pkg_name, install_dirs, prefix
                     remove_prefix = entry["remove_prefix"],
                 )
                 pkg_files(
-                    name = pkg_path,
+                    name = pkg_label,
                     srcs = [prefixed_path],
                     strip_prefix = strip_prefix.from_pkg(prefix),
                 )
@@ -139,7 +144,7 @@ def make_install_filegroups(name, no_driver_name, pkg_name, install_dirs, prefix
 
                 # For the distributed package, we retain relative symlinks.
                 pkg_mklink(
-                    name = pkg_path,
+                    name = pkg_label,
                     link_name = path,
                     target = entry["symlink"],
                 )

+ 17 - 4
toolchain/install/install_paths.cpp

@@ -92,19 +92,32 @@ auto InstallPaths::Make(llvm::StringRef install_prefix) -> InstallPaths {
 
 auto InstallPaths::ReadPreludeManifest() const
     -> ErrorOr<llvm::SmallVector<std::string>> {
+  return ReadManifest(core_package(), "prelude_manifest.txt");
+}
+
+auto InstallPaths::ReadClangHeadersManifest() const
+    -> ErrorOr<llvm::SmallVector<std::string>> {
+  llvm::SmallString<256> manifest_path(prefix_);
+  llvm::sys::path::append(manifest_path, llvm::sys::path::Style::posix, "..");
+  return ReadManifest(manifest_path, "clang_headers_manifest.txt");
+}
+
+auto InstallPaths::ReadManifest(llvm::StringRef manifest_path,
+                                llvm::StringRef manifest_file) const
+    -> ErrorOr<llvm::SmallVector<std::string>> {
   // This is structured to avoid a vector copy on success.
   ErrorOr<llvm::SmallVector<std::string>> result =
       llvm::SmallVector<std::string>();
 
   llvm::SmallString<256> manifest;
   llvm::sys::path::append(manifest, llvm::sys::path::Style::posix,
-                          core_package(), "prelude_manifest.txt");
+                          manifest_path, manifest_file);
 
   auto fs = llvm::vfs::getRealFileSystem();
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> file =
       fs->getBufferForFile(manifest);
   if (!file) {
-    result = ErrorBuilder() << "Loading prelude manifest `" << manifest
+    result = ErrorBuilder() << "Loading manifest `" << manifest
                             << "`: " << file.getError().message();
     return result;
   }
@@ -117,14 +130,14 @@ auto InstallPaths::ReadPreludeManifest() const
       break;
     }
     llvm::SmallString<256> path;
-    llvm::sys::path::append(path, llvm::sys::path::Style::posix, core_package(),
+    llvm::sys::path::append(path, llvm::sys::path::Style::posix, manifest_path,
                             token);
     result->push_back(path.str().str());
     buffer = remainder;
   }
 
   if (result->empty()) {
-    result = ErrorBuilder() << "Prelude manifest `" << manifest << "` is empty";
+    result = ErrorBuilder() << "Manifest `" << manifest << "` is empty";
   }
   return result;
 }

+ 11 - 0
toolchain/install/install_paths.h

@@ -68,6 +68,12 @@ class InstallPaths {
   // files that define the prelude, and will always be non-empty on success.
   auto ReadPreludeManifest() const -> ErrorOr<llvm::SmallVector<std::string>>;
 
+  // Returns the contents of the clang builtin headers manifest file. This is
+  // the list of header files that are installed as part of the clang compiler,
+  // and will always be non-empty on success.
+  auto ReadClangHeadersManifest() const
+      -> ErrorOr<llvm::SmallVector<std::string>>;
+
   // Check for an error detecting the install paths correctly.
   //
   // A nullopt return means no errors encountered and the paths should work
@@ -111,6 +117,11 @@ class InstallPaths {
   // relevant error message.
   auto CheckMarkerFile() -> void;
 
+  // Read a manifest file.
+  auto ReadManifest(llvm::StringRef manifest_path,
+                    llvm::StringRef manifest_file) const
+      -> ErrorOr<llvm::SmallVector<std::string>>;
+
   // The computed installation prefix. This will be an absolute path. We keep an
   // absolute path for when the command line uses a relative path
   // (`./bin/carbon`) and the working directory changes after initialization

+ 2 - 0
toolchain/language_server/BUILD

@@ -22,6 +22,7 @@ cc_library(
         "//common:ostream",
         "//common:raw_string_ostream",
         "//toolchain/diagnostics:diagnostic_emitter",
+        "//toolchain/install:install_paths",
         "@llvm-project//clang-tools-extra/clangd:ClangDaemon",
     ],
 )
@@ -38,6 +39,7 @@ cc_library(
         "//toolchain/check",
         "//toolchain/diagnostics:diagnostic_emitter",
         "//toolchain/diagnostics:file_diagnostics",
+        "//toolchain/install:install_paths",
         "//toolchain/lex",
         "//toolchain/lex:tokenized_buffer",
         "//toolchain/parse",

+ 4 - 3
toolchain/language_server/context.cpp

@@ -163,9 +163,10 @@ auto Context::File::SetText(Context& context, std::optional<int64_t> version,
   // TODO: Include the prelude.
   Check::CheckParseTreesOptions check_options;
   check_options.vlog_stream = context.vlog_stream();
-  Check::CheckParseTrees(
-      units, llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>(getter), fs,
-      llvm::sys::getDefaultTargetTriple(), check_options);
+  Check::CheckParseTrees(units,
+                         llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>(getter),
+                         fs, context.installation().clang_path(),
+                         llvm::sys::getDefaultTargetTriple(), check_options);
 
   // Note we need to publish diagnostics even when empty.
   // TODO: Consider caching previously published diagnostics and only publishing

+ 9 - 2
toolchain/language_server/context.h

@@ -14,6 +14,7 @@
 #include "toolchain/diagnostics/diagnostic_consumer.h"
 #include "toolchain/diagnostics/diagnostic_emitter.h"
 #include "toolchain/diagnostics/file_diagnostics.h"
+#include "toolchain/install/install_paths.h"
 #include "toolchain/lex/tokenized_buffer.h"
 #include "toolchain/parse/tree_and_subtrees.h"
 #include "toolchain/sem_ir/file.h"
@@ -52,10 +53,12 @@ class Context {
   };
 
   // `vlog_stream` is optional; other parameters are required.
-  explicit Context(llvm::raw_ostream* vlog_stream,
+  explicit Context(const InstallPaths* installation,
+                   llvm::raw_ostream* vlog_stream,
                    Diagnostics::Consumer* consumer,
                    clang::clangd::LSPBinder::RawOutgoing* outgoing)
-      : vlog_stream_(vlog_stream),
+      : installation_(installation),
+        vlog_stream_(vlog_stream),
         file_emitter_(consumer),
         no_loc_emitter_(consumer),
         outgoing_(outgoing) {}
@@ -70,6 +73,8 @@ class Context {
     outgoing_->notify("textDocument/publishDiagnostics", params);
   }
 
+  auto installation() -> const InstallPaths& { return *installation_; }
+
   auto vlog_stream() -> llvm::raw_ostream* { return vlog_stream_; }
   auto file_emitter() -> Diagnostics::FileEmitter& { return file_emitter_; }
   auto no_loc_emitter() -> Diagnostics::NoLocEmitter& {
@@ -79,6 +84,8 @@ class Context {
   auto files() -> Map<std::string, File>& { return files_; }
 
  private:
+  const InstallPaths* installation_;
+
   // Diagnostic and output streams.
   llvm::raw_ostream* vlog_stream_;
   Diagnostics::FileEmitter file_emitter_;

+ 5 - 4
toolchain/language_server/language_server.cpp

@@ -44,9 +44,10 @@ class Logger : public clang::clangd::Logger {
   std::unique_ptr<clang::clangd::StreamLogger> vlog_logger_;
 };
 
-auto Run(FILE* input_stream, llvm::raw_ostream& output_stream,
-         llvm::raw_ostream& error_stream, llvm::raw_ostream* vlog_stream,
-         Diagnostics::Consumer& consumer) -> bool {
+auto Run(const InstallPaths& installation, FILE* input_stream,
+         llvm::raw_ostream& output_stream, llvm::raw_ostream& error_stream,
+         llvm::raw_ostream* vlog_stream, Diagnostics::Consumer& consumer)
+    -> bool {
   // The language server internally uses diagnostics for logging issues, but the
   // clangd parts have their own logging system. We intercept that here.
   Logger logger(&error_stream, vlog_stream);
@@ -58,7 +59,7 @@ auto Run(FILE* input_stream, llvm::raw_ostream& output_stream,
                                       /*InMirror=*/nullptr,
                                       /*Pretty=*/true));
   OutgoingMessages outgoing(transport.get());
-  Context context(vlog_stream, &consumer, &outgoing);
+  Context context(&installation, vlog_stream, &consumer, &outgoing);
   IncomingMessages incoming(transport.get(), &context);
 
   // Run the server loop.

+ 5 - 3
toolchain/language_server/language_server.h

@@ -7,6 +7,7 @@
 
 #include "common/ostream.h"
 #include "toolchain/diagnostics/diagnostic_consumer.h"
+#include "toolchain/install/install_paths.h"
 
 namespace Carbon::LanguageServer {
 
@@ -15,9 +16,10 @@ namespace Carbon::LanguageServer {
 // the server cleanly exits.
 //
 // This is thread-hostile because `clangd::LoggingSession` relies on a global.
-auto Run(FILE* input_stream, llvm::raw_ostream& output_stream,
-         llvm::raw_ostream& error_stream, llvm::raw_ostream* vlog_stream,
-         Diagnostics::Consumer& consumer) -> bool;
+auto Run(const InstallPaths& installation, FILE* input_stream,
+         llvm::raw_ostream& output_stream, llvm::raw_ostream& error_stream,
+         llvm::raw_ostream* vlog_stream, Diagnostics::Consumer& consumer)
+    -> bool;
 
 }  // namespace Carbon::LanguageServer
 

+ 3 - 0
toolchain/sem_ir/diagnostic_loc_converter.cpp

@@ -65,6 +65,9 @@ auto DiagnosticLocConverter::ConvertImpl(
   CARBON_CHECK(sem_ir_->cpp_ast());
   clang::PresumedLoc presumed_loc =
       sem_ir_->cpp_ast()->getSourceManager().getPresumedLoc(clang_loc);
+  if (presumed_loc.isInvalid()) {
+    return Diagnostics::ConvertedLoc();
+  }
   unsigned offset =
       sem_ir_->cpp_ast()->getSourceManager().getDecomposedLoc(clang_loc).second;
 

+ 5 - 1
toolchain/testing/BUILD

@@ -60,7 +60,11 @@ file_test(
     size = "small",
     timeout = "moderate",  # Taking >60 seconds in GitHub actions
     srcs = ["file_test.cpp"],
-    data = [":min_prelude"],
+    data = [
+        ":min_prelude",
+        "//toolchain/install:clang_headers",
+        "//toolchain/install:clang_headers_manifest.txt",
+    ],
     tests = [":all_testdata"],
     deps = [
         "//common:all_llvm_targets",

+ 66 - 37
toolchain/testing/file_test.cpp

@@ -24,6 +24,56 @@
 namespace Carbon::Testing {
 namespace {
 
+// Adds a file to the fs.
+static auto AddFile(llvm::vfs::InMemoryFileSystem& fs, llvm::StringRef path)
+    -> ErrorOr<Success> {
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> file =
+      llvm::MemoryBuffer::getFile(path);
+  if (file.getError()) {
+    return ErrorBuilder() << "Getting `" << path
+                          << "`: " << file.getError().message();
+  }
+  if (!fs.addFile(path, /*ModificationTime=*/0, std::move(*file))) {
+    return ErrorBuilder() << "Duplicate file: `" << path << "`";
+  }
+  return Success();
+}
+
+struct SharedTestData {
+  // The toolchain install information.
+  InstallPaths installation;
+
+  // Files in the prelude.
+  llvm::SmallVector<std::string> prelude_files;
+
+  // The installed files that tests can use.
+  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> file_system =
+      new llvm::vfs::InMemoryFileSystem();
+};
+
+static auto GetSharedTestData(llvm::StringRef exe_path)
+    -> const SharedTestData* {
+  static ErrorOr<SharedTestData> data = [&]() -> ErrorOr<SharedTestData> {
+    SharedTestData data = {.installation =
+                               InstallPaths::MakeForBazelRunfiles(exe_path)};
+    CARBON_ASSIGN_OR_RETURN(data.prelude_files,
+                            data.installation.ReadPreludeManifest());
+    for (const auto& file : data.prelude_files) {
+      CARBON_RETURN_IF_ERROR(AddFile(*data.file_system, file));
+    }
+
+    llvm::SmallVector<std::string> clang_header_files;
+    CARBON_ASSIGN_OR_RETURN(clang_header_files,
+                            data.installation.ReadClangHeadersManifest());
+    for (const auto& file : clang_header_files) {
+      CARBON_RETURN_IF_ERROR(AddFile(*data.file_system, file));
+    }
+    return data;
+  }();
+  CARBON_CHECK(data.ok(), "{0}", data.error());
+  return &*data;
+}
+
 // Provides common test support for the driver. This is used by file tests in
 // component subdirectories.
 class ToolchainFileTest : public FileTestBase {
@@ -62,8 +112,8 @@ class ToolchainFileTest : public FileTestBase {
  private:
   // The toolchain component subdirectory, such as `lex` or `language_server`.
   const llvm::StringRef component_;
-  // The toolchain install information.
-  const InstallPaths installation_;
+  // The shared test data.
+  const SharedTestData* data_;
 };
 
 }  // namespace
@@ -85,37 +135,16 @@ ToolchainFileTest::ToolchainFileTest(llvm::StringRef exe_path,
                                      llvm::StringRef test_name)
     : FileTestBase(test_name),
       component_(GetComponent(test_name)),
-      installation_(InstallPaths::MakeForBazelRunfiles(exe_path)) {}
-
-// Adds a file to the fs.
-static auto AddFile(llvm::vfs::InMemoryFileSystem& fs, llvm::StringRef path)
-    -> ErrorOr<Success> {
-  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> file =
-      llvm::MemoryBuffer::getFile(path);
-  if (file.getError()) {
-    return ErrorBuilder() << "Getting `" << path
-                          << "`: " << file.getError().message();
-  }
-  if (!fs.addFile(path, /*ModificationTime=*/0, std::move(*file))) {
-    return ErrorBuilder() << "Duplicate file: `" << path << "`";
-  }
-  return Success();
-}
+      data_(GetSharedTestData(exe_path)) {}
 
 auto ToolchainFileTest::Run(
     const llvm::SmallVector<llvm::StringRef>& test_args,
     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>& fs,
     FILE* input_stream, llvm::raw_pwrite_stream& output_stream,
     llvm::raw_pwrite_stream& error_stream) const -> ErrorOr<RunResult> {
-  llvm::SmallVector<std::string> prelude_files;
-  // Lex and parse shouldn't ever access the prelude.
-  if (component_ != "lex" && component_ != "parse") {
-    // TODO: Try providing the prelude as an overlay.
-    CARBON_ASSIGN_OR_RETURN(prelude_files, installation_.ReadPreludeManifest());
-    for (const auto& file : prelude_files) {
-      CARBON_RETURN_IF_ERROR(AddFile(*fs, file));
-    }
-  }
+  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> overlay_fs =
+      new llvm::vfs::OverlayFileSystem(data_->file_system);
+  overlay_fs->pushOverlay(fs);
 
   llvm::SmallVector<llvm::StringRef> filtered_test_args;
   if (component_ == "check" || component_ == "lower") {
@@ -144,7 +173,7 @@ auto ToolchainFileTest::Run(
     filtered_test_args = test_args;
   }
 
-  Driver driver(fs, &installation_, input_stream, &output_stream,
+  Driver driver(overlay_fs, &data_->installation, input_stream, &output_stream,
                 &error_stream);
   auto driver_result = driver.RunCommand(filtered_test_args);
   // If any diagnostics have been produced, add a trailing newline to make the
@@ -164,7 +193,7 @@ auto ToolchainFileTest::Run(
                  [&](std::pair<llvm::StringRef, bool> entry) {
                    return entry.first == "." || entry.first == "-" ||
                           entry.first.starts_with("not_file") ||
-                          llvm::is_contained(prelude_files, entry.first);
+                          llvm::is_contained(data_->prelude_files, entry.first);
                  });
 
   if (component_ == "language_server") {
@@ -187,13 +216,13 @@ auto ToolchainFileTest::GetDefaultArgs() const
     return args;
   }
 
-  args.insert(args.end(),
-              {
-                  "compile",
-                  "--phase=" + component_.str(),
-                  // Use the install path to exclude prelude files.
-                  "--exclude-dump-file-prefix=" + installation_.core_package(),
-              });
+  args.insert(args.end(), {
+                              "compile",
+                              "--phase=" + component_.str(),
+                              // Use the install path to exclude prelude files.
+                              "--exclude-dump-file-prefix=" +
+                                  data_->installation.core_package(),
+                          });
 
   if (component_ == "lex") {
     args.insert(args.end(), {"--no-prelude-import", "--dump-tokens",
@@ -257,7 +286,7 @@ auto ToolchainFileTest::DoExtraCheckReplacements(std::string& check_line) const
     // TODO: Consider adding a content keyword to name the core package, and
     // replace with that instead. Alternatively, consider adding the core
     // package to the VFS with a fixed name.
-    absl::StrReplaceAll({{installation_.core_package(), "{{.*}}"}},
+    absl::StrReplaceAll({{data_->installation.core_package(), "{{.*}}"}},
                         &check_line);
   } else {
     FileTestBase::DoExtraCheckReplacements(check_line);