Переглянути джерело

Switch from `clang::ASTUnit` to `clang::CompilerInstance`. (#6483)

This gives us a lot more control over how the compiler is built and
invoked. But no functionality changes are intended in this PR.
Richard Smith 4 місяців тому
батько
коміт
c4d162e5f5

+ 2 - 1
toolchain/check/cpp/context.cpp

@@ -8,7 +8,8 @@
 
 namespace Carbon::Check {
 
-CppContext::CppContext(clang::ASTUnit* ast_unit) : ast_unit_(ast_unit) {}
+CppContext::CppContext(std::unique_ptr<clang::FrontendAction> action)
+    : action_(std::move(action)) {}
 
 CppContext::~CppContext() = default;
 

+ 12 - 6
toolchain/check/cpp/context.h

@@ -8,7 +8,8 @@
 #include <memory>
 
 #include "clang/Basic/SourceLocation.h"
-#include "clang/Frontend/ASTUnit.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendAction.h"
 #include "llvm/ADT/SmallVector.h"
 
 namespace Carbon::Check {
@@ -20,13 +21,18 @@ namespace Carbon::Check {
 // declarations, and similar values.
 class CppContext {
  public:
-  explicit CppContext(clang::ASTUnit* ast_unit);
+  explicit CppContext(std::unique_ptr<clang::FrontendAction> action);
   ~CppContext();
 
+  auto action() -> clang::FrontendAction& { return *action_; }
+
   auto ast_context() -> clang::ASTContext& {
-    return ast_unit_->getASTContext();
+    return action_->getCompilerInstance().getASTContext();
+  }
+
+  auto sema() -> clang::Sema& {
+    return action_->getCompilerInstance().getSema();
   }
-  auto sema() -> clang::Sema& { return ast_unit_->getSema(); }
 
   auto clang_mangle_context() -> clang::MangleContext&;
 
@@ -35,8 +41,8 @@ class CppContext {
   }
 
  private:
-  // The ASTUnit is owned by the `CppFile`.
-  clang::ASTUnit* ast_unit_;
+  // The clang action that is generating the C++ AST.
+  std::unique_ptr<clang::FrontendAction> action_;
 
   // Per-Carbon-file start locations for corresponding Clang source buffers.
   // Owned and managed by code in location.cpp.

+ 78 - 13
toolchain/check/cpp/generate_ast.cpp

@@ -11,8 +11,10 @@
 #include "clang/Basic/FileManager.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/FrontendAction.h"
 #include "clang/Frontend/TextDiagnostic.h"
 #include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Sema/ExternalSemaSource.h"
 #include "common/check.h"
 #include "common/raw_string_ostream.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
@@ -23,6 +25,7 @@
 #include "toolchain/diagnostics/diagnostic_emitter.h"
 #include "toolchain/diagnostics/format_providers.h"
 #include "toolchain/parse/node_ids.h"
+#include "toolchain/sem_ir/cpp_file.h"
 
 namespace Carbon::Check {
 
@@ -319,12 +322,53 @@ class ShallowCopyCompilerInvocation : public clang::CompilerInvocation {
       const clang::CompilerInvocation& invocation) {
     shallow_copy_assign(invocation);
 
-    // The preprocessor options are modified to hold a replacement includes
-    // buffer, so make our own version of those options.
+    // Make a deep copy of options that we modify.
+    FrontendOpts = std::make_shared<clang::FrontendOptions>(*FrontendOpts);
     PPOpts = std::make_shared<clang::PreprocessorOptions>(*PPOpts);
   }
 };
 
+// An AST consumer that tracks top-level declarations so they can be handed off
+// to code generation later.
+class BufferingConsumer : public clang::ASTConsumer {
+ public:
+  explicit BufferingConsumer(SemIR::CppFile& file) : file_(&file) {}
+
+  auto HandleTopLevelDecl(clang::DeclGroupRef decl_group) -> bool override {
+    file_->decl_groups().push_back(decl_group);
+    return true;
+  }
+
+ private:
+  SemIR::CppFile* file_;
+};
+
+// An action and a set of registered Clang callbacks used to generate an AST
+// from a set of Cpp imports.
+class GenerateASTAction : public clang::ASTFrontendAction {
+ public:
+  explicit GenerateASTAction(Context& context) : context_(&context) {}
+
+ protected:
+  auto CreateASTConsumer(clang::CompilerInstance& /*clang_instance*/,
+                         llvm::StringRef /*file*/)
+      -> std::unique_ptr<clang::ASTConsumer> override {
+    return std::make_unique<BufferingConsumer>(*context_->sem_ir().cpp_file());
+  }
+
+  auto BeginSourceFileAction(clang::CompilerInstance& /*clang_instance*/)
+      -> bool override {
+    // TODO: Consider creating an `ExternalSemaSource` here and attaching it to
+    // the compilation.
+    // TODO: `clang.getPreprocessor().enableIncrementalProcessing();` to avoid
+    // the TU scope getting torn down before we're done parsing macros.
+    return true;
+  }
+
+ private:
+  Context* context_;
+};
+
 }  // namespace
 
 auto GenerateAst(Context& context,
@@ -338,6 +382,9 @@ auto GenerateAst(Context& context,
   auto invocation =
       std::make_shared<ShallowCopyCompilerInvocation>(*base_invocation);
 
+  // Ask Clang to not leak memory.
+  invocation->getFrontendOpts().DisableFree = false;
+
   // Build a diagnostics engine.
   llvm::IntrusiveRefCntPtr<clang::DiagnosticsEngine> diags(
       clang::CompilerInstance::createDiagnostics(
@@ -364,19 +411,37 @@ auto GenerateAst(Context& context,
 
   clang::DiagnosticErrorTrap trap(*diags);
 
-  // Create the AST unit.
-  auto ast = clang::ASTUnit::LoadFromCompilerInvocation(
-      invocation, std::make_shared<clang::PCHContainerOperations>(), nullptr,
-      diags, new clang::FileManager(invocation->getFileSystemOpts(), fs));
-
-  // Attach the AST to SemIR. This needs to be done before we can emit any
-  // diagnostics, so their locations can be properly interpreted by our
-  // diagnostics machinery.
-  context.set_cpp_context(std::make_unique<CppContext>(ast.get()));
+  auto clang_instance_ptr =
+      std::make_unique<clang::CompilerInstance>(invocation);
+  auto& clang_instance = *clang_instance_ptr;
   context.sem_ir().set_cpp_file(
-      std::make_unique<SemIR::CppFile>(std::move(ast)));
+      std::make_unique<SemIR::CppFile>(std::move(clang_instance_ptr)));
+
+  clang_instance.setDiagnostics(diags);
+  clang_instance.setVirtualFileSystem(fs);
+  clang_instance.createFileManager();
+  clang_instance.createSourceManager();
+  if (!clang_instance.createTarget()) {
+    return false;
+  }
+
+  context.set_cpp_context(std::make_unique<CppContext>(
+      std::make_unique<GenerateASTAction>(context)));
+
+  if (!context.cpp_context()->action().BeginSourceFile(clang_instance,
+                                                       inputs[0])) {
+    return false;
+  }
+
+  if (llvm::Error error = context.cpp_context()->action().Execute()) {
+    // `Execute` currently never fails, but its contract allows it to.
+    context.TODO(SemIR::LocId::None, "failed to execute clang action: " +
+                                         llvm::toString(std::move(error)));
+    return false;
+  }
 
-  // Emit any diagnostics we queued up while building the AST.
+  // Flush any diagnostics. We know we're not part-way through emitting a
+  // diagnostic now.
   context.emitter().Flush();
 
   return !trap.hasErrorOccurred();

+ 3 - 7
toolchain/lower/file_context.cpp

@@ -76,13 +76,9 @@ auto FileContext::PrepareToLower() -> void {
         const_cast<clang::ASTContext&>(cpp_file()->ast_context()));
 
     // Emit any top-level declarations now.
-    cpp_file()->VisitLocalTopLevelDecls([&](const clang::Decl* decl) {
-      // CodeGenerator won't modify the declaration it's given, but we can
-      // only call it via the ASTConsumer interface which doesn't know that.
-      auto* non_const_decl = const_cast<clang::Decl*>(decl);
-      cpp_code_generator_->HandleTopLevelDecl(
-          clang::DeclGroupRef(non_const_decl));
-    });
+    for (auto decl_group : cpp_file()->decl_groups()) {
+      cpp_code_generator_->HandleTopLevelDecl(decl_group);
+    }
   }
 
   // Lower all types that were required to be complete.

+ 0 - 1
toolchain/sem_ir/BUILD

@@ -65,7 +65,6 @@ cc_library(
 
 cc_library(
     name = "cpp_file",
-    srcs = ["cpp_file.cpp"],
     hdrs = ["cpp_file.h"],
     deps = [
         "//common:check",

+ 0 - 19
toolchain/sem_ir/cpp_file.cpp

@@ -1,19 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include "toolchain/sem_ir/cpp_file.h"
-
-namespace Carbon::SemIR {
-
-auto CppFile::VisitLocalTopLevelDecls(
-    llvm::function_ref<void(const clang::Decl*)> visitor) const -> void {
-  ast_unit_->visitLocalTopLevelDecls(
-      &visitor, [](void* erased_visitor_ptr, const clang::Decl* decl) {
-        auto* visitor_ptr = static_cast<decltype(visitor)*>(erased_visitor_ptr);
-        (*visitor_ptr)(decl);
-        return true;
-      });
-}
-
-}  // namespace Carbon::SemIR

+ 19 - 17
toolchain/sem_ir/cpp_file.h

@@ -6,8 +6,7 @@
 #define CARBON_TOOLCHAIN_SEM_IR_CPP_FILE_H_
 
 #include "clang/Basic/Diagnostic.h"
-#include "clang/Frontend/ASTUnit.h"
-#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/CompilerInstance.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/Support/FileSystem.h"
 
@@ -17,44 +16,47 @@ namespace Carbon::SemIR {
 // imported C++ headers and any inline C++ fragments.
 class CppFile {
  public:
-  explicit CppFile(std::unique_ptr<clang::ASTUnit> ast_unit)
-      : ast_unit_(std::move(ast_unit)) {}
+  explicit CppFile(std::unique_ptr<clang::CompilerInstance> clang)
+      : clang_(std::move(clang)) {}
 
   // Access to compilation options.
   auto diagnostic_options() const -> const clang::DiagnosticOptions& {
-    return ast_unit_->getDiagnostics().getDiagnosticOptions();
+    return clang_->getDiagnostics().getDiagnosticOptions();
   }
   auto lang_options() const -> const clang::LangOptions& {
-    return ast_unit_->getLangOpts();
+    return clang_->getLangOpts();
   }
 
   // Access to Clang's compilation environment.
   auto source_manager() -> clang::SourceManager& {
-    return ast_unit_->getSourceManager();
+    return clang_->getSourceManager();
   }
   auto source_manager() const -> const clang::SourceManager& {
-    return ast_unit_->getSourceManager();
+    return clang_->getSourceManager();
   }
   // TODO: This doesn't really belong here, but is currently used by lowering
   // because Clang's code generation may produce diagnostics.
   auto diagnostics() const -> clang::DiagnosticsEngine& {
-    return ast_unit_->getDiagnostics();
+    return clang_->getDiagnostics();
   }
 
   // Access to layers of Clang's C++ representation.
-  auto ast_context() -> clang::ASTContext& {
-    return ast_unit_->getASTContext();
-  }
+  auto ast_context() -> clang::ASTContext& { return clang_->getASTContext(); }
   auto ast_context() const -> const clang::ASTContext& {
-    return ast_unit_->getASTContext();
+    return clang_->getASTContext();
   }
 
-  // Visit all top-level declarations in the file.
-  auto VisitLocalTopLevelDecls(
-      llvm::function_ref<auto(const clang::Decl*)->void> visitor) const -> void;
+  // A list of all the top-level decl groups produced in this compilation.
+  auto decl_groups() -> llvm::SmallVector<clang::DeclGroupRef>& {
+    return decl_groups_;
+  }
+  auto decl_groups() const -> const llvm::SmallVector<clang::DeclGroupRef>& {
+    return decl_groups_;
+  }
 
  private:
-  std::unique_ptr<clang::ASTUnit> ast_unit_;
+  std::unique_ptr<clang::CompilerInstance> clang_;
+  llvm::SmallVector<clang::DeclGroupRef> decl_groups_;
 };
 
 }  // namespace Carbon::SemIR