Selaa lähdekoodia

Add the beginnings of a RecursiveASTVisitor-based migration tool. (#2041)

The approach uses a RecursiveASTVisitor rather than matchers. Matchers
and callbacks do not compose neatly and introduce significant runtime overhead
over RecursiveASTVisitor when an action needs to be performed on most nodes.
Andy Soffer 3 vuotta sitten
vanhempi
sitoutus
fd28f5f9a8

+ 39 - 0
migrate_cpp/BUILD

@@ -11,3 +11,42 @@ py_binary(
     ],
     python_version = "PY3",
 )
+
+cc_library(
+    name = "output_segment",
+    hdrs = ["output_segment.h"],
+    deps = [
+        "//common:check",
+        "@llvm-project//clang:ast",
+    ],
+)
+
+cc_library(
+    name = "rewriter",
+    srcs = ["rewriter.cpp"],
+    hdrs = ["rewriter.h"],
+    deps = [
+        ":output_segment",
+        "//common:check",
+        "@llvm-project//clang:ast",
+        "@llvm-project//clang:basic",
+        "@llvm-project//clang:frontend",
+        "@llvm-project//clang:lex",
+        "@llvm-project//clang:tooling",
+        "@llvm-project//clang:tooling_core",
+        "@llvm-project//llvm:Support",
+    ],
+)
+
+cc_test(
+    name = "rewriter_test",
+    srcs = ["rewriter_test.cpp"],
+    deps = [
+        ":rewriter",
+        "//common:gtest_main",
+        "@com_google_googletest//:gtest",
+        "@llvm-project//clang:ast",
+        "@llvm-project//clang:frontend",
+        "@llvm-project//clang:tooling",
+    ],
+)

+ 8 - 0
migrate_cpp/README.md

@@ -11,6 +11,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 ## Table of contents
 
 -   [Overview](#overview)
+-   [Structure](#structure)
 
 <!-- tocstop -->
 
@@ -18,3 +19,10 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 `migrate_cpp` assists in migration of C++ code to Carbon. It's currently being
 assembled; more documentation will be added later.
+
+## Structure
+
+The `migrate_cpp` tool uses a `clang::RecursiveASTVisitor` to traverse Clang's
+AST and, to each node, associate replacements. Each node's replacement is a
+sequence of text, or a reference to some other node that should be used to
+replace it.

+ 90 - 0
migrate_cpp/output_segment.h

@@ -0,0 +1,90 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_MIGRATE_CPP_OUTPUT_SEGMENT_H_
+#define CARBON_MIGRATE_CPP_OUTPUT_SEGMENT_H_
+
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+#include "clang/AST/ASTTypeTraits.h"
+#include "common/check.h"
+
+namespace Carbon {
+
+// Represents a segment of the output string. `OutputSegment`s come in two
+// flavors: Text and Node. A text segment holds string text that should be used
+// to be added to the output. A node segment holds a node in Clang's AST and
+// indicates that the output associated to that node should be the output
+// segment that the `RewriteBuilder` (defined below) has attached to that AST
+// node.
+//
+// For example, the output for a binary operator node corresponding to the C++
+// code snippet `f() + 3 * 5`, would be the sequence of three output segments:
+//
+//                  {Node(lhs), Text(" + "), Node(rhs)}
+//
+// The left-hand side and right-hand side can then be queried recursively to
+// determine what their output should be.
+class OutputSegment {
+ public:
+  // Returns whether or not the type T is an acceptable node type from which an
+  // OutputSegment can be constructed. We intentionally do not want to support
+  // `clang::Type` because we support traversing through `clang::TypeLoc`
+  // instead. However, most other types we intend to support as they become
+  // necessary.
+  template <typename T>
+  static constexpr bool IsSupportedClangASTNodeType() {
+    return std::is_convertible_v<T*, clang::Stmt*> ||
+           std::is_convertible_v<T*, clang::Decl*>;
+  }
+
+  // Creates a text-based `OutputSegment`.
+  explicit OutputSegment(std::string content) : content_(std::move(content)) {}
+  explicit OutputSegment(llvm::StringRef content) : content_(content.str()) {}
+  explicit OutputSegment(const char* content) : content_(content) {}
+
+  // Creates a node-based `OutputSegment` from `node`.
+  explicit OutputSegment(const clang::DynTypedNode& node) : content_(node) {}
+  template <typename T,
+            std::enable_if_t<OutputSegment::IsSupportedClangASTNodeType<T>(),
+                             int> = 0>
+  explicit OutputSegment(const T* node);
+
+  // Creates a TypeLoc-based `OutputSegment` from `type_loc`.
+  explicit OutputSegment(clang::TypeLoc type_loc)
+      : content_(PassThroughQualifiedTypeLoc(type_loc)) {}
+
+ private:
+  friend struct OutputWriter;
+
+  template <typename T>
+  T& AssertNotNull(T* ptr) {
+    CARBON_CHECK(ptr != nullptr);
+    return *ptr;
+  }
+
+  // Traversals for TypeLocs have some sharp corners. In particular,
+  // QualifiedTypeLocs are silently passed through to their unqualified part.
+  // This means that when constructing output segments we also need to match
+  // this behavior.
+  static auto PassThroughQualifiedTypeLoc(clang::TypeLoc type_loc)
+      -> clang::TypeLoc {
+    auto qtl = type_loc.getAs<clang::QualifiedTypeLoc>();
+    return qtl.isNull() ? type_loc : qtl.getUnqualifiedLoc();
+  }
+
+  std::variant<std::string, clang::DynTypedNode, clang::TypeLoc> content_;
+};
+
+template <typename T, std::enable_if_t<
+                          OutputSegment::IsSupportedClangASTNodeType<T>(), int>>
+OutputSegment::OutputSegment(const T* node)
+    : content_(clang::DynTypedNode::create(AssertNotNull(node))) {}
+
+}  // namespace Carbon
+
+#endif  // CARBON_MIGRATE_CPP_OUTPUT_SEGMENT_H_

+ 285 - 0
migrate_cpp/rewriter.cpp

@@ -0,0 +1,285 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "migrate_cpp/rewriter.h"
+
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/FormatVariadic.h"
+
+namespace Carbon {
+
+static constexpr const char CppPlaceholder[] = "__cpp__{ ... }";
+
+auto OutputWriter::Write(clang::SourceLocation loc,
+                         const OutputSegment& segment) const -> bool {
+  return std::visit(
+      [&](auto& content) {
+        using type = std::decay_t<decltype(content)>;
+        auto [begin, end] = bounds;
+
+        if constexpr (std::is_same_v<type, std::string>) {
+          auto begin_offset = source_manager.getDecomposedLoc(loc).second;
+          // Append the string replacement if the node being replaced falls
+          // within `bounds`.
+          if (begin <= begin_offset && begin_offset < end) {
+            output.append(content);
+          }
+        } else if constexpr (std::is_same_v<type, clang::DynTypedNode> ||
+                             std::is_same_v<type, clang::TypeLoc>) {
+          auto content_loc = content.getSourceRange().getBegin();
+          auto begin_offset =
+              source_manager.getDecomposedLoc(content_loc).second;
+          // If the node we're considering a replacement for is already beyond
+          // the region for which we want to make a replacement, exit early
+          // declaring that we have completed replacements (by returning false).
+          // Otherwise proceed. Note that we do not exit early or skip anything
+          // if the node comes before the relevant region. This is because many
+          // nodes in Clang's AST have a starting source location but a
+          // meaningless end location, and while the start of the segment may
+          // not be in the range, as we recurse, sub-segments may indeed end up
+          // being printed.
+          if (begin_offset >= end) {
+            return false;
+          }
+
+          if (auto iter = map.find(content); iter == map.end()) {
+            output.append(CppPlaceholder);
+          } else {
+            for (const auto& output_segment : iter->second) {
+              if (!Write(content.getSourceRange().getBegin(), output_segment)) {
+                return false;
+              }
+            }
+          }
+        } else {
+          static_assert(std::is_void_v<type>,
+                        "Failed to handle a case in the `std::variant`.");
+        }
+        return true;
+      },
+      segment.content_);
+}
+
+auto MigrationConsumer::HandleTranslationUnit(clang::ASTContext& context)
+    -> void {
+  RewriteBuilder rewriter(context, segment_map_);
+  rewriter.TraverseAST(context);
+
+  auto translation_unit_node =
+      clang::DynTypedNode::create(*context.getTranslationUnitDecl());
+  auto iter = segment_map_.find(translation_unit_node);
+
+  if (iter == segment_map_.end()) {
+    result_.append(CppPlaceholder);
+  } else {
+    OutputWriter w{
+        .map = segment_map_,
+        .bounds = output_range_,
+        .source_manager = context.getSourceManager(),
+        .output = result_,
+    };
+
+    for (const auto& output_segment : iter->second) {
+      w.Write(translation_unit_node.getSourceRange().getBegin(),
+              output_segment);
+    }
+  }
+}
+
+auto RewriteBuilder::TextFor(clang::SourceLocation begin,
+                             clang::SourceLocation end) const
+    -> llvm::StringRef {
+  auto range = clang::CharSourceRange::getCharRange(begin, end);
+  return clang::Lexer::getSourceText(range, context_.getSourceManager(),
+                                     context_.getLangOpts());
+}
+
+auto RewriteBuilder::TextForTokenAt(clang::SourceLocation loc) const
+    -> llvm::StringRef {
+  auto& source_manager = context_.getSourceManager();
+  auto [file_id, offset] = source_manager.getDecomposedLoc(loc);
+  llvm::StringRef file = source_manager.getBufferData(file_id);
+  clang::Lexer lexer(source_manager.getLocForStartOfFile(file_id),
+                     context_.getLangOpts(), file.begin(), file.data() + offset,
+                     file.end());
+  clang::Token token;
+  lexer.LexFromRawLexer(token);
+  return TextFor(loc, loc.getLocWithOffset(token.getLength()));
+}
+
+// TODO: The output written in this member function needs to be
+// architecture-dependent. Moreover, even if the output is correct in the sense
+// that the types match and are interoperable between Carbon and C++, they may
+// not be semantically correct: If the C++ code specifies the type `long`, and
+// on the platform for which the migration is occurring `long` has 64-bits, we
+// may not want to use `i64` as the replacement: The C++ code may be intended to
+// operate in environments where `long` is only 32-bits wide. We need to develop
+// a strategy for determining builtin-type replacements that addresses these
+// issues.
+auto RewriteBuilder::VisitBuiltinTypeLoc(clang::BuiltinTypeLoc type_loc)
+    -> bool {
+  llvm::StringRef content;
+  switch (type_loc.getTypePtr()->getKind()) {
+    case clang::BuiltinType::Bool:
+      content = "bool";
+      break;
+    case clang::BuiltinType::Char_U:
+      content = "char";
+      break;
+    case clang::BuiltinType::UChar:
+      content = "u8";
+      break;
+    case clang::BuiltinType::UShort:
+      content = "u16";
+      break;
+    case clang::BuiltinType::UInt:
+      content = "u32";
+      break;
+    case clang::BuiltinType::ULong:
+      content = "u64";
+      break;
+    case clang::BuiltinType::ULongLong:
+      content = "u64";
+      break;
+    case clang::BuiltinType::UInt128:
+      content = "u128";
+      break;
+    case clang::BuiltinType::Char_S:
+      content = "char";
+      break;
+    case clang::BuiltinType::SChar:
+      content = "i8";
+      break;
+    case clang::BuiltinType::Short:
+      content = "i16";
+      break;
+    case clang::BuiltinType::Int:
+      content = "i32";
+      break;
+    case clang::BuiltinType::Long:
+      content = "i64";
+      break;
+    case clang::BuiltinType::LongLong:
+      content = "i64";
+      break;
+    case clang::BuiltinType::Int128:
+      content = "i128";
+      break;
+    case clang::BuiltinType::Float:
+      content = "f32";
+      break;
+    case clang::BuiltinType::Double:
+      content = "f64";
+      break;
+    default:
+      // In this case we do not know what the output should be so we do not
+      // write any.
+      return true;
+  }
+  SetReplacement(type_loc, OutputSegment(content));
+  return true;
+}
+
+auto RewriteBuilder::VisitCXXBoolLiteralExpr(clang::CXXBoolLiteralExpr* expr)
+    -> bool {
+  SetReplacement(expr, OutputSegment(expr->getValue() ? "true" : "false"));
+  return true;
+}
+
+auto RewriteBuilder::VisitDeclRefExpr(clang::DeclRefExpr* expr) -> bool {
+  SetReplacement(expr, OutputSegment(TextForTokenAt(expr->getBeginLoc())));
+  return true;
+}
+
+auto RewriteBuilder::VisitDeclStmt(clang::DeclStmt* stmt) -> bool {
+  std::vector<OutputSegment> segments;
+  for (clang::Decl* decl : stmt->decls()) {
+    segments.push_back(OutputSegment(decl));
+    segments.push_back(OutputSegment(";\n"));
+  }
+  SetReplacement(stmt, std::move(segments));
+  return true;
+}
+
+auto RewriteBuilder::VisitIntegerLiteral(clang::IntegerLiteral* expr) -> bool {
+  // TODO: Replace suffixes.
+  std::string text(TextForTokenAt(expr->getBeginLoc()));
+  for (char& c : text) {
+    // Carbon uses underscores for digit separators whereas C++ uses single
+    // quotation marks. Convert all `'` to `_`.
+    if (c == '\'') {
+      c = '_';
+    }
+  }
+  SetReplacement(expr, {OutputSegment(std::move(text))});
+  return true;
+}
+
+auto RewriteBuilder::VisitPointerTypeLoc(clang::PointerTypeLoc type_loc)
+    -> bool {
+  SetReplacement(type_loc,
+                 {OutputSegment(type_loc.getPointeeLoc()), OutputSegment("*")});
+  return true;
+}
+
+auto RewriteBuilder::VisitTranslationUnitDecl(clang::TranslationUnitDecl* decl)
+    -> bool {
+  std::vector<OutputSegment> segments;
+
+  // Clang starts each translation unit with some initial `TypeDefDecl`s that
+  // are not part of the written text. We want to skip past these initial
+  // declarations, which we do by ignoring any node of type `TypeDefDecl` which
+  // has an invalid source location.
+  auto iter = decl->decls_begin();
+  while (iter != decl->decls_end() && llvm::isa<clang::TypedefDecl>(*iter) &&
+         (*iter)->getLocation().isInvalid()) {
+    ++iter;
+  }
+
+  for (; iter != decl->decls_end(); ++iter) {
+    clang::Decl* d = *iter;
+    segments.push_back(OutputSegment(d));
+    segments.push_back(OutputSegment(";\n"));
+  }
+
+  SetReplacement(decl, std::move(segments));
+  return true;
+}
+
+auto RewriteBuilder::VisitUnaryOperator(clang::UnaryOperator* expr) -> bool {
+  switch (expr->getOpcode()) {
+    case clang::UO_AddrOf:
+      SetReplacement(expr,
+                     {OutputSegment("&"), OutputSegment(expr->getSubExpr())});
+      break;
+
+    default:
+      // TODO: Finish implementing cases.
+      break;
+  }
+  return true;
+}
+
+auto RewriteBuilder::VisitVarDecl(clang::VarDecl* decl) -> bool {
+  // TODO: Check storage class. Determine what happens for static local
+  // variables.
+
+  bool is_const = decl->getType().isConstQualified();
+  std::vector<OutputSegment> segments = {
+      OutputSegment(llvm::formatv("{0} {1}: ", is_const ? "let" : "var",
+                                  decl->getNameAsString())),
+      OutputSegment(decl->getTypeSourceInfo()->getTypeLoc()),
+  };
+
+  if (clang::Expr* init = decl->getInit()) {
+    segments.push_back(OutputSegment(" = "));
+    segments.push_back(OutputSegment(init));
+  }
+
+  SetReplacement(decl, std::move(segments));
+  return true;
+}
+
+}  // namespace Carbon

+ 213 - 0
migrate_cpp/rewriter.h

@@ -0,0 +1,213 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_MIGRATE_CPP_REWRITER_H_
+#define CARBON_MIGRATE_CPP_REWRITER_H_
+
+#include <string>
+#include <utility>
+#include <variant>
+#include <vector>
+
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTTypeTraits.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "llvm/ADT/DenseMap.h"
+#include "migrate_cpp/output_segment.h"
+
+namespace Carbon {
+namespace Internal {
+
+struct Empty {
+  friend bool operator==(Empty, Empty) { return true; }
+};
+struct Tombstone {
+  friend bool operator==(Tombstone, Tombstone) { return true; }
+};
+
+// Type alias for the variant representing any of the values that can be
+// written with OutputWriter.
+using KeyType =
+    std::variant<clang::DynTypedNode, clang::TypeLoc, Empty, Tombstone>;
+
+// `KeyInfo` is used as a template argument to `llvm::DenseMap` to specify how
+// to equality-compare and hash `KeyType`.
+struct KeyInfo {
+  static bool isEqual(const KeyType& lhs, const KeyType& rhs) {
+    return lhs == rhs;
+  }
+  static unsigned getHashValue(const KeyType& x) {
+    return std::visit(
+        [](auto x) -> unsigned {
+          using type = std::decay_t<decltype(x)>;
+          if constexpr (std::is_same_v<type, clang::DynTypedNode>) {
+            return clang::DynTypedNode::DenseMapInfo::getHashValue(x);
+          } else if constexpr (std::is_same_v<type, clang::TypeLoc>) {
+            // TODO: Improve this.
+            return reinterpret_cast<uintptr_t>(x.getTypePtr());
+          } else {
+            return 0;
+          }
+        },
+        x);
+  }
+
+  static KeyType getEmptyKey() { return Empty{}; }
+  static KeyType getTombstoneKey() { return Tombstone{}; }
+};
+
+}  // namespace Internal
+
+// `OutputWriter` is responsible for traversing the tree of `OutputSegment`s
+// and writing the correct data to its member `output`.
+struct OutputWriter {
+  using SegmentMapType =
+      llvm::DenseMap<Internal::KeyType, std::vector<OutputSegment>,
+                     Internal::KeyInfo>;
+
+  auto Write(clang::SourceLocation loc, const OutputSegment& segment) const
+      -> bool;
+
+  const SegmentMapType& map;
+
+  // Bounds represent the offsets into the primary file (multi-file refactorings
+  // are not yet supported) that should be output. While primarily this is a
+  // mechanism to make testing more robust, it can also be used to make local
+  // changes to sections of C++ code.
+  std::pair<size_t, size_t> bounds;
+
+  clang::SourceManager& source_manager;
+  std::string& output;
+};
+
+// `RewriteBuilder` is a recursive AST visitor. For each node, it computes and
+// stores a sequence of `OutputSegment`s describing how this node should be
+// replaced.
+class RewriteBuilder : public clang::RecursiveASTVisitor<RewriteBuilder> {
+ public:
+  using SegmentMapType = typename OutputWriter::SegmentMapType;
+
+  // Constructs a `RewriteBuilder` which can read the AST from `context` and
+  // will write results into `segments`.
+  explicit RewriteBuilder(clang::ASTContext& context, SegmentMapType& segments)
+      : context_(context), segments_(segments) {}
+
+  // By default, traverse children nodes before their parent. Called by the CRTP
+  // base class to determine traversal order.
+  auto shouldTraversePostOrder() const -> bool { return true; }
+
+  // Visitor member functions, defining how each node should be processed.
+  auto VisitBuiltinTypeLoc(clang::BuiltinTypeLoc type_loc) -> bool;
+  auto VisitCXXBoolLiteralExpr(clang::CXXBoolLiteralExpr* expr) -> bool;
+  auto VisitDeclRefExpr(clang::DeclRefExpr* expr) -> bool;
+  auto VisitDeclStmt(clang::DeclStmt* stmt) -> bool;
+  auto VisitIntegerLiteral(clang::IntegerLiteral* expr) -> bool;
+  auto VisitPointerTypeLoc(clang::PointerTypeLoc type_loc) -> bool;
+  auto VisitTranslationUnitDecl(clang::TranslationUnitDecl* decl) -> bool;
+  auto VisitUnaryOperator(clang::UnaryOperator* expr) -> bool;
+  auto VisitVarDecl(clang::VarDecl* decl) -> bool;
+
+  auto segments() const -> const SegmentMapType& { return segments_; }
+  auto segments() -> SegmentMapType& { return segments_; }
+
+ private:
+  // Associates `output_segments` in the output map `this->segments()` with the
+  // key `node`, so as to declare that, when output is being written, `node`
+  // should be replaced with the sequence of outputs described by
+  // `output_segments`.
+  auto SetReplacement(clang::DynTypedNode node,
+                      std::vector<OutputSegment> output_segments) -> void {
+    segments_.try_emplace(node, std::move(output_segments));
+  }
+
+  auto SetReplacement(clang::TypeLoc node,
+                      std::vector<OutputSegment> output_segments) -> void {
+    segments_.try_emplace(node, std::move(output_segments));
+  }
+
+  template <typename T>
+  auto SetReplacement(const T* node, std::vector<OutputSegment> output_segments)
+      -> void {
+    segments_.try_emplace(clang::DynTypedNode::create(*node),
+                          std::move(output_segments));
+  }
+
+  // Invokes the overload of `SetReplacement` defined above. Equivalent to
+  // `this->SetReplacement(node, std::vector<OutputSegment>(1, segment))`.
+  template <typename T>
+  auto SetReplacement(const T* node, OutputSegment segment) -> void {
+    std::vector<OutputSegment> node_segments;
+    node_segments.push_back(std::move(segment));
+    SetReplacement(node, std::move(node_segments));
+  }
+
+  auto SetReplacement(clang::TypeLoc type_loc, OutputSegment segment) -> void {
+    std::vector<OutputSegment> node_segments;
+    node_segments.push_back(std::move(segment));
+    SetReplacement(type_loc, std::move(node_segments));
+  }
+
+  // Returns a `llvm::StringRef` into the source text corresponding to the
+  // half-open interval starting at `begin` (inclusive) and ending at `end`
+  // (exclusive).
+  auto TextFor(clang::SourceLocation begin, clang::SourceLocation end) const
+      -> llvm::StringRef;
+
+  // Returns a `llvm::StringRef` into the source text for the single token
+  // located at `loc`.
+  auto TextForTokenAt(clang::SourceLocation loc) const -> llvm::StringRef;
+
+  clang::ASTContext& context_;
+  SegmentMapType& segments_;
+};
+
+// An `ASTConsumer` which, when executed, populates a `std::string` with the
+// text of a Carbon source file which is a best approximation of of the
+// semantics of the corresponding C++ translation unit defined by the consumed
+// AST.
+class MigrationConsumer : public clang::ASTConsumer {
+ public:
+  explicit MigrationConsumer(std::string& result,
+                             std::pair<size_t, size_t> output_range)
+      : result_(result), output_range_(output_range) {}
+
+  auto HandleTranslationUnit(clang::ASTContext& context) -> void override;
+
+ private:
+  RewriteBuilder::SegmentMapType segment_map_;
+  std::string& result_;
+  std::pair<size_t, size_t> output_range_;
+};
+
+// An `ASTFrontendAction` which constructs a `MigrationConsumer` and invokes it
+// on an AST, populating a `std::string` with the text of a Carbon source file
+// which is a best approximation of of the semantics of the corresponding C++
+// translation unit defined by the consumed AST.
+class MigrationAction : public clang::ASTFrontendAction {
+ public:
+  // Constructs the `MigrationAction`. The parameter `result` is a reference to
+  // the `std::string` where output will be written. Only output corresponding
+  // to text at offsets that fall in between `output_range.first` and
+  // `output_range.second` will be written.
+  explicit MigrationAction(std::string& result,
+                           std::pair<size_t, size_t> output_range)
+      : result_(result), output_range_(output_range) {}
+
+  // Returns a `std::unique_ptr` to a `clang::MigrationConsumer` which populates
+  // the output `result`.
+  auto CreateASTConsumer(clang::CompilerInstance&, llvm::StringRef)
+      -> std::unique_ptr<clang::ASTConsumer> override {
+    return std::make_unique<MigrationConsumer>(result_, output_range_);
+  }
+
+ private:
+  std::string& result_;
+  std::pair<size_t, size_t> output_range_;
+};
+
+}  // namespace Carbon
+
+#endif  // CARBON_MIGRATE_CPP_REWRITER_H_

+ 135 - 0
migrate_cpp/rewriter_test.cpp

@@ -0,0 +1,135 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "migrate_cpp/rewriter.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "clang/AST/ASTConsumer.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Tooling/Tooling.h"
+
+namespace Carbon::Testing {
+namespace {
+
+// Represents C++ source code with at most one region enclosed in $[[...]]$ as
+// an annotated range.
+class Annotations {
+ public:
+  Annotations(llvm::StringRef annotated_source) {
+    size_t index = annotated_source.find("$[[");
+    if (index == llvm::StringRef::npos) {
+      source_code_ = std::string(annotated_source);
+      return;
+    }
+    start_ = index;
+    end_ = annotated_source.find("]]$", index);
+    CARBON_CHECK(end_ != llvm::StringRef::npos)
+        << "Found `$[[` but no matching `]]$`";
+    source_code_ = (llvm::Twine(annotated_source.substr(0, start_)) +
+                    annotated_source.substr(start_ + 3, end_ - start_ - 3) +
+                    annotated_source.substr(end_ + 3))
+                       .str();
+    // Update `end_` so that it is relative to the unannotated source (which
+    // means three characters earlier due to the `$[[` being removed.
+    end_ -= 3;
+  }
+
+  // Returns a view into the unannotated source.
+  llvm::StringRef source() const { return source_code_; }
+
+  // Returns the offsets in the file representing the annotated range if they
+  // exist and `{0, std::numeric_limits<size_t>::max()}` otherwise.
+  std::pair<size_t, size_t> range() const { return std::pair(start_, end_); }
+
+ private:
+  std::string source_code_;
+  size_t start_ = 0;
+  size_t end_ = std::numeric_limits<size_t>::max();
+};
+
+// Rewrites the `cpp_code`, return the Carbon equivalent. If the text has no
+// source range annotated with $[[...]]$, the entire translation unit will be
+// migrated and output. Otherwise, only the migrated output corresponding to the
+// annotated range will be be output. No more than one range may be annoated at
+// all.
+//
+// This annotation mechanism is useful in that it allows us to specifically test
+// the migration associated with specific nodes even when they require some
+// additional context that we do not wish to be covered by the test.
+auto RewriteText(llvm::StringRef cpp_code) -> std::string {
+  std::string result;
+
+  Annotations annotated_cpp_code(cpp_code);
+
+  bool success = clang::tooling::runToolOnCodeWithArgs(
+      std::make_unique<MigrationAction>(result, annotated_cpp_code.range()),
+      annotated_cpp_code.source(), {}, "test.cc", "clang-tool",
+      std::make_shared<clang::PCHContainerOperations>(),
+      clang::tooling::FileContentMappings());
+
+  return success ? result : "";
+}
+
+TEST(Rewriter, BoolLiteral) {
+  EXPECT_EQ(RewriteText("bool x = $[[true]]$;"), "true");
+  EXPECT_EQ(RewriteText("bool x = $[[false]]$;"), "false");
+}
+
+TEST(Rewriter, IntegerLiteral) {
+  EXPECT_EQ(RewriteText("int x = $[[0]]$;"), "0");
+  EXPECT_EQ(RewriteText("int x = $[[1]]$;"), "1");
+  EXPECT_EQ(RewriteText("int x = $[[1234]]$;"), "1234");
+  EXPECT_EQ(RewriteText("int x = $[[12'34]]$;"), "12_34");
+  EXPECT_EQ(RewriteText("int x = $[[12'3'4]]$;"), "12_3_4");
+}
+
+TEST(Rewriter, SingleDeclaration) {
+  EXPECT_EQ(RewriteText("bool b;"), "var b: bool;\n");
+  EXPECT_EQ(RewriteText("int i;"), "var i: i32;\n");
+
+  EXPECT_EQ(RewriteText("const bool b = false;"), "let b: bool = false;\n");
+  EXPECT_EQ(RewriteText("const int i = 17;"), "let i: i32 = 17;\n");
+
+  EXPECT_EQ(RewriteText("bool const b = false;"), "let b: bool = false;\n");
+  EXPECT_EQ(RewriteText("int const i = 1234;"), "let i: i32 = 1234;\n");
+}
+
+TEST(Rewriter, Pointers) {
+  // TODO: Add tests for pointers-to-const when the syntax is nailed down.
+  EXPECT_EQ(RewriteText("bool b;\n"
+                        "$[[bool *p = &b]]$;"),
+            "var p: bool* = &b");
+  EXPECT_EQ(RewriteText("bool b;\n"
+                        "$[[bool * const p = &b]]$;"),
+            "let p: bool* = &b");
+
+  // Pointers and non-pointers on the same DeclStmt.
+  EXPECT_EQ(RewriteText("bool b, *p;\n"),
+            "var b: bool;\n"
+            "var p: bool*;\n");
+  EXPECT_EQ(RewriteText("bool b, *p = &b;\n"),
+            "var b: bool;\n"
+            "var p: bool* = &b;\n");
+}
+
+TEST(Rewriter, DeclarationComma) {
+  EXPECT_EQ(RewriteText("int x, y;"),
+            "var x: i32;\n"
+            "var y: i32;\n");
+  EXPECT_EQ(RewriteText("int x = 7, y;"),
+            "var x: i32 = 7;\n"
+            "var y: i32;\n");
+  EXPECT_EQ(RewriteText("const int x = 1, y = 2;"),
+            "let x: i32 = 1;\n"
+            "let y: i32 = 2;\n");
+  EXPECT_EQ(RewriteText("int const x = 1234, y = 5678;"),
+            "let x: i32 = 1234;\n"
+            "let y: i32 = 5678;\n");
+}
+
+}  // namespace
+}  // namespace Carbon::Testing