tree_and_subtrees.cpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parse/tree_and_subtrees.h"
  5. namespace Carbon::Parse {
  6. TreeAndSubtrees::TreeAndSubtrees(const Lex::TokenizedBuffer& tokens,
  7. const Tree& tree)
  8. : tokens_(&tokens), tree_(&tree) {
  9. subtree_sizes_.reserve(tree_->size());
  10. // A stack of nodes which haven't yet been used as children.
  11. llvm::SmallVector<NodeId> size_stack;
  12. for (auto n : tree.postorder()) {
  13. // Nodes always include themselves.
  14. int32_t size = 1;
  15. auto kind = tree.node_kind(n);
  16. if (kind.has_child_count()) {
  17. // When the child count is set, remove the specific number from the stack.
  18. CARBON_CHECK(
  19. static_cast<int32_t>(size_stack.size()) >= kind.child_count(),
  20. "Need {0} children for {1}, have {2} available", kind.child_count(),
  21. kind, size_stack.size());
  22. for (auto i : llvm::seq(kind.child_count())) {
  23. auto child = size_stack.pop_back_val();
  24. CARBON_CHECK((size_t)child.index < subtree_sizes_.size());
  25. size += subtree_sizes_[child.index];
  26. if (kind.has_bracket() && i == kind.child_count() - 1) {
  27. CARBON_CHECK(kind.bracket() == tree.node_kind(child),
  28. "Node {0} with child count {1} needs bracket {2}, found "
  29. "wrong bracket {3}",
  30. kind, kind.child_count(), kind.bracket(),
  31. tree.node_kind(child));
  32. }
  33. }
  34. } else {
  35. while (true) {
  36. CARBON_CHECK(!size_stack.empty(), "Node {0} is missing bracket {1}",
  37. kind, kind.bracket());
  38. auto child = size_stack.pop_back_val();
  39. size += subtree_sizes_[child.index];
  40. if (kind.bracket() == tree.node_kind(child)) {
  41. break;
  42. }
  43. }
  44. }
  45. size_stack.push_back(n);
  46. subtree_sizes_.push_back(size);
  47. }
  48. CARBON_CHECK(static_cast<int>(subtree_sizes_.size()) == tree_->size());
  49. // Remaining nodes should all be roots in the tree; make sure they line up.
  50. CARBON_CHECK(
  51. size_stack.back().index == static_cast<int32_t>(tree_->size()) - 1,
  52. "{0} {1}", size_stack.back(), tree_->size() - 1);
  53. int prev_index = -1;
  54. for (const auto& n : size_stack) {
  55. CARBON_CHECK(n.index - subtree_sizes_[n.index] == prev_index,
  56. "NodeId {0} is a root {1} with subtree_size {2}, but previous "
  57. "root was at {3}.",
  58. n, tree_->node_kind(n), subtree_sizes_[n.index], prev_index);
  59. prev_index = n.index;
  60. }
  61. }
  62. auto TreeAndSubtrees::VerifyExtract(NodeId node_id, NodeKind kind,
  63. ErrorBuilder* trace) const -> bool {
  64. switch (kind) {
  65. #define CARBON_PARSE_NODE_KIND(Name) \
  66. case NodeKind::Name: \
  67. return VerifyExtractAs<Name>(node_id, trace).has_value();
  68. #include "toolchain/parse/node_kind.def"
  69. }
  70. }
  71. auto TreeAndSubtrees::Verify() const -> ErrorOr<Success> {
  72. // Validate that each node extracts successfully when not marked as having an
  73. // error.
  74. //
  75. // Without this code, a 10 mloc test case of lex & parse takes 4.129 s ± 0.041
  76. // s. With this additional verification, it takes 5.768 s ± 0.036 s.
  77. for (NodeId n : tree_->postorder()) {
  78. if (tree_->node_has_error(n)) {
  79. continue;
  80. }
  81. auto node_kind = tree_->node_kind(n);
  82. if (!VerifyExtract(n, node_kind, nullptr)) {
  83. ErrorBuilder trace;
  84. trace << llvm::formatv(
  85. "NodeId #{0} couldn't be extracted as a {1}. Trace:\n", n, node_kind);
  86. VerifyExtract(n, node_kind, &trace);
  87. return trace;
  88. }
  89. }
  90. // Validate the roots. Also ensures Tree::ExtractFile() doesn't error.
  91. if (!TryExtractNodeFromChildren<File>(NodeId::Invalid, roots(), nullptr)) {
  92. ErrorBuilder trace;
  93. trace << "Roots of tree couldn't be extracted as a `File`. Trace:\n";
  94. TryExtractNodeFromChildren<File>(NodeId::Invalid, roots(), &trace);
  95. return trace;
  96. }
  97. return Success();
  98. }
  99. auto TreeAndSubtrees::postorder(NodeId n) const
  100. -> llvm::iterator_range<Tree::PostorderIterator> {
  101. // The postorder ends after this node, the root, and begins at the start of
  102. // its subtree.
  103. int start_index = n.index - subtree_sizes_[n.index] + 1;
  104. return Tree::PostorderIterator::MakeRange(NodeId(start_index), n);
  105. }
  106. auto TreeAndSubtrees::children(NodeId n) const
  107. -> llvm::iterator_range<SiblingIterator> {
  108. CARBON_CHECK(n.is_valid());
  109. int end_index = n.index - subtree_sizes_[n.index];
  110. return llvm::iterator_range<SiblingIterator>(
  111. SiblingIterator(*this, NodeId(n.index - 1)),
  112. SiblingIterator(*this, NodeId(end_index)));
  113. }
  114. auto TreeAndSubtrees::roots() const -> llvm::iterator_range<SiblingIterator> {
  115. return llvm::iterator_range<SiblingIterator>(
  116. SiblingIterator(*this,
  117. NodeId(static_cast<int>(subtree_sizes_.size()) - 1)),
  118. SiblingIterator(*this, NodeId(-1)));
  119. }
  120. auto TreeAndSubtrees::PrintNode(llvm::raw_ostream& output, NodeId n, int depth,
  121. bool preorder) const -> bool {
  122. output.indent(2 * (depth + 2));
  123. output << "{";
  124. // If children are being added, include node_index in order to disambiguate
  125. // nodes.
  126. if (preorder) {
  127. output << "node_index: " << n.index << ", ";
  128. }
  129. output << "kind: '" << tree_->node_kind(n) << "', text: '"
  130. << tokens_->GetTokenText(tree_->node_token(n)) << "'";
  131. if (tree_->node_has_error(n)) {
  132. output << ", has_error: yes";
  133. }
  134. if (subtree_sizes_[n.index] > 1) {
  135. output << ", subtree_size: " << subtree_sizes_[n.index];
  136. if (preorder) {
  137. output << ", children: [\n";
  138. return true;
  139. }
  140. }
  141. output << "}";
  142. return false;
  143. }
  144. auto TreeAndSubtrees::Print(llvm::raw_ostream& output) const -> void {
  145. output << "- filename: " << tokens_->source().filename() << "\n"
  146. << " parse_tree: [\n";
  147. // Walk the tree just to calculate depths for each node.
  148. llvm::SmallVector<int> indents;
  149. indents.resize(subtree_sizes_.size(), 0);
  150. llvm::SmallVector<std::pair<NodeId, int>, 16> node_stack;
  151. for (NodeId n : roots()) {
  152. node_stack.push_back({n, 0});
  153. }
  154. while (!node_stack.empty()) {
  155. NodeId n = NodeId::Invalid;
  156. int depth;
  157. std::tie(n, depth) = node_stack.pop_back_val();
  158. for (NodeId sibling_n : children(n)) {
  159. indents[sibling_n.index] = depth + 1;
  160. node_stack.push_back({sibling_n, depth + 1});
  161. }
  162. }
  163. for (NodeId n : tree_->postorder()) {
  164. PrintNode(output, n, indents[n.index], /*preorder=*/false);
  165. output << ",\n";
  166. }
  167. output << " ]\n";
  168. }
  169. auto TreeAndSubtrees::PrintPreorder(llvm::raw_ostream& output) const -> void {
  170. output << "- filename: " << tokens_->source().filename() << "\n"
  171. << " parse_tree: [\n";
  172. // The parse tree is stored in postorder. The preorder can be constructed
  173. // by reversing the order of each level of siblings within an RPO. The
  174. // sibling iterators are directly built around RPO and so can be used with a
  175. // stack to produce preorder.
  176. // The roots, like siblings, are in RPO (so reversed), but we add them in
  177. // order here because we'll pop off the stack effectively reversing then.
  178. llvm::SmallVector<std::pair<NodeId, int>, 16> node_stack;
  179. for (NodeId n : roots()) {
  180. node_stack.push_back({n, 0});
  181. }
  182. while (!node_stack.empty()) {
  183. NodeId n = NodeId::Invalid;
  184. int depth;
  185. std::tie(n, depth) = node_stack.pop_back_val();
  186. if (PrintNode(output, n, depth, /*preorder=*/true)) {
  187. // Has children, so we descend. We append the children in order here as
  188. // well because they will get reversed when popped off the stack.
  189. for (NodeId sibling_n : children(n)) {
  190. node_stack.push_back({sibling_n, depth + 1});
  191. }
  192. continue;
  193. }
  194. int next_depth = node_stack.empty() ? 0 : node_stack.back().second;
  195. CARBON_CHECK(next_depth <= depth, "Cannot have the next depth increase!");
  196. for (int close_children_count : llvm::seq(0, depth - next_depth)) {
  197. (void)close_children_count;
  198. output << "]}";
  199. }
  200. // We always end with a comma and a new line as we'll move to the next
  201. // node at whatever the current level ends up being.
  202. output << " ,\n";
  203. }
  204. output << " ]\n";
  205. }
  206. auto TreeAndSubtrees::CollectMemUsage(MemUsage& mem_usage,
  207. llvm::StringRef label) const -> void {
  208. mem_usage.Collect(MemUsage::ConcatLabel(label, "subtree_sizes_"),
  209. subtree_sizes_);
  210. }
  211. auto TreeAndSubtrees::SiblingIterator::Print(llvm::raw_ostream& output) const
  212. -> void {
  213. output << node_;
  214. }
  215. } // namespace Carbon::Parse