tree_and_subtrees.cpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parse/tree_and_subtrees.h"
  5. namespace Carbon::Parse {
  6. TreeAndSubtrees::TreeAndSubtrees(const Lex::TokenizedBuffer& tokens,
  7. const Tree& tree)
  8. : tokens_(&tokens), tree_(&tree) {
  9. subtree_sizes_.reserve(tree_->size());
  10. // A stack of nodes which haven't yet been used as children.
  11. llvm::SmallVector<NodeId> size_stack;
  12. for (auto n : tree.postorder()) {
  13. // Nodes always include themselves.
  14. int32_t size = 1;
  15. auto kind = tree.node_kind(n);
  16. if (kind.has_child_count()) {
  17. // When the child count is set, remove the specific number from the stack.
  18. CARBON_CHECK(static_cast<int32_t>(size_stack.size()) >=
  19. kind.child_count())
  20. << "Need " << kind.child_count() << " children for " << kind
  21. << ", have " << size_stack.size() << " available";
  22. for (auto i : llvm::seq(kind.child_count())) {
  23. auto child = size_stack.pop_back_val();
  24. CARBON_CHECK((size_t)child.index < subtree_sizes_.size());
  25. size += subtree_sizes_[child.index];
  26. if (kind.has_bracket() && i == kind.child_count() - 1) {
  27. CARBON_CHECK(kind.bracket() == tree.node_kind(child))
  28. << "Node " << kind << " with child count " << kind.child_count()
  29. << " needs bracket " << kind.bracket() << ", found wrong bracket "
  30. << tree.node_kind(child);
  31. }
  32. }
  33. } else {
  34. while (true) {
  35. CARBON_CHECK(!size_stack.empty())
  36. << "Node " << kind << " is missing bracket " << kind.bracket();
  37. auto child = size_stack.pop_back_val();
  38. size += subtree_sizes_[child.index];
  39. if (kind.bracket() == tree.node_kind(child)) {
  40. break;
  41. }
  42. }
  43. }
  44. size_stack.push_back(n);
  45. subtree_sizes_.push_back(size);
  46. }
  47. CARBON_CHECK(static_cast<int>(subtree_sizes_.size()) == tree_->size());
  48. // Remaining nodes should all be roots in the tree; make sure they line up.
  49. CARBON_CHECK(size_stack.back().index ==
  50. static_cast<int32_t>(tree_->size()) - 1)
  51. << size_stack.back() << " " << tree_->size() - 1;
  52. int prev_index = -1;
  53. for (const auto& n : size_stack) {
  54. CARBON_CHECK(n.index - subtree_sizes_[n.index] == prev_index)
  55. << "NodeId " << n << " is a root " << tree_->node_kind(n)
  56. << " with subtree_size " << subtree_sizes_[n.index]
  57. << ", but previous root was at " << prev_index << ".";
  58. prev_index = n.index;
  59. }
  60. }
  61. auto TreeAndSubtrees::VerifyExtract(NodeId node_id, NodeKind kind,
  62. ErrorBuilder* trace) const -> bool {
  63. switch (kind) {
  64. #define CARBON_PARSE_NODE_KIND(Name) \
  65. case NodeKind::Name: \
  66. return VerifyExtractAs<Name>(node_id, trace).has_value();
  67. #include "toolchain/parse/node_kind.def"
  68. }
  69. }
  70. auto TreeAndSubtrees::Verify() const -> ErrorOr<Success> {
  71. // Validate that each node extracts successfully when not marked as having an
  72. // error.
  73. //
  74. // Without this code, a 10 mloc test case of lex & parse takes 4.129 s ± 0.041
  75. // s. With this additional verification, it takes 5.768 s ± 0.036 s.
  76. for (NodeId n : tree_->postorder()) {
  77. if (tree_->node_has_error(n)) {
  78. continue;
  79. }
  80. auto node_kind = tree_->node_kind(n);
  81. if (!VerifyExtract(n, node_kind, nullptr)) {
  82. ErrorBuilder trace;
  83. trace << llvm::formatv(
  84. "NodeId #{0} couldn't be extracted as a {1}. Trace:\n", n, node_kind);
  85. VerifyExtract(n, node_kind, &trace);
  86. return trace;
  87. }
  88. }
  89. // Validate the roots. Also ensures Tree::ExtractFile() doesn't error.
  90. if (!TryExtractNodeFromChildren<File>(NodeId::Invalid, roots(), nullptr)) {
  91. ErrorBuilder trace;
  92. trace << "Roots of tree couldn't be extracted as a `File`. Trace:\n";
  93. TryExtractNodeFromChildren<File>(NodeId::Invalid, roots(), &trace);
  94. return trace;
  95. }
  96. return Success();
  97. }
  98. auto TreeAndSubtrees::postorder(NodeId n) const
  99. -> llvm::iterator_range<Tree::PostorderIterator> {
  100. // The postorder ends after this node, the root, and begins at the start of
  101. // its subtree.
  102. int start_index = n.index - subtree_sizes_[n.index] + 1;
  103. return Tree::PostorderIterator::MakeRange(NodeId(start_index), n);
  104. }
  105. auto TreeAndSubtrees::children(NodeId n) const
  106. -> llvm::iterator_range<SiblingIterator> {
  107. CARBON_CHECK(n.is_valid());
  108. int end_index = n.index - subtree_sizes_[n.index];
  109. return llvm::iterator_range<SiblingIterator>(
  110. SiblingIterator(*this, NodeId(n.index - 1)),
  111. SiblingIterator(*this, NodeId(end_index)));
  112. }
  113. auto TreeAndSubtrees::roots() const -> llvm::iterator_range<SiblingIterator> {
  114. return llvm::iterator_range<SiblingIterator>(
  115. SiblingIterator(*this,
  116. NodeId(static_cast<int>(subtree_sizes_.size()) - 1)),
  117. SiblingIterator(*this, NodeId(-1)));
  118. }
  119. auto TreeAndSubtrees::PrintNode(llvm::raw_ostream& output, NodeId n, int depth,
  120. bool preorder) const -> bool {
  121. output.indent(2 * (depth + 2));
  122. output << "{";
  123. // If children are being added, include node_index in order to disambiguate
  124. // nodes.
  125. if (preorder) {
  126. output << "node_index: " << n << ", ";
  127. }
  128. output << "kind: '" << tree_->node_kind(n) << "', text: '"
  129. << tokens_->GetTokenText(tree_->node_token(n)) << "'";
  130. if (tree_->node_has_error(n)) {
  131. output << ", has_error: yes";
  132. }
  133. if (subtree_sizes_[n.index] > 1) {
  134. output << ", subtree_size: " << subtree_sizes_[n.index];
  135. if (preorder) {
  136. output << ", children: [\n";
  137. return true;
  138. }
  139. }
  140. output << "}";
  141. return false;
  142. }
  143. auto TreeAndSubtrees::Print(llvm::raw_ostream& output) const -> void {
  144. output << "- filename: " << tokens_->source().filename() << "\n"
  145. << " parse_tree: [\n";
  146. // Walk the tree just to calculate depths for each node.
  147. llvm::SmallVector<int> indents;
  148. indents.resize(subtree_sizes_.size(), 0);
  149. llvm::SmallVector<std::pair<NodeId, int>, 16> node_stack;
  150. for (NodeId n : roots()) {
  151. node_stack.push_back({n, 0});
  152. }
  153. while (!node_stack.empty()) {
  154. NodeId n = NodeId::Invalid;
  155. int depth;
  156. std::tie(n, depth) = node_stack.pop_back_val();
  157. for (NodeId sibling_n : children(n)) {
  158. indents[sibling_n.index] = depth + 1;
  159. node_stack.push_back({sibling_n, depth + 1});
  160. }
  161. }
  162. for (NodeId n : tree_->postorder()) {
  163. PrintNode(output, n, indents[n.index], /*preorder=*/false);
  164. output << ",\n";
  165. }
  166. output << " ]\n";
  167. }
  168. auto TreeAndSubtrees::PrintPreorder(llvm::raw_ostream& output) const -> void {
  169. output << "- filename: " << tokens_->source().filename() << "\n"
  170. << " parse_tree: [\n";
  171. // The parse tree is stored in postorder. The preorder can be constructed
  172. // by reversing the order of each level of siblings within an RPO. The
  173. // sibling iterators are directly built around RPO and so can be used with a
  174. // stack to produce preorder.
  175. // The roots, like siblings, are in RPO (so reversed), but we add them in
  176. // order here because we'll pop off the stack effectively reversing then.
  177. llvm::SmallVector<std::pair<NodeId, int>, 16> node_stack;
  178. for (NodeId n : roots()) {
  179. node_stack.push_back({n, 0});
  180. }
  181. while (!node_stack.empty()) {
  182. NodeId n = NodeId::Invalid;
  183. int depth;
  184. std::tie(n, depth) = node_stack.pop_back_val();
  185. if (PrintNode(output, n, depth, /*preorder=*/true)) {
  186. // Has children, so we descend. We append the children in order here as
  187. // well because they will get reversed when popped off the stack.
  188. for (NodeId sibling_n : children(n)) {
  189. node_stack.push_back({sibling_n, depth + 1});
  190. }
  191. continue;
  192. }
  193. int next_depth = node_stack.empty() ? 0 : node_stack.back().second;
  194. CARBON_CHECK(next_depth <= depth) << "Cannot have the next depth increase!";
  195. for (int close_children_count : llvm::seq(0, depth - next_depth)) {
  196. (void)close_children_count;
  197. output << "]}";
  198. }
  199. // We always end with a comma and a new line as we'll move to the next
  200. // node at whatever the current level ends up being.
  201. output << " ,\n";
  202. }
  203. output << " ]\n";
  204. }
  205. auto TreeAndSubtrees::CollectMemUsage(MemUsage& mem_usage,
  206. llvm::StringRef label) const -> void {
  207. mem_usage.Add(MemUsage::ConcatLabel(label, "subtree_sizes_"), subtree_sizes_);
  208. }
  209. auto TreeAndSubtrees::SiblingIterator::Print(llvm::raw_ostream& output) const
  210. -> void {
  211. output << node_;
  212. }
  213. } // namespace Carbon::Parse