| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296 |
- // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
- // Exceptions. See /LICENSE for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- #ifndef CARBON_TOOLCHAIN_SEM_IR_INST_NAMER_H_
- #define CARBON_TOOLCHAIN_SEM_IR_INST_NAMER_H_
- #include "common/type_enum.h"
- #include "llvm/Support/raw_ostream.h"
- #include "toolchain/lex/tokenized_buffer.h"
- #include "toolchain/parse/tree.h"
- #include "toolchain/sem_ir/file.h"
- #include "toolchain/sem_ir/ids.h"
- #include "toolchain/sem_ir/inst_categories.h"
- #include "toolchain/sem_ir/inst_fingerprinter.h"
- #include "toolchain/sem_ir/typed_insts.h"
- namespace Carbon::SemIR {
- // Assigns names to instructions, blocks, and scopes in the Semantics IR.
- //
- // If `<unexpected>` occurs in output of valid SemIR, it often means the
- // instruction needs to be handled by `NamingContext::NameInst` (see the cpp
- // file). Note that `<unexpected>` can occur in invalid SemIR just because we're
- // unable to correctly walk the SemIR.
- class InstNamer {
- public:
- // int32_t matches the input value size.
- enum class ScopeId : int32_t {
- None = -1,
- // The top-level scopes.
- File = 0,
- Generated = 1,
- Imports = 2,
- Constants = 3,
- // The first entity scope; see entities in `ScopeIdTypeEnum`.
- FirstEntityScope = 4,
- };
- static_assert(sizeof(ScopeId) == sizeof(AnyIdBase));
- // A wrapper around InterfaceId for dealing with the interface-with-self scope
- // nested within the interface entity's scope.
- struct InterfaceWithSelfId {
- static constexpr llvm::StringLiteral Label = "interface_with_self";
- InterfaceId id;
- };
- // A wrapper around NamedConstraintId for dealing with the
- // constraint-with-self scope nested within the constraint entity's scope.
- struct NamedConstraintWithSelfId {
- static constexpr llvm::StringLiteral Label = "constraint_with_self";
- NamedConstraintId id;
- };
- // Entities whose scopes get entries from `ScopeId`.
- using ScopeIdTypeEnum =
- TypeEnum<AssociatedConstantId, ClassId, CppOverloadSetId, FunctionId,
- ImplId, InterfaceId, InterfaceWithSelfId, NamedConstraintId,
- NamedConstraintWithSelfId, RequireImplsId, SpecificInterfaceId,
- VtableId>;
- // Construct the instruction namer, and assign names to all instructions in
- // the provided file.
- explicit InstNamer(const File* sem_ir, int total_ir_count);
- // Returns the scope ID corresponding to an ID of a function, class,
- // interface, or named constraint.
- template <typename IdT>
- requires ScopeIdTypeEnum::Contains<IdT>
- auto GetScopeFor(IdT id) const -> ScopeId {
- int32_t index;
- if constexpr (std::is_same_v<IdT, ClassId>) {
- index = sem_ir_->classes().GetRawIndex(id);
- } else if constexpr (std::is_same_v<IdT, CppOverloadSetId>) {
- index = sem_ir_->cpp_overload_sets().GetRawIndex(id);
- } else if constexpr (std::is_same_v<IdT, AssociatedConstantId>) {
- index = sem_ir_->associated_constants().GetRawIndex(id);
- } else if constexpr (std::is_same_v<IdT, FunctionId>) {
- index = sem_ir_->functions().GetRawIndex(id);
- } else if constexpr (std::is_same_v<IdT, ImplId>) {
- index = sem_ir_->impls().GetRawIndex(id);
- } else if constexpr (std::is_same_v<IdT, InterfaceId>) {
- index = sem_ir_->interfaces().GetRawIndex(id);
- } else if constexpr (std::is_same_v<IdT, InterfaceWithSelfId>) {
- index = sem_ir_->interfaces().GetRawIndex(id.id);
- } else if constexpr (std::is_same_v<IdT, NamedConstraintId>) {
- index = sem_ir_->named_constraints().GetRawIndex(id);
- } else if constexpr (std::is_same_v<IdT, NamedConstraintWithSelfId>) {
- index = sem_ir_->named_constraints().GetRawIndex(id.id);
- } else if constexpr (std::is_same_v<IdT, RequireImplsId>) {
- index = sem_ir_->require_impls().GetRawIndex(id);
- } else if constexpr (std::is_same_v<IdT, SpecificInterfaceId>) {
- index = sem_ir_->specific_interfaces().GetRawIndex(id);
- } else if constexpr (std::is_same_v<IdT, VtableId>) {
- index = sem_ir_->vtables().GetRawIndex(id);
- } else {
- index = id.index;
- }
- return static_cast<ScopeId>(GetScopeIdOffset(ScopeIdTypeEnum::For<IdT>) +
- index);
- }
- // Returns the scope ID corresponding to a generic. A generic object shares
- // its scope with its generic entity.
- auto GetScopeFor(GenericId id) const -> ScopeId {
- return generic_scopes_[sem_ir_->generics().GetRawIndex(id)];
- }
- // Returns the IR name for the specified scope.
- auto GetScopeName(ScopeId scope) const -> std::string;
- // Returns the name for a parent NameScope. Does not return a name for
- // namespaces. Used as part of naming functions with their containing scope.
- auto GetNameForParentNameScope(NameScopeId name_scope_id) -> llvm::StringRef;
- // Returns the IR name to use for a function, class, interface, etc.
- template <typename IdT>
- requires(ScopeIdTypeEnum::Contains<IdT> || std::same_as<IdT, GenericId>)
- auto GetNameFor(IdT id) const -> std::string {
- if (!id.has_value()) {
- return "invalid";
- }
- return GetScopeName(GetScopeFor(id));
- }
- // Returns the IR name to use for an instruction within its own scope, without
- // any prefix. Returns an empty string if there isn't a good name.
- auto GetUnscopedNameFor(InstId inst_id) const -> llvm::StringRef;
- // Returns the IR name to use for an instruction, when referenced from a given
- // scope.
- auto GetNameFor(ScopeId scope_id, InstId inst_id) const -> std::string;
- // Returns the IR name to use for a label within its own scope, without any
- // prefix. Returns an empty string if there isn't a good name.
- auto GetUnscopedLabelFor(InstBlockId block_id) const -> llvm::StringRef;
- // Returns the IR name to use for a label, when referenced from a given scope.
- auto GetLabelFor(ScopeId scope_id, InstBlockId block_id) const -> std::string;
- // Returns true if the instruction has a specific name assigned.
- auto has_name(InstId inst_id) const -> bool;
- private:
- // A space in which unique names can be allocated.
- class Namespace {
- private:
- // A result of a name lookup.
- struct NameResult;
- public:
- // A name in a namespace, which might be redirected to refer to another name
- // for disambiguation purposes.
- class Name {
- public:
- explicit Name() : value_(nullptr) {}
- explicit Name(llvm::StringMap<NameResult>::iterator it,
- size_t base_name_size)
- : value_(&*it), base_name_size_(base_name_size) {}
- explicit operator bool() const { return value_; }
- // Returns the disambiguated name.
- auto GetFullName() const -> llvm::StringRef;
- // Returns the base name, without any disambiguators.
- auto GetBaseName() const -> llvm::StringRef;
- auto SetFallback(Name name) -> void { value_->second.fallback = name; }
- auto SetAmbiguous() -> void { value_->second.ambiguous = true; }
- private:
- llvm::StringMapEntry<NameResult>* value_;
- // The base name length within `value_->first()`.
- size_t base_name_size_;
- };
- // Allocates and returns a name, handling ambiguity.
- auto AllocateName(const InstNamer& inst_namer,
- std::variant<LocId, uint64_t> loc_id_or_fingerprint,
- std::string name) -> Name;
- private:
- struct NameResult {
- bool ambiguous = false;
- Name fallback = Name();
- };
- llvm::StringMap<NameResult> allocated_;
- };
- // A named scope that contains named entities.
- struct Scope {
- Namespace::Name name;
- Namespace insts;
- Namespace labels;
- };
- // Helper class for naming a single instruction.
- class NamingContext;
- auto GetScopeInfo(ScopeId scope_id) -> Scope& {
- return scopes_[static_cast<int>(scope_id)];
- }
- auto GetScopeInfo(ScopeId scope_id) const -> const Scope& {
- return scopes_[static_cast<int>(scope_id)];
- }
- // For the given `IdT`, returns its start offset in the `ScopeId` space. Each
- // of `ScopeIdTypeEnum` is stored sequentially. When called with
- // `ScopeIdTypeEnum::None`, returns the full count of scopes.
- auto GetScopeIdOffset(ScopeIdTypeEnum id_enum) const -> int;
- auto AddBlockLabel(
- ScopeId scope_id, InstBlockId block_id, std::string name = "",
- std::variant<LocId, uint64_t> loc_id_or_fingerprint = LocId::None)
- -> void;
- // Finds and adds a suitable block label for the given SemIR instruction that
- // represents some kind of branch.
- auto AddBlockLabel(ScopeId scope_id, LocId loc_id, AnyBranch branch) -> void;
- // Adds a scope and instructions to walk. Avoids recursion while allowing
- // the loop to below add more instructions during iteration. The new
- // instructions are pushed such that they will be the next to be walked.
- // Internally that means they are reversed and added to the end of the vector,
- // since we pop from the back of the vector.
- auto PushBlockInsts(ScopeId scope_id, llvm::ArrayRef<InstId> inst_ids)
- -> void;
- auto PushBlockId(ScopeId scope_id, InstBlockId block_id) -> void;
- // Pushes generic information for an entity.
- auto PushGeneric(ScopeId scope_id, GenericId generic_id) -> void;
- // Names an entity, and pushes processing of its blocks.
- auto PushEntity(AssociatedConstantId associated_constant_id, ScopeId scope_id,
- Scope& scope) -> void;
- auto PushEntity(ClassId class_id, ScopeId scope_id, Scope& scope) -> void;
- auto PushEntity(FunctionId function_id, ScopeId scope_id, Scope& scope)
- -> void;
- auto PushEntity(CppOverloadSetId cpp_overload_set_id, ScopeId scope_id,
- Scope& scope) -> void;
- auto PushEntity(ImplId impl_id, ScopeId scope_id, Scope& scope) -> void;
- auto PushEntity(InterfaceId interface_id, ScopeId scope_id, Scope& scope)
- -> void;
- auto PushEntity(InterfaceWithSelfId interface_id, ScopeId scope_id,
- Scope& scope) -> void;
- auto PushEntity(NamedConstraintId named_constraint_id, ScopeId scope_id,
- Scope& scope) -> void;
- auto PushEntity(NamedConstraintWithSelfId named_constraint_id,
- ScopeId scope_id, Scope& scope) -> void;
- auto PushEntity(RequireImplsId require_impls_id, ScopeId scope_id,
- Scope& scope) -> void;
- auto PushEntity(VtableId vtable_id, ScopeId scope_id, Scope& scope) -> void;
- // Always returns the name of the entity. May push it if it has not yet been
- // pushed.
- template <typename EntityIdT>
- auto MaybePushEntity(EntityIdT entity_id) -> llvm::StringRef {
- auto scope_id = GetScopeFor(entity_id);
- auto& scope = GetScopeInfo(scope_id);
- if (!scope.name) {
- PushEntity(entity_id, scope_id, scope);
- }
- return scope.name.GetBaseName();
- }
- const File* sem_ir_;
- InstFingerprinter fingerprinter_;
- // The namespace for entity names. Names within this namespace are prefixed
- // with `@` in formatted SemIR.
- Namespace globals_;
- // The enclosing scope and name for each instruction, indexed by the InstId's
- // index.
- std::vector<std::pair<ScopeId, Namespace::Name>> insts_;
- // The enclosing scope and name for each block that might be a branch target,
- // indexed by the InstBlockId's index.
- std::vector<std::pair<ScopeId, Namespace::Name>> labels_;
- // The scopes corresponding to ScopeId values.
- std::vector<Scope> scopes_;
- // The scope IDs corresponding to generics. The vector indexes are the
- // GenericId index.
- std::vector<ScopeId> generic_scopes_;
- // The stack of instructions to name.
- llvm::SmallVector<std::pair<ScopeId, InstId>> inst_stack_;
- // The stack of blocks to traverse.
- llvm::SmallVector<std::pair<ScopeId, InstBlockId>> inst_block_stack_;
- };
- } // namespace Carbon::SemIR
- #endif // CARBON_TOOLCHAIN_SEM_IR_INST_NAMER_H_
|