Skip to content

Commit

Permalink
[include-cleaner] Ranking of providers based on hints
Browse files Browse the repository at this point in the history
Introduce signals to rank providers of a symbol.

Differential Revision: https://reviews.llvm.org/D139921
  • Loading branch information
kadircet committed Jan 23, 2023
1 parent 92787e3 commit 749c6a7
Show file tree
Hide file tree
Showing 11 changed files with 624 additions and 105 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include <memory>
#include <utility>
#include <vector>

namespace llvm {
Expand Down Expand Up @@ -71,7 +72,11 @@ struct Symbol {
// Order must match Kind enum!
std::variant<const Decl *, struct Macro> Storage;

Symbol(decltype(Storage) Sentinel) : Storage(std::move(Sentinel)) {}
// Disambiguation tag to make sure we can call the right constructor from
// DenseMapInfo methods.
struct SentinelTag {};
Symbol(SentinelTag, decltype(Storage) Sentinel)
: Storage(std::move(Sentinel)) {}
friend llvm::DenseMapInfo<Symbol>;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Symbol &);
Expand Down Expand Up @@ -117,6 +122,7 @@ struct Header {

Kind kind() const { return static_cast<Kind>(Storage.index()); }
bool operator==(const Header &RHS) const { return Storage == RHS.Storage; }
bool operator<(const Header &RHS) const;

const FileEntry *physical() const { return std::get<Physical>(Storage); }
tooling::stdlib::Header standard() const {
Expand All @@ -127,6 +133,13 @@ struct Header {
private:
// Order must match Kind enum!
std::variant<const FileEntry *, tooling::stdlib::Header, StringRef> Storage;

// Disambiguation tag to make sure we can call the right constructor from
// DenseMapInfo methods.
struct SentinelTag {};
Header(SentinelTag, decltype(Storage) Sentinel)
: Storage(std::move(Sentinel)) {}
friend llvm::DenseMapInfo<Header>;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Header &);

Expand Down Expand Up @@ -178,8 +191,12 @@ template <> struct DenseMapInfo<clang::include_cleaner::Symbol> {
using Outer = clang::include_cleaner::Symbol;
using Base = DenseMapInfo<decltype(Outer::Storage)>;

static inline Outer getEmptyKey() { return {Base::getEmptyKey()}; }
static inline Outer getTombstoneKey() { return {Base::getTombstoneKey()}; }
static inline Outer getEmptyKey() {
return {Outer::SentinelTag{}, Base::getEmptyKey()};
}
static inline Outer getTombstoneKey() {
return {Outer::SentinelTag{}, Base::getTombstoneKey()};
}
static unsigned getHashValue(const Outer &Val) {
return Base::getHashValue(Val.Storage);
}
Expand All @@ -202,6 +219,23 @@ template <> struct DenseMapInfo<clang::include_cleaner::Macro> {
return Base::isEqual(LHS.Definition, RHS.Definition);
}
};
template <> struct DenseMapInfo<clang::include_cleaner::Header> {
using Outer = clang::include_cleaner::Header;
using Base = DenseMapInfo<decltype(Outer::Storage)>;

static inline Outer getEmptyKey() {
return {Outer::SentinelTag{}, Base::getEmptyKey()};
}
static inline Outer getTombstoneKey() {
return {Outer::SentinelTag{}, Base::getTombstoneKey()};
}
static unsigned getHashValue(const Outer &Val) {
return Base::getHashValue(Val.Storage);
}
static bool isEqual(const Outer &LHS, const Outer &RHS) {
return Base::isEqual(LHS.Storage, RHS.Storage);
}
};
} // namespace llvm

#endif
18 changes: 4 additions & 14 deletions clang-tools-extra/include-cleaner/lib/Analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,19 @@
#include "clang-include-cleaner/Types.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Tooling/Core/Replacement.h"
#include "clang/Tooling/Inclusions/HeaderIncludes.h"
#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"

namespace clang::include_cleaner {

namespace {
// Gets all the providers for a symbol by tarversing each location.
llvm::SmallVector<Header> headersForSymbol(const Symbol &S,
const SourceManager &SM,
const PragmaIncludes *PI) {
llvm::SmallVector<Header> Headers;
for (auto &Loc : locateSymbol(S))
Headers.append(findHeaders(Loc, SM, PI));
return Headers;
}
} // namespace

void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
llvm::ArrayRef<SymbolReference> MacroRefs,
const PragmaIncludes *PI, const SourceManager &SM,
Expand All @@ -55,7 +45,7 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
assert(MacroRef.Target.kind() == Symbol::Macro);
if (!SM.isWrittenInMainFile(SM.getSpellingLoc(MacroRef.RefLocation)))
continue;
CB(MacroRef, findHeaders(MacroRef.Target.macro().Definition, SM, PI));
CB(MacroRef, headersForSymbol(MacroRef.Target, SM, PI));
}
}

Expand Down
52 changes: 14 additions & 38 deletions clang-tools-extra/include-cleaner/lib/AnalysisInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,18 @@
#ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H
#define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H

#include "TypesInternal.h"
#include "clang-include-cleaner/Record.h"
#include "clang-include-cleaner/Types.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include <variant>
#include <vector>

namespace clang {
class ASTContext;
class Decl;
class HeaderSearch;
class NamedDecl;
class SourceLocation;
namespace include_cleaner {

/// Traverses part of the AST from \p Root, finding uses of symbols.
Expand All @@ -51,40 +50,20 @@ namespace include_cleaner {
void walkAST(Decl &Root,
llvm::function_ref<void(SourceLocation, NamedDecl &, RefType)>);

/// A place where a symbol can be provided.
/// It is either a physical file of the TU (SourceLocation) or a logical
/// location in the standard library (stdlib::Symbol).
struct SymbolLocation {
enum Kind {
/// A position within a source file (or macro expansion) parsed by clang.
Physical,
/// A recognized standard library symbol, like std::string.
Standard,
};

SymbolLocation(SourceLocation S) : Storage(S) {}
SymbolLocation(tooling::stdlib::Symbol S) : Storage(S) {}
/// Finds the headers that provide the symbol location.
llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc,
const SourceManager &SM,
const PragmaIncludes *PI);

Kind kind() const { return static_cast<Kind>(Storage.index()); }
bool operator==(const SymbolLocation &RHS) const {
return Storage == RHS.Storage;
}
SourceLocation physical() const { return std::get<Physical>(Storage); }
tooling::stdlib::Symbol standard() const {
return std::get<Standard>(Storage);
}
/// A set of locations that provides the declaration.
std::vector<Hinted<SymbolLocation>> locateSymbol(const Symbol &S);

private:
// Order must match Kind enum!
std::variant<SourceLocation, tooling::stdlib::Symbol> Storage;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);

/// Finds the headers that provide the symbol location.
// FIXME: expose signals
llvm::SmallVector<Header> findHeaders(const SymbolLocation &Loc,
const SourceManager &SM,
const PragmaIncludes *PI);
/// Gets all the providers for a symbol by traversing each location.
/// Returned headers are sorted by relevance, first element is the most
/// likely provider for the symbol.
llvm::SmallVector<Header> headersForSymbol(const Symbol &S,
const SourceManager &SM,
const PragmaIncludes *PI);

/// Write an HTML summary of the analysis to the given stream.
void writeHTMLReport(FileID File, const Includes &,
Expand All @@ -93,9 +72,6 @@ void writeHTMLReport(FileID File, const Includes &,
HeaderSearch &HS, PragmaIncludes *PI,
llvm::raw_ostream &OS);

/// A set of locations that provides the declaration.
std::vector<SymbolLocation> locateSymbol(const Symbol &S);

} // namespace include_cleaner
} // namespace clang

Expand Down
148 changes: 133 additions & 15 deletions clang-tools-extra/include-cleaner/lib/FindHeaders.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,110 @@
//===----------------------------------------------------------------------===//

#include "AnalysisInternal.h"
#include "TypesInternal.h"
#include "clang-include-cleaner/Record.h"
#include "clang-include-cleaner/Types.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/Basic/FileEntry.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <string>
#include <utility>

namespace clang::include_cleaner {
namespace {
llvm::SmallVector<Hinted<Header>>
applyHints(llvm::SmallVector<Hinted<Header>> Headers, Hints H) {
for (auto &Header : Headers)
Header.Hint |= H;
return Headers;
}

llvm::SmallVector<Header> ranked(llvm::SmallVector<Hinted<Header>> Headers) {
llvm::stable_sort(llvm::reverse(Headers),
[](const Hinted<Header> &LHS, const Hinted<Header> &RHS) {
return LHS < RHS;
});
return llvm::SmallVector<Header>(Headers.begin(), Headers.end());
}

// Return the basename from a verbatim header spelling, leaves only the file
// name.
llvm::StringRef basename(llvm::StringRef Header) {
Header = Header.trim("<>\"");
if (auto LastSlash = Header.rfind('/'); LastSlash != Header.npos)
Header = Header.drop_front(LastSlash + 1);
// Drop everything after first `.` (dot).
// foo.h -> foo
// foo.cu.h -> foo
Header = Header.substr(0, Header.find('.'));
return Header;
}

llvm::SmallVector<Header> findHeaders(const SymbolLocation &Loc,
const SourceManager &SM,
const PragmaIncludes *PI) {
llvm::SmallVector<Header> Results;
// Check if spelling of \p H matches \p DeclName.
bool nameMatch(llvm::StringRef DeclName, Header H) {
switch (H.kind()) {
case Header::Physical:
return basename(H.physical()->getName()).equals_insensitive(DeclName);
case Header::Standard:
return basename(H.standard().name()).equals_insensitive(DeclName);
case Header::Verbatim:
return basename(H.verbatim()).equals_insensitive(DeclName);
}
llvm_unreachable("unhandled Header kind!");
}

llvm::StringRef symbolName(const Symbol &S) {
switch (S.kind()) {
case Symbol::Declaration:
// Unnamed decls like operators and anonymous structs won't get any name
// match.
if (const auto *ND = llvm::dyn_cast<NamedDecl>(&S.declaration()))
if (auto *II = ND->getIdentifier())
return II->getName();
return "";
case Symbol::Macro:
return S.macro().Name->getName();
}
llvm_unreachable("unhandled Symbol kind!");
}

} // namespace

llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc,
const SourceManager &SM,
const PragmaIncludes *PI) {
auto IsPublicHeader = [&PI](const FileEntry *FE) {
return (PI->isPrivate(FE) || !PI->isSelfContained(FE))
? Hints::None
: Hints::PublicHeader;
};
llvm::SmallVector<Hinted<Header>> Results;
switch (Loc.kind()) {
case SymbolLocation::Physical: {
FileID FID = SM.getFileID(SM.getExpansionLoc(Loc.physical()));
const FileEntry *FE = SM.getFileEntryForID(FID);
if (!PI) {
return FE ? llvm::SmallVector<Header>{Header(FE)}
: llvm::SmallVector<Header>();
}
if (!FE)
return {};
if (!PI)
return {{FE, Hints::PublicHeader}};
while (FE) {
Results.push_back(Header(FE));
Hints CurrentHints = IsPublicHeader(FE);
Results.emplace_back(FE, CurrentHints);
// FIXME: compute transitive exporter headers.
for (const auto *Export : PI->getExporters(FE, SM.getFileManager()))
Results.push_back(Header(Export));
Results.emplace_back(Export, IsPublicHeader(Export));

llvm::StringRef VerbatimSpelling = PI->getPublic(FE);
if (!VerbatimSpelling.empty()) {
Results.push_back(VerbatimSpelling);
if (auto Verbatim = PI->getPublic(FE); !Verbatim.empty()) {
Results.emplace_back(Verbatim,
Hints::PublicHeader | Hints::PreferredHeader);
break;
}
if (PI->isSelfContained(FE) || FID == SM.getMainFileID())
Expand All @@ -46,14 +124,54 @@ llvm::SmallVector<Header> findHeaders(const SymbolLocation &Loc,
}
case SymbolLocation::Standard: {
for (const auto &H : Loc.standard().headers()) {
Results.push_back(H);
Results.emplace_back(H, Hints::PublicHeader);
for (const auto *Export : PI->getExporters(H, SM.getFileManager()))
Results.push_back(Header(Export));
Results.emplace_back(Header(Export), IsPublicHeader(Export));
}
// StandardLibrary returns headers in preference order, so only mark the
// first.
if (!Results.empty())
Results.front().Hint |= Hints::PreferredHeader;
return Results;
}
}
llvm_unreachable("unhandled SymbolLocation kind!");
}

llvm::SmallVector<Header> headersForSymbol(const Symbol &S,
const SourceManager &SM,
const PragmaIncludes *PI) {
// Get headers for all the locations providing Symbol. Same header can be
// reached through different traversals, deduplicate those into a single
// Header by merging their hints.
llvm::SmallVector<Hinted<Header>> Headers;
for (auto &Loc : locateSymbol(S))
Headers.append(applyHints(findHeaders(Loc, SM, PI), Loc.Hint));
// If two Headers probably refer to the same file (e.g. Verbatim(foo.h) and
// Physical(/path/to/foo.h), we won't deduplicate them or merge their hints
llvm::stable_sort(
Headers, [](const Hinted<Header> &LHS, const Hinted<Header> &RHS) {
return static_cast<Header>(LHS) < static_cast<Header>(RHS);
});
auto *Write = Headers.begin();
for (auto *Read = Headers.begin(); Read != Headers.end(); ++Write) {
*Write = *Read++;
while (Read != Headers.end() &&
static_cast<Header>(*Write) == static_cast<Header>(*Read)) {
Write->Hint |= Read->Hint;
++Read;
}
}
Headers.erase(Write, Headers.end());

// Add name match hints to deduplicated providers.
llvm::StringRef SymbolName = symbolName(S);
for (auto &H : Headers) {
if (nameMatch(SymbolName, H))
H.Hint |= Hints::PreferredHeader;
}

// FIXME: Introduce a MainFile header kind or signal and boost it.
return ranked(std::move(Headers));
}
} // namespace clang::include_cleaner
Loading

0 comments on commit 749c6a7

Please sign in to comment.