[amdgpu][nfc] Extract more functions in LowerModuleLDS, mark more methods static
This commit is contained in:
parent
b9c4b95b11
commit
d70e7ea0d1
|
@ -631,7 +631,7 @@ public:
|
|||
return tableKernelIndexCache[F];
|
||||
}
|
||||
|
||||
std::vector<Function *> assignLDSKernelIDToEachKernel(
|
||||
static std::vector<Function *> assignLDSKernelIDToEachKernel(
|
||||
Module *M, DenseSet<Function *> const &KernelsThatAllocateTableLDS) {
|
||||
// Associate kernels in the set with an arbirary but reproducible order and
|
||||
// annotate them with that order in metadata. This metadata is recognised by
|
||||
|
@ -680,168 +680,157 @@ public:
|
|||
return OrderedKernels;
|
||||
}
|
||||
|
||||
bool runOnModule(Module &M) override {
|
||||
LLVMContext &Ctx = M.getContext();
|
||||
CallGraph CG = CallGraph(M);
|
||||
bool Changed = superAlignLDSGlobals(M);
|
||||
static void partitionVariablesIntoIndirectStrategies(
|
||||
Module &M, LDSUsesInfoTy const &LDSUsesInfo,
|
||||
VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly,
|
||||
DenseSet<GlobalVariable *> &ModuleScopeVariables,
|
||||
DenseSet<GlobalVariable *> &TableLookupVariables,
|
||||
DenseSet<GlobalVariable *> &KernelAccessVariables) {
|
||||
|
||||
Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M);
|
||||
GlobalVariable *HybridModuleRoot =
|
||||
LoweringKindLoc != LoweringKind::hybrid
|
||||
? nullptr
|
||||
: chooseBestVariableForModuleStrategy(
|
||||
M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
|
||||
|
||||
Changed = true; // todo: narrow this down
|
||||
DenseSet<Function *> const EmptySet;
|
||||
DenseSet<Function *> const &HybridModuleRootKernels =
|
||||
HybridModuleRoot
|
||||
? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
|
||||
: EmptySet;
|
||||
|
||||
// For each kernel, what variables does it access directly or through
|
||||
// callees
|
||||
LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
|
||||
for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
|
||||
// Each iteration of this loop assigns exactly one global variable to
|
||||
// exactly one of the implementation strategies.
|
||||
|
||||
// For each variable accessed through callees, which kernels access it
|
||||
VariableFunctionMap LDSToKernelsThatNeedToAccessItIndirectly;
|
||||
for (auto &K : LDSUsesInfo.indirect_access) {
|
||||
Function *F = K.first;
|
||||
assert(isKernelLDS(F));
|
||||
for (GlobalVariable *GV : K.second) {
|
||||
LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);
|
||||
}
|
||||
}
|
||||
GlobalVariable *GV = K.first;
|
||||
assert(AMDGPU::isLDSVariableToLower(*GV));
|
||||
assert(K.second.size() != 0);
|
||||
|
||||
// Partition variables accessed indirectly into the different strategies
|
||||
DenseSet<GlobalVariable *> ModuleScopeVariables;
|
||||
DenseSet<GlobalVariable *> TableLookupVariables;
|
||||
DenseSet<GlobalVariable *> KernelAccessVariables;
|
||||
switch (LoweringKindLoc) {
|
||||
case LoweringKind::module:
|
||||
ModuleScopeVariables.insert(GV);
|
||||
break;
|
||||
|
||||
{
|
||||
GlobalVariable *HybridModuleRoot =
|
||||
LoweringKindLoc != LoweringKind::hybrid
|
||||
? nullptr
|
||||
: chooseBestVariableForModuleStrategy(
|
||||
M.getDataLayout(),
|
||||
LDSToKernelsThatNeedToAccessItIndirectly);
|
||||
|
||||
DenseSet<Function *> const EmptySet;
|
||||
DenseSet<Function *> const &HybridModuleRootKernels =
|
||||
HybridModuleRoot
|
||||
? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
|
||||
: EmptySet;
|
||||
|
||||
for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
|
||||
// Each iteration of this loop assigns exactly one global variable to
|
||||
// exactly one of the implementation strategies.
|
||||
|
||||
GlobalVariable *GV = K.first;
|
||||
assert(AMDGPU::isLDSVariableToLower(*GV));
|
||||
assert(K.second.size() != 0);
|
||||
|
||||
switch (LoweringKindLoc) {
|
||||
case LoweringKind::module:
|
||||
ModuleScopeVariables.insert(GV);
|
||||
break;
|
||||
|
||||
case LoweringKind::table:
|
||||
TableLookupVariables.insert(GV);
|
||||
break;
|
||||
|
||||
case LoweringKind::kernel:
|
||||
if (K.second.size() == 1) {
|
||||
KernelAccessVariables.insert(GV);
|
||||
} else {
|
||||
report_fatal_error(
|
||||
"cannot lower LDS '" + GV->getName() +
|
||||
"' to kernel access as it is reachable from multiple kernels");
|
||||
}
|
||||
break;
|
||||
|
||||
case LoweringKind::hybrid: {
|
||||
if (GV == HybridModuleRoot) {
|
||||
assert(K.second.size() != 1);
|
||||
ModuleScopeVariables.insert(GV);
|
||||
} else if (K.second.size() == 1) {
|
||||
KernelAccessVariables.insert(GV);
|
||||
} else if (set_is_subset(K.second, HybridModuleRootKernels)) {
|
||||
ModuleScopeVariables.insert(GV);
|
||||
} else {
|
||||
TableLookupVariables.insert(GV);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// All LDS variables accessed indirectly have now been partitioned into
|
||||
// the distinct lowering strategies.
|
||||
assert(ModuleScopeVariables.size() + TableLookupVariables.size() +
|
||||
KernelAccessVariables.size() ==
|
||||
LDSToKernelsThatNeedToAccessItIndirectly.size());
|
||||
}
|
||||
|
||||
// If the kernel accesses a variable that is going to be stored in the
|
||||
// module instance through a call then that kernel needs to allocate the
|
||||
// module instance
|
||||
DenseSet<Function *> KernelsThatAllocateModuleLDS =
|
||||
kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
|
||||
ModuleScopeVariables);
|
||||
DenseSet<Function *> KernelsThatAllocateTableLDS =
|
||||
kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
|
||||
TableLookupVariables);
|
||||
|
||||
GlobalVariable *MaybeModuleScopeStruct = nullptr;
|
||||
if (!ModuleScopeVariables.empty()) {
|
||||
LDSVariableReplacement ModuleScopeReplacement =
|
||||
createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
|
||||
ModuleScopeVariables);
|
||||
MaybeModuleScopeStruct = ModuleScopeReplacement.SGV;
|
||||
appendToCompilerUsed(M,
|
||||
{static_cast<GlobalValue *>(
|
||||
ConstantExpr::getPointerBitCastOrAddrSpaceCast(
|
||||
cast<Constant>(ModuleScopeReplacement.SGV),
|
||||
Type::getInt8PtrTy(Ctx)))});
|
||||
|
||||
// module.lds will be allocated at zero in any kernel that allocates it
|
||||
recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
|
||||
|
||||
// historic
|
||||
removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
|
||||
|
||||
// Replace all uses of module scope variable from non-kernel functions
|
||||
replaceLDSVariablesWithStruct(
|
||||
M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
|
||||
Instruction *I = dyn_cast<Instruction>(U.getUser());
|
||||
if (!I) {
|
||||
return false;
|
||||
}
|
||||
Function *F = I->getFunction();
|
||||
return !isKernelLDS(F);
|
||||
});
|
||||
|
||||
// Replace uses of module scope variable from kernel functions that
|
||||
// allocate the module scope variable, otherwise leave them unchanged
|
||||
// Record on each kernel whether the module scope global is used by it
|
||||
|
||||
LLVMContext &Ctx = M.getContext();
|
||||
IRBuilder<> Builder(Ctx);
|
||||
|
||||
for (Function &Func : M.functions()) {
|
||||
if (Func.isDeclaration() || !isKernelLDS(&Func))
|
||||
continue;
|
||||
|
||||
if (KernelsThatAllocateModuleLDS.contains(&Func)) {
|
||||
replaceLDSVariablesWithStruct(
|
||||
M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
|
||||
Instruction *I = dyn_cast<Instruction>(U.getUser());
|
||||
if (!I) {
|
||||
return false;
|
||||
}
|
||||
Function *F = I->getFunction();
|
||||
return F == &Func;
|
||||
});
|
||||
|
||||
markUsedByKernel(Builder, &Func, ModuleScopeReplacement.SGV);
|
||||
case LoweringKind::table:
|
||||
TableLookupVariables.insert(GV);
|
||||
break;
|
||||
|
||||
case LoweringKind::kernel:
|
||||
if (K.second.size() == 1) {
|
||||
KernelAccessVariables.insert(GV);
|
||||
} else {
|
||||
Func.addFnAttr("amdgpu-elide-module-lds");
|
||||
report_fatal_error(
|
||||
"cannot lower LDS '" + GV->getName() +
|
||||
"' to kernel access as it is reachable from multiple kernels");
|
||||
}
|
||||
break;
|
||||
|
||||
case LoweringKind::hybrid: {
|
||||
if (GV == HybridModuleRoot) {
|
||||
assert(K.second.size() != 1);
|
||||
ModuleScopeVariables.insert(GV);
|
||||
} else if (K.second.size() == 1) {
|
||||
KernelAccessVariables.insert(GV);
|
||||
} else if (set_is_subset(K.second, HybridModuleRootKernels)) {
|
||||
ModuleScopeVariables.insert(GV);
|
||||
} else {
|
||||
TableLookupVariables.insert(GV);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a struct for each kernel for the non-module-scope variables
|
||||
// All LDS variables accessed indirectly have now been partitioned into
|
||||
// the distinct lowering strategies.
|
||||
assert(ModuleScopeVariables.size() + TableLookupVariables.size() +
|
||||
KernelAccessVariables.size() ==
|
||||
LDSToKernelsThatNeedToAccessItIndirectly.size());
|
||||
}
|
||||
|
||||
static GlobalVariable *lowerModuleScopeStructVariables(
|
||||
Module &M, DenseSet<GlobalVariable *> const &ModuleScopeVariables,
|
||||
DenseSet<Function *> const &KernelsThatAllocateModuleLDS) {
|
||||
// Create a struct to hold the ModuleScopeVariables
|
||||
// Replace all uses of those variables from non-kernel functions with the
|
||||
// new struct instance Replace only the uses from kernel functions that will
|
||||
// allocate this instance. That is a space optimisation - kernels that use a
|
||||
// subset of the module scope struct and do not need to allocate it for
|
||||
// indirect calls will only allocate the subset they use (they do so as part
|
||||
// of the per-kernel lowering).
|
||||
if (ModuleScopeVariables.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
LLVMContext &Ctx = M.getContext();
|
||||
|
||||
LDSVariableReplacement ModuleScopeReplacement =
|
||||
createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
|
||||
ModuleScopeVariables);
|
||||
|
||||
appendToCompilerUsed(M, {static_cast<GlobalValue *>(
|
||||
ConstantExpr::getPointerBitCastOrAddrSpaceCast(
|
||||
cast<Constant>(ModuleScopeReplacement.SGV),
|
||||
Type::getInt8PtrTy(Ctx)))});
|
||||
|
||||
// module.lds will be allocated at zero in any kernel that allocates it
|
||||
recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
|
||||
|
||||
// historic
|
||||
removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
|
||||
|
||||
// Replace all uses of module scope variable from non-kernel functions
|
||||
replaceLDSVariablesWithStruct(
|
||||
M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
|
||||
Instruction *I = dyn_cast<Instruction>(U.getUser());
|
||||
if (!I) {
|
||||
return false;
|
||||
}
|
||||
Function *F = I->getFunction();
|
||||
return !isKernelLDS(F);
|
||||
});
|
||||
|
||||
// Replace uses of module scope variable from kernel functions that
|
||||
// allocate the module scope variable, otherwise leave them unchanged
|
||||
// Record on each kernel whether the module scope global is used by it
|
||||
|
||||
IRBuilder<> Builder(Ctx);
|
||||
|
||||
for (Function &Func : M.functions()) {
|
||||
if (Func.isDeclaration() || !isKernelLDS(&Func))
|
||||
continue;
|
||||
|
||||
if (KernelsThatAllocateModuleLDS.contains(&Func)) {
|
||||
replaceLDSVariablesWithStruct(
|
||||
M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
|
||||
Instruction *I = dyn_cast<Instruction>(U.getUser());
|
||||
if (!I) {
|
||||
return false;
|
||||
}
|
||||
Function *F = I->getFunction();
|
||||
return F == &Func;
|
||||
});
|
||||
|
||||
markUsedByKernel(Builder, &Func, ModuleScopeReplacement.SGV);
|
||||
|
||||
} else {
|
||||
Func.addFnAttr("amdgpu-elide-module-lds");
|
||||
}
|
||||
}
|
||||
|
||||
return ModuleScopeReplacement.SGV;
|
||||
}
|
||||
|
||||
static DenseMap<Function *, LDSVariableReplacement>
|
||||
lowerKernelScopeStructVariables(
|
||||
Module &M, LDSUsesInfoTy &LDSUsesInfo,
|
||||
DenseSet<GlobalVariable *> const &ModuleScopeVariables,
|
||||
DenseSet<Function *> const &KernelsThatAllocateModuleLDS,
|
||||
GlobalVariable *MaybeModuleScopeStruct) {
|
||||
|
||||
// Create a struct for each kernel for the non-module-scope variables.
|
||||
|
||||
DenseMap<Function *, LDSVariableReplacement> KernelToReplacement;
|
||||
for (Function &Func : M.functions()) {
|
||||
if (Func.isDeclaration() || !isKernelLDS(&Func))
|
||||
|
@ -927,6 +916,55 @@ public:
|
|||
return I && I->getFunction() == &Func;
|
||||
});
|
||||
}
|
||||
return KernelToReplacement;
|
||||
}
|
||||
|
||||
bool runOnModule(Module &M) override {
|
||||
CallGraph CG = CallGraph(M);
|
||||
bool Changed = superAlignLDSGlobals(M);
|
||||
|
||||
Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M);
|
||||
|
||||
Changed = true; // todo: narrow this down
|
||||
|
||||
// For each kernel, what variables does it access directly or through
|
||||
// callees
|
||||
LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
|
||||
|
||||
// For each variable accessed through callees, which kernels access it
|
||||
VariableFunctionMap LDSToKernelsThatNeedToAccessItIndirectly;
|
||||
for (auto &K : LDSUsesInfo.indirect_access) {
|
||||
Function *F = K.first;
|
||||
assert(isKernelLDS(F));
|
||||
for (GlobalVariable *GV : K.second) {
|
||||
LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);
|
||||
}
|
||||
}
|
||||
|
||||
DenseSet<GlobalVariable *> ModuleScopeVariables;
|
||||
DenseSet<GlobalVariable *> TableLookupVariables;
|
||||
DenseSet<GlobalVariable *> KernelAccessVariables;
|
||||
partitionVariablesIntoIndirectStrategies(
|
||||
M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
|
||||
ModuleScopeVariables, TableLookupVariables, KernelAccessVariables);
|
||||
|
||||
// If the kernel accesses a variable that is going to be stored in the
|
||||
// module instance through a call then that kernel needs to allocate the
|
||||
// module instance
|
||||
DenseSet<Function *> KernelsThatAllocateModuleLDS =
|
||||
kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
|
||||
ModuleScopeVariables);
|
||||
DenseSet<Function *> KernelsThatAllocateTableLDS =
|
||||
kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
|
||||
TableLookupVariables);
|
||||
|
||||
GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
|
||||
M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
|
||||
|
||||
DenseMap<Function *, LDSVariableReplacement> KernelToReplacement =
|
||||
lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
|
||||
KernelsThatAllocateModuleLDS,
|
||||
MaybeModuleScopeStruct);
|
||||
|
||||
// Lower zero cost accesses to the kernel instances just created
|
||||
for (auto &GV : KernelAccessVariables) {
|
||||
|
@ -1132,7 +1170,7 @@ private:
|
|||
}
|
||||
|
||||
template <typename PredicateTy>
|
||||
void replaceLDSVariablesWithStruct(
|
||||
static void replaceLDSVariablesWithStruct(
|
||||
Module &M, DenseSet<GlobalVariable *> const &LDSVarsToTransformArg,
|
||||
LDSVariableReplacement Replacement, PredicateTy Predicate) {
|
||||
LLVMContext &Ctx = M.getContext();
|
||||
|
@ -1190,9 +1228,9 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
|
||||
MDNode *AliasScope, MDNode *NoAlias,
|
||||
unsigned MaxDepth = 5) {
|
||||
static void refineUsesAlignmentAndAA(Value *Ptr, Align A,
|
||||
const DataLayout &DL, MDNode *AliasScope,
|
||||
MDNode *NoAlias, unsigned MaxDepth = 5) {
|
||||
if (!MaxDepth || (A == 1 && !AliasScope))
|
||||
return;
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user