Skip to content

Commit 8ee5460

Browse files
dlei6gsys_zuul
authored andcommitted
Directly use registers in ArgBlkVar for stackcall:
Instead of doing an extra move from ArgBlkVar to/from local regs, directly use ArgBlkVar as an alias. Other refectors as well for stack call code cleanup. Change-Id: Iee1b7d686e34ba281add456f38db74e03ccdcd78
1 parent b813433 commit 8ee5460

File tree

4 files changed

+102
-146
lines changed

4 files changed

+102
-146
lines changed

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 73 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1816,6 +1816,17 @@ VISA_Type CShader::GetType(llvm::Type* type)
18161816
return IGC::GetType(type, GetContext());
18171817
}
18181818

1819+
uint32_t CShader::GetNumElts(llvm::Type* type, bool isUniform)
1820+
{
1821+
uint32_t numElts = isUniform ? 1 : numLanes(m_SIMDSize);
1822+
if (type->isVectorTy())
1823+
{
1824+
IGC_ASSERT(type->getVectorElementType()->isIntegerTy() || type->getVectorElementType()->isFloatingPointTy());
1825+
numElts *= (uint16_t)type->getVectorNumElements();
1826+
}
1827+
return numElts;
1828+
}
1829+
18191830
uint64_t IGC::GetImmediateVal(llvm::Value* Const)
18201831
{
18211832
// Constant integer
@@ -2129,15 +2140,10 @@ CVariable* CShader::getOrCreateReturnSymbol(llvm::Function* F)
21292140

21302141
IGC_ASSERT(retType->isSingleValueType());
21312142
VISA_Type type = GetType(retType);
2132-
uint16_t nElts = numLanes(m_SIMDSize);
2133-
if (retType->isVectorTy())
2134-
{
2135-
nElts *= (uint16_t)cast<VectorType>(retType)->getNumElements();
2136-
}
2143+
uint16_t nElts = (uint16_t)GetNumElts(retType, false);
21372144
e_alignment align = getGRFAlignment();
2138-
static const bool nonUniform = false;
21392145
CVariable* var = GetNewVariable(
2140-
nElts, type, align, nonUniform, m_numberInstance,
2146+
nElts, type, align, false, m_numberInstance,
21412147
CName(F->getName(), "_RETVAL"));
21422148
globalSymbolMapping.insert(std::make_pair(F, var));
21432149
return var;
@@ -2157,31 +2163,34 @@ CVariable* CShader::getOrCreateArgumentSymbol(
21572163
return it->second;
21582164
}
21592165

2160-
// An explicit argument is not uniform, and for an implicit argument, it
2161-
// is predefined. Note that it is not necessarily uniform.
2162-
Function* F = Arg->getParent();
2163-
ImplicitArgs implicitArgs(*F, m_pMdUtils);
2164-
unsigned numImplicitArgs = implicitArgs.size();
2165-
unsigned numPushArgsEntry = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
2166-
unsigned numPushArgs = (isEntryFunc(m_pMdUtils, F) && !isNonEntryMultirateShader(F) ? numPushArgsEntry : 0);
2167-
unsigned numFuncArgs = IGCLLVM::GetFuncArgSize(F) - numImplicitArgs - numPushArgs;
2168-
21692166
CVariable* var = nullptr;
2170-
llvm::Function::arg_iterator arg = F->arg_begin();
2171-
std::advance(arg, numFuncArgs);
2172-
for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg)
2173-
{
2174-
Argument* argVal = &(*arg);
2175-
if (argVal == Arg)
2167+
2168+
// Stack call does not use implicit args
2169+
if (!useStackCall)
2170+
{
2171+
// An explicit argument is not uniform, and for an implicit argument, it
2172+
// is predefined. Note that it is not necessarily uniform.
2173+
Function* F = Arg->getParent();
2174+
ImplicitArgs implicitArgs(*F, m_pMdUtils);
2175+
unsigned numImplicitArgs = implicitArgs.size();
2176+
unsigned numPushArgsEntry = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
2177+
unsigned numPushArgs = (isEntryFunc(m_pMdUtils, F) && !isNonEntryMultirateShader(F) ? numPushArgsEntry : 0);
2178+
unsigned numFuncArgs = IGCLLVM::GetFuncArgSize(F) - numImplicitArgs - numPushArgs;
2179+
2180+
llvm::Function::arg_iterator arg = F->arg_begin();
2181+
std::advance(arg, numFuncArgs);
2182+
for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg)
21762183
{
2177-
ImplicitArg implictArg = implicitArgs[i];
2178-
auto ArgType = implictArg.getArgType();
2179-
2180-
// Just reuse the kernel arguments for the following.
2181-
// Note that for read only general arguments, we may do similar
2182-
// optimization, with some advanced analysis.
2183-
if (!useStackCall &&
2184-
(ArgType == ImplicitArg::ArgType::R0 ||
2184+
Argument* argVal = &(*arg);
2185+
if (argVal == Arg)
2186+
{
2187+
ImplicitArg implictArg = implicitArgs[i];
2188+
auto ArgType = implictArg.getArgType();
2189+
2190+
// Just reuse the kernel arguments for the following.
2191+
// Note that for read only general arguments, we may do similar
2192+
// optimization, with some advanced analysis.
2193+
if (ArgType == ImplicitArg::ArgType::R0 ||
21852194
ArgType == ImplicitArg::ArgType::PAYLOAD_HEADER ||
21862195
ArgType == ImplicitArg::ArgType::WORK_DIM ||
21872196
ArgType == ImplicitArg::ArgType::NUM_GROUPS ||
@@ -2191,37 +2200,38 @@ CVariable* CShader::getOrCreateArgumentSymbol(
21912200
ArgType == ImplicitArg::ArgType::CONSTANT_BASE ||
21922201
ArgType == ImplicitArg::ArgType::GLOBAL_BASE ||
21932202
ArgType == ImplicitArg::ArgType::PRIVATE_BASE ||
2194-
ArgType == ImplicitArg::ArgType::PRINTF_BUFFER))
2195-
{
2196-
Function& K = *m_FGA->getSubGroupMap(F);
2197-
ImplicitArgs IAs(K, m_pMdUtils);
2198-
uint32_t nIAs = (uint32_t)IAs.size();
2199-
uint32_t iArgIx = IAs.getArgIndex(ArgType);
2200-
uint32_t argIx = (uint32_t)IGCLLVM::GetFuncArgSize(K) - nIAs + iArgIx;
2201-
if (isEntryFunc(m_pMdUtils, &K) && !isNonEntryMultirateShader(&K)) {
2202-
argIx = argIx - numPushArgsEntry;
2203-
}
2204-
Function::arg_iterator arg = K.arg_begin();
2205-
for (uint32_t j = 0; j < argIx; ++j, ++arg);
2206-
Argument* kerArg = &(*arg);
2203+
ArgType == ImplicitArg::ArgType::PRINTF_BUFFER)
2204+
{
2205+
Function& K = *m_FGA->getSubGroupMap(F);
2206+
ImplicitArgs IAs(K, m_pMdUtils);
2207+
uint32_t nIAs = (uint32_t)IAs.size();
2208+
uint32_t iArgIx = IAs.getArgIndex(ArgType);
2209+
uint32_t argIx = (uint32_t)IGCLLVM::GetFuncArgSize(K) - nIAs + iArgIx;
2210+
if (isEntryFunc(m_pMdUtils, &K) && !isNonEntryMultirateShader(&K)) {
2211+
argIx = argIx - numPushArgsEntry;
2212+
}
2213+
Function::arg_iterator arg = K.arg_begin();
2214+
for (uint32_t j = 0; j < argIx; ++j, ++arg);
2215+
Argument* kerArg = &(*arg);
22072216

2208-
// Pre-condition: all kernel arguments have been created already.
2209-
IGC_ASSERT(pSymMap->count(kerArg));
2210-
return (*pSymMap)[kerArg];
2211-
}
2212-
else
2213-
{
2214-
bool isUniform = implictArg.getDependency() == WIAnalysis::UNIFORM;
2215-
uint16_t nbElements = (uint16_t)implictArg.getNumberElements();
2217+
// Pre-condition: all kernel arguments have been created already.
2218+
IGC_ASSERT(pSymMap->count(kerArg));
2219+
return (*pSymMap)[kerArg];
2220+
}
2221+
else
2222+
{
2223+
bool isUniform = implictArg.getDependency() == WIAnalysis::UNIFORM;
2224+
uint16_t nbElements = (uint16_t)implictArg.getNumberElements();
22162225

22172226

2218-
var = GetNewVariable(nbElements,
2219-
implictArg.getVISAType(*m_DL),
2220-
implictArg.getAlignType(*m_DL), isUniform,
2221-
isUniform ? 1 : m_numberInstance,
2222-
argVal->getName());
2227+
var = GetNewVariable(nbElements,
2228+
implictArg.getVISAType(*m_DL),
2229+
implictArg.getAlignType(*m_DL), isUniform,
2230+
isUniform ? 1 : m_numberInstance,
2231+
argVal->getName());
2232+
}
2233+
break;
22232234
}
2224-
break;
22252235
}
22262236
}
22272237

@@ -2240,18 +2250,18 @@ CVariable* CShader::getOrCreateArgumentSymbol(
22402250
}
22412251

22422252
VISA_Type type = GetType(Arg->getType());
2243-
uint16_t nElts = numLanes(m_SIMDSize);
2244-
if (Arg->getType()->isVectorTy())
2245-
{
2246-
IGC_ASSERT((Arg->getType()->getVectorElementType()->isIntegerTy()) || (Arg->getType()->getVectorElementType()->isFloatingPointTy()));
2247-
nElts *= (uint16_t)cast<VectorType>(Arg->getType())->getNumElements();
2248-
}
2253+
uint16_t nElts = (uint16_t)GetNumElts(Arg->getType(), isUniform);
22492254
var = GetNewVariable(nElts, type, align, isUniform, m_numberInstance, Arg->getName());
22502255
}
22512256
pSymMap->insert(std::make_pair(Arg, var));
22522257
return var;
22532258
}
22542259

2260+
void CShader::UpdateSymbolMap(llvm::Value* v, CVariable* CVar)
2261+
{
2262+
symbolMapping[v] = CVar;
2263+
}
2264+
22552265
// Reuse a varable in the following case
22562266
// %x = op1...
22572267
// %y = op2 (%x, ...)

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 27 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -9489,32 +9489,6 @@ void EmitPass::InitializeKernelStack(Function* pKernel)
94899489
emitPushToStack(m_currShader->ImmToVariable(totalAllocaSize, ISA_TYPE_UD));
94909490
}
94919491

9492-
/// This function is NOT about the alignment-rule for storing argv into GRF!
9493-
/// It is about the alignment-rule when we pack the arguments into a block for stack-call!
9494-
uint EmitPass::stackCallArgumentAlignment(CVariable* argv)
9495-
{
9496-
if (argv->IsUniform())
9497-
{
9498-
IGC_ASSERT(argv->GetType() != ISA_TYPE_BOOL);
9499-
if (argv->GetSize() > SIZE_OWORD)
9500-
{
9501-
return getGRFSize();
9502-
}
9503-
else if (argv->GetSize() > SIZE_DWORD)
9504-
{
9505-
return SIZE_OWORD;
9506-
}
9507-
else
9508-
{
9509-
return SIZE_DWORD;
9510-
}
9511-
}
9512-
else
9513-
{
9514-
return getGRFSize();
9515-
}
9516-
}
9517-
95189492
// Either do a block load or store to the stack-pointer given a vector of function arguments
95199493
uint EmitPass::emitStackArgumentLoadOrStore(std::vector<CVariable*>& Args, bool isWrite)
95209494
{
@@ -9652,44 +9626,13 @@ void EmitPass::emitStackCall(llvm::CallInst* inst)
96529626

96539627
for (uint32_t i = 0; i < inst->getNumArgOperands(); i++)
96549628
{
9655-
CVariable* ArgCV = nullptr;
9656-
CVariable* Src = nullptr;
9657-
Type* argType = nullptr;
9658-
9659-
if (!isIndirectFCall)
9660-
{
9661-
IGC_ASSERT(inst->getNumArgOperands() == F->arg_size());
9662-
auto Arg = F->arg_begin();
9663-
std::advance(Arg, i);
9664-
9665-
// Skip unused arguments if any.
9666-
if (Arg->use_empty())
9667-
{
9668-
continue;
9669-
}
9670-
ArgCV = m_currShader->getOrCreateArgumentSymbol(&*Arg, true, true);
9671-
Src = GetSymbol(inst->getArgOperand(i));
9672-
argType = Arg->getType();
9673-
}
9674-
else
9675-
{
9676-
// Indirect function call
9677-
Value* operand = inst->getArgOperand(i);
9678-
argType = operand->getType();
9679-
Src = GetSymbol(operand);
9629+
Value* operand = inst->getArgOperand(i);
9630+
CVariable* Src = GetSymbol(operand);
9631+
Type* argType = operand->getType();
96809632

9681-
uint16_t nElts = numLanes(m_currShader->m_SIMDSize);
9682-
if (argType->isVectorTy())
9683-
{
9684-
IGC_ASSERT(cast<VectorType>(argType)->getElementType()->isIntegerTy() || cast<VectorType>(argType)->getElementType()->isFloatingPointTy());
9685-
nElts *= (uint16_t)cast<VectorType>(argType)->getNumElements();
9686-
}
9687-
ArgCV = m_currShader->GetNewVariable(nElts, m_currShader->GetType(argType), m_currShader->getGRFAlignment(), false, 1, CName::NONE);
9688-
}
96899633
if (Src->GetType() == ISA_TYPE_BOOL)
96909634
{
96919635
// bool args are treated as a vector of WORDs
9692-
IGC_ASSERT(ArgCV->GetType() == ISA_TYPE_BOOL);
96939636
uint nElts = numLanes(m_currShader->m_dispatchSize);
96949637
CVariable* ReplaceArg = m_currShader->GetNewVariable(
96959638
nElts,
@@ -9699,27 +9642,36 @@ void EmitPass::emitStackCall(llvm::CallInst* inst)
96999642
CVariable* one = m_currShader->ImmToVariable(1, ISA_TYPE_W);
97009643
CVariable* zero = m_currShader->ImmToVariable(0, ISA_TYPE_W);
97019644
m_encoder->Select(Src, ReplaceArg, one, zero);
9645+
9646+
argType = IntegerType::getInt16Ty(inst->getContext());
97029647
Src = ReplaceArg;
9703-
argType = VectorType::get(IntegerType::getInt16Ty(inst->getContext()), nElts);
9704-
}
9705-
else
9706-
{
9707-
emitCopyAll(ArgCV, Src, argType);
9708-
Src = ArgCV;
97099648
}
97109649

97119650
// adjust offset for alignment
9712-
uint align = stackCallArgumentAlignment(Src);
9651+
uint align = getGRFSize();
97139652
offsetA = int_cast<unsigned>(llvm::alignTo(offsetA, align));
97149653
// check if an argument can be written to ARGV based upon offset + arg-size
9715-
bool overflow = ((offsetA + Src->GetSize()) > ArgBlkVar->GetSize());
9654+
unsigned argSize = Src->GetSize();
9655+
if (Src->IsUniform())
9656+
{
9657+
argSize = Src->GetSize() * numLanes(m_currShader->m_dispatchSize);
9658+
}
9659+
bool overflow = ((offsetA + argSize) > ArgBlkVar->GetSize());
97169660
if (!overflow)
97179661
{
97189662
argsOnRegister.push_back(std::make_tuple(Src, argType, offsetA));
9719-
offsetA += Src->GetSize();
9663+
offsetA += argSize;
97209664
}
97219665
else
97229666
{
9667+
// Vectorize, then push to stack
9668+
if (Src->IsUniform())
9669+
{
9670+
uint16_t nElts = (uint16_t)m_currShader->GetNumElts(argType, false);
9671+
CVariable* SrcVec = m_currShader->GetNewVariable(nElts, Src->GetType(), m_currShader->getGRFAlignment(), false, Src->getName());
9672+
emitCopyAll(SrcVec, Src, argType);
9673+
Src = SrcVec;
9674+
}
97239675
argsOnStack.push_back(Src);
97249676
}
97259677
}
@@ -9753,12 +9705,9 @@ void EmitPass::emitStackCall(llvm::CallInst* inst)
97539705
CVariable * Src = std::get<0>(I);
97549706
Type* argType = std::get<1>(I);
97559707
uint32_t offset = std::get<2>(I);
9756-
CVariable* Dst = ArgBlkVar;
97579708

9758-
if (Dst->GetType() != Src->GetType() || offset != 0 || Src->IsUniform() != Dst->IsUniform())
9759-
{
9760-
Dst = m_currShader->GetNewAlias(ArgBlkVar, Src->GetType(), (uint16_t)offset, Src->GetNumberElement(), Src->IsUniform());
9761-
}
9709+
uint16_t nElts = (uint16_t)m_currShader->GetNumElts(argType, false);
9710+
CVariable* Dst = m_currShader->GetNewAlias(ArgBlkVar, m_currShader->GetType(argType), offset, nElts, false);
97629711
emitCopyAll(Dst, Src, argType);
97639712
}
97649713
};
@@ -9901,7 +9850,7 @@ void EmitPass::emitStackFuncEntry(Function* F)
99019850

99029851
CVariable* Dst = m_currShader->getOrCreateArgumentSymbol(&Arg, false, true);
99039852
// adjust offset for alignment
9904-
uint align = stackCallArgumentAlignment(Dst);
9853+
uint align = getGRFSize();
99059854
offsetA = int_cast<unsigned>(llvm::alignTo(offsetA, align));
99069855
uint argSize = Dst->GetSize();
99079856
if (Dst->GetType() == ISA_TYPE_BOOL)
@@ -9922,13 +9871,9 @@ void EmitPass::emitStackFuncEntry(Function* F)
99229871
}
99239872
else
99249873
{
9925-
if (Src->GetType() != Dst->GetType() ||
9926-
offsetA != 0 ||
9927-
Src->IsUniform() != Dst->IsUniform())
9928-
{
9929-
Src = m_currShader->GetNewAlias(ArgBlkVar, Dst->GetType(), (uint16_t)offsetA, Dst->GetNumberElement(), Dst->IsUniform());
9930-
}
9931-
emitCopyAll(Dst, Src, Arg.getType());
9874+
// Directly map the dst register to an alias of ArgBlkVar, and update symbol mapping for future uses
9875+
Dst = m_currShader->GetNewAlias(ArgBlkVar, Dst->GetType(), (uint16_t)offsetA, Dst->GetNumberElement(), Dst->IsUniform());
9876+
m_currShader->UpdateSymbolMap(&Arg, Dst);
99329877
}
99339878
}
99349879
offsetA += argSize;

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ class EmitPass : public llvm::FunctionPass
141141
void emitStackCall(llvm::CallInst* inst);
142142
void emitStackFuncEntry(llvm::Function* F);
143143
void emitStackFuncExit(llvm::ReturnInst* inst);
144-
uint stackCallArgumentAlignment(CVariable* argv);
145144
uint emitStackArgumentLoadOrStore(std::vector<CVariable*>& Args, bool isWrite);
146145
void InitializeKernelStack(llvm::Function* pKernel);
147146

IGC/Compiler/CISACodeGen/ShaderCodeGen.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,9 @@ class CShader
343343
llvm::Argument* Arg,
344344
bool ArgInCallee, // true if Arg isn't in current func
345345
bool useStackCall = false);
346+
void UpdateSymbolMap(llvm::Value* v, CVariable* CVar);
346347
VISA_Type GetType(llvm::Type* type);
348+
uint32_t GetNumElts(llvm::Type* type, bool isUniform = false);
347349

348350
/// Evaluate constant expression and return the result immediate value.
349351
uint64_t GetConstantExpr(llvm::ConstantExpr* C);

0 commit comments

Comments
 (0)