Skip to content

Commit 2680f8d

Browse files
ekochetkigcbot
authored andcommitted
Proper SIMD width for enabling forceBCR
Our information on maximal register pressure is based on minimal SIMD width so it needs to be adjusted to the one that we really compile to.
1 parent 681e0a5 commit 2680f8d

File tree

5 files changed

+31
-21
lines changed

5 files changed

+31
-21
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ SPDX-License-Identifier: MIT
88

99
#include "Compiler/CISACodeGen/CISABuilder.hpp"
1010
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
11+
#include "Compiler/CISACodeGen/IGCLivenessAnalysis.h"
1112
#include "Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp"
1213
#include "Compiler/Optimizer/OpenCLPasses/NamedBarriers/NamedBarriersResolution.hpp"
1314
#include "common/allocator.h"
@@ -3545,8 +3546,8 @@ void CEncoder::SetAbortOnSpillThreshold(bool canAbortOnSpill, bool AllowSpill) {
35453546
}
35463547
}
35473548

3548-
void CEncoder::InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbortOnSpill, bool hasStackCall,
3549-
bool enableVISA_IR) {
3549+
void CEncoder::InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, SIMDMode simdMode, bool canAbortOnSpill,
3550+
bool hasStackCall, bool enableVISA_IR) {
35503551
CodeGenContext *context = m_program->GetContext();
35513552
bool KernelDebugEnable = false;
35523553
bool ForceNonCoherentStatelessBti = false;
@@ -4239,13 +4240,16 @@ void CEncoder::InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbor
42394240

42404241
auto funcInfoMD = context->getMetaDataUtils()->getFunctionsInfoItem(m_program->entry);
42414242
uint32_t MaxRegPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
4243+
// need to adjust since MaxRegPressure assumes minimal SIMD
4244+
auto guessedSimd = IGCLivenessAnalysisBase::bestGuessSIMDSize(context, context->getMetaDataUtils(), m_program->entry);
4245+
MaxRegPressure = MaxRegPressure * numLanes(simdMode) / numLanes(guessedSimd);
42424246
uint32_t RegPressureThreshold = (uint32_t)(context->getNumGRFPerThread(true) * 0.6);
4243-
4247+
bool maxPressureHeur = MaxRegPressure > 0 && MaxRegPressure < RegPressureThreshold &&
4248+
m_program->GetParent()->getLLVMFunction()->size() == 1;
42444249
if (context->type == ShaderType::OPENCL_SHADER &&
4245-
(m_program->m_Platform->limitedBCR() || (MaxRegPressure > 0 && MaxRegPressure < RegPressureThreshold))) {
4250+
(m_program->m_Platform->limitedBCR() || (maxPressureHeur))) {
42464251
SaveOption(vISA_enableBCR, true);
4247-
if (m_program->GetParent()->getLLVMFunction()->size() == 1 &&
4248-
m_program->m_Platform->getMinDispatchMode() != SIMDMode::SIMD8)
4252+
if (m_program->m_Platform->getMinDispatchMode() != SIMDMode::SIMD8)
42494253
SaveOption(vISA_forceBCR, true);
42504254
}
42514255
if (context->type == ShaderType::OPENCL_SHADER && m_program->m_Platform->supportDpasInstruction()) {
@@ -4722,7 +4726,7 @@ void CEncoder::InitLabelMap(const llvm::Function *F) {
47224726
}
47234727
}
47244728

4725-
void CEncoder::InitEncoder(bool canAbortOnSpill, bool hasStackCall, bool hasInlineAsmCall,
4729+
void CEncoder::InitEncoder(SIMDMode simdMode, bool canAbortOnSpill, bool hasStackCall, bool hasInlineAsmCall,
47264730
bool hasAdditionalVisaAsmToLink, int numThreadsPerEU, uint lowerBoundGRF, uint upperBoundGRF,
47274731
VISAKernel *prevKernel) {
47284732
m_aliasesMap.clear();
@@ -4770,7 +4774,7 @@ void CEncoder::InitEncoder(bool canAbortOnSpill, bool hasStackCall, bool hasInli
47704774
if (IsCodePatchCandidate()) {
47714775
SetHasPrevKernel(prevKernel != nullptr);
47724776
}
4773-
InitVISABuilderOptions(VISAPlatform, canAbortOnSpill, hasStackCall, builderOpt == VISA_BUILDER_BOTH);
4777+
InitVISABuilderOptions(VISAPlatform, simdMode, canAbortOnSpill, hasStackCall, builderOpt == VISA_BUILDER_BOTH);
47744778

47754779
if (numThreadsPerEU > 0) {
47764780
// Number of threads per EU is set per kernel (by function MD)

IGC/Compiler/CISACodeGen/CISABuilder.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,11 @@ struct SEncoderState {
118118

119119
class CEncoder {
120120
public:
121-
void InitEncoder(bool canAbortOnSpill, bool hasStackCall, bool hasInlineAsmCall, bool hasAdditionalVisaAsmToLink,
122-
int numThreadsPerEU, uint lowerBoundGRF, uint upperBoundGRF, VISAKernel *prevKernel);
121+
void InitEncoder(SIMDMode simdMode, bool canAbortOnSpill, bool hasStackCall, bool hasInlineAsmCall,
122+
bool hasAdditionalVisaAsmToLink, int numThreadsPerEU, uint lowerBoundGRF, uint upperBoundGRF,
123+
VISAKernel *prevKernel);
123124
void InitBuildParams(llvm::SmallVector<std::unique_ptr<const char, std::function<void(const char *)>>, 10> &params);
124-
void InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbortOnSpill, bool hasStackCall,
125+
void InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, SIMDMode simdMode, bool canAbortOnSpill, bool hasStackCall,
125126
bool enableVISA_IR);
126127
SEncoderState CopyEncoderState();
127128
void SetEncoderState(SEncoderState &newState);

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,7 @@ bool EmitPass::runOnFunction(llvm::Function &F) {
773773
}
774774
// call builder after pre-analysis pass where scratchspace offset to VISA is
775775
// calculated
776-
m_encoder->InitEncoder(m_canAbortOnSpill, m_currShader->HasStackCalls(), hasInlineAsmCall,
776+
m_encoder->InitEncoder(m_SimdMode, m_canAbortOnSpill, m_currShader->HasStackCalls(), hasInlineAsmCall,
777777
hasAdditionalVisaAsmToLink, numThreadsPerEU, lowerBoundGRF, upperBoundGRF, prevKernel);
778778

779779
if (!m_encoder->IsCodePatchCandidate())

IGC/Compiler/CISACodeGen/IGCLivenessAnalysis.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ unsigned int IGCLivenessAnalysisBase::registerSizeInBytes() {
4545
return 32;
4646
}
4747

48-
SIMDMode IGCLivenessAnalysisBase::bestGuessSIMDSize(Function *F) {
48+
SIMDMode IGCLivenessAnalysisBase::bestGuessSIMDSize(IGC::CodeGenContext *CGCtx, IGCMD::MetaDataUtils *MDUtils,
49+
Function *F, GenXFunctionGroupAnalysis *FGA) {
4950
switch (IGC_GET_FLAG_VALUE(ForceOCLSIMDWidth)) {
5051
case 0:
5152
break;
@@ -69,19 +70,23 @@ SIMDMode IGCLivenessAnalysisBase::bestGuessSIMDSize(Function *F) {
6970
if (CGCtx->platform.isProductChildOf(IGFX_PVC)) {
7071
bool abortOnSpills =
7172
IGC_GET_FLAG_VALUE(AllowSIMD16DropForXE2Plus) && (CGCtx->platform.isCoreXE2() || CGCtx->platform.isCoreXE3());
72-
auto FG = FGA ? FGA->getGroup(F) : nullptr;
73-
bool hasStackCall = (FG && FG->hasStackCall()) || (F && F->hasFnAttribute("visaStackCall"));
74-
bool isIndirectGroup = FG && FGA->isIndirectCallGroup(FG);
75-
bool hasSubroutine = FG && !FG->isSingle() && !hasStackCall && !isIndirectGroup;
76-
if (abortOnSpills || hasSubroutine) {
73+
if (abortOnSpills)
7774
return SIMDMode::SIMD16;
75+
if (FGA) {
76+
auto FG = FGA ? FGA->getGroup(F) : nullptr;
77+
bool hasStackCall = (FG && FG->hasStackCall()) || (F && F->hasFnAttribute("visaStackCall"));
78+
bool isIndirectGroup = FG && FGA->isIndirectCallGroup(FG);
79+
bool hasSubroutine = FG && !FG->isSingle() && !hasStackCall && !isIndirectGroup;
80+
if (hasSubroutine)
81+
return SIMDMode::SIMD16;
7882
}
7983
return SIMDMode::SIMD32;
8084
}
81-
8285
return SIMDMode::SIMD8;
8386
}
8487

88+
SIMDMode IGCLivenessAnalysisBase::bestGuessSIMDSize(Function *F) { return bestGuessSIMDSize(CGCtx, MDUtils, F, FGA); }
89+
8590
ValueSet IGCLivenessAnalysisBase::getDefs(llvm::BasicBlock &BB) {
8691

8792
ValueSet &BBIn = In[&BB];

IGC/Compiler/CISACodeGen/IGCLivenessAnalysis.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ class IGCLivenessAnalysisBase {
6363
unsigned int estimateSizeInBytes(ValueSet &Set, llvm::Function &F, unsigned int SIMD, WIAnalysisRunner *WI = nullptr);
6464
void collectPressureForBB(llvm::BasicBlock &BB, InsideBlockPressureMap &BBListing, unsigned int SIMD,
6565
WIAnalysisRunner *WI = nullptr);
66-
6766
SIMDMode bestGuessSIMDSize(Function *F = nullptr);
68-
67+
static SIMDMode bestGuessSIMDSize(IGC::CodeGenContext *CGCtx, IGCMD::MetaDataUtils *MDUtils, Function *F = nullptr,
68+
GenXFunctionGroupAnalysis *FGA = nullptr);
6969
unsigned int bytesToRegisters(unsigned int Bytes) {
7070
unsigned int RegisterSizeInBytes = registerSizeInBytes();
7171
unsigned int AmountOfRegistersRoundUp = (Bytes + RegisterSizeInBytes - 1) / RegisterSizeInBytes;

0 commit comments

Comments
 (0)