Skip to content

Commit 556bd2c

Browse files
pratikasharsys_zuul
authored andcommitted
Fix in detection of whether a kernel is compiled to single SIMD32 or broken
up in to 2 SIMD16 kernels Change-Id: Ic750bb82a108d84163f49f2639d36802720ae9c3
1 parent 275c2c8 commit 556bd2c

File tree

4 files changed

+39
-52
lines changed

4 files changed

+39
-52
lines changed

visa/FlowGraph.cpp

Lines changed: 30 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5967,7 +5967,6 @@ void GlobalOpndHashTable::dump()
59675967

59685968
void G4_Kernel::computeChannelSlicing()
59695969
{
5970-
std::unordered_set<G4_Declare*> skipSendDcls;
59715970
unsigned int simdSize = getSimdSize();
59725971
channelSliced = true;
59735972

@@ -5978,30 +5977,6 @@ void G4_Kernel::computeChannelSlicing()
59785977
return;
59795978
}
59805979

5981-
for (auto bb : fg)
5982-
{
5983-
for (auto inst : bb->getInstList())
5984-
{
5985-
if (inst->isPseudoKill() || inst->isWriteEnableInst())
5986-
continue;
5987-
5988-
if (inst->isSend())
5989-
{
5990-
auto dst = inst->getDst();
5991-
if (dst && dst->isDstRegRegion())
5992-
skipSendDcls.insert(dst->getTopDcl());
5993-
5994-
auto src = inst->getSrc(0);
5995-
if (src && src->isSrcRegRegion())
5996-
skipSendDcls.insert(src->getTopDcl());
5997-
5998-
src = inst->getSrc(1);
5999-
if (src && src->isSrcRegRegion())
6000-
skipSendDcls.insert(src->getTopDcl());
6001-
}
6002-
}
6003-
}
6004-
60055980
// .dcl V1 size = 128 bytes
60065981
// op (16|M0) V1(0,0) ..
60075982
// op (16|M16) V1(2,0) ..
@@ -6010,16 +5985,21 @@ void G4_Kernel::computeChannelSlicing()
60105985
// Allocation of dcl is still as if it were a
60115986
// SIMD32 kernel.
60125987

6013-
// dcl -> lb, rb, emask offset
6014-
std::unordered_map<G4_Declare*, std::vector<std::tuple<unsigned int, unsigned int, unsigned int>>> defaultDefs;
5988+
// Store emask bits that are ever used to define a variable
5989+
std::unordered_map<G4_Declare*, std::bitset<32>> emaskRef;
60155990
for (auto bb : fg)
60165991
{
60175992
for (auto inst : bb->getInstList())
60185993
{
5994+
if (inst->isSend())
5995+
continue;
5996+
60195997
auto dst = inst->getDst();
6020-
if (!dst || !dst->isDstRegRegion() || !dst->getTopDcl() ||
6021-
skipSendDcls.find(dst->getTopDcl()) != skipSendDcls.end() ||
6022-
dst->asDstRegRegion()->getHorzStride() != 1)
5998+
if (!dst || !dst->getTopDcl() ||
5999+
dst->getHorzStride() != 1)
6000+
continue;
6001+
6002+
if (inst->isWriteEnableInst())
60236003
continue;
60246004

60256005
auto regFileKind = dst->getTopDcl()->getRegFile();
@@ -6031,31 +6011,32 @@ void G4_Kernel::computeChannelSlicing()
60316011
if (dst->getTopDcl()->getByteSize() <= dstElemSize * simdSize)
60326012
continue;
60336013

6034-
std::vector<std::tuple<unsigned int, unsigned int, unsigned int>> v =
6035-
{ std::make_tuple(dst->getLeftBound(), dst->getRightBound(), inst->getMaskOffset()) };
6036-
defaultDefs.insert(std::make_pair(dst->getTopDcl(), v));
6014+
auto emaskOffStart = inst->getMaskOffset();
6015+
6016+
// Reset all bits on first encounter of dcl
6017+
if (emaskRef.find(dst->getTopDcl()) == emaskRef.end())
6018+
emaskRef[dst->getTopDcl()].reset();
6019+
6020+
// Set bits based on which EM bits are used in the def
6021+
for (unsigned int i = emaskOffStart; i != (emaskOffStart + inst->getExecSize()); i++)
6022+
{
6023+
emaskRef[dst->getTopDcl()].set(i);
6024+
}
60376025
}
60386026
}
60396027

6040-
for (auto dd : defaultDefs)
6028+
// Check whether any variable's emask usage straddles across lower and upper 16 bits
6029+
for (auto& emRefs : emaskRef)
60416030
{
6042-
auto elemSize = dd.first->getElemSize();
6043-
for (auto defs : dd.second)
6044-
{
6045-
auto lb = std::get<0>(defs);
6046-
auto rb = std::get<1>(defs);
6047-
auto emaskOffset = std::get<2>(defs);
6031+
auto& bits = emRefs.second;
6032+
auto num = bits.to_ulong();
60486033

6049-
// Look for single instruction
6050-
if (emaskOffset == 0 && lb == 0 && rb == elemSize * 32)
6051-
channelSliced = false;
6052-
// Or broken instruction
6053-
if (emaskOffset == 16 && lb == elemSize * 16 && rb == elemSize * 32)
6054-
channelSliced = false;
6034+
// Check whether any lower 16 and upper 16 bits are set
6035+
if (((num & 0xffff) != 0) && ((num & 0xffff0000) != 0))
6036+
{
6037+
channelSliced = false;
6038+
return;
60556039
}
6056-
6057-
if (!channelSliced)
6058-
break;
60596040
}
60606041

60616042
return;

visa/GraphColor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,7 @@ namespace vISA
829829

830830
static unsigned int owordToGRFSize(unsigned int numOwords);
831831
static unsigned int hwordToGRFSize(unsigned int numHwords);
832+
static unsigned int GRFToHwordSize(unsigned int numGRFs);
832833
static unsigned int GRFSizeToOwords(unsigned int numGRFs);
833834

834835
// RA specific fields

visa/SpillCleanup.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ G4_SrcRegRegion* CoalesceSpillFills::generateCoalescedSpill(unsigned int scratch
5050
fp = kernel.fg.getFramePtrDcl();
5151
unsigned int option = useNoMask ? InstOpt_WriteEnable : 0;
5252
auto spillInst = kernel.fg.builder->createSpill(kernel.fg.builder->createNullDst(Type_UW), header, spillSrcPayload, 16, payloadSize,
53-
scratchOffset, fp, static_cast<G4_InstOption>(option), 0, srcCISAOff);
53+
GlobalRA::GRFToHwordSize(scratchOffset), fp, static_cast<G4_InstOption>(option), 0, srcCISAOff);
5454

5555
if (!useNoMask)
5656
{
@@ -94,8 +94,8 @@ G4_DstRegRegion* CoalesceSpillFills::generateCoalescedFill(unsigned int scratchO
9494
if (kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc())
9595
fp = kernel.fg.getFramePtrDcl();
9696

97-
kernel.fg.builder->createFill(header, fillDst, 16, payloadSize, scratchOffset, fp,
98-
InstOpt_WriteEnable, 0, srcCISAOff);
97+
kernel.fg.builder->createFill(header, fillDst, 16, payloadSize,
98+
GlobalRA::GRFToHwordSize(scratchOffset), fp, InstOpt_WriteEnable, 0, srcCISAOff);
9999

100100
#if 0
101101
fillInst->dump();

visa/SpillManagerGMRF.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4254,6 +4254,11 @@ unsigned int GlobalRA::hwordToGRFSize(unsigned int numHwords)
42544254
return owordToGRFSize(numHwords * 2);
42554255
}
42564256

4257+
unsigned int GlobalRA::GRFToHwordSize(unsigned int numGRFs)
4258+
{
4259+
return GRFSizeToOwords(numGRFs) / 2;
4260+
}
4261+
42574262
unsigned int GlobalRA::GRFSizeToOwords(unsigned int numGRFs)
42584263
{
42594264
return numGRFs * (G4_GRF_REG_NBYTES / OWORD_BYTE_SIZE);

0 commit comments

Comments
 (0)