Skip to content

Commit 83ce894

Browse files
weiyu-chensys_zuul
authored andcommitted
Normalize r[a0.0]<N;0> region when N is equal to execution size.
Change-Id: Icaf1ad22aa12e18e7f1c2a7934a8a95307bb3a77
1 parent f634848 commit 83ce894

File tree

1 file changed

+43
-29
lines changed

1 file changed

+43
-29
lines changed

visa/HWConformity.cpp

Lines changed: 43 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6733,41 +6733,55 @@ void HWConformity::fixSrcRegion( G4_INST *inst )
67336733
{
67346734
if (inst->getSrc(i) && inst->getSrc(i)->isSrcRegRegion() && !inst->getSrc(i)->isNullReg())
67356735
{
6736-
G4_SrcRegRegion *src = inst->getSrc(i)->asSrcRegRegion();
6736+
G4_SrcRegRegion* src = inst->getSrc(i)->asSrcRegRegion();
67376737
const RegionDesc* srcRegion = src->getRegion();
6738-
if (srcRegion->isRegionWH() || srcRegion->isRegionV() || srcRegion->isRegionSW() )
6738+
if (srcRegion->isRegionWH() || srcRegion->isRegionV() || srcRegion->isRegionSW())
6739+
{
6740+
// normalize VxH regions if possible
6741+
if (srcRegion->isRegionWH() && srcRegion->width == inst->getExecSize())
6742+
{
6743+
// r[a0.0]<E, S> -> r[a0.0]<S;1,0>
6744+
src->setRegion(builder.createRegionDesc(srcRegion->horzStride, 1, 0));
6745+
}
6746+
// ToDo: add other legalization
67396747
continue;
6748+
}
6749+
6750+
//ToDo: most of these checks should be obsolete at this point
67406751
uint16_t vs = srcRegion->vertStride, wd = srcRegion->width, hs = srcRegion->horzStride;
67416752
uint8_t exSize = inst->getExecSize();
6742-
MUST_BE_TRUE( inst->isSend() || exSize >= wd, " Bad source region: Width is greater than execution size." );
6743-
if ( comprInst )
6753+
MUST_BE_TRUE(inst->isSend() || exSize >= wd, " Bad source region: Width is greater than execution size.");
6754+
if (comprInst)
67446755
{
6745-
if (G4_Type_Table[inst->getSrc(i)->getType()].byteSize > G4_WSIZE &&
6756+
if (G4_Type_Table[inst->getSrc(i)->getType()].byteSize > G4_WSIZE&&
67466757
wd == exSize &&
67476758
vs == wd && hs == 1)
67486759
{
67496760
vs = wd = exSize / 2;
67506761
}
67516762
}
6752-
if( wd == exSize && hs != 0 && vs != wd * hs )
6763+
if (wd == exSize && hs != 0 && vs != wd * hs)
67536764
{
6765+
// <V;E,H> --> <V*H;E,H>
67546766
vs = wd * hs;
67556767
}
6756-
if( wd == 1 )
6768+
if (wd == 1)
67576769
{
6770+
// <V;1,H> -> <V;1,0> or <0;1,0>
67586771
hs = 0;
6759-
if( 1 == exSize )
6772+
if (1 == exSize)
67606773
vs = 0;
67616774
}
6762-
if( vs == 0 && hs == 0 )
6775+
if (vs == 0 && hs == 0)
67636776
{
6777+
// <0;N,0> -> <0;1,0>
67646778
wd = 1;
67656779
}
6766-
if( hs == 0 &&
6780+
if (hs == 0 &&
67676781
((G4_Type_Table[inst->getSrc(i)->getType()].byteSize == G4_WSIZE &&
6768-
exSize == 32 && vs == 32 && wd == 32) ||
6769-
(G4_Type_Table[inst->getSrc(i)->getType()].byteSize == G4_DSIZE &&
6770-
exSize == 16 && vs == 16 && wd == 16)) )
6782+
exSize == 32 && vs == 32 && wd == 32) ||
6783+
(G4_Type_Table[inst->getSrc(i)->getType()].byteSize == G4_DSIZE &&
6784+
exSize == 16 && vs == 16 && wd == 16)))
67716785
{
67726786
vs = 0;
67736787
wd = 1;
@@ -6777,36 +6791,36 @@ void HWConformity::fixSrcRegion( G4_INST *inst )
67776791
// TODO! for the following two cases, split the instruction:
67786792
// source region is like<8;4,1>
67796793
// source region is like<2;4,1>
6780-
if( src->getRegAccess() == Direct && src->crossGRF() && hs != 0)
6794+
if (src->getRegAccess() == Direct && src->crossGRF() && hs != 0)
67816795
{
67826796
// TODO: this is a temp fix
6783-
if( (getGenxPlatform() == GENX_BDW || getGenxPlatform() == GENX_CHV) && vs < wd * hs )
6797+
if ((getGenxPlatform() == GENX_BDW || getGenxPlatform() == GENX_CHV) && vs < wd * hs)
67846798
continue;
67856799
// check number of elements in first GRF.
67866800
uint16_t execTypeSize = hs * src->getElemSize();
67876801
uint16_t sizeInFirstGRF = GENX_GRF_REG_SIZ - src->getLeftBound() % GENX_GRF_REG_SIZ;
67886802
uint16_t vertSize = vs * G4_Type_Table[src->getType()].byteSize;
6789-
uint16_t numEle = ( sizeInFirstGRF + execTypeSize - 1 ) / execTypeSize;
6803+
uint16_t numEle = (sizeInFirstGRF + execTypeSize - 1) / execTypeSize;
67906804
uint16_t rowSize = wd * execTypeSize;
67916805

6792-
if( sizeInFirstGRF <= vertSize )
6806+
if (sizeInFirstGRF <= vertSize)
67936807
{
6794-
if( numEle >= wd )
6808+
if (numEle >= wd)
67956809
{
67966810
numEle = wd;
67976811
}
67986812
}
6799-
else if( vs > wd )
6813+
else if (vs > wd)
68006814
{
6801-
numEle = sizeInFirstGRF/vertSize * wd +
6802-
(( sizeInFirstGRF%vertSize > rowSize ) ? wd : ( sizeInFirstGRF%vertSize + execTypeSize - 1 ) / execTypeSize );
6815+
numEle = sizeInFirstGRF / vertSize * wd +
6816+
((sizeInFirstGRF % vertSize > rowSize) ? wd : (sizeInFirstGRF % vertSize + execTypeSize - 1) / execTypeSize);
68036817
}
68046818
// wd is used to cross GRF, change to <vs;1,0>
6805-
if( numEle < wd || ( wd >= vs && numEle % wd != 0 ) )
6819+
if (numEle < wd || (wd >= vs && numEle % wd != 0))
68066820
{
68076821

68086822
wd = 1;
6809-
if( hs == 0 )
6823+
if (hs == 0)
68106824
{
68116825
vs = 1;
68126826
}
@@ -6818,17 +6832,17 @@ void HWConformity::fixSrcRegion( G4_INST *inst )
68186832
}
68196833
}
68206834

6821-
if( vs != srcRegion->vertStride || wd != srcRegion->width || hs != srcRegion->horzStride )
6835+
if (vs != srcRegion->vertStride || wd != srcRegion->width || hs != srcRegion->horzStride)
68226836
{
6823-
G4_SrcRegRegion *origSrc = inst->getSrc(i)->asSrcRegRegion();
6824-
origSrc->setRegion( builder.createRegionDesc( vs, wd, hs ) );
6837+
G4_SrcRegRegion* origSrc = inst->getSrc(i)->asSrcRegRegion();
6838+
origSrc->setRegion(builder.createRegionDesc(vs, wd, hs));
68256839
}
68266840
}
68276841
}
6828-
if( inst->getDst() && !inst->hasNULLDst() )
6842+
if (inst->getDst() && !inst->hasNULLDst())
68296843
{
6830-
MUST_BE_TRUE( inst->getDst()->getHorzStride() != 0,
6831-
"Bad source region: Width is greater than execution size." );
6844+
MUST_BE_TRUE(inst->getDst()->getHorzStride() != 0,
6845+
"Bad source region: Width is greater than execution size.");
68326846
}
68336847
}
68346848

0 commit comments

Comments
 (0)