@@ -6733,41 +6733,55 @@ void HWConformity::fixSrcRegion( G4_INST *inst )
67336733 {
67346734 if (inst->getSrc (i) && inst->getSrc (i)->isSrcRegRegion () && !inst->getSrc (i)->isNullReg ())
67356735 {
6736- G4_SrcRegRegion * src = inst->getSrc (i)->asSrcRegRegion ();
6736+ G4_SrcRegRegion* src = inst->getSrc (i)->asSrcRegRegion ();
67376737 const RegionDesc* srcRegion = src->getRegion ();
6738- if (srcRegion->isRegionWH () || srcRegion->isRegionV () || srcRegion->isRegionSW () )
6738+ if (srcRegion->isRegionWH () || srcRegion->isRegionV () || srcRegion->isRegionSW ())
6739+ {
6740+ // normalize VxH regions if possible
6741+ if (srcRegion->isRegionWH () && srcRegion->width == inst->getExecSize ())
6742+ {
6743+ // r[a0.0]<E, S> -> r[a0.0]<S;1,0>
6744+ src->setRegion (builder.createRegionDesc (srcRegion->horzStride , 1 , 0 ));
6745+ }
6746+ // ToDo: add other legalization
67396747 continue ;
6748+ }
6749+
6750+ // ToDo: most of these checks should be obsolete at this point
67406751 uint16_t vs = srcRegion->vertStride , wd = srcRegion->width , hs = srcRegion->horzStride ;
67416752 uint8_t exSize = inst->getExecSize ();
6742- MUST_BE_TRUE ( inst->isSend () || exSize >= wd, " Bad source region: Width is greater than execution size." );
6743- if ( comprInst )
6753+ MUST_BE_TRUE (inst->isSend () || exSize >= wd, " Bad source region: Width is greater than execution size." );
6754+ if (comprInst)
67446755 {
6745- if (G4_Type_Table[inst->getSrc (i)->getType ()].byteSize > G4_WSIZE &&
6756+ if (G4_Type_Table[inst->getSrc (i)->getType ()].byteSize > G4_WSIZE&&
67466757 wd == exSize &&
67476758 vs == wd && hs == 1 )
67486759 {
67496760 vs = wd = exSize / 2 ;
67506761 }
67516762 }
6752- if ( wd == exSize && hs != 0 && vs != wd * hs )
6763+ if ( wd == exSize && hs != 0 && vs != wd * hs)
67536764 {
6765+ // <V;E,H> --> <V*H;E,H>
67546766 vs = wd * hs;
67556767 }
6756- if ( wd == 1 )
6768+ if ( wd == 1 )
67576769 {
6770+ // <V;1,H> -> <V;1,0> or <0;1,0>
67586771 hs = 0 ;
6759- if ( 1 == exSize )
6772+ if ( 1 == exSize)
67606773 vs = 0 ;
67616774 }
6762- if ( vs == 0 && hs == 0 )
6775+ if ( vs == 0 && hs == 0 )
67636776 {
6777+ // <0;N,0> -> <0;1,0>
67646778 wd = 1 ;
67656779 }
6766- if ( hs == 0 &&
6780+ if ( hs == 0 &&
67676781 ((G4_Type_Table[inst->getSrc (i)->getType ()].byteSize == G4_WSIZE &&
6768- exSize == 32 && vs == 32 && wd == 32 ) ||
6769- (G4_Type_Table[inst->getSrc (i)->getType ()].byteSize == G4_DSIZE &&
6770- exSize == 16 && vs == 16 && wd == 16 )) )
6782+ exSize == 32 && vs == 32 && wd == 32 ) ||
6783+ (G4_Type_Table[inst->getSrc (i)->getType ()].byteSize == G4_DSIZE &&
6784+ exSize == 16 && vs == 16 && wd == 16 )))
67716785 {
67726786 vs = 0 ;
67736787 wd = 1 ;
@@ -6777,36 +6791,36 @@ void HWConformity::fixSrcRegion( G4_INST *inst )
67776791 // TODO! for the following two cases, split the instruction:
67786792 // source region is like<8;4,1>
67796793 // source region is like<2;4,1>
6780- if ( src->getRegAccess () == Direct && src->crossGRF () && hs != 0 )
6794+ if ( src->getRegAccess () == Direct && src->crossGRF () && hs != 0 )
67816795 {
67826796 // TODO: this is a temp fix
6783- if ( ( getGenxPlatform () == GENX_BDW || getGenxPlatform () == GENX_CHV) && vs < wd * hs )
6797+ if (( getGenxPlatform () == GENX_BDW || getGenxPlatform () == GENX_CHV) && vs < wd * hs)
67846798 continue ;
67856799 // check number of elements in first GRF.
67866800 uint16_t execTypeSize = hs * src->getElemSize ();
67876801 uint16_t sizeInFirstGRF = GENX_GRF_REG_SIZ - src->getLeftBound () % GENX_GRF_REG_SIZ;
67886802 uint16_t vertSize = vs * G4_Type_Table[src->getType ()].byteSize ;
6789- uint16_t numEle = ( sizeInFirstGRF + execTypeSize - 1 ) / execTypeSize;
6803+ uint16_t numEle = (sizeInFirstGRF + execTypeSize - 1 ) / execTypeSize;
67906804 uint16_t rowSize = wd * execTypeSize;
67916805
6792- if ( sizeInFirstGRF <= vertSize )
6806+ if ( sizeInFirstGRF <= vertSize)
67936807 {
6794- if ( numEle >= wd )
6808+ if ( numEle >= wd)
67956809 {
67966810 numEle = wd;
67976811 }
67986812 }
6799- else if ( vs > wd )
6813+ else if ( vs > wd)
68006814 {
6801- numEle = sizeInFirstGRF/ vertSize * wd +
6802- (( sizeInFirstGRF% vertSize > rowSize ) ? wd : ( sizeInFirstGRF% vertSize + execTypeSize - 1 ) / execTypeSize );
6815+ numEle = sizeInFirstGRF / vertSize * wd +
6816+ ((sizeInFirstGRF % vertSize > rowSize) ? wd : (sizeInFirstGRF % vertSize + execTypeSize - 1 ) / execTypeSize);
68036817 }
68046818 // wd is used to cross GRF, change to <vs;1,0>
6805- if ( numEle < wd || ( wd >= vs && numEle % wd != 0 ) )
6819+ if ( numEle < wd || (wd >= vs && numEle % wd != 0 ) )
68066820 {
68076821
68086822 wd = 1 ;
6809- if ( hs == 0 )
6823+ if ( hs == 0 )
68106824 {
68116825 vs = 1 ;
68126826 }
@@ -6818,17 +6832,17 @@ void HWConformity::fixSrcRegion( G4_INST *inst )
68186832 }
68196833 }
68206834
6821- if ( vs != srcRegion->vertStride || wd != srcRegion->width || hs != srcRegion->horzStride )
6835+ if ( vs != srcRegion->vertStride || wd != srcRegion->width || hs != srcRegion->horzStride )
68226836 {
6823- G4_SrcRegRegion * origSrc = inst->getSrc (i)->asSrcRegRegion ();
6824- origSrc->setRegion ( builder.createRegionDesc ( vs, wd, hs ) );
6837+ G4_SrcRegRegion* origSrc = inst->getSrc (i)->asSrcRegRegion ();
6838+ origSrc->setRegion (builder.createRegionDesc (vs, wd, hs) );
68256839 }
68266840 }
68276841 }
6828- if ( inst->getDst () && !inst->hasNULLDst () )
6842+ if ( inst->getDst () && !inst->hasNULLDst ())
68296843 {
6830- MUST_BE_TRUE ( inst->getDst ()->getHorzStride () != 0 ,
6831- " Bad source region: Width is greater than execution size." );
6844+ MUST_BE_TRUE (inst->getDst ()->getHorzStride () != 0 ,
6845+ " Bad source region: Width is greater than execution size." );
68326846 }
68336847}
68346848
0 commit comments