@@ -5967,7 +5967,6 @@ void GlobalOpndHashTable::dump()
59675967
59685968void G4_Kernel::computeChannelSlicing ()
59695969{
5970- std::unordered_set<G4_Declare*> skipSendDcls;
59715970 unsigned int simdSize = getSimdSize ();
59725971 channelSliced = true ;
59735972
@@ -5978,30 +5977,6 @@ void G4_Kernel::computeChannelSlicing()
59785977 return ;
59795978 }
59805979
5981- for (auto bb : fg)
5982- {
5983- for (auto inst : bb->getInstList ())
5984- {
5985- if (inst->isPseudoKill () || inst->isWriteEnableInst ())
5986- continue ;
5987-
5988- if (inst->isSend ())
5989- {
5990- auto dst = inst->getDst ();
5991- if (dst && dst->isDstRegRegion ())
5992- skipSendDcls.insert (dst->getTopDcl ());
5993-
5994- auto src = inst->getSrc (0 );
5995- if (src && src->isSrcRegRegion ())
5996- skipSendDcls.insert (src->getTopDcl ());
5997-
5998- src = inst->getSrc (1 );
5999- if (src && src->isSrcRegRegion ())
6000- skipSendDcls.insert (src->getTopDcl ());
6001- }
6002- }
6003- }
6004-
60055980 // .dcl V1 size = 128 bytes
60065981 // op (16|M0) V1(0,0) ..
60075982 // op (16|M16) V1(2,0) ..
@@ -6010,16 +5985,21 @@ void G4_Kernel::computeChannelSlicing()
60105985 // Allocation of dcl is still as if it were a
60115986 // SIMD32 kernel.
60125987
6013- // dcl -> lb, rb, emask offset
6014- std::unordered_map<G4_Declare*, std::vector<std::tuple< unsigned int , unsigned int , unsigned int >>> defaultDefs ;
5988+ // Store emask bits that are ever used to define a variable
5989+ std::unordered_map<G4_Declare*, std::bitset< 32 >> emaskRef ;
60155990 for (auto bb : fg)
60165991 {
60175992 for (auto inst : bb->getInstList ())
60185993 {
5994+ if (inst->isSend ())
5995+ continue ;
5996+
60195997 auto dst = inst->getDst ();
6020- if (!dst || !dst->isDstRegRegion () || !dst->getTopDcl () ||
6021- skipSendDcls.find (dst->getTopDcl ()) != skipSendDcls.end () ||
6022- dst->asDstRegRegion ()->getHorzStride () != 1 )
5998+ if (!dst || !dst->getTopDcl () ||
5999+ dst->getHorzStride () != 1 )
6000+ continue ;
6001+
6002+ if (inst->isWriteEnableInst ())
60236003 continue ;
60246004
60256005 auto regFileKind = dst->getTopDcl ()->getRegFile ();
@@ -6031,31 +6011,32 @@ void G4_Kernel::computeChannelSlicing()
60316011 if (dst->getTopDcl ()->getByteSize () <= dstElemSize * simdSize)
60326012 continue ;
60336013
6034- std::vector<std::tuple<unsigned int , unsigned int , unsigned int >> v =
6035- { std::make_tuple (dst->getLeftBound (), dst->getRightBound (), inst->getMaskOffset ()) };
6036- defaultDefs.insert (std::make_pair (dst->getTopDcl (), v));
6014+ auto emaskOffStart = inst->getMaskOffset ();
6015+
6016+ // Reset all bits on first encounter of dcl
6017+ if (emaskRef.find (dst->getTopDcl ()) == emaskRef.end ())
6018+ emaskRef[dst->getTopDcl ()].reset ();
6019+
6020+ // Set bits based on which EM bits are used in the def
6021+ for (unsigned int i = emaskOffStart; i != (emaskOffStart + inst->getExecSize ()); i++)
6022+ {
6023+ emaskRef[dst->getTopDcl ()].set (i);
6024+ }
60376025 }
60386026 }
60396027
6040- for (auto dd : defaultDefs)
6028+ // Check whether any variable's emask usage straddles across lower and upper 16 bits
6029+ for (auto & emRefs : emaskRef)
60416030 {
6042- auto elemSize = dd.first ->getElemSize ();
6043- for (auto defs : dd.second )
6044- {
6045- auto lb = std::get<0 >(defs);
6046- auto rb = std::get<1 >(defs);
6047- auto emaskOffset = std::get<2 >(defs);
6031+ auto & bits = emRefs.second ;
6032+ auto num = bits.to_ulong ();
60486033
6049- // Look for single instruction
6050- if (emaskOffset == 0 && lb == 0 && rb == elemSize * 32 )
6051- channelSliced = false ;
6052- // Or broken instruction
6053- if (emaskOffset == 16 && lb == elemSize * 16 && rb == elemSize * 32 )
6054- channelSliced = false ;
6034+ // Check whether any lower 16 and upper 16 bits are set
6035+ if (((num & 0xffff ) != 0 ) && ((num & 0xffff0000 ) != 0 ))
6036+ {
6037+ channelSliced = false ;
6038+ return ;
60556039 }
6056-
6057- if (!channelSliced)
6058- break ;
60596040 }
60606041
60616042 return ;
0 commit comments