@@ -868,28 +868,31 @@ namespace IGC
868868 return size;
869869 }
870870
871- void PushAnalysis::AllocatePushedConstant (
871+ unsigned int PushAnalysis::AllocatePushedConstant (
872872 Instruction* load,
873873 const SimplePushInfo& newChunk,
874874 const unsigned int maxSizeAllowed)
875875 {
876876 if (!newChunk.isBindless &&
877877 newChunk.cbIdx > m_context->m_DriverInfo .MaximumSimplePushBufferID ())
878878 {
879- return ;
879+ return 0 ;
880880 }
881881 unsigned int size = GetSizeInBits (load->getType ()) / 8 ;
882882 IGC_ASSERT_MESSAGE (isa<LoadInst>(load) || isa<LdRawIntrinsic>(load),
883883 " Expected a load instruction" );
884+ PushInfo& pushInfo = m_context->getModuleMetaData ()->pushInfo ;
884885
886+ bool canPromote = false ;
887+ unsigned int sizeGrown = 0 ;
885888 // greedy allocation for now
886889 // first check if we are already pushing from the buffer
887890 unsigned int piIndex;
888891 bool regionFound = false ;
889892
890- for (piIndex = 0 ; piIndex < numSimplePush ; piIndex++)
893+ for (piIndex = 0 ; piIndex < pushInfo. simplePushBufferUsed ; piIndex++)
891894 {
892- const SimplePushData & info = CollectAllSimplePushInfoArr [piIndex];
895+ const SimplePushInfo & info = pushInfo. simplePushInfoArr [piIndex];
893896 // Stateless load - GRF offsets need to match.
894897 if (info.isStateless &&
895898 newChunk.isStateless &&
@@ -922,7 +925,7 @@ namespace IGC
922925 }
923926 if (regionFound)
924927 {
925- SimplePushData & info = CollectAllSimplePushInfoArr [piIndex];
928+ SimplePushInfo & info = pushInfo. simplePushInfoArr [piIndex];
926929 unsigned int newStartOffset = iSTD::RoundDown (
927930 std::min (newChunk.offset , info.offset ),
928931 getMinPushConstantBufferAlignmentInBytes ());
@@ -931,36 +934,54 @@ namespace IGC
931934 getMinPushConstantBufferAlignmentInBytes ());
932935 unsigned int newSize = newEndOffset - newStartOffset;
933936
934- if (newSize <= maxSizeAllowed)
937+ if (newSize - info. size <= maxSizeAllowed)
935938 {
939+ sizeGrown = newSize - info.size ;
940+ canPromote = true ;
936941 info.offset = newStartOffset;
937942 info.size = newSize;
938- info.Load [load] = newChunk.offset ;
939943 }
940944 }
941945
946+ const unsigned int maxNumberOfPushedBuffers = pushInfo.MaxNumberOfPushedBuffers ;
947+
942948 // we couldn't add it to an existing buffer try to add a new one if there is a slot available
943- else
949+ if (canPromote == false &&
950+ maxSizeAllowed > 0 &&
951+ pushInfo.simplePushBufferUsed < maxNumberOfPushedBuffers)
944952 {
945953 unsigned int newStartOffset = iSTD::RoundDown (newChunk.offset , getMinPushConstantBufferAlignmentInBytes ());
946954 unsigned int newEndOffset = iSTD::Round (newChunk.offset + size, getMinPushConstantBufferAlignmentInBytes ());
947955 unsigned int newSize = newEndOffset - newStartOffset;
948956
949957 if (newSize <= maxSizeAllowed)
950958 {
951- SimplePushData& info = CollectAllSimplePushInfoArr[numSimplePush];
959+ canPromote = true ;
960+ sizeGrown = newSize;
961+
962+ piIndex = pushInfo.simplePushBufferUsed ;
963+ SimplePushInfo& info = pushInfo.simplePushInfoArr [piIndex];
952964 info.pushableAddressGrfOffset = newChunk.pushableAddressGrfOffset ;
953965 info.pushableOffsetGrfOffset = newChunk.pushableOffsetGrfOffset ;
954966 info.cbIdx = newChunk.cbIdx ;
955967 info.isStateless = newChunk.isStateless ;
956968 info.isBindless = newChunk.isBindless ;
957969 info.offset = newStartOffset;
958970 info.size = newSize;
959- info. Load [load] = newChunk. offset ;
960- numSimplePush ++;
971+
972+ pushInfo. simplePushBufferUsed ++;
961973 }
962974 }
963- return ;
975+
976+ if (canPromote)
977+ {
978+ // promote the load to be pushed
979+ PromoteLoadToSimplePush (
980+ load,
981+ pushInfo.simplePushInfoArr [piIndex],
982+ newChunk.offset );
983+ }
984+ return sizeGrown;
964985 }
965986
966987 void PushAnalysis::PromoteLoadToSimplePush (Instruction* load, SimplePushInfo& info, unsigned int offset)
@@ -1082,45 +1103,13 @@ namespace IGC
10821103 bool isPushable = IsPushableShaderConstant (instr, info);
10831104 if (isPushable)
10841105 {
1085- AllocatePushedConstant (
1106+ sizePushed += AllocatePushedConstant (
10861107 instr,
10871108 info,
1088- cthreshold); // maxSizeAllowed
1109+ cthreshold - sizePushed ); // maxSizeAllowed
10891110 }
10901111 }
10911112 }
1092-
1093-
1094- PushInfo& pushInfo = m_context->getModuleMetaData ()->pushInfo ;
1095- while ((pushInfo.simplePushBufferUsed < pushInfo.MaxNumberOfPushedBuffers ) && CollectAllSimplePushInfoArr.size ())
1096- {
1097- unsigned int iter = CollectAllSimplePushInfoArr.begin ()->first ;
1098- SimplePushData info;
1099- for (auto I = CollectAllSimplePushInfoArr.begin (), E = CollectAllSimplePushInfoArr.end (); I != E; I++)
1100- {
1101- if (I->second .size > info.size )
1102- {
1103- info = I->second ;
1104- iter = I->first ;
1105- }
1106- }
1107-
1108- SimplePushInfo& newChunk = pushInfo.simplePushInfoArr [pushInfo.simplePushBufferUsed ];
1109- if (sizePushed + info.size <= cthreshold)
1110- {
1111- newChunk.cbIdx = info.cbIdx ;
1112- newChunk.isBindless = info.isBindless ;
1113- newChunk.isStateless = info.isStateless ;
1114- newChunk.offset = info.offset ;
1115- newChunk.size = info.size ;
1116- newChunk.pushableAddressGrfOffset = info.pushableAddressGrfOffset ;
1117- newChunk.pushableOffsetGrfOffset = info.pushableOffsetGrfOffset ;
1118- for (auto I = info.Load .rbegin (), E = info.Load .rend (); I != E; I++)
1119- PromoteLoadToSimplePush (I->first , newChunk, I->second );
1120- pushInfo.simplePushBufferUsed ++;
1121- }
1122- CollectAllSimplePushInfoArr.erase (iter);
1123- }
11241113 }
11251114
11261115 PushConstantMode PushAnalysis::GetPushConstantMode ()
0 commit comments