@@ -73,7 +73,6 @@ namespace
7373 FunctionPass (ID)
7474 { }
7575
76- virtual bool doInitialization (Function& F);
7776 virtual bool runOnFunction (Function& F);
7877
7978 virtual void getAnalysisUsage (AnalysisUsage& AU) const
@@ -94,8 +93,20 @@ namespace
9493 // in order of offsets and merging adjacent writes.
9594 void MergeInstructions ();
9695
96+ // Returns the dynamic URB base offset and an immediate const offset
97+ // from the dynamic base. The function calculates the result by walking
98+ // the use-def chain of pUrbOffset.
99+ // If pUrbOffset is an immediate constant (==offset) then
100+ // <nullptr, offset> is returned.
101+ // In all other cases <pUrbOffset, 0> is returned.
102+ std::pair<Value*, unsigned int > GetBaseAndOffset (Value* pUrbOffset);
103+
97104 // represents the map (urb index) --> (instruction, instruction index in BB)
98- std::vector<InstWithIndex> m_writeList;
105+ // The key consists of a dynamic URB base offset (key.first) and
106+ // an immediate offset from this dynamic base
107+ // Dynamic URB base offset is null if URB offset is constant.
108+ std::map<std::pair<Value*, unsigned int >, InstWithIndex> m_writeList;
109+
99110 bool m_bbModified;
100111 static char ID;
101112 };
@@ -109,13 +120,6 @@ namespace
109120
110121} // end of unnamed namespace to contain class definition and auxiliary functions
111122
112- // / Do initialization of the data structure.
113- // / We want to allocate space for the vector only once.
114- bool MergeURBWrites::doInitialization (Function& F)
115- {
116- m_writeList.reserve (128 ); // most of the time we won't exceed offset = 127
117- return false ;
118- }
119123
120124// / This optimization merges shorter writes to URB to get a smaller number of longer writes
121125// / which is more efficient.
@@ -131,7 +135,7 @@ bool MergeURBWrites::doInitialization(Function& F)
131135// / locations with one.
132136// /
133137// / for now, we don't handle the following cases:
134- // / 1) offset is a runtime value
138+ // / 1) channel mask is a runtime value
135139// / 2) handling of writes of size >4
136140// / so e.g. we don't handle |aaaa|bbbbbbbb|cccc| -> |aaaabbbb|bbbbcccc|
137141// / this will be addressed in the future.
@@ -172,31 +176,28 @@ void MergeURBWrites::FillWriteList(BasicBlock& BB)
172176 }
173177
174178 // intrinsic has the format: URB_write (%offset, %mask, %data0, ... , %data7)
175- ConstantInt* pOffset = dyn_cast<ConstantInt>(iit->getOperand (0 ));
176179 ConstantInt* pImmediateMask = dyn_cast<ConstantInt>(iit->getOperand (1 ));
177- if (pOffset == nullptr || pImmediateMask == nullptr || (GetChannelMask (intrinsic) > 0x0F ))
180+ if (pImmediateMask == nullptr || (GetChannelMask (intrinsic) > 0x0F ))
178181 {
179182 // for now, we don't handle the following cases:
180- // 1) offset is a runtime value
181- // 2) mask is a runtime value
182- // 3) handling of writes of size >4
183+ // 1) mask is a runtime value
184+ // 2) handling of writes of size >4
183185 // so e.g. we don't handle |aaaa|bbbbbbbb|cccc| -> |aaaabbbb|bbbbcccc|
184186 // this will be addressed in the future
185187 continue ;
186188 }
187- const unsigned int offset = int_cast<unsigned int >(pOffset->getZExtValue ());
188- // if we reach outside of the vector, grow it (filling with nullptr)
189- if (offset >= m_writeList.size ())
190- {
191- m_writeList.resize (offset + 1 );
192- }
193- auto elem = m_writeList[offset];
189+
190+ std::pair<Value*, unsigned int > baseAndOffset =
191+ GetBaseAndOffset (iit->getOperand (0 ));
192+
193+ auto it = m_writeList.find (baseAndOffset);
194194 // we encountered an instruction writing at the same offset,
195195 // most likely we write RTAI, VAI or PSIZE to vertex header
196196 // or we overwrite the old value
197- if (elem. GetInst () != nullptr )
197+ if (it != m_writeList. end () )
198198 {
199- auto oldMask = GetChannelMask (m_writeList[offset].GetInst ());
199+ const InstWithIndex& instWithIndex = it->second ;
200+ auto oldMask = GetChannelMask (instWithIndex.GetInst ());
200201 auto newMask = GetChannelMask (intrinsic);
201202 // assume the write lengths are <=4
202203 // if we have writes to the same channel, we retain the later one,
@@ -219,21 +220,21 @@ void MergeURBWrites::FillWriteList(BasicBlock& BB)
219220 {
220221 intrinsic->setOperand (
221222 opIndex + 2 ,
222- m_writeList[offset] .GetInst ()->getOperand (opIndex + 2 ));
223+ instWithIndex .GetInst ()->getOperand (opIndex + 2 ));
223224 }
224225 ++opIndex;
225226 takeFromOlderMask = takeFromOlderMask >> 1 ;
226227 }
227228 // after transferring the operands, remove the old instruction and store the new one
228- m_writeList[offset] .GetInst ()->eraseFromParent ();
229+ instWithIndex .GetInst ()->eraseFromParent ();
229230 m_bbModified = true ;
230- m_writeList[offset ] = InstWithIndex (intrinsic, instCounter);
231+ m_writeList[baseAndOffset ] = InstWithIndex (intrinsic, instCounter);
231232 }
232233 }
233234 else
234235 {
235236 // adding new write at this offset
236- m_writeList[offset ] = InstWithIndex (intrinsic, instCounter);
237+ m_writeList[baseAndOffset ] = InstWithIndex (intrinsic, instCounter);
237238 }
238239 }
239240} // FillWriteList()
@@ -253,11 +254,24 @@ void MergeURBWrites::MergeInstructions()
253254 for (auto ii = m_writeList.begin (); ii != m_writeList.end () && ii != last; ++ii)
254255 {
255256 auto next = std::next (ii);
256- if (ii->GetInst () == nullptr || next->GetInst () == nullptr )
257+ if (ii->second . GetInst () == nullptr || next->second . GetInst () == nullptr )
257258 {
258259 // nothing to do, no write at current or next offset
259260 continue ;
260261 }
262+
263+ // ii->first.first is the dynamic URB base offset, may be nullptr
264+ // ii->first.second is the immediate constant offset from ii->first.first
265+ if (ii->first .first != next->first .first )
266+ {
267+ // nothing to do, different base URB offset
268+ continue ;
269+ }
270+ if (ii->first .second + 1 != next->first .second )
271+ {
272+ // nothing to do, not a consecutive URB access
273+ continue ;
274+ }
261275 // We have two instructions, merge them by moving operands from the one appearing
262276 // earlier in the BB to the one appearing later and increasing write length.
263277 //
@@ -281,13 +295,13 @@ void MergeURBWrites::MergeInstructions()
281295 // and 'next' corresponds to 'offset+1'.
282296 //
283297 // determine which instruction is appearing earlier in the BB
284- const bool inOrder = ii->GetPlace () < next->GetPlace ();
285- CallInst* earlierInst = inOrder ? ii->GetInst () : next->GetInst ();
286- CallInst* laterInst = !inOrder ? ii->GetInst () : next->GetInst ();
298+ const bool inOrder = ii->second . GetPlace () < next->second . GetPlace ();
299+ CallInst* earlierInst = inOrder ? ii->second . GetInst () : next->second . GetInst ();
300+ CallInst* laterInst = !inOrder ? ii->second . GetInst () : next->second . GetInst ();
287301
288302 // merge per-channel write masks
289- auto lowWriteMask = GetChannelMask (ii->GetInst ());
290- auto highWriteMask = GetChannelMask (next->GetInst ());
303+ auto lowWriteMask = GetChannelMask (ii->second . GetInst ());
304+ auto highWriteMask = GetChannelMask (next->second . GetInst ());
291305 assert (lowWriteMask <= 0x0F && highWriteMask <= 0x0F );
292306 auto mergedMask = lowWriteMask | (highWriteMask << 4 );
293307
@@ -306,7 +320,7 @@ void MergeURBWrites::MergeInstructions()
306320 }
307321
308322 // now take the smaller of the two offsets from the instruction in the current slot
309- laterInst->setOperand (0 , ii->GetInst ()->getOperand (0 ));
323+ laterInst->setOperand (0 , ii->second . GetInst ()->getOperand (0 ));
310324 // and update the mask operand
311325 auto mergedMaskVal = llvm::ConstantInt::get (
312326 llvm::Type::getInt32Ty (laterInst->getParent ()->getContext ()),
@@ -317,12 +331,59 @@ void MergeURBWrites::MergeInstructions()
317331 earlierInst->eraseFromParent ();
318332 m_bbModified = true ;
319333 ++ii; // skip the next slot since we just considered it as 'next'
320- URBWrite8.push_back (laterInst == ii->GetInst () ? *ii : *next);
334+ if (nullptr == ii->first .first ) // if URB offset is immediate const
335+ {
336+ URBWrite8.push_back (laterInst == ii->second .GetInst () ? ii->second : next->second );
337+ }
321338 } // for
322339
323340} // MergeInstructions
324341
325342
343+ std::pair<Value*, unsigned int > MergeURBWrites::GetBaseAndOffset (Value* pUrbOffset)
344+ {
345+ Value* pBase = pUrbOffset;
346+ unsigned int offset = 0 ;
347+
348+ auto GetConstant = [](Value* pVal)->unsigned int
349+ {
350+ assert (isa<ConstantInt>(pVal));
351+ ConstantInt* pConst = cast<ConstantInt>(pVal);
352+ return int_cast<unsigned int >(pConst->getZExtValue ());
353+ };
354+
355+ if (isa<ConstantInt>(pUrbOffset))
356+ {
357+ Value* pNullBase = nullptr ;
358+ return std::make_pair (
359+ pNullBase,
360+ GetConstant (pUrbOffset));
361+ }
362+ else if (isa<Instruction>(pUrbOffset))
363+ {
364+ Instruction* pInstr = cast<Instruction>(pUrbOffset);
365+ if (pInstr->getOpcode () == Instruction::Add)
366+ {
367+ Value* src0 = pInstr->getOperand (0 );
368+ Value* src1 = pInstr->getOperand (1 );
369+ if (isa<ConstantInt>(src1))
370+ {
371+ auto baseAndOffset = GetBaseAndOffset (src0);
372+ pBase = baseAndOffset.first ;
373+ offset = GetConstant (src1) + baseAndOffset.second ;
374+ }
375+ else if (isa<ConstantInt>(src0))
376+ {
377+ auto baseAndOffset = GetBaseAndOffset (src1);
378+ pBase = baseAndOffset.first ;
379+ offset = GetConstant (src0) + baseAndOffset.second ;
380+ }
381+ }
382+ }
383+
384+ return std::make_pair (pBase, offset);
385+ }
386+
326387llvm::FunctionPass* IGC::createMergeURBWritesPass ()
327388{
328389 return new MergeURBWrites ();
0 commit comments