@@ -1816,6 +1816,8 @@ void ConstantCoalescing::ScatterToSampler(
18161816
18171817 assert (!load->getType ()->isVectorTy () || load->getType ()->getVectorNumElements () <= 4 );
18181818
1819+ const bool useByteAddress = m_ctx->m_DriverInfo .UsesTypedConstantBuffersWithByteAddress ();
1820+
18191821 // Code below doesn't support crossing 4 DWORD boundary i.e. mapping a
18201822 // single input load to multiple sampler loads.
18211823 const bool canBeLoadedUsingSampler =
@@ -1830,26 +1832,34 @@ void ConstantCoalescing::ScatterToSampler(
18301832
18311833 WIAnalysis::WIDependancy baseInBytesDep = wiAns->whichDepend (baseInBytes);
18321834
1833- Value* baseAddressInOwords = GetSamplerAlignedAddress (baseInBytes);
1834- assert (baseAddressInOwords);
1835-
1836- // it is possible that baseInBytes is uniform, yet load is non-uniform due to the use location of load
1837- if (baseAddressInOwords != baseInBytes->getOperand (0 ))
1838- {
1839- Value* newVal = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 4 ));
1840- wiAns->incUpdateDepend (newVal, baseInBytesDep);
1841- baseInBytes->replaceAllUsesWith (newVal);
1842- }
1843- else if (wiAns->whichDepend (baseAddressInOwords) != baseInBytesDep)
1835+ // Data address for sampler load, either in OWORDs or in bytes
1836+ Value* chunkBaseAddress = baseInBytes;
1837+ if (!useByteAddress)
18441838 {
1845- // quick fix for a special case: baseAddressInOwords is uniform and baseInBytes is not uniform.
1846- // If we use baseInBytes-src0 (elementIndx) directly at cf-join point by this transform,
1847- // we can change the uniformness of baseAddressInOwords
1848- baseAddressInOwords = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 0 ));
1849- wiAns->incUpdateDepend (baseAddressInOwords, baseInBytesDep);
1850- Value* newVal = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 4 ));
1851- wiAns->incUpdateDepend (newVal, baseInBytesDep);
1852- baseInBytes->replaceAllUsesWith (newVal);
1839+ // base address is in OWORDs
1840+ Value* baseAddressInOwords = GetSamplerAlignedAddress (baseInBytes);
1841+ assert (baseAddressInOwords);
1842+
1843+ // it is possible that baseInBytes is uniform, yet load is non-uniform due to the use location of load
1844+ if (baseAddressInOwords != baseInBytes->getOperand (0 ))
1845+ {
1846+ Value* newVal = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 4 ));
1847+ wiAns->incUpdateDepend (newVal, baseInBytesDep);
1848+ baseInBytes->replaceAllUsesWith (newVal);
1849+ }
1850+ else if (wiAns->whichDepend (baseAddressInOwords) != baseInBytesDep)
1851+ {
1852+ // quick fix for a special case: baseAddressInOwords is uniform and baseInBytes is not uniform.
1853+ // If we use baseInBytes-src0 (elementIndx) directly at cf-join point by this transform,
1854+ // we can change the uniformness of baseAddressInOwords
1855+ baseAddressInOwords = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 0 ));
1856+ wiAns->incUpdateDepend (baseAddressInOwords, baseInBytesDep);
1857+ Value* newVal = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 4 ));
1858+ wiAns->incUpdateDepend (newVal, baseInBytesDep);
1859+ baseInBytes->replaceAllUsesWith (newVal);
1860+ }
1861+
1862+ chunkBaseAddress = baseAddressInOwords;
18531863 }
18541864 BufChunk* cov_chunk = nullptr ;
18551865 for (std::vector<BufChunk*>::reverse_iterator rit = chunk_vec.rbegin (),
@@ -1858,7 +1868,7 @@ void ConstantCoalescing::ScatterToSampler(
18581868 BufChunk* cur_chunk = *rit;
18591869 // Look for an existing sampler load covering data range of the input load.
18601870 if (CompareBufferBase (cur_chunk->bufIdxV , cur_chunk->addrSpace , bufIdxV, addrSpace) &&
1861- cur_chunk->baseIdxV == baseAddressInOwords )
1871+ cur_chunk->baseIdxV == chunkBaseAddress )
18621872 {
18631873 const uint chunkStartInBytes = cur_chunk->chunkStart * cur_chunk->elementSize ;
18641874 const uint chunkEndInBytes = (cur_chunk->chunkStart + cur_chunk->chunkSize ) * cur_chunk->elementSize ;
@@ -1877,23 +1887,32 @@ void ConstantCoalescing::ScatterToSampler(
18771887 cov_chunk = new BufChunk ();
18781888 cov_chunk->bufIdxV = bufIdxV;
18791889 cov_chunk->addrSpace = addrSpace;
1880- cov_chunk->baseIdxV = baseAddressInOwords;
1881- cov_chunk->elementSize = samplerElementSizeInBytes;
1882- cov_chunk->chunkStart = iSTD::RoundDown ((offsetInBytes / samplerElementSizeInBytes), samplerLoadSizeInDwords);
1883- cov_chunk->chunkSize = samplerLoadSizeInDwords;
1890+ cov_chunk->baseIdxV = chunkBaseAddress;
1891+ cov_chunk->elementSize = samplerElementSizeInBytes; // 4 bytes
1892+ cov_chunk->chunkStart = iSTD::RoundDown ((offsetInBytes / samplerElementSizeInBytes), samplerLoadSizeInDwords); // in DWORDS aligned to OWORDs
1893+ cov_chunk->chunkSize = samplerLoadSizeInDwords; // in DWORDs
1894+
1895+ Value* dataAddress = chunkBaseAddress;
18841896 if (offsetInBytes >= samplerLoadSizeInBytes)
18851897 {
1886- baseAddressInOwords = irBuilder->CreateAdd (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), (offsetInBytes / samplerLoadSizeInBytes)));
1887- wiAns->incUpdateDepend (baseAddressInOwords, WIAnalysis::RANDOM);
1898+ const uint32_t chunkOffset = (useByteAddress) ?
1899+ (cov_chunk->chunkStart * cov_chunk->elementSize ) : // in bytes
1900+ (offsetInBytes / samplerLoadSizeInBytes); // in OWORDs
1901+ dataAddress = irBuilder->CreateAdd (dataAddress, ConstantInt::get (dataAddress->getType (), chunkOffset));
1902+ wiAns->incUpdateDepend (dataAddress, WIAnalysis::RANDOM);
18881903 }
1889- if (baseAddressInOwords ->getType ()->getIntegerBitWidth () >= 32 )
1904+ if (dataAddress ->getType ()->getIntegerBitWidth () >= 32 && !useByteAddress )
18901905 {
1891- baseAddressInOwords = irBuilder->CreateAnd (baseAddressInOwords , ConstantInt::get (baseAddressInOwords ->getType (), 0x0FFFFFFF ));
1892- wiAns->incUpdateDepend (baseAddressInOwords , WIAnalysis::RANDOM);
1906+ dataAddress = irBuilder->CreateAnd (dataAddress , ConstantInt::get (dataAddress ->getType (), 0x0FFFFFFF ));
1907+ wiAns->incUpdateDepend (dataAddress , WIAnalysis::RANDOM);
18931908 }
1894- baseAddressInOwords = irBuilder->CreateZExtOrTrunc (baseAddressInOwords, irBuilder->getInt32Ty ());
1895- wiAns->incUpdateDepend (baseAddressInOwords, WIAnalysis::RANDOM);
1896- ld = CreateSamplerLoad (baseAddressInOwords, addrSpace);
1909+ if (dataAddress->getType () != irBuilder->getInt32Ty ())
1910+ {
1911+ dataAddress = irBuilder->CreateZExtOrTrunc (dataAddress, irBuilder->getInt32Ty ());
1912+ wiAns->incUpdateDepend (dataAddress, WIAnalysis::RANDOM);
1913+ }
1914+
1915+ ld = CreateSamplerLoad (dataAddress, addrSpace);
18971916 cov_chunk->chunkIO = ld;
18981917 chunk_vec.push_back (cov_chunk);
18991918 }
0 commit comments