@@ -423,13 +423,14 @@ void ConstantCoalescing::ProcessBlock(
423423
424424 uint offsetInBytes = 0 ;
425425 Value* baseOffsetInBytes = nullptr ;
426+ ExtensionKind Extension = EK_NotExtended;
426427 if (ConstantInt * offsetConstVal = dyn_cast<ConstantInt>(ldRaw->getOffsetValue ()))
427428 {
428429 offsetInBytes = int_cast<uint>(offsetConstVal->getZExtValue ());
429430 }
430431 else
431432 {
432- baseOffsetInBytes = SimpleBaseOffset (ldRaw->getOffsetValue (), offsetInBytes);
433+ baseOffsetInBytes = SimpleBaseOffset (ldRaw->getOffsetValue (), offsetInBytes, Extension );
433434 }
434435 if ((int32_t )offsetInBytes >= 0 )
435436 {
@@ -448,6 +449,7 @@ void ConstantCoalescing::ProcessBlock(
448449 baseOffsetInBytes,
449450 offsetInBytes,
450451 maxEltPlus,
452+ Extension,
451453 baseOffsetInBytes ? indcb_owloads : dircb_owloads);
452454 }
453455 else if (bufType == BINDLESS_CONSTANT_BUFFER
@@ -473,6 +475,7 @@ void ConstantCoalescing::ProcessBlock(
473475 baseOffsetInBytes,
474476 offsetInBytes,
475477 maxEltPlus,
478+ Extension,
476479 indcb_gathers);
477480 }
478481 }
@@ -485,6 +488,7 @@ void ConstantCoalescing::ProcessBlock(
485488 baseOffsetInBytes,
486489 offsetInBytes,
487490 maxEltPlus,
491+ Extension,
488492 indcb_gathers);
489493 }
490494 }
@@ -518,7 +522,8 @@ void ConstantCoalescing::ProcessBlock(
518522 Value* buf_idxv = nullptr ;
519523 Value* elt_idxv = nullptr ;
520524 uint offsetInBytes = 0 ;
521- if (DecomposePtrExp (LI->getPointerOperand (), buf_idxv, elt_idxv, offsetInBytes))
525+ ExtensionKind Extension = EK_NotExtended;
526+ if (DecomposePtrExp (LI->getPointerOperand (), buf_idxv, elt_idxv, offsetInBytes, Extension))
522527 {
523528 // TODO: Disabling constant coalescing when we see that the offset to the constant buffer is negtive
524529 // As we handle all negative offsets as uint and some arithmetic operations do not work well. Needs more detailed fix
@@ -527,13 +532,13 @@ void ConstantCoalescing::ProcessBlock(
527532 if (wiAns->isUniform (LI))
528533 { // uniform
529534 if (elt_idxv)
530- MergeUniformLoad (LI, buf_idxv, 0 , elt_idxv, offsetInBytes, maxEltPlus, indcb_owloads);
535+ MergeUniformLoad (LI, buf_idxv, 0 , elt_idxv, offsetInBytes, maxEltPlus, Extension, indcb_owloads);
531536 else
532- MergeUniformLoad (LI, buf_idxv, 0 , nullptr , offsetInBytes, maxEltPlus, dircb_owloads);
537+ MergeUniformLoad (LI, buf_idxv, 0 , nullptr , offsetInBytes, maxEltPlus, Extension, dircb_owloads);
533538 }
534539 else
535540 { // not uniform
536- MergeScatterLoad (LI, buf_idxv, 0 , elt_idxv, offsetInBytes, maxEltPlus, indcb_gathers);
541+ MergeScatterLoad (LI, buf_idxv, 0 , elt_idxv, offsetInBytes, maxEltPlus, Extension, indcb_gathers);
537542 }
538543 }
539544 }
@@ -552,7 +557,7 @@ void ConstantCoalescing::ProcessBlock(
552557 continue ;
553558 if (isa<ConstantPointerNull>(elt_ptrv))
554559 {
555- MergeUniformLoad (LI, nullptr , addrSpace, nullptr , 0 , maxEltPlus, dircb_owloads);
560+ MergeUniformLoad (LI, nullptr , addrSpace, nullptr , 0 , maxEltPlus, EK_NotExtended, dircb_owloads);
556561 }
557562 else if (isa<IntToPtrInst>(elt_ptrv))
558563 {
@@ -565,20 +570,21 @@ void ConstantCoalescing::ProcessBlock(
565570 // As we handle all negative offsets as uint and some arithmetic operations do not work well. Needs more detailed fix
566571 if ((int32_t )offsetInBytes >= 0 )
567572 {
568- MergeUniformLoad (LI, nullptr , addrSpace, nullptr , offsetInBytes, maxEltPlus, dircb_owloads);
573+ MergeUniformLoad (LI, nullptr , addrSpace, nullptr , offsetInBytes, maxEltPlus, EK_NotExtended, dircb_owloads);
569574 }
570575 }
571576 else
572577 { // indirect access
573578 uint offsetInBytes = 0 ;
574- elt_idxv = SimpleBaseOffset (elt_idxv, offsetInBytes);
579+ ExtensionKind Extension = EK_NotExtended;
580+ elt_idxv = SimpleBaseOffset (elt_idxv, offsetInBytes, Extension);
575581 // TODO: Disabling constant coalescing when we see that the offset to the constant buffer is negtive
576582 // As we handle all negative offsets as uint and some arithmetic operations do not work well. Needs more detailed fix
577583 if ((int32_t )offsetInBytes >= 0 )
578584 {
579585 if (wiAns->isUniform (LI))
580586 { // uniform
581- MergeUniformLoad (LI, nullptr , addrSpace, elt_idxv, offsetInBytes, maxEltPlus, indcb_owloads);
587+ MergeUniformLoad (LI, nullptr , addrSpace, elt_idxv, offsetInBytes, maxEltPlus, Extension, indcb_owloads);
582588 }
583589 else if (bufType == CONSTANT_BUFFER)
584590 { // not uniform
@@ -595,6 +601,7 @@ void ConstantCoalescing::ProcessBlock(
595601 elt_idxv,
596602 offsetInBytes,
597603 maxEltPlus,
604+ Extension,
598605 indcb_gathers);
599606 }
600607 }
@@ -788,6 +795,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
788795 Value* bufIdxV, uint addrSpace,
789796 Value* eltIdxV, uint offsetInBytes,
790797 uint maxEltPlus,
798+ const ExtensionKind& Extension,
791799 std::vector<BufChunk*>& chunk_vec)
792800{
793801 const uint scalarSizeInBytes = load->getType ()->getScalarSizeInBits () / 8 ;
@@ -852,7 +860,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
852860 cov_chunk->chunkStart = eltid;
853861 cov_chunk->chunkSize = maxEltPlus;
854862 const uint chunkAlignment = std::max<uint>(alignment, 4 );
855- cov_chunk->chunkIO = CreateChunkLoad (load, cov_chunk, eltid, chunkAlignment);
863+ cov_chunk->chunkIO = CreateChunkLoad (load, cov_chunk, eltid, chunkAlignment, Extension );
856864
857865 // Update load alignment if needed, set it to DWORD aligned
858866 if (alignment < 4 )
@@ -867,7 +875,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
867875 {
868876 // combine the initial scalar loads with this incoming load (which can be a vector-load),
869877 // then add extracts
870- CombineTwoLoads (cov_chunk, load, eltid, maxEltPlus);
878+ CombineTwoLoads (cov_chunk, load, eltid, maxEltPlus, Extension );
871879 }
872880 else if (load->getType ()->isVectorTy ())
873881 {
@@ -885,7 +893,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
885893 }
886894 else
887895 {
888- AdjustChunk (cov_chunk, start_adj, size_adj);
896+ AdjustChunk (cov_chunk, start_adj, size_adj, Extension );
889897 }
890898 MoveExtracts (cov_chunk, load, (eltid - cov_chunk->chunkStart ));
891899 }
@@ -910,7 +918,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
910918 }
911919 else if (start_adj > 0 )
912920 {
913- splitter = AdjustChunkAddExtract (cov_chunk, start_adj, size_adj, eltid);
921+ splitter = AdjustChunkAddExtract (cov_chunk, start_adj, size_adj, eltid, Extension );
914922 }
915923 else if (size_adj > 0 )
916924 {
@@ -922,7 +930,8 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
922930
923931}
924932
925- Value* ConstantCoalescing::FormChunkAddress (BufChunk* chunk)
933+ Value* ConstantCoalescing::FormChunkAddress (
934+ BufChunk* chunk, const ExtensionKind &Extension)
926935{
927936 IGC_ASSERT (nullptr != chunk);
928937 IGC_ASSERT_MESSAGE ((chunk->bufIdxV || chunk->baseIdxV ), " at least one!" );
@@ -959,7 +968,10 @@ Value* ConstantCoalescing::FormChunkAddress(BufChunk* chunk)
959968 if (eac->getType ()->getPrimitiveSizeInBits () <
960969 bufsrc->getType ()->getPrimitiveSizeInBits ())
961970 {
962- eac = irBuilder->CreateZExt (eac, bufsrc->getType ());
971+ if (Extension == EK_SignExt)
972+ eac = irBuilder->CreateSExt (eac, bufsrc->getType ());
973+ else
974+ eac = irBuilder->CreateZExt (eac, bufsrc->getType ());
963975 wiAns->incUpdateDepend (eac, uniformness);
964976 }
965977 IGC_ASSERT (eac->getType () == bufsrc->getType ());
@@ -979,7 +991,8 @@ Value* ConstantCoalescing::FormChunkAddress(BufChunk* chunk)
979991 return eac;
980992}
981993
982- void ConstantCoalescing::CombineTwoLoads (BufChunk* cov_chunk, Instruction* load, uint eltid, uint numelt)
994+ void ConstantCoalescing::CombineTwoLoads (
995+ BufChunk* cov_chunk, Instruction* load, uint eltid, uint numelt, const ExtensionKind &Extension)
983996{
984997 uint eltid0 = cov_chunk->chunkStart ;
985998 uint lb = std::min (eltid0, eltid);
@@ -1013,7 +1026,7 @@ void ConstantCoalescing::CombineTwoLoads(BufChunk* cov_chunk, Instruction* load,
10131026 // modify the address calculation if the chunk-start is changed
10141027 if (eltid0 != cov_chunk->chunkStart )
10151028 {
1016- eac = FormChunkAddress (cov_chunk);
1029+ eac = FormChunkAddress (cov_chunk, Extension );
10171030 }
10181031 // new IntToPtr and new load
10191032 // cannot use irbuilder to create IntToPtr. It may create ConstantExpr instead of instruction
@@ -1160,6 +1173,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
11601173 Value* bufIdxV, uint addrSpace,
11611174 Value* eltIdxV, uint offsetInBytes,
11621175 uint maxEltPlus,
1176+ const ExtensionKind &Extension,
11631177 std::vector<BufChunk*>& chunk_vec)
11641178{
11651179 const uint alignment = GetAlignment (load);
@@ -1234,7 +1248,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
12341248 cov_chunk->chunkStart = eltid;
12351249 cov_chunk->chunkSize = iSTD::RoundPower2 ((DWORD)maxEltPlus);
12361250 const uint chunkAlignment = std::max<uint>(alignment, 4 );
1237- cov_chunk->chunkIO = CreateChunkLoad (load, cov_chunk, eltid, chunkAlignment);
1251+ cov_chunk->chunkIO = CreateChunkLoad (load, cov_chunk, eltid, chunkAlignment, Extension );
12381252 chunk_vec.push_back (cov_chunk);
12391253 }
12401254 }
@@ -1282,7 +1296,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
12821296 }
12831297 else
12841298 {
1285- AdjustChunk (cov_chunk, start_adj, size_adj);
1299+ AdjustChunk (cov_chunk, start_adj, size_adj, Extension );
12861300 }
12871301 MoveExtracts (cov_chunk, load, eltid - cov_chunk->chunkStart );
12881302 }
@@ -1309,7 +1323,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
13091323 }
13101324 else if (start_adj > 0 )
13111325 {
1312- splitter = AdjustChunkAddExtract (cov_chunk, start_adj, size_adj, eltid);
1326+ splitter = AdjustChunkAddExtract (cov_chunk, start_adj, size_adj, eltid, Extension );
13131327 }
13141328 else if (size_adj > 0 )
13151329 {
@@ -1398,15 +1412,30 @@ uint ConstantCoalescing::GetOffsetAlignment(Value* val) const
13981412 return 1 ;
13991413}
14001414
1401- Value* ConstantCoalescing::SimpleBaseOffset (Value* elt_idxv, uint& offset)
1415+ Value* ConstantCoalescing::SimpleBaseOffset (
1416+ Value* elt_idxv, uint& offset, ExtensionKind &Extension)
14021417{
14031418 // in case expression comes from a smaller type arithmetic
14041419 if (ZExtInst * reducedOffset = dyn_cast<ZExtInst>(elt_idxv))
14051420 {
1421+ if (Extension == EK_SignExt)
1422+ {
1423+ offset = 0 ;
1424+ return elt_idxv;
1425+ }
1426+
1427+ Extension = EK_ZeroExt;
14061428 elt_idxv = reducedOffset->getOperand (0 );
14071429 }
14081430 if (SExtInst * reducedOffset = dyn_cast<SExtInst>(elt_idxv))
14091431 {
1432+ if (Extension == EK_ZeroExt)
1433+ {
1434+ offset = 0 ;
1435+ return elt_idxv;
1436+ }
1437+
1438+ Extension = EK_SignExt;
14101439 elt_idxv = reducedOffset->getOperand (0 );
14111440 }
14121441
@@ -1432,7 +1461,7 @@ Value* ConstantCoalescing::SimpleBaseOffset(Value* elt_idxv, uint& offset)
14321461 // %535 = or i32 %519, 12
14331462 // %537 = add i32 %535, 16
14341463 uint offset1 = 0 ;
1435- Value* base = SimpleBaseOffset (src0, offset1);
1464+ Value* base = SimpleBaseOffset (src0, offset1, Extension );
14361465 offset = offset1 + static_cast <uint>(csrc1->getZExtValue ());
14371466 return base;
14381467 }
@@ -1528,7 +1557,8 @@ static Value *getPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
15281557}
15291558
15301559bool ConstantCoalescing::DecomposePtrExp (
1531- Value* ptr_val, Value*& buf_idxv, Value*& elt_idxv, uint& offset)
1560+ Value* ptr_val, Value*& buf_idxv, Value*& elt_idxv, uint& offset,
1561+ ExtensionKind &Extension)
15321562{
15331563 buf_idxv = ptr_val;
15341564 elt_idxv = nullptr ;
@@ -1569,7 +1599,7 @@ bool ConstantCoalescing::DecomposePtrExp(
15691599 }
15701600 else
15711601 {
1572- elt_idxv = SimpleBaseOffset (src1, offset);
1602+ elt_idxv = SimpleBaseOffset (src1, offset, Extension );
15731603 }
15741604 return true ;
15751605 }
@@ -1583,7 +1613,7 @@ bool ConstantCoalescing::DecomposePtrExp(
15831613 }
15841614 else
15851615 {
1586- elt_idxv = SimpleBaseOffset (src0, offset);
1616+ elt_idxv = SimpleBaseOffset (src0, offset, Extension );
15871617 }
15881618 return true ;
15891619 }
@@ -1647,7 +1677,8 @@ uint ConstantCoalescing::CheckVectorElementUses(const Instruction* load)
16471677 return maxEltPlus;
16481678}
16491679
1650- Instruction* ConstantCoalescing::CreateChunkLoad (Instruction* seedi, BufChunk* chunk, uint eltid, uint alignment)
1680+ Instruction* ConstantCoalescing::CreateChunkLoad (
1681+ Instruction* seedi, BufChunk* chunk, uint eltid, uint alignment, const ExtensionKind &Extension)
16511682{
16521683 irBuilder->SetInsertPoint (seedi);
16531684 if (LoadInst * load = dyn_cast<LoadInst>(seedi))
@@ -1666,7 +1697,7 @@ Instruction* ConstantCoalescing::CreateChunkLoad(Instruction* seedi, BufChunk* c
16661697 if (eltid == chunk->chunkStart && isa<IntToPtrInst>(eac))
16671698 eac = dyn_cast<IntToPtrInst>(eac)->getOperand (0 );
16681699 else
1669- eac = FormChunkAddress (chunk);
1700+ eac = FormChunkAddress (chunk, Extension );
16701701 }
16711702 else
16721703 {
@@ -1794,7 +1825,8 @@ Instruction* ConstantCoalescing::FindOrAddChunkExtract(BufChunk* cov_chunk, uint
17941825 return splitter;
17951826}
17961827
1797- void ConstantCoalescing::AdjustChunk (BufChunk* cov_chunk, uint start_adj, uint size_adj)
1828+ void ConstantCoalescing::AdjustChunk (
1829+ BufChunk* cov_chunk, uint start_adj, uint size_adj, const ExtensionKind &Extension)
17981830{
17991831 cov_chunk->chunkSize += size_adj;
18001832 cov_chunk->chunkStart -= start_adj;
@@ -1836,7 +1868,7 @@ void ConstantCoalescing::AdjustChunk(BufChunk* cov_chunk, uint start_adj, uint s
18361868 Instruction* expr2 = dyn_cast<Instruction>(expr->getOperand (srcIdx));
18371869 if (expr2 && expr2->hasOneUse ())
18381870 {
1839- if (isa<ZExtInst>(expr2) && isa<BinaryOperator>(expr2->getOperand (0 )))
1871+ if (( isa<ZExtInst>(expr2) || isa<SExtInst>(expr2) ) && isa<BinaryOperator>(expr2->getOperand (0 )))
18401872 expr2 = cast<Instruction>(expr2->getOperand (0 ));
18411873 IGC_ASSERT (isa<BinaryOperator>(expr2));
18421874
@@ -1861,7 +1893,7 @@ void ConstantCoalescing::AdjustChunk(BufChunk* cov_chunk, uint start_adj, uint s
18611893 if (!foundOffset)
18621894 {
18631895 // if we cannot modify the offset, create a new chain of address calculation
1864- eac = FormChunkAddress (cov_chunk);
1896+ eac = FormChunkAddress (cov_chunk, Extension );
18651897 cast<Instruction>(addr_ptr)->setOperand (0 , eac);
18661898 }
18671899 }
@@ -1973,9 +2005,10 @@ void ConstantCoalescing::AdjustChunk(BufChunk* cov_chunk, uint start_adj, uint s
19732005 }
19742006}
19752007
1976- Instruction* ConstantCoalescing::AdjustChunkAddExtract (BufChunk* cov_chunk, uint start_adj, uint size_adj, uint eltid)
2008+ Instruction* ConstantCoalescing::AdjustChunkAddExtract (
2009+ BufChunk* cov_chunk, uint start_adj, uint size_adj, uint eltid, const ExtensionKind &Extension)
19772010{
1978- AdjustChunk (cov_chunk, start_adj, size_adj);
2011+ AdjustChunk (cov_chunk, start_adj, size_adj, Extension );
19792012 return AddChunkExtract (cov_chunk->chunkIO , eltid - cov_chunk->chunkStart );
19802013}
19812014
0 commit comments