@@ -141,8 +141,9 @@ static Value *ZExtOrTruncIfNeeded(Value *From, Type *To,
141141 Value *Res = From;
142142 if (From->getType ()->isVectorTy () &&
143143 From->getType ()->getVectorNumElements () == 1 ) {
144- Res = CastInst::CreateBitOrPointerCast (
144+ auto *TmpRes = CastInst::CreateBitOrPointerCast (
145145 Res, From->getType ()->getVectorElementType (), " " , InsertBefore);
146+ Res = TmpRes;
146147 }
147148 if (FromTySz < ToTySz)
148149 Res = CastInst::CreateZExtOrBitCast (Res, To, " " , InsertBefore);
@@ -204,18 +205,18 @@ GenXThreadPrivateMemory::RestoreVectorAfterNormalization(Instruction *From,
204205 Restored = CastInst::Create (Instruction::Trunc, From, To, " " );
205206 } else if (EltSz == genx::QWordBits &&
206207 !(m_useGlobalMem && To->getScalarType ()->isIntegerTy (64 ))) {
207- auto *NewFrom = From;
208208 if (!From->getType ()->getScalarType ()->isPointerTy () &&
209209 To->getScalarType ()->isPointerTy ()) {
210210 assert (From->getType ()->getScalarType ()->isIntegerTy (genx::DWordBits));
211211 Type *NewTy =
212212 VectorType::get (Type::getInt64Ty (*m_ctx),
213213 From->getType ()->getVectorNumElements () / 2 );
214- NewFrom = CastInst::CreateBitOrPointerCast (From, NewTy);
214+ auto * NewFrom = CastInst::CreateBitOrPointerCast (From, NewTy);
215215 NewFrom->insertAfter (From);
216- Restored = CastInst::Create (CastInst::IntToPtr, NewFrom, To);
216+ From = NewFrom;
217+ Restored = CastInst::Create (CastInst::IntToPtr, From, To);
217218 } else
218- Restored = CastInst::CreateBitOrPointerCast (NewFrom , To);
219+ Restored = CastInst::CreateBitOrPointerCast (From , To);
219220 }
220221 if (Restored != From)
221222 Restored->insertAfter (From);
@@ -303,11 +304,12 @@ Value *GenXThreadPrivateMemory::lookForPtrReplacement(Value *Ptr) const {
303304 } else if (isa<ExtractElementInst>(Ptr) &&
304305 lookForPtrReplacement (
305306 cast<ExtractElementInst>(Ptr)->getVectorOperand ())) {
306- if (Ptr->getType ()->isPointerTy ())
307- return CastInst::Create (Instruction::PtrToInt, Ptr,
308- Type::getInt32Ty (*m_ctx), " " ,
309- cast<Instruction>(Ptr));
310- else
307+ if (Ptr->getType ()->isPointerTy ()) {
308+ auto *PTI = CastInst::Create (Instruction::PtrToInt, Ptr,
309+ Type::getInt32Ty (*m_ctx));
310+ PTI->insertAfter (cast<Instruction>(Ptr));
311+ return PTI;
312+ } else
311313 return Ptr;
312314 } else if (auto *CI = dyn_cast<IGCLLVM::CallInst>(Ptr)) {
313315 if (!CI->isIndirectCall () &&
@@ -707,8 +709,9 @@ bool GenXThreadPrivateMemory::replacePhi(PHINode *Phi) {
707709 V->getType ()->isVectorTy () != NonVecTy->isVectorTy ()) {
708710 if (V->getType ()->isVectorTy ()) {
709711 assert (V->getType ()->getVectorNumElements () == 1 );
710- V = CastInst::Create (CastInst::BitCast, V, NonVecTy->getScalarType (),
711- " " , cast<Instruction>(V));
712+ auto *VCast = CastInst::Create (CastInst::BitCast, V, NonVecTy->getScalarType ());
713+ VCast->insertAfter (cast<Instruction>(V));
714+ V = VCast;
712715 }
713716 } else {
714717 assert (0 && " New phi types mismatch" );
@@ -980,7 +983,6 @@ void GenXThreadPrivateMemory::addUsers(Value *V) {
980983
981984void GenXThreadPrivateMemory::collectEachPossibleTPMUsers () {
982985 assert (m_AIUsers.empty ());
983- m_AlreadyAdded.clear ();
984986 // At first collect every alloca user
985987 for (auto B = m_allocaToIntrinsic.begin (), E = m_allocaToIntrinsic.end ();
986988 B != E; ++B) {
@@ -990,15 +992,9 @@ void GenXThreadPrivateMemory::collectEachPossibleTPMUsers() {
990992 }
991993 // Then collect all pointer args - they may be used
992994 // in loads/stores we need to lower to svm intrinsics
993- // Process args if only we are sure
994- // it's necessary
995- if (m_useGlobalMem) {
996- for (auto &Arg : m_args) {
997- // SVM-pointer func arg users should be handled too
998- if (checkSVMNecessary (Arg))
999- addUsers (Arg);
1000- }
1001- }
995+ // m_args already contatins only args that require processing
996+ for (auto &Arg : m_args)
997+ addUsers (Arg);
1002998}
1003999
10041000void GenXThreadPrivateMemory::addUsersIfNeeded (Value *V) {
@@ -1030,10 +1026,9 @@ bool GenXThreadPrivateMemory::runOnModule(Module &M) {
10301026 m_ST = STP->getSubtarget ();
10311027 for (auto &F : M)
10321028 visit (F);
1033- if (std::find_if (m_alloca.begin (), m_alloca.end (), checkSVMNecessaryPred) !=
1034- m_alloca.end () ||
1035- std::find_if (m_args.begin (), m_args.end (), checkSVMNecessaryPred) !=
1036- m_args.end ()) {
1029+ if (!m_useGlobalMem &&
1030+ std::find_if (m_alloca.begin (), m_alloca.end (), checkSVMNecessaryPred) !=
1031+ m_alloca.end ()) {
10371032 LLVM_DEBUG (dbgs () << " Switching TPM to SVM\n " );
10381033 // TODO: move the name string to vc-intrinsics *MD::useGlobalMem
10391034 M.addModuleFlag (Module::ModFlagBehavior::Error, " genx.useGlobalMem" , 1 );
@@ -1095,6 +1090,7 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
10951090 }
10961091
10971092 // Main loop where instructions are replaced one by one.
1093+ m_AlreadyAdded.clear ();
10981094 collectEachPossibleTPMUsers ();
10991095 while (!m_AIUsers.empty ()) {
11001096 Instruction *I = m_AIUsers.front ();
@@ -1145,11 +1141,10 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
11451141 }
11461142
11471143 for (auto AllocaPair : m_allocaToIntrinsic) {
1148- if (!AllocaPair.first ->use_empty ()) {
1149- for (const auto &U : AllocaPair.first ->users ()) {
1150- assert (U->getNumUses () == 0 );
1151- cast<Instruction>(U)->eraseFromParent ();
1152- }
1144+ while (!AllocaPair.first ->user_empty ()) {
1145+ const auto &U = AllocaPair.first ->user_back ();
1146+ assert (U->getNumUses () == 0 );
1147+ cast<Instruction>(U)->eraseFromParent ();
11531148 }
11541149 assert (AllocaPair.first ->use_empty () &&
11551150 " uses of replaced alloca aren't empty" );
@@ -1188,6 +1183,13 @@ void GenXThreadPrivateMemory::visitAllocaInst(AllocaInst &I) {
11881183
11891184void GenXThreadPrivateMemory::visitFunction (Function &F) {
11901185 for (auto &Arg : F.args ())
1191- if (Arg.getType ()->isPointerTy ())
1186+ if (Arg.getType ()->isPointerTy () && checkSVMNecessaryPred (&Arg)) {
1187+ LLVM_DEBUG (dbgs () << " Switching TPM to SVM: svm arg\n " );
1188+ // TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1189+ if (!m_useGlobalMem)
1190+ F.getParent ()->addModuleFlag (Module::ModFlagBehavior::Error,
1191+ " genx.useGlobalMem" , 1 );
1192+ m_useGlobalMem = true ;
11921193 m_args.push_back (&Arg);
1194+ }
11931195}
0 commit comments