Skip to content

Commit 744b0f5

Browse files
AnBodrovaigcbot
authored andcommitted
Changes in code.
1 parent b084aea commit 744b0f5

File tree

8 files changed

+3
-285
lines changed

8 files changed

+3
-285
lines changed

IGC/Compiler/CISACodeGen/RematAddressArithmetic.cpp

Lines changed: 0 additions & 193 deletions
Original file line numberDiff line numberDiff line change
@@ -46,201 +46,8 @@ class RematAddressArithmetic : public FunctionPass {
4646
bool rematerialize(Instruction* I, SmallVectorImpl<Value*>& Chain);
4747
};
4848

49-
class CloneAddressArithmetic : public FunctionPass {
50-
51-
public:
52-
static char ID;
53-
54-
CloneAddressArithmetic() : FunctionPass(ID)
55-
{
56-
initializeCloneAddressArithmeticPass(*PassRegistry::getPassRegistry());
57-
}
58-
59-
virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override
60-
{
61-
AU.setPreservesCFG();
62-
}
63-
64-
bool runOnFunction(Function&) override;
65-
66-
private:
67-
bool greedyRemat(Function &F);
68-
};
69-
70-
71-
72-
7349
} // end namespace
7450

75-
76-
FunctionPass* IGC::createCloneAddressArithmeticPass() {
77-
return new CloneAddressArithmetic();
78-
}
79-
80-
char CloneAddressArithmetic::ID = 0;
81-
82-
#define PASS_FLAG_2 "igc-clone-address-arithmetic"
83-
#define PASS_DESC_2 "Clone Address Arithmetic"
84-
#define PASS_CFG_ONLY_2 false
85-
#define PASS_ANALYSIS_2 false
86-
namespace IGC {
87-
IGC_INITIALIZE_PASS_BEGIN(CloneAddressArithmetic, PASS_FLAG_2, PASS_DESC_2, PASS_CFG_ONLY_2, PASS_ANALYSIS_2)
88-
IGC_INITIALIZE_PASS_END(CloneAddressArithmetic, PASS_FLAG_2, PASS_DESC_2, PASS_CFG_ONLY_2, PASS_ANALYSIS_2)
89-
}
90-
91-
92-
static bool isAddressArithmetic(Instruction* I)
93-
{
94-
if (isa<GetElementPtrInst>(I) ||
95-
isa<ExtractElementInst>(I) ||
96-
isa<InsertElementInst>(I) ||
97-
isa<InsertValueInst>(I) ||
98-
(isa<UnaryInstruction>(I) && !isa<LoadInst>(I)) ||
99-
isa<BinaryOperator>(I))
100-
return true;
101-
102-
return false;
103-
}
104-
105-
void rematWholeChain(llvm::IntToPtrInst *I) {
106-
107-
llvm::SmallVector<llvm::Instruction *, 4> RematVector;
108-
std::queue<llvm::Instruction *> BFSQ;
109-
BFSQ.push((Instruction *)I);
110-
111-
const unsigned NumOfUsesLimit = IGC_GET_FLAG_VALUE(RematUsesThreshold);
112-
113-
// we are traversing ssa-chain for address arithmetic
114-
while (!BFSQ.empty()) {
115-
116-
llvm::Instruction *CurrI = BFSQ.front();
117-
BFSQ.pop();
118-
119-
for (unsigned int i = 0; i < CurrI->getNumOperands(); ++i) {
120-
121-
Instruction *Op = llvm::dyn_cast<Instruction>(CurrI->getOperand(i));
122-
if( Op != NULL) {
123-
124-
bool NotPHI = !llvm::isa<llvm::PHINode>(Op);
125-
bool NotConstant = !llvm::isa<llvm::Constant>(Op);
126-
bool SameBB = Op->getParent() == I->getParent();
127-
bool AddressArithmetic = isAddressArithmetic(Op);
128-
129-
// if operand has more uses than specified, we do not rematerialize it.
130-
// helps with situation like this:
131-
//
132-
// (we don't want to add this to every rematerialized chain of instructions)
133-
// someCommonValue = add base, 10000
134-
//
135-
// mul r0, someCommonValue
136-
// load r0
137-
// ...
138-
// mul r2 someCommonValue
139-
// load r2
140-
bool NotTooManyUses = Op->getNumUses() < NumOfUsesLimit;
141-
142-
if (SameBB && NotConstant && NotPHI && NotTooManyUses && AddressArithmetic) {
143-
144-
BFSQ.push(Op);
145-
RematVector.push_back(Op);
146-
}
147-
}
148-
}
149-
}
150-
151-
std::unordered_map<Instruction *, Instruction *> OldToNew;
152-
std::reverse(RematVector.begin(), RematVector.end());
153-
154-
for (auto el : RematVector) {
155-
156-
auto Clone = el->clone();
157-
OldToNew[el] = Clone;
158-
for (unsigned int i = 0; i < Clone->getNumOperands(); ++i) {
159-
160-
auto OldOp = llvm::dyn_cast<Instruction>(Clone->getOperand(i));
161-
162-
if (OldToNew.count(OldOp)) {
163-
Clone->setOperand(i, OldToNew[OldOp]);
164-
}
165-
}
166-
167-
Clone->setName("remat");
168-
Clone->insertBefore(I);
169-
}
170-
171-
auto OldOp = dyn_cast<Instruction>(I->getOperand(0));
172-
if (OldToNew.count(OldOp)) I->setOperand(0, OldToNew[OldOp]);
173-
174-
OldToNew.clear();
175-
RematVector.clear();
176-
}
177-
178-
bool CloneAddressArithmetic::greedyRemat(Function &F) {
179-
180-
bool Result = false;
181-
llvm::SmallVector<llvm::IntToPtrInst *, 4> ToProcess;
182-
183-
// go through block, collect all inttoptr instructions to do
184-
// remat on them
185-
for (BasicBlock &BB : F) {
186-
// if block has less than required amount of LLVM IR instructions, skip it
187-
const unsigned Limit = IGC_GET_FLAG_VALUE(RematBlockSize);
188-
if (BB.getInstList().size() < Limit) continue;
189-
190-
for (auto &I : BB) {
191-
192-
auto *CastedIntToPtrInst = llvm::dyn_cast<IntToPtrInst>(&I);
193-
if (CastedIntToPtrInst) ToProcess.push_back(CastedIntToPtrInst);
194-
}
195-
}
196-
197-
for (auto el : ToProcess) {
198-
199-
Value *V = el;
200-
llvm::SmallVector<llvm::Use*, 4> VectorOfUses;
201-
// collect all uses of particular intoptr inst
202-
bool usedOnlyInLoadOrStore = true;
203-
for (auto &use : V->uses()) {
204-
205-
// check that this inttoptr instruction only used in load or stores
206-
auto LI = llvm::dyn_cast<LoadInst>(use.getUser());
207-
auto SI = llvm::dyn_cast<StoreInst>(use.getUser());
208-
usedOnlyInLoadOrStore &= (LI != NULL) || (SI != NULL);
209-
210-
VectorOfUses.push_back(&use);
211-
}
212-
213-
if(!usedOnlyInLoadOrStore) continue;
214-
215-
for (auto use : VectorOfUses) {
216-
217-
// take use of inttoptr instruction, clone instruction,
218-
// insert clone right before the use, swap use to clone, remat
219-
auto User = use->getUser();
220-
auto UserInst = llvm::dyn_cast<Instruction>(User);
221-
222-
if(UserInst) {
223-
auto Clone = el->clone();
224-
Clone->setName("cloned_" + el->getName());
225-
Clone->insertBefore(UserInst);
226-
*use = Clone;
227-
rematWholeChain((llvm::IntToPtrInst *)Clone);
228-
Result = true;
229-
}
230-
}
231-
}
232-
233-
return Result;
234-
}
235-
236-
bool CloneAddressArithmetic::runOnFunction(Function& F)
237-
{
238-
bool Modified = false;
239-
Modified |= greedyRemat(F);
240-
return Modified;
241-
}
242-
243-
24451
FunctionPass* IGC::createRematAddressArithmeticPass() {
24552
return new RematAddressArithmetic();
24653
}

IGC/Compiler/CISACodeGen/RematAddressArithmetic.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ SPDX-License-Identifier: MIT
1818
namespace IGC {
1919
llvm::FunctionPass* createRematAddressArithmeticPass();
2020
void initializeRematAddressArithmeticPass(llvm::PassRegistry&);
21-
llvm::FunctionPass* createCloneAddressArithmeticPass();
22-
void initializeCloneAddressArithmeticPass(llvm::PassRegistry&);
2321
} // End namespace IGC
2422

2523
#endif // _CISA_REMATADDRESSARITHMETIC_H_

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -855,43 +855,6 @@ void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSignature
855855

856856
// Run address remat after GVN as it may hoist address calculations and
857857
// create PHI nodes with addresses.
858-
859-
if (IGC_IS_FLAG_ENABLED(EnableRemat) || (ctx.m_retryManager.AllowCloneAddressArithmetic() && ctx.type == ShaderType::OPENCL_SHADER)) {
860-
861-
// TODO: This is a workaround that helps to reduce amount of instructions for clone address arithmetic
862-
// it helps with chain of instructions like this
863-
// %remat12 = add i64 %baseArith, 100780848
864-
// %remat13 = add i64 %remat12, %basePtr
865-
// %remat14 = add i64 %remat13, %offsetI
866-
// %remat15 = add i64 %remat14, %offsetJ
867-
// load ...
868-
// ....
869-
// %remat21 = add i64 %baseArith, 201561696
870-
// %remat22 = add i64 %remat21, %basePtr
871-
// %remat23 = add i64 %remat22, %offsetI
872-
// %remat24 = add i64 %remat23, %offsetJ
873-
// load ...
874-
// we can compress this chain of instruction into one "add" for each "load"
875-
// this is achieved by combining reassoc + cse 3 times (each pair hoists one add)
876-
// it should be substituted for general pass when it's implemented
877-
//
878-
// Now it's accessible through flag, for testing purposes
879-
880-
if (IGC_GET_FLAG_VALUE(RematReassocBefore)) {
881-
mpm.add(llvm::createReassociatePass());
882-
mpm.add(llvm::createEarlyCSEPass());
883-
mpm.add(llvm::createReassociatePass());
884-
mpm.add(llvm::createEarlyCSEPass());
885-
mpm.add(llvm::createReassociatePass());
886-
mpm.add(llvm::createEarlyCSEPass());
887-
}
888-
889-
mpm.add(createCloneAddressArithmeticPass());
890-
// cloneAddressArithmetic leaves old instructions unnecessary
891-
// dce pass helps to clean that up
892-
mpm.add(createDeadCodeEliminationPass());
893-
}
894-
895858
mpm.add(createRematAddressArithmeticPass());
896859

897860

IGC/Compiler/CISACodeGen/helper.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2224,8 +2224,6 @@ namespace IGC
22242224
// Disable opt passes using the retry manager states
22252225
if (!pCtx->m_retryManager.AllowPromotePrivateMemory(F))
22262226
OptDisableSet->insert(IGCOpts::LowerGEPForPrivMemPass);
2227-
if (!pCtx->m_retryManager.AllowCloneAddressArithmetic(F))
2228-
OptDisableSet->insert(IGCOpts::CloneAddressArithmeticPass);
22292227
if (!pCtx->m_retryManager.AllowAddressArithmeticSinking(F))
22302228
OptDisableSet->insert(IGCOpts::AddressArithmeticSinkingPass);
22312229
if (!pCtx->m_retryManager.AllowPreRAScheduler(F))

IGC/Compiler/CodeGenContext.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ namespace IGC
2525
bool allowLICM;
2626
bool allowCodeSinking;
2727
bool allowAddressArithmeticSinking;
28-
bool allowCloneAddressArithmetic;
2928
bool allowSimd32Slicing;
3029
bool allowPromotePrivateMemory;
3130
bool allowPreRAScheduler;
@@ -38,9 +37,9 @@ namespace IGC
3837
};
3938

4039
static const RetryState RetryTable[] = {
41-
// licm codSk AdrSk clAdr Slice PrivM PreRA VISAP URBWr Coals GRF loadSk
42-
{ true, true, false, false, false, true, true, true, true, true, false, false, 1 },
43-
{ false, true, true, true, true, false, false, false, false, false, true, true, 500 }
40+
// licm codSk AdrSk Slice PrivM PreRA VISAP URBWr Coals GRF loadSk
41+
{ true, true, false, false, true, true, true, true, true, false, false, 1 },
42+
{ false, true, true, true, false, false, false, false, false, true, true, 500 }
4443
};
4544

4645
static constexpr size_t RetryTableSize = sizeof(RetryTable) / sizeof(RetryState);
@@ -88,13 +87,6 @@ namespace IGC
8887
return RetryTable[id].allowLICM || !PerFuncRetrySet.empty();
8988
}
9089

91-
bool RetryManager::AllowCloneAddressArithmetic(Function* F) const
92-
{
93-
unsigned id = GetPerFuncRetryStateId(F);
94-
IGC_ASSERT(id < RetryTableSize);
95-
return RetryTable[id].allowCloneAddressArithmetic;
96-
}
97-
9890
bool RetryManager::AllowAddressArithmeticSinking(Function* F) const
9991
{
10092
unsigned id = GetPerFuncRetryStateId(F);

IGC/Compiler/CodeGenPublic.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ namespace IGCOpts
7474
// Pass level optimizations
7575
static const std::string LowerGEPForPrivMemPass = "IGC-LowerGEPForPrivMem";
7676
static const std::string AddressArithmeticSinkingPass = "IGC-AddressArithmeticSinking";
77-
static const std::string CloneAddressArithmeticPass = "IGC-CloneAddressArithmetic";
7877
static const std::string PreRASchedulerPass = "IGC-PreRAScheduler";
7978
static const std::string MergeURBWritePass = "IGC-MergeURBWrites";
8079
static const std::string ConstantCoalescingPass = "IGC-ConstantCoalescing";
@@ -732,7 +731,6 @@ namespace IGC
732731
bool AllowVISAPreRAScheduler(llvm::Function* F = nullptr) const;
733732
bool AllowCodeSinking(llvm::Function* F = nullptr) const;
734733
bool AllowAddressArithmeticSinking(llvm::Function* F = nullptr) const;
735-
bool AllowCloneAddressArithmetic(llvm::Function* F = nullptr) const;
736734
bool AllowSimd32Slicing(llvm::Function* F = nullptr) const;
737735
bool AllowLargeURBWrite(llvm::Function* F = nullptr) const;
738736
bool AllowConstantCoalescing(llvm::Function* F = nullptr) const;

IGC/Compiler/tests/RematAddressArithmetic/cloneAddressArithmetic.ll

Lines changed: 0 additions & 34 deletions
This file was deleted.

IGC/common/igc_flags.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -672,10 +672,6 @@ DECLARE_IGC_REGKEY(bool, ForceSubroutineForEmulation, false, "Force subroutin
672672
DECLARE_IGC_REGKEY(DWORD, InlinedEmulationThreshold, 125000, "Inlined instruction threshold for enabling subroutines", false)
673673
DECLARE_IGC_REGKEY(int, ByPassAllocaSizeHeuristic, 0, "Force some Alloca to pass the pressure heuristic until the given size", false)
674674
DECLARE_IGC_REGKEY(DWORD, MemOptWindowSize, 150, "Size of the window in unit of instructions in which load/stores are allowed to be coalesced. Keep it limited in order to avoid creating long liveranges. Default value is 150", false)
675-
DECLARE_IGC_REGKEY(DWORD, RematBlockSize, 10, "Represents a threshold for a basic block size which determines whether this block will be processed for rematerialization or not", false)
676-
DECLARE_IGC_REGKEY(DWORD, RematUsesThreshold, 5, "Amount of uses after which operand is not rematerialized", false)
677-
DECLARE_IGC_REGKEY(bool, EnableRemat, false, "Enable clone adress arithmetic pass not only on retry", false)
678-
DECLARE_IGC_REGKEY(bool, RematReassocBefore, false, "Enable short sequence of passes before clone address arithmetic pass to potentially decrese amount of operations that will be rematerialized", false)
679675
DECLARE_IGC_REGKEY(bool, ForceNoFP64bRegioning, false, "force regioning rules for FP and 64b FPU instructions", false)
680676
DECLARE_IGC_REGKEY(bool, EmitDebugLoc, true, "Enable generation of .debug_loc section", false)
681677
DECLARE_IGC_REGKEY(bool, EmitOffsetInDbgLoc, false, "Emit offset of private memory in DW_AT_location when available", false)

0 commit comments

Comments
 (0)