Skip to content

Commit 80bc750

Browse files
hliao2igcbot
authored andcommitted
Turn on '-swapAccSub' by default.
Turn on '-swapAccSub' by default.
1 parent 63f1cc8 commit 80bc750

File tree

3 files changed

+190
-9
lines changed

3 files changed

+190
-9
lines changed

visa/Passes/AccSubstitution.cpp

Lines changed: 188 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ struct AccInterval
1919
bool mustBeAcc0 = false;
2020
bool isAllFloat = false;
2121
bool isPreAssigned = false;
22+
bool isRemoved = false;
2223
int assignedAcc = -1;
2324
int spilledAcc = -1;
2425
int bundleConflictTimes = 0;
@@ -101,6 +102,17 @@ static void setSuppression(int i, unsigned short& BC)
101102
BC |= bc;
102103
}
103104

105+
static bool isCommutativeOnSrc12(G4_INST *inst) {
106+
switch (inst->opcode()) {
107+
case G4_mad:
108+
case G4_add3:
109+
return true;
110+
default:
111+
break;
112+
}
113+
return false;
114+
}
115+
104116
/*
105117
* Bank conflict types:
106118
* 1. any two from same bundle and same bank
@@ -422,8 +434,11 @@ static unsigned getBankConflicts(int srcOpndIdx, unsigned int BC)
422434

423435
// returns true if the inst is a candidate for acc substitution
424436
// lastUse is also update to point to the last use id of the inst
425-
bool AccSubPass::isAccCandidate(G4_INST* inst, int& lastUse, bool& mustBeAcc0, bool& isAllFloat, int& readSuppressionSrcs, int& bundleBC, int& bankBC, std::map<G4_INST*, unsigned int>* BCInfo)
426-
{
437+
bool AccSubPass::isAccCandidate(G4_INST *inst, int &lastUse, bool &mustBeAcc0,
438+
bool &isAllFloat, int &readSuppressionSrcs,
439+
int &bundleBC, int &bankBC,
440+
std::map<G4_INST *, unsigned int> *BCInfo,
441+
std::vector<USE_DEF_NODE> *SwappableUses) {
427442
mustBeAcc0 = false;
428443
isAllFloat = true;
429444
G4_DstRegRegion* dst = inst->getDst();
@@ -479,15 +494,38 @@ bool AccSubPass::isAccCandidate(G4_INST* inst, int& lastUse, bool& mustBeAcc0, b
479494
case Opnd_src2:
480495
if (!kernel.fg.builder->relaxedACCRestrictions3())
481496
{
482-
return false;
497+
// If swapAccSub is disabled, skip further checking on src2.
498+
if (!SwappableUses)
499+
return false;
500+
if (!isCommutativeOnSrc12(useInst))
501+
return false;
502+
// As src2 cannot use acc, acc substitution is only
503+
// feasible if src1 and src2 are different.
504+
auto *def1 = useInst->getSingleDef(Opnd_src1);
505+
// If the single-def on src1 is the same as this use-inst,
506+
// the acc substitution following a swap is infeasible.
507+
if (def1 && def1 == inst)
508+
return false;
509+
// FIXME: If there's any further hardware restrictions on
510+
// src2, please check here.
483511
}
512+
// Q: What's the purpose of this check?
484513
if (!IS_TYPE_FLOAT_FOR_ACC(useInst->getSrc(2)->getType()) ||
485514
(useInst->getDst() && !IS_TYPE_FLOAT_FOR_ACC(useInst->getDst()->getType())))
486515
{
487516
return false;
488517
}
489518
break;
490519
case Opnd_src1:
520+
if (!kernel.fg.builder->relaxedACCRestrictions3()) {
521+
// As src2 cannot use acc, acc substitution is only
522+
// feasible if src1 and src2 are different.
523+
auto *def2 = useInst->getSingleDef(Opnd_src2);
524+
// If the single-def on src2 is the same as this use-inst,
525+
// the acc substitution is infeasible.
526+
if (def2 && def2 == inst)
527+
return false;
528+
}
491529
if (BC)
492530
{
493531
bundleBC += getBundleConflicts(1, BC);
@@ -560,6 +598,7 @@ bool AccSubPass::isAccCandidate(G4_INST* inst, int& lastUse, bool& mustBeAcc0, b
560598
// def must be the only define for this use
561599
return false;
562600
}
601+
MUST_BE_TRUE(useInst->getSingleDef(opndNum) == inst, "this user's single def should be this inst.");
563602

564603
int srcId = useInst->getSrcNum(opndNum);
565604
G4_Operand* src = useInst->getSrc(srcId);
@@ -571,12 +610,30 @@ bool AccSubPass::isAccCandidate(G4_INST* inst, int& lastUse, bool& mustBeAcc0, b
571610
}
572611
if (!useInst->canSrcBeAcc(opndNum))
573612
{
574-
return false;
613+
// Need further check when swapAccSub is enabled and the operand
614+
// number is src2.
615+
if (!SwappableUses || opndNum != Opnd_src2)
616+
return false;
617+
// When src2 is substitutable and swapAccSub is enabled, need to
618+
// check whether src1 could use acc.
619+
if (!useInst->canSrcBeAcc(Opnd_src1))
620+
return false;
575621
}
576622
if (!IS_TYPE_FLOAT_FOR_ACC(src->getType()))
577623
{
578624
isAllFloat = false;
579625
}
626+
// Record this swappable use if the swapping on it could help acc
627+
// substitution. Both src1 and src2 need recording as, from them, we
628+
// need to build the conflict graph and determine which ones should be
629+
// removed from acc candidates if two acc candidates sit in the same
630+
// ternary instruction, says 'mad'.
631+
if (SwappableUses) {
632+
if (isCommutativeOnSrc12(useInst) && useInst->getNumSrc() == 3 &&
633+
(opndNum == Opnd_src1 || opndNum == Opnd_src2)) {
634+
SwappableUses->push_back(use);
635+
}
636+
}
580637
}
581638

582639
// we have to avoid the case where the dst is used as both src0 and src1 of a mad
@@ -965,6 +1022,13 @@ void AccSubPass::multiAccSub(G4_BB* bb)
9651022
}
9661023
}
9671024

1025+
bool EnableSwapAccSub =
1026+
kernel.getOptions()->getOption(vISA_EnableSwapAccSub) &&
1027+
!kernel.fg.builder->relaxedACCRestrictions3();
1028+
// Each candidate is an acc interval and its list of associated swappable
1029+
// uses, where a swappable use is such a use, which is one of the
1030+
// commutative operands from that user instruction.
1031+
std::map<AccInterval *, std::vector<USE_DEF_NODE>> SwapCandidates;
9681032
//build intervals for potential acc candidates as well as pre-existing acc uses from mac/mach/addc/etc
9691033
for (auto instIter = bb->begin(), instEnd = bb->end(); instIter != instEnd; ++instIter)
9701034
{
@@ -986,7 +1050,11 @@ void AccSubPass::multiAccSub(G4_BB* bb)
9861050
int bundleBCTimes = 0;
9871051
int bankBCTimes = 0;
9881052
int readSuppressionSrcs = 0;
989-
if (isAccCandidate(inst, lastUseId, mustBeAcc0, isAllFloat, readSuppressionSrcs, bundleBCTimes, bankBCTimes, &BCInfo))
1053+
std::vector<USE_DEF_NODE> SwappableUseList;
1054+
if (isAccCandidate(inst, lastUseId, mustBeAcc0, isAllFloat,
1055+
readSuppressionSrcs, bundleBCTimes, bankBCTimes,
1056+
&BCInfo,
1057+
EnableSwapAccSub ? &SwappableUseList : nullptr))
9901058
{
9911059
// this is a potential candidate for acc substitution
9921060
AccInterval* newInterval = new AccInterval(inst, lastUseId);
@@ -997,15 +1065,124 @@ void AccSubPass::multiAccSub(G4_BB* bb)
9971065
newInterval->suppressionTimes = readSuppressionSrcs;
9981066

9991067
intervals.push_back(newInterval);
1068+
1069+
if (EnableSwapAccSub && !SwappableUseList.empty())
1070+
std::swap(SwapCandidates[newInterval], SwappableUseList);
1071+
}
1072+
}
1073+
}
1074+
1075+
// Resolve conflicts in the swap candidates and swap operands if necessary.
1076+
if (EnableSwapAccSub) {
1077+
// For each use inst, at most two operands could be swappable. If both
1078+
// of them are populated, that 2 candidates are conflict.
1079+
// TODO: So far, we only consider swap on src1 and src2. But, for
1080+
// instructions like add3, src0, src1, and src2 are all commutative.
1081+
std::map<G4_INST *, std::pair<G4_INST *, G4_INST *>> ConflictUseMap;
1082+
for (auto &I : SwapCandidates) {
1083+
for (auto &U : I.second) {
1084+
MUST_BE_TRUE((U.second == Opnd_src1 || U.second == Opnd_src2),
1085+
"Only src1 and src2 are swappable.");
1086+
auto MI = ConflictUseMap.insert(
1087+
std::make_pair(U.first, std::make_pair(nullptr, nullptr))).first;
1088+
if (U.second == Opnd_src1) {
1089+
MUST_BE_TRUE(MI->second.first == nullptr, "src1 is already populated");
1090+
MI->second.first = I.first->inst;
1091+
} else {
1092+
MUST_BE_TRUE(MI->second.second == nullptr, "src2 is already populated");
1093+
MI->second.second = I.first->inst;
1094+
}
1095+
}
1096+
}
1097+
1098+
// Now, with the conflict use map, build the confict graph on the
1099+
// corresponding definitions. Here, the comparator on instruction local
1100+
// ids is used to ensure that iteration order of the conflict graph (a
1101+
// std::map) follows the program order. By following the program order
1102+
// only, the elimination order is more predictable and consistent from
1103+
// run to run.
1104+
auto comp = [](G4_INST *LHS, G4_INST *RHS) {return LHS->getLocalId() < RHS->getLocalId();};
1105+
std::map<G4_INST *, std::set<G4_INST *>, decltype(comp)> ConflictGraph(comp);
1106+
1107+
for (auto &I : ConflictUseMap) {
1108+
auto *def1 = I.second.first;
1109+
auto *def2 = I.second.second;
1110+
// When both swappable operands are acc candidates, their
1111+
// definitions are conflict.
1112+
if (def1 && def2) {
1113+
ConflictGraph[def1].insert(def2);
1114+
ConflictGraph[def2].insert(def1);
1115+
}
1116+
}
1117+
// Now plan the node elimination order to make the conflict graph fully
1118+
// disconnected. A greedy algorithm is designed to eliminate minimal
1119+
// nodes in order to fully disconnect the graph. In each steps, we
1120+
// remove a node with the maximal degrees but minimal degrees from
1121+
// neighbor nodes.
1122+
std::set<G4_INST *> Eliminated;
1123+
while (!ConflictGraph.empty()) {
1124+
unsigned MaxDeg = ~0U;
1125+
unsigned MinNeighDeg = ~0U;
1126+
G4_INST *Node = nullptr;
1127+
for (auto &I : ConflictGraph) {
1128+
unsigned Deg = I.second.size();
1129+
unsigned NeighDeg = 0;
1130+
// NeighDeg is counted to tell nodes with the same degree.
1131+
for (auto &I : I.second) {
1132+
NeighDeg += ConflictGraph[I].size();
1133+
}
1134+
if (!Node || Deg > MaxDeg ||
1135+
(Deg == MaxDeg && NeighDeg < MinNeighDeg)) {
1136+
Node = I.first;
1137+
MaxDeg = Deg;
1138+
MinNeighDeg = NeighDeg;
1139+
// TODO: A more comprehensive elimination order would
1140+
// consider the impact on acc intervals, especially when
1141+
// two nodes have the same degree(s). The one reducing the
1142+
// chromatic number should be eliminated so that the result
1143+
// acc interal graph has a smaller max clique size.
1144+
}
1145+
}
1146+
// If all remaining nodes have 0 degree, CG is fully disconnected.
1147+
if (MaxDeg == 0)
1148+
break;
1149+
// Eliminate this node.
1150+
auto &Set = ConflictGraph[Node];
1151+
for (auto *N : Set) {
1152+
ConflictGraph[N].erase(Node);
1153+
}
1154+
ConflictGraph.erase(Node);
1155+
Eliminated.insert(Node);
1156+
}
1157+
// Check the remaining node and swap their uses into src1.
1158+
for (auto &I : SwapCandidates) {
1159+
// Skip candidates eliminated but mark them as removed.
1160+
if (Eliminated.count(I.first->inst)) {
1161+
I.first->isRemoved = true;
1162+
continue;
1163+
}
1164+
// For remaining swap candidates, need to swap operands if
1165+
// necessary.
1166+
for (auto &U : SwapCandidates[I.first]) {
1167+
// Skip as src1 could use acc.
1168+
if (U.second == Opnd_src1)
1169+
continue;
1170+
MUST_BE_TRUE(U.second = Opnd_src2,
1171+
"Only src1 or src2 is expected.");
1172+
U.first->swapSrc(1, 2);
1173+
U.first->swapDefUse(Opnd_src1, Opnd_src2);
10001174
}
10011175
}
10021176
}
10031177

10041178
//modified linear scan to assign free accs to intervals
10051179
AccAssignment accAssign(numGeneralAcc, builder, true);
10061180

1007-
for (auto interval : intervals)
1181+
for (auto *interval : intervals)
10081182
{
1183+
if (interval->isRemoved)
1184+
continue;
1185+
10091186
// expire intervals
10101187
accAssign.expireIntervals(interval);
10111188

@@ -1118,8 +1295,11 @@ void AccSubPass::multiAccSub(G4_BB* bb)
11181295
}
11191296
}
11201297

1121-
for (auto interval : intervals)
1298+
for (auto *interval : intervals)
11221299
{
1300+
if (interval->isRemoved)
1301+
continue;
1302+
11231303
if (!interval->isPreAssigned && interval->assignedAcc != -1)
11241304
{
11251305
G4_INST* inst = interval->inst;
@@ -1189,7 +1369,7 @@ void AccSubPass::accSub(G4_BB* bb)
11891369
int bundleC = 0;
11901370
int bankC = 0;
11911371
int suppression = 0;
1192-
if (!isAccCandidate(inst, lastUseId, mustBeAcc0, isAllFloat, suppression, bundleC, bankC, nullptr))
1372+
if (!isAccCandidate(inst, lastUseId, mustBeAcc0, isAllFloat, suppression, bundleC, bankC, nullptr, nullptr))
11931373
{
11941374
continue;
11951375
}

visa/Passes/AccSubstitution.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class AccSubPass
5555
void multiAccSub(G4_BB* bb);
5656

5757
bool isAccCandidate(G4_INST* inst, int& lastUse, bool& mustBeAcc0, bool& isAllFloat, int& readSuppressionSrcs, int& bundleBC,
58-
int& bankBC, std::map<G4_INST*, unsigned int>* BCInfo);
58+
int& bankBC, std::map<G4_INST*, unsigned int>* BCInfo, std::vector<USE_DEF_NODE> *SwappableUses);
5959

6060
int getNumAccSubDef() const { return numAccSubDef; }
6161
int getNumAccSubUse() const { return numAccSubUse; }

visa/include/VISAOptionsDefs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ DEF_VISA_OPTION(vISA_SplitMov64, ET_INT32,"-SplitMov64", "USAGE: -Sp
249249
DEF_VISA_OPTION(vISA_UseOldSubRoutineAugIntf, ET_BOOL, "-useOldSubRoutineAugIntf", UNUSED, false)
250250
DEF_VISA_OPTION(vISA_FastCompileRA, ET_BOOL, "-fastCompileRA", UNUSED, false)
251251
DEF_VISA_OPTION(vISA_HybridRAWithSpill, ET_BOOL, "-hybridRAWithSpill", UNUSED, false)
252+
DEF_VISA_OPTION(vISA_EnableSwapAccSub, ET_BOOL, "-swapAccSub", UNUSED, true)
252253

253254
//=== binary emission options ===
254255
DEF_VISA_OPTION(vISA_Compaction, ET_BOOL, "-nocompaction", UNUSED, true)

0 commit comments

Comments
 (0)