@@ -19,6 +19,7 @@ struct AccInterval
1919 bool mustBeAcc0 = false ;
2020 bool isAllFloat = false ;
2121 bool isPreAssigned = false ;
22+ bool isRemoved = false ;
2223 int assignedAcc = -1 ;
2324 int spilledAcc = -1 ;
2425 int bundleConflictTimes = 0 ;
@@ -101,6 +102,17 @@ static void setSuppression(int i, unsigned short& BC)
101102 BC |= bc;
102103}
103104
105+ static bool isCommutativeOnSrc12 (G4_INST *inst) {
106+ switch (inst->opcode ()) {
107+ case G4_mad:
108+ case G4_add3:
109+ return true ;
110+ default :
111+ break ;
112+ }
113+ return false ;
114+ }
115+
104116/*
105117 * Bank conflict types:
106118 * 1. any two from same bundle and same bank
@@ -422,8 +434,11 @@ static unsigned getBankConflicts(int srcOpndIdx, unsigned int BC)
422434
423435// returns true if the inst is a candidate for acc substitution
424436// lastUse is also update to point to the last use id of the inst
425- bool AccSubPass::isAccCandidate (G4_INST* inst, int & lastUse, bool & mustBeAcc0, bool & isAllFloat, int & readSuppressionSrcs, int & bundleBC, int & bankBC, std::map<G4_INST*, unsigned int >* BCInfo)
426- {
437+ bool AccSubPass::isAccCandidate (G4_INST *inst, int &lastUse, bool &mustBeAcc0,
438+ bool &isAllFloat, int &readSuppressionSrcs,
439+ int &bundleBC, int &bankBC,
440+ std::map<G4_INST *, unsigned int > *BCInfo,
441+ std::vector<USE_DEF_NODE> *SwappableUses) {
427442 mustBeAcc0 = false ;
428443 isAllFloat = true ;
429444 G4_DstRegRegion* dst = inst->getDst ();
@@ -479,15 +494,38 @@ bool AccSubPass::isAccCandidate(G4_INST* inst, int& lastUse, bool& mustBeAcc0, b
479494 case Opnd_src2:
480495 if (!kernel.fg .builder ->relaxedACCRestrictions3 ())
481496 {
482- return false ;
497+ // If swapAccSub is disabled, skip further checking on src2.
498+ if (!SwappableUses)
499+ return false ;
500+ if (!isCommutativeOnSrc12 (useInst))
501+ return false ;
502+ // As src2 cannot use acc, acc substitution is only
503+ // feasible if src1 and src2 are different.
504+ auto *def1 = useInst->getSingleDef (Opnd_src1);
505+ // If the single-def on src1 is the same as this use-inst,
506+ // the acc substitution following a swap is infeasible.
507+ if (def1 && def1 == inst)
508+ return false ;
509+ // FIXME: If there's any further hardware restrictions on
510+ // src2, please check here.
483511 }
512+ // Q: What's the purpose of this check?
484513 if (!IS_TYPE_FLOAT_FOR_ACC (useInst->getSrc (2 )->getType ()) ||
485514 (useInst->getDst () && !IS_TYPE_FLOAT_FOR_ACC (useInst->getDst ()->getType ())))
486515 {
487516 return false ;
488517 }
489518 break ;
490519 case Opnd_src1:
520+ if (!kernel.fg .builder ->relaxedACCRestrictions3 ()) {
521+ // As src2 cannot use acc, acc substitution is only
522+ // feasible if src1 and src2 are different.
523+ auto *def2 = useInst->getSingleDef (Opnd_src2);
524+ // If the single-def on src2 is the same as this use-inst,
525+ // the acc substitution is infeasible.
526+ if (def2 && def2 == inst)
527+ return false ;
528+ }
491529 if (BC)
492530 {
493531 bundleBC += getBundleConflicts (1 , BC);
@@ -560,6 +598,7 @@ bool AccSubPass::isAccCandidate(G4_INST* inst, int& lastUse, bool& mustBeAcc0, b
560598 // def must be the only define for this use
561599 return false ;
562600 }
601+ MUST_BE_TRUE (useInst->getSingleDef (opndNum) == inst, " this user's single def should be this inst." );
563602
564603 int srcId = useInst->getSrcNum (opndNum);
565604 G4_Operand* src = useInst->getSrc (srcId);
@@ -571,12 +610,30 @@ bool AccSubPass::isAccCandidate(G4_INST* inst, int& lastUse, bool& mustBeAcc0, b
571610 }
572611 if (!useInst->canSrcBeAcc (opndNum))
573612 {
574- return false ;
613+ // Need further check when swapAccSub is enabled and the operand
614+ // number is src2.
615+ if (!SwappableUses || opndNum != Opnd_src2)
616+ return false ;
617+ // When src2 is substitutable and swapAccSub is enabled, need to
618+ // check whether src1 could use acc.
619+ if (!useInst->canSrcBeAcc (Opnd_src1))
620+ return false ;
575621 }
576622 if (!IS_TYPE_FLOAT_FOR_ACC (src->getType ()))
577623 {
578624 isAllFloat = false ;
579625 }
626+ // Record this swappable use if the swapping on it could help acc
627+ // substitution. Both src1 and src2 need recording as, from them, we
628+ // need to build the conflict graph and determine which ones should be
629+ // removed from acc candidates if two acc candidates sit in the same
630+ // ternary instruction, says 'mad'.
631+ if (SwappableUses) {
632+ if (isCommutativeOnSrc12 (useInst) && useInst->getNumSrc () == 3 &&
633+ (opndNum == Opnd_src1 || opndNum == Opnd_src2)) {
634+ SwappableUses->push_back (use);
635+ }
636+ }
580637 }
581638
582639 // we have to avoid the case where the dst is used as both src0 and src1 of a mad
@@ -965,6 +1022,13 @@ void AccSubPass::multiAccSub(G4_BB* bb)
9651022 }
9661023 }
9671024
1025+ bool EnableSwapAccSub =
1026+ kernel.getOptions ()->getOption (vISA_EnableSwapAccSub) &&
1027+ !kernel.fg .builder ->relaxedACCRestrictions3 ();
1028+ // Each candidate is an acc interval and its list of associated swappable
1029+ // uses, where a swappable use is such a use, which is one of the
1030+ // commutative operands from that user instruction.
1031+ std::map<AccInterval *, std::vector<USE_DEF_NODE>> SwapCandidates;
9681032 // build intervals for potential acc candidates as well as pre-existing acc uses from mac/mach/addc/etc
9691033 for (auto instIter = bb->begin (), instEnd = bb->end (); instIter != instEnd; ++instIter)
9701034 {
@@ -986,7 +1050,11 @@ void AccSubPass::multiAccSub(G4_BB* bb)
9861050 int bundleBCTimes = 0 ;
9871051 int bankBCTimes = 0 ;
9881052 int readSuppressionSrcs = 0 ;
989- if (isAccCandidate (inst, lastUseId, mustBeAcc0, isAllFloat, readSuppressionSrcs, bundleBCTimes, bankBCTimes, &BCInfo))
1053+ std::vector<USE_DEF_NODE> SwappableUseList;
1054+ if (isAccCandidate (inst, lastUseId, mustBeAcc0, isAllFloat,
1055+ readSuppressionSrcs, bundleBCTimes, bankBCTimes,
1056+ &BCInfo,
1057+ EnableSwapAccSub ? &SwappableUseList : nullptr ))
9901058 {
9911059 // this is a potential candidate for acc substitution
9921060 AccInterval* newInterval = new AccInterval (inst, lastUseId);
@@ -997,15 +1065,124 @@ void AccSubPass::multiAccSub(G4_BB* bb)
9971065 newInterval->suppressionTimes = readSuppressionSrcs;
9981066
9991067 intervals.push_back (newInterval);
1068+
1069+ if (EnableSwapAccSub && !SwappableUseList.empty ())
1070+ std::swap (SwapCandidates[newInterval], SwappableUseList);
1071+ }
1072+ }
1073+ }
1074+
1075+ // Resolve conflicts in the swap candidates and swap operands if necessary.
1076+ if (EnableSwapAccSub) {
1077+ // For each use inst, at most two operands could be swappable. If both
1078+ // of them are populated, that 2 candidates are conflict.
1079+ // TODO: So far, we only consider swap on src1 and src2. But, for
1080+ // instructions like add3, src0, src1, and src2 are all commutative.
1081+ std::map<G4_INST *, std::pair<G4_INST *, G4_INST *>> ConflictUseMap;
1082+ for (auto &I : SwapCandidates) {
1083+ for (auto &U : I.second ) {
1084+ MUST_BE_TRUE ((U.second == Opnd_src1 || U.second == Opnd_src2),
1085+ " Only src1 and src2 are swappable." );
1086+ auto MI = ConflictUseMap.insert (
1087+ std::make_pair (U.first , std::make_pair (nullptr , nullptr ))).first ;
1088+ if (U.second == Opnd_src1) {
1089+ MUST_BE_TRUE (MI->second .first == nullptr , " src1 is already populated" );
1090+ MI->second .first = I.first ->inst ;
1091+ } else {
1092+ MUST_BE_TRUE (MI->second .second == nullptr , " src2 is already populated" );
1093+ MI->second .second = I.first ->inst ;
1094+ }
1095+ }
1096+ }
1097+
1098+ // Now, with the conflict use map, build the confict graph on the
1099+ // corresponding definitions. Here, the comparator on instruction local
1100+ // ids is used to ensure that iteration order of the conflict graph (a
1101+ // std::map) follows the program order. By following the program order
1102+ // only, the elimination order is more predictable and consistent from
1103+ // run to run.
1104+ auto comp = [](G4_INST *LHS, G4_INST *RHS) {return LHS->getLocalId () < RHS->getLocalId ();};
1105+ std::map<G4_INST *, std::set<G4_INST *>, decltype (comp)> ConflictGraph (comp);
1106+
1107+ for (auto &I : ConflictUseMap) {
1108+ auto *def1 = I.second .first ;
1109+ auto *def2 = I.second .second ;
1110+ // When both swappable operands are acc candidates, their
1111+ // definitions are conflict.
1112+ if (def1 && def2) {
1113+ ConflictGraph[def1].insert (def2);
1114+ ConflictGraph[def2].insert (def1);
1115+ }
1116+ }
1117+ // Now plan the node elimination order to make the conflict graph fully
1118+ // disconnected. A greedy algorithm is designed to eliminate minimal
1119+ // nodes in order to fully disconnect the graph. In each steps, we
1120+ // remove a node with the maximal degrees but minimal degrees from
1121+ // neighbor nodes.
1122+ std::set<G4_INST *> Eliminated;
1123+ while (!ConflictGraph.empty ()) {
1124+ unsigned MaxDeg = ~0U ;
1125+ unsigned MinNeighDeg = ~0U ;
1126+ G4_INST *Node = nullptr ;
1127+ for (auto &I : ConflictGraph) {
1128+ unsigned Deg = I.second .size ();
1129+ unsigned NeighDeg = 0 ;
1130+ // NeighDeg is counted to tell nodes with the same degree.
1131+ for (auto &I : I.second ) {
1132+ NeighDeg += ConflictGraph[I].size ();
1133+ }
1134+ if (!Node || Deg > MaxDeg ||
1135+ (Deg == MaxDeg && NeighDeg < MinNeighDeg)) {
1136+ Node = I.first ;
1137+ MaxDeg = Deg;
1138+ MinNeighDeg = NeighDeg;
1139+ // TODO: A more comprehensive elimination order would
1140+ // consider the impact on acc intervals, especially when
1141+ // two nodes have the same degree(s). The one reducing the
1142+ // chromatic number should be eliminated so that the result
1143+ // acc interal graph has a smaller max clique size.
1144+ }
1145+ }
1146+ // If all remaining nodes have 0 degree, CG is fully disconnected.
1147+ if (MaxDeg == 0 )
1148+ break ;
1149+ // Eliminate this node.
1150+ auto &Set = ConflictGraph[Node];
1151+ for (auto *N : Set) {
1152+ ConflictGraph[N].erase (Node);
1153+ }
1154+ ConflictGraph.erase (Node);
1155+ Eliminated.insert (Node);
1156+ }
1157+ // Check the remaining node and swap their uses into src1.
1158+ for (auto &I : SwapCandidates) {
1159+ // Skip candidates eliminated but mark them as removed.
1160+ if (Eliminated.count (I.first ->inst )) {
1161+ I.first ->isRemoved = true ;
1162+ continue ;
1163+ }
1164+ // For remaining swap candidates, need to swap operands if
1165+ // necessary.
1166+ for (auto &U : SwapCandidates[I.first ]) {
1167+ // Skip as src1 could use acc.
1168+ if (U.second == Opnd_src1)
1169+ continue ;
1170+ MUST_BE_TRUE (U.second = Opnd_src2,
1171+ " Only src1 or src2 is expected." );
1172+ U.first ->swapSrc (1 , 2 );
1173+ U.first ->swapDefUse (Opnd_src1, Opnd_src2);
10001174 }
10011175 }
10021176 }
10031177
10041178 // modified linear scan to assign free accs to intervals
10051179 AccAssignment accAssign (numGeneralAcc, builder, true );
10061180
1007- for (auto interval : intervals)
1181+ for (auto * interval : intervals)
10081182 {
1183+ if (interval->isRemoved )
1184+ continue ;
1185+
10091186 // expire intervals
10101187 accAssign.expireIntervals (interval);
10111188
@@ -1118,8 +1295,11 @@ void AccSubPass::multiAccSub(G4_BB* bb)
11181295 }
11191296 }
11201297
1121- for (auto interval : intervals)
1298+ for (auto * interval : intervals)
11221299 {
1300+ if (interval->isRemoved )
1301+ continue ;
1302+
11231303 if (!interval->isPreAssigned && interval->assignedAcc != -1 )
11241304 {
11251305 G4_INST* inst = interval->inst ;
@@ -1189,7 +1369,7 @@ void AccSubPass::accSub(G4_BB* bb)
11891369 int bundleC = 0 ;
11901370 int bankC = 0 ;
11911371 int suppression = 0 ;
1192- if (!isAccCandidate (inst, lastUseId, mustBeAcc0, isAllFloat, suppression, bundleC, bankC, nullptr ))
1372+ if (!isAccCandidate (inst, lastUseId, mustBeAcc0, isAllFloat, suppression, bundleC, bankC, nullptr , nullptr ))
11931373 {
11941374 continue ;
11951375 }
0 commit comments