@@ -5637,29 +5637,30 @@ void HWConformity::conformBB(G4_BB* bb)
56375637}
56385638
56395639//
5640- // SIMD16 addc/subb are illegal on GEN, since they write to acc and there are only 8 acc
5641- // channels for D/UD type. In vISA IR we should get something like
5642- // addc (16) V0 V2 V3
5643- // mov (16) V1 acc0<8;8,1> :ud
5640+ // SIMD16 addc/subb are illegal on GEN, since they write to acc and there are
5641+ // only 8 acc channels for D/UD type. In vISA IR we should get something like
5642+ // addc (16|M0 ) V0 V2 V3
5643+ // use (16|M0 ) V1 ... acc0:ud // or :d
56445644// which needs to be translated to
5645- // addc (8) V0(0) V2(0) V3(0) {Q1}
5646- // mov (8) V1(0) acc0<8;8,1>:ud {Q1}
5647- // addc (8) V0(1) V2(1) V3(1) {Q2}
5648- // mov (8) V1(1) acc0<8;8,1>:ud {Q2}
5645+ // addc (8|M0) V0(0) V2(0) V3(0)
5646+ // use (8|M0) V1(0) ... acc0:ud
5647+ // addc (8|M8) V0(1) V2(1) V3(1)
5648+ // use (8|M8) V1(1) ... acc0:ud
5649+ // NOTE: we also support other consumers such as add.
5650+ //
56495651//
56505652// We do this first thing in HW conformity to avoid REXES from splitting addc/subb incorrectly
56515653// We also count on previous opt to preserve the inst pair by not inserting any acc using inst in between;
56525654// it should hopefully be the case since we generally don't optimize instructions with acc src/dst
56535655//
56545656// If exec size of addc is < 8, we also have to make sure both the addc's dst and the carry move's dst are
56555657// GRF-aligned, since acc's channel is dependent on the dst's subreg offset. In other words, we fix
5656- // addc (1) r1.0 ...
5657- // mov (1) r1.1 acc0.0<0;1,0>
5658+ // addc (1) r1.0 ...
5659+ // mov (1) r1.1 acc0.0<0;1,0>
56585660// into
5659- // addc (1) r1.0 ...
5660- // mov (1) r2.0 acc0.0<0;1,0>
5661- // mov (1) r1.1 r2.0
5662- //
5661+ // addc (1) r1.0 ...
5662+ // mov (1) r2.0 acc0.0<0;1,0>
5663+ // mov (1) r1.1 r2.0
56635664//
56645665bool HWConformity::fixAddcSubb (G4_BB* bb)
56655666{
@@ -5672,40 +5673,59 @@ bool HWConformity::fixAddcSubb(G4_BB* bb)
56725673 inst->getExecSize () != builder.getNativeExecSize ())
56735674 {
56745675 // find the matching carry move
5675- G4_INST* carryMov = nullptr ;
5676- auto movIter = iter;
5677- for (++movIter; movIter != iterEnd; ++movIter )
5676+ G4_INST* carryUse = nullptr ;
5677+ auto srchIter = iter;
5678+ for (++srchIter; srchIter != iterEnd; ++srchIter )
56785679 {
5679- G4_INST* inst2 = *movIter;
5680- if (inst2->opcode () == G4_mov && inst2->getExecSize () == inst->getExecSize () &&
5681- inst2->getSrc (0 )->isAccReg () && inst2->getSrc (0 )->getType () == Type_UD)
5680+ G4_INST* inst2 = *srchIter;
5681+ auto op = inst2->opcode ();
5682+
5683+ bool opPossibleConsumer =
5684+ op == G4_mov || op == G4_add || op == G4_addc ||
5685+ op == G4_mad || op == G4_pseudo_mad;
5686+
5687+ auto srcUsesAcc = [&] (int srcIx) {
5688+ if (srcIx >= inst2->getNumSrc ())
5689+ return false ;
5690+ auto type = inst2->getSrc (srcIx)->getType ();
5691+ return inst2->getSrc (srcIx)->isAccReg () &&
5692+ (type == Type_UD || type == Type_D);
5693+ };
5694+
5695+ // only check for a handful of user instructions
5696+ // this list could be extended
5697+ if (opPossibleConsumer &&
5698+ inst2->getExecSize () == inst->getExecSize () &&
5699+ (srcUsesAcc (0 ) || srcUsesAcc (1 ) || srcUsesAcc (1 )))
56825700 {
5683- carryMov = inst2;
5701+ carryUse = inst2;
56845702 break ;
56855703 }
56865704 else if (inst2->useAcc ())
56875705 {
5706+ // someone redefines acc0; we can stop looking
56885707 break ;
56895708 }
56905709 }
56915710
5692- if (carryMov == NULL )
5711+ if (carryUse == NULL )
56935712 {
56945713 // can't find the move using acc, skip this addc/subb
5695- assert (false && " expect a carry move instruction " );
5714+ assert (false && " unable to find addc/subc consumer " );
56965715 continue ;
56975716 }
56985717
56995718 if (inst->getExecSize () > builder.getNativeExecSize ())
57005719 {
5720+ // we're breaking a bigger instruction into a smaller one
57015721 evenlySplitInst (iter, bb);
5702- evenlySplitInst (movIter , bb);
5722+ evenlySplitInst (srchIter , bb);
57035723
5704- // movIter now points to the second half of move, and we want to move the first move to be
5724+ // srchIter now points to the second half of move, and we want to move the first move to be
57055725 // before the second half of the addc/subb, which is pointed by iter
5706- --movIter ;
5707- G4_INST* mov1 = *movIter ;
5708- bb->erase (movIter );
5726+ --srchIter ;
5727+ G4_INST* mov1 = *srchIter ;
5728+ bb->erase (srchIter );
57095729 bb->insertBefore (iter, mov1);
57105730
57115731 changed = true ;
@@ -5721,10 +5741,10 @@ bool HWConformity::fixAddcSubb(G4_BB* bb)
57215741 insertMovAfter (iter, inst->getDst (), inst->getDst ()->getType (), bb));
57225742 changed = true ;
57235743 }
5724- if (!builder.isOpndAligned (carryMov ->getDst (), 32 ))
5744+ if (!builder.isOpndAligned (carryUse ->getDst (), 32 ))
57255745 {
5726- carryMov ->setDest (
5727- insertMovAfter (movIter, carryMov ->getDst (), carryMov ->getDst ()->getType (), bb));
5746+ carryUse ->setDest (
5747+ insertMovAfter (srchIter, carryUse ->getDst (), carryUse ->getDst ()->getType (), bb));
57285748 changed = true ;
57295749 }
57305750 }
0 commit comments