@@ -4793,7 +4793,10 @@ void Augmentation::augmentIntfGraph()
47934793 !liveAnalysis.livenessClass (G4_ADDRESS) &&
47944794 kernel.fg .size () > 2 ))
47954795 {
4796- return ;
4796+ if (!kernel.getOption (vISA_DumpRegChart))
4797+ {
4798+ return ;
4799+ }
47974800 }
47984801 }
47994802
@@ -4813,6 +4816,12 @@ void Augmentation::augmentIntfGraph()
48134816 // Sort live-intervals based on their start
48144817 sortLiveIntervals ();
48154818
4819+ if (kernel.getOption (vISA_DumpRegChart))
4820+ {
4821+ gra.regChart = new RegChartDump (gra);
4822+ gra.regChart ->recordLiveIntervals (sortedIntervals);
4823+ }
4824+
48164825 if (gra.verifyAugmentation )
48174826 {
48184827 gra.verifyAugmentation ->loadAugData (sortedIntervals, lrs, intf.liveAnalysis ->getNumSelectedVar (), &intf, gra);
@@ -5690,7 +5699,7 @@ void PhyRegUsage::updateRegUsage(LiveRange* lr)
56905699 }
56915700}
56925701
5693- bool GraphColor::assignColors (ColorHeuristic colorHeuristicGRF, bool doBankConflict, bool highInternalConflict)
5702+ bool GraphColor::assignColors (ColorHeuristic colorHeuristicGRF, bool doBankConflict, bool highInternalConflict, bool honorHints )
56945703{
56955704 if (builder.getOption (vISA_RATrace))
56965705 {
@@ -5758,7 +5767,7 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
57585767 bool skipParentIntf = false ;
57595768 if (lr->hasAllocHint ())
57605769 {
5761- parms.startGRFReg = lr->getAllocHint ();
5770+ parms.startGRFReg = ( lr->getAllocHint () >= maxGRFCanBeUsed ? 0 : lr-> getAllocHint () );
57625771 if (varSplitPass.isPartialDcl (lr->getDcl ()))
57635772 {
57645773 parentDcl = varSplitPass.getParentDcl (lr->getDcl ());
@@ -5966,7 +5975,19 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
59665975 if (!ret)
59675976 return false ;
59685977
5969- if (gra.getIterNo () < 3 )
5978+ if (lr->getSpillCost () == MAXSPILLCOST &&
5979+ !lr->getPhyReg () &&
5980+ honorHints)
5981+ {
5982+ // infinite spill cost range spilled
5983+ // undo all allocations done to split vars
5984+ // and skip adhering to hints for preserving
5985+ // correctness.
5986+ resetTemporaryRegisterAssignments ();
5987+ return assignColors (colorHeuristicGRF, doBankConflict, highInternalConflict, false );
5988+ }
5989+
5990+ if (honorHints && gra.getIterNo () < 3 )
59705991 {
59715992 if (varSplitPass.isSplitDcl (lr->getDcl ()))
59725993 {
@@ -5982,6 +6003,25 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
59826003 {
59836004 auto isChildSpilled = childLR->isSpilled ();
59846005 assignColor (childLR, false , !isChildSpilled);
6006+ // if allocated GRF is different than hint, then
6007+ // undo allocation and let coloring take its course.
6008+ // this can be done only if the childLR wasnt
6009+ // already processed in colorOrder.
6010+ if (!isChildSpilled && childLR->getPhyReg ())
6011+ {
6012+ auto hint = childLR->getAllocHint ();
6013+ if (childLR->getPhyReg ()->asGreg ()->getRegNum () != hint)
6014+ {
6015+ // this is executed only if childLR is guaranteed to be
6016+ // processed later on in colorOrder.
6017+ childLR->resetPhyReg ();
6018+ }
6019+ }
6020+ else if (isChildSpilled && childLR->getPhyReg ())
6021+ {
6022+ // was spilled earlier, got allocation now
6023+ spilledLRs.remove (childLR);
6024+ }
59856025 }
59866026 else
59876027 {
@@ -6070,6 +6110,12 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
60706110 {
60716111 MUST_BE_TRUE (lr->isSpilled (), " LR not marked as spilled, but inserted in spilledLRs list" );
60726112 }
6113+
6114+ // Verify if all LRs have either an allocation or are spilled
6115+ for (auto lr : colorOrder)
6116+ {
6117+ MUST_BE_TRUE (lr->isSpilled () || lr->getPhyReg () || lr->getDcl ()->isSpilled (), " Range without allocation and not spilled" );
6118+ }
60736119#endif
60746120
60756121 return true ;
@@ -6522,8 +6568,10 @@ void GraphColor::resetTemporaryRegisterAssignments()
65226568 if (lrs[i]->getVar ()->getPhyReg () == NULL ) {
65236569 lrs[i]->resetPhyReg ();
65246570 lrs[i]->resetAllocHint ();
6571+ lrs[i]->setSpilled (false );
65256572 }
65266573 }
6574+ spilledLRs.clear ();
65276575}
65286576
65296577void GraphColor::cleanupRedundantARFFillCode ()
@@ -9524,6 +9572,15 @@ int GlobalRA::coloringRegAlloc()
95249572 coloring.addSaveRestoreCode (localSpillAreaOwordSize);
95259573 }
95269574
9575+ if (kernel.getOption (vISA_DumpRegChart))
9576+ {
9577+ assignRegForAliasDcl ();
9578+ computePhyReg ();
9579+ // invoke before expanding spill/fill since
9580+ // it modifies IR
9581+ regChart->dumpRegChart (std::cerr);
9582+ }
9583+
95279584 expandSpillFillIntrinsics ();
95289585
95299586 if (builder.getOption (vISA_OptReport))
@@ -12311,3 +12368,158 @@ void LiveRange::setAllocHint(unsigned int h)
1231112368 if ((h + dcl->getNumRows ()) <= gra.kernel .getNumRegTotal ())
1231212369 allocHint = h;
1231312370}
12371+
12372+ // sortedIntervals comes from augmentation.
12373+ // This can be invoked either post RA where phy regs are assigned to dcls,
12374+ // or after assignColors with lrs and numLRs passed which makes this function
12375+ // use temp allocations from lrs. Doesnt handle sub-routines yet.
12376+ void RegChartDump::dumpRegChart (std::ostream& os, LiveRange** lrs, unsigned int numLRs)
12377+ {
12378+ constexpr unsigned int N = 128 ;
12379+ std::unordered_map<G4_INST*, std::bitset<N>> busyGRFPerInst;
12380+ bool dumpHex = false ;
12381+
12382+ auto getPhyReg = [&](G4_Declare* dcl)
12383+ {
12384+ auto preg = dcl->getRegVar ()->getPhyReg ();
12385+ if (preg)
12386+ return preg;
12387+
12388+ for (unsigned int i = 0 ; i != numLRs; i++)
12389+ {
12390+ LiveRange* lr = lrs[i];
12391+ if (lr->getDcl () == dcl)
12392+ {
12393+ preg = lr->getPhyReg ();
12394+ break ;
12395+ }
12396+ }
12397+
12398+ return preg;
12399+ };
12400+
12401+ for (auto dcl : sortedLiveIntervals)
12402+ {
12403+ if (dcl->getRegFile () != G4_RegFileKind::G4_GRF &&
12404+ dcl->getRegFile () != G4_RegFileKind::G4_INPUT)
12405+ continue ;
12406+
12407+ auto phyReg = getPhyReg (dcl);
12408+ if (!phyReg)
12409+ continue ;
12410+
12411+ if (!phyReg->isGreg ())
12412+ continue ;
12413+
12414+ auto GRFStart = phyReg->asGreg ()->getRegNum ();
12415+ auto numRows = dcl->getNumRows ();
12416+
12417+ auto startInst = startEnd[dcl].first ;
12418+ auto endInst = startEnd[dcl].second ;
12419+
12420+ bool start = (dcl->getRegFile () == G4_RegFileKind::G4_INPUT);
12421+ bool done = false ;
12422+ for (auto bb : gra.kernel .fg .getBBList ())
12423+ {
12424+ for (auto inst : bb->getInstList ())
12425+ {
12426+ if (inst == startInst)
12427+ {
12428+ start = true ;
12429+ continue ;
12430+ }
12431+
12432+ if (!start)
12433+ continue ;
12434+
12435+ for (unsigned int i = GRFStart; i != (GRFStart + numRows); i++)
12436+ {
12437+ busyGRFPerInst[inst].set (i, true );
12438+ }
12439+
12440+ if (inst == endInst)
12441+ {
12442+ done = true ;
12443+ break ;
12444+ }
12445+ }
12446+
12447+ if (done)
12448+ break ;
12449+ }
12450+ }
12451+
12452+ // Now emit instructions with GRFs
12453+ for (auto bb : gra.kernel .fg .getBBList ())
12454+ {
12455+ for (auto inst : bb->getInstList ())
12456+ {
12457+ constexpr unsigned int maxInstLen = 80 ;
12458+ auto item = busyGRFPerInst[inst];
12459+ std::stringstream ss;
12460+ inst->emit (ss);
12461+ auto len = ss.str ().length ();
12462+
12463+ if (len <= maxInstLen)
12464+ {
12465+ os << ss.str ();
12466+ for (unsigned int i = 0 ; i != maxInstLen - ss.str ().length (); i++)
12467+ os << " " ;
12468+ }
12469+ else
12470+ {
12471+ auto tmpStr = ss.str ();
12472+ auto limitedStr = tmpStr.substr (0 , maxInstLen);
12473+ os << std::string (limitedStr);
12474+ }
12475+
12476+ os << " " ;
12477+
12478+ if (!dumpHex)
12479+ {
12480+ // dump GRFs | - busy, * - free
12481+ for (unsigned int i = 0 ; i != N; i++)
12482+ {
12483+ // emit in groups of 10 GRFs
12484+ if (i > 0 && (i % 10 ) == 0 )
12485+ os << " " ;
12486+
12487+ if (item[i] == true )
12488+ os << " |" ; // busy
12489+ else
12490+ os << " *" ; // free
12491+ }
12492+ }
12493+ else
12494+ {
12495+ for (unsigned int i = 0 ; i != N; i+=sizeof (unsigned short )*8 )
12496+ {
12497+ unsigned short busyGRFs = 0 ;
12498+ for (unsigned int j = 0 ; j != sizeof (unsigned short )*8 ; j++)
12499+ {
12500+ auto offset = i + j;
12501+ if (offset < N)
12502+ {
12503+ if (item[offset])
12504+ busyGRFs |= (1 << j);
12505+ }
12506+ }
12507+ printf (" r%d:%4x " , i, busyGRFs);
12508+ }
12509+ }
12510+ os << std::endl;
12511+ }
12512+ os << std::endl;
12513+ }
12514+ }
12515+
12516+ void RegChartDump::recordLiveIntervals (std::vector<G4_Declare*>& dcls)
12517+ {
12518+ sortedLiveIntervals = dcls;
12519+ for (auto dcl : dcls)
12520+ {
12521+ auto start = gra.getStartInterval (dcl);
12522+ auto end = gra.getEndInterval (dcl);
12523+ startEnd.insert (std::make_pair (dcl, std::make_pair (start, end)));
12524+ }
12525+ }
0 commit comments