@@ -7071,8 +7071,8 @@ void GraphColor::OptimizeActiveRegsFootprint(std::vector<bool>& saveRegs, std::v
70717071//
70727072void GraphColor::addCallerSaveRestoreCode ()
70737073{
7074- // maxCallerSaveSize in Oword
7075- unsigned int maxCallerSaveSize = builder. kernel . fg . callerSaveAreaOffset ;
7074+
7075+ uint32_t maxCallerSaveSize = 0 ;
70767076 unsigned int callerSaveNumGRF = builder.kernel .getCallerSaveLastGRF () + 1 ;
70777077
70787078 for (BB_LIST_ITER it = builder.kernel .fg .begin (); it != builder.kernel .fg .end (); ++it)
@@ -7208,10 +7208,7 @@ void GraphColor::addCallerSaveRestoreCode()
72087208 }
72097209 afterFCallBB->erase (insertRestIt);
72107210
7211- // FIXME: maxCallerSaveSize in unit of OWord, here assume a register is 2 Oword
7212- // builder.kernel.fg.paramOverflowAreaOffset = builder.kernel.fg.callerSaveAreaOffset + callerSaveRegsWritten * 2;
7213- if (maxCallerSaveSize < (builder.kernel .fg .callerSaveAreaOffset + callerSaveRegsWritten * 2 ))
7214- maxCallerSaveSize = (builder.kernel .fg .callerSaveAreaOffset + callerSaveRegsWritten * 2 );
7211+ maxCallerSaveSize = std::max (maxCallerSaveSize, callerSaveRegsWritten * getGRFSize ());
72157212
72167213 if (m_options->getOption (vISA_OptReport))
72177214 {
@@ -7225,7 +7222,8 @@ void GraphColor::addCallerSaveRestoreCode()
72257222 }
72267223 }
72277224
7228- builder.kernel .fg .paramOverflowAreaOffset = maxCallerSaveSize;
7225+ auto byteOffset = builder.kernel .fg .callerSaveAreaOffset * 16 + maxCallerSaveSize;
7226+ builder.kernel .fg .frameSizeInOWord = ROUND (byteOffset, 64 ) / 16 ;
72297227
72307228 builder.instList .clear ();
72317229}
@@ -7235,7 +7233,7 @@ void GraphColor::addCallerSaveRestoreCode()
72357233//
72367234void GraphColor::addCalleeSaveRestoreCode ()
72377235{
7238- builder. kernel . fg . callerSaveAreaOffset = builder. kernel . fg . calleeSaveAreaOffset ;
7236+
72397237 unsigned int callerSaveNumGRF = builder.kernel .getCallerSaveLastGRF () + 1 ;
72407238 unsigned int numCalleeSaveRegs = builder.kernel .getNumCalleeSaveRegs ();
72417239
@@ -7335,15 +7333,11 @@ void GraphColor::addCalleeSaveRestoreCode()
73357333 }
73367334 builder.kernel .fg .getUniqueReturnBlock ()->erase (eraseIt);
73377335
7338- // FIXME: builder.kernel.fg.calleeSaveAreaOffset looks like in OWord, here assume
7339- // register size is two Oword, so
7340- // builder.kernel.fg.callerSaveAreaOffset = calleeSaveAreaOffset + calleeSaveRegsWritten * 2
7341- builder.kernel .fg .callerSaveAreaOffset =
7342- MAX (
7343- builder.kernel .fg .calleeSaveAreaOffset + calleeSaveRegsWritten * 2 ,
7344- builder.kernel .fg .callerSaveAreaOffset );
73457336 builder.instList .clear ();
73467337
7338+ // caller-save starts after callee-save and is 64-byte aligned
7339+ auto byteOffset = builder.kernel .fg .calleeSaveAreaOffset * 16 + calleeSaveRegsWritten * getGRFSize ();
7340+ builder.kernel .fg .callerSaveAreaOffset = ROUND (byteOffset, 64 ) / 16 ;
73477341 if (m_options->getOption (vISA_OptReport))
73487342 {
73497343 std::ofstream optreport;
@@ -7360,11 +7354,11 @@ void GraphColor::addCalleeSaveRestoreCode()
73607354void GraphColor::addGenxMainStackSetupCode ()
73617355{
73627356 uint32_t fpInitVal = (uint32_t )kernel.getIntKernelAttribute (Attributes::ATTR_SpillMemOffset);
7363- // FIXME: a potential failure here is that paramOverflowAreaOffset is already the offset based on
7357+ // FIXME: a potential failure here is that frameSizeInOword is already the offset based on
73647358 // GlobalSratchOffset, which is the value of fpInitVal. So below we generate code to do
73657359 // SP = fpInitVal + frameSize, which does not make sense. It is correct now since when there's stack call,
73667360 // IGC will not use scratch, so fpInitVal will be 0.
7367- unsigned frameSize = builder.kernel .fg .paramOverflowAreaOffset + builder. kernel . fg . paramOverflowAreaSize ;
7361+ unsigned frameSize = builder.kernel .fg .frameSizeInOWord ;
73687362 G4_Declare* framePtr = builder.kernel .fg .framePtrDcl ;
73697363 G4_Declare* stackPtr = builder.kernel .fg .stackPtrDcl ;
73707364
@@ -7409,7 +7403,7 @@ void GraphColor::addGenxMainStackSetupCode()
74097403//
74107404void GraphColor::addCalleeStackSetupCode ()
74117405{
7412- int frameSize = (int )builder.kernel .fg .paramOverflowAreaOffset /* - builder.kernel.fg.calleeSaveAreaOffset */ ;
7406+ int frameSize = (int )builder.kernel .fg .frameSizeInOWord ;
74137407 G4_Declare* framePtr = builder.kernel .fg .framePtrDcl ;
74147408 G4_Declare* stackPtr = builder.kernel .fg .stackPtrDcl ;
74157409
@@ -7430,7 +7424,7 @@ void GraphColor::addCalleeStackSetupCode()
74307424 }
74317425 //
74327426 // BE_FP = BE_SP
7433- // BE_SP += FrameSize (overflow-area offset + overflow-area size)
7427+ // BE_SP += FrameSize
74347428 //
74357429 {
74367430 G4_DstRegRegion* dst = builder.createDst (stackPtr->getRegVar (), 0 , 0 , 1 , Type_UD);
@@ -7629,6 +7623,8 @@ void GraphColor::addFlagSaveRestoreCode()
76297623
76307624//
76317625// Add GRF caller/callee save/restore code for stack calls.
7626+ // localSpillAreaOwordsize specifices the starting offset of the caller/callee-save area in this frame.
7627+ // It is 64-byte aligned.
76327628//
76337629void GraphColor::addSaveRestoreCode (unsigned localSpillAreaOwordSize)
76347630{
@@ -7645,9 +7641,6 @@ void GraphColor::addSaveRestoreCode(unsigned localSpillAreaOwordSize)
76457641 }
76467642 else
76477643 {
7648- // FIXME: looks like inside addCalleeSaveRestoreCode() and addCallerSaveRestoreCode(),
7649- // the expected offset (of calleeSaveAreaOffset and callerSaveAreaOffset) is 0-based.
7650- // But localSpillAreaOwordSize is based on globalScratchOffset.
76517644 builder.kernel .fg .calleeSaveAreaOffset = localSpillAreaOwordSize;
76527645 addCalleeSaveRestoreCode ();
76537646 }
@@ -9549,7 +9542,9 @@ int GlobalRA::coloringRegAlloc()
95499542
95509543 if (hasStackCall)
95519544 {
9552- unsigned localSpillAreaOwordSize = ROUND (scratchOffset, 16 ) / 16 ;
9545+ // spill/fill intrinsics expect offset in HWord, so round up to 64 byte but maintain it in OWord unit
9546+ // ToDo: we really need to change everything to byte for everyone's sanity..
9547+ unsigned localSpillAreaOwordSize = ROUND (scratchOffset, 64 ) / 16 ;
95539548 // the given localSpillAreaOwordSize is the offset based on globalScratchOffset
95549549 coloring.addSaveRestoreCode (localSpillAreaOwordSize);
95559550 }
@@ -9662,7 +9657,7 @@ int GlobalRA::coloringRegAlloc()
96629657 jitInfo->isSpill = spillMemUsed > 0 ;
96639658 jitInfo->hasStackcalls = kernel.fg .getHasStackCalls ();
96649659
9665- if (builder.kernel .fg .paramOverflowAreaOffset != 0 ) {
9660+ if (builder.kernel .fg .frameSizeInOWord != 0 ) {
96669661 // jitInfo->spillMemUsed is the entire visa stack size. Consider the caller/callee
96679662 // save size if having caller/callee save
96689663 // globalScratchOffset in unit of byte, others in Oword
@@ -9676,13 +9671,8 @@ int GlobalRA::coloringRegAlloc()
96769671 // callerSaveAreaOffset -> ---------------------
96779672 // | caller save |
96789673 // paramOverflowAreaOffset -> ---------------------
9679- // | paramOverflowArea |
9680- // ---------------------
9681- // FIXME: paramOverflowAreaOffset and paramOverflowAreaSize don't seem like be used
9682- // anywhere, do we need them?
96839674 jitInfo->spillMemUsed =
9684- (builder.kernel .fg .paramOverflowAreaOffset +
9685- builder.kernel .fg .paramOverflowAreaSize ) * 16 - globalScratchOffset;
9675+ builder.kernel .fg .frameSizeInOWord * 16 - globalScratchOffset;
96869676
96879677 // reserve spillMemUsed #bytes before 8kb boundary
96889678 kernel.getGTPinData ()->setScratchNextFree (8 *1024 - kernel.getGTPinData ()->getNumBytesScratchUse ());
0 commit comments