@@ -6,6 +6,7 @@ SPDX-License-Identifier: MIT
66
77============================= end_copyright_notice ===========================*/
88
9+ #include < utility>
910#include " Compiler/GenTTI.h"
1011#include " GenISAIntrinsics/GenIntrinsics.h"
1112#include " GenISAIntrinsics/GenIntrinsicInst.h"
@@ -18,6 +19,7 @@ SPDX-License-Identifier: MIT
1819#include " llvm/Analysis/CodeMetrics.h"
1920#include " llvm/Analysis/LoopInfo.h"
2021#include " llvm/Analysis/ScalarEvolution.h"
22+ #include " llvm/Support/InstructionCost.h"
2123#include " llvmWrapper/Transforms/Utils/LoopUtils.h"
2224#include " common/LLVMWarningsPop.hpp"
2325
@@ -173,6 +175,8 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
173175 }
174176
175177 unsigned LoopUnrollThreshold = ctx->m_DriverInfo .GetLoopUnrollThreshold ();
178+ bool UnrollLoopForCodeSizeOnly =
179+ IGC_IS_FLAG_ENABLED (UnrollLoopForCodeSizeOnly) || (!ctx->m_retryManager .IsFirstTry ());
176180
177181 // override the LoopUnrollThreshold if the registry key is set
178182 if (IGC_GET_FLAG_VALUE (SetLoopUnrollThreshold) != 0 ) {
@@ -274,6 +278,12 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
274278 UP.Force = true ;
275279 }
276280
281+ if (UnrollLoopForCodeSizeOnly) {
282+ UP.Threshold = getLoopSize (L, *this ) + 1 ;
283+ UP.MaxPercentThresholdBoost = 100 ;
284+ UP.Partial = false ;
285+ }
286+
277287 // For all the load/store who (having a GEP to),
278288 // 1. Accessing a fixed size Alloca
279289 // 2. Having an loop-iteration-inducted-only index
@@ -306,8 +316,6 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
306316 //
307317 // TODO: Having an analysis pass to link alloca with loops globally so that they are either unrolled together or not.
308318 // It can potentially do some global cost estimations.
309- // TODO: Having compilation retry enables loop unrolling for this case and determines if unrolling actually helps
310- // reduce register pressure.
311319 const unsigned UnrollMaxCountForAlloca = IGC_GET_FLAG_VALUE (PromoteLoopUnrollwithAllocaCountThreshold);
312320 bool AllocaFound = false ;
313321 if (MaxTripCount && MaxTripCount <= UnrollMaxCountForAlloca &&
@@ -353,7 +361,7 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
353361 UP.MaxIterationsCountToAnalyze = UnrollMaxCountForAlloca;
354362 UP.Threshold += ThresholdBoost;
355363 UP.UpperBound = true ;
356- UP.Force = true ;
364+ UP.Force = UnrollLoopForCodeSizeOnly ? false : true ;
357365
358366 LLVM_DEBUG (dbgs () << " Increasing L:" << L->getName () << " threshold to " << UP.Threshold
359367 << " due to Alloca accessed by:" );
@@ -363,6 +371,9 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
363371 }
364372 }
365373
374+ if (IGC_IS_FLAG_ENABLED (UnrollLoopForCodeSizeOnly))
375+ return ;
376+
366377 unsigned sendMessage = 0 ;
367378 unsigned TripCount = 0 ;
368379 BasicBlock *ExitingBlock = L->getLoopLatch ();
@@ -679,4 +690,20 @@ llvm::InstructionCost GenIntrinsicsTTIImpl::internalCalculateCost(const User *U,
679690
680691 return BaseT::getInstructionCost (U, Operands, CostKind);
681692}
693+
694+ // Strip from LLVM::LoopUnrollPass::ApproximateLoopSize
695+ unsigned getLoopSize (const Loop *L, const TargetTransformInfo &TTI) {
696+ SmallPtrSet<const Value *, 32 > EphValues;
697+
698+ CodeMetrics Metrics;
699+ for (BasicBlock *BB : L->blocks ())
700+ Metrics.analyzeBasicBlock (BB, TTI, EphValues);
701+
702+ InstructionCost LoopSize;
703+ LoopSize = Metrics.NumInsts ;
704+
705+ LoopSize = (LoopSize > 3 /* BEInsns + 1*/ ) ? LoopSize : 3 ;
706+ return *LoopSize.getValue ();
707+ }
708+
682709} // namespace llvm
0 commit comments