@@ -80,7 +80,6 @@ SPDX-License-Identifier: MIT
8080#include " llvmWrapper/Support/TypeSize.h"
8181#include " llvmWrapper/Transforms/Utils/Local.h"
8282
83- #include " vc/Utils/GenX/Intrinsics.h"
8483#include " vc/Utils/General/InstRebuilder.h"
8584
8685#include < functional>
@@ -2815,8 +2814,6 @@ enum class DivRemOptimize {
28152814 Not,
28162815 // Power of 2 case optimization.
28172816 Pow2,
2818- // Not power of 2 case optimization.
2819- NotPow2,
28202817};
28212818
28222819// Check if unsigned UDiv/URem can be optimized,
@@ -2834,17 +2831,7 @@ static DivRemOptimize isSuitableUDivURemOperand(Value *Operand) {
28342831 return DivRemOptimize::Not;
28352832 if (PatternMatch::match (Operand, PatternMatch::m_Power2 ()))
28362833 return DivRemOptimize::Pow2;
2837- // Not power of 2 case.
2838- // For now we expect type to be i32 or vector of i32.
2839- // TODO: support i8, i16 case by creating zext cast to i32.
2840- // TODO: support i64 case by using the same pattern, as for i32 case.
2841- // Now can not be done, as mulh i64 is not supported.
2842- if (OperandTy->getScalarSizeInBits () != 32 )
2843- return DivRemOptimize::Not;
2844- // Expect to be splat.
2845- if (OperandTy->isVectorTy () && !(cast<Constant>(Operand)->getSplatValue ()))
2846- return DivRemOptimize::Not;
2847- return DivRemOptimize::NotPow2;
2834+ return DivRemOptimize::Not;
28482835}
28492836
28502837// Check if unsigned SDiv/URem can be optimized,
@@ -2927,93 +2914,12 @@ static void decomposeUDivPow2(BinaryOperator &UDivOp) {
29272914 Res->takeName (&UDivOp);
29282915}
29292916
2930- // Unsigned optimization if x / y, y positive not power of 2:
2931- // floor ( x / y ) = floor( x / 2^k * 2^k / y )
2932- // m = ceil( 2^k / y ) = (2^k + e ) / y, e - error, e = y - (2^k % y )
2933- // floor( m * x / 2^k) = floor( x / y + e / y * x / 2^k )
2934- // k should be big enough, to( e * x ) / (y * 2^k) < 1 / d,
2935- // as if reminder was d - 1, with error greater the result will be bigger
2936- // x <= 2^32, e / d ~ 1, => k = 32 + ceil( log2( d) )
2937- // Note: m is 33 bit number, so need some additional work:
2938- // m * x = ( m - 2^32 ) * x + 2^32 * x
2939- // Note: another hack: ( a + b) / 2^k = [ (a - b) / 2 + b ] / 2 ^ ( k -1 ).
2940- // Usual algorithm, for every unsigned number:
2941- // p = ceil ( log2(y) )
2942- // m = ceil [ 2^(32 + p) / y ] - save low 32 bits
2943- // q = ( m * x ) >> 32 = mulh( m, x )
2944- // t = ( x - q ) / 2 + q // it is a save variant of ( x + q ) / 2
2945- // ans = t >> ( p - 1 )
2946- // It can be simplified, if the value is near power of 2, so
2947- // if m is <= 2^32 we can remove addition that was added as a hack for multiply
2948- // additional shift for p - 1 also could be removed.
2949- // See Hacker's Delight 10-10.
2950- static void decomposeUDivNotPow2 (BinaryOperator &UDivOp) {
2951- IGC_ASSERT (UDivOp.getOpcode () == Instruction::UDiv);
2952- Value *Dividend = UDivOp.getOperand (0 );
2953- IGC_ASSERT (isSuitableUDivURemOperand (UDivOp.getOperand (1 )) ==
2954- DivRemOptimize::NotPow2);
2955- Constant *Divisor = cast<Constant>(UDivOp.getOperand (1 ));
2956- IRBuilder<> Builder{&UDivOp};
2957- Type *OperationTy = Dividend->getType ();
2958- bool IsVector = OperationTy->isVectorTy ();
2959-
2960- IGC_ASSERT (!IsVector || cast<ConstantDataVector>(Divisor)->isSplat ());
2961- const APInt &DivisorVal =
2962- (IsVector ? cast<ConstantInt>(Divisor->getSplatValue ())
2963- : cast<ConstantInt>(Divisor))
2964- ->getValue ();
2965-
2966- APInt::mu MagicStruct = DivisorVal.magicu ();
2967- const int ElementBitWidth =
2968- Divisor->getType ()->getScalarType ()->getIntegerBitWidth ();
2969- // Even divisors, can pre-shift the dividend to avoid
2970- // extra work at the end.
2971- Value *ShiftedDividend = Dividend;
2972- // Need addition and y is 2 * y'.
2973- if (MagicStruct.a && !DivisorVal[0 ]) {
2974- unsigned ShiftSizeRaw = DivisorVal.countTrailingZeros ();
2975- Constant *ShiftSize =
2976- Constant::getIntegerValue (OperationTy, APInt{32 , ShiftSizeRaw});
2977- ShiftedDividend = Builder.CreateLShr (ShiftedDividend, ShiftSize);
2978- MagicStruct = DivisorVal.lshr (ShiftSizeRaw).magicu (ShiftSizeRaw);
2979-
2980- // Should not change addition quality.
2981- IGC_ASSERT_MESSAGE (!MagicStruct.a , " expected to subtract now" );
2982- IGC_ASSERT_MESSAGE (MagicStruct.s < DivisorVal.getBitWidth (),
2983- " undefined shift" );
2984- }
2985- Constant *MagicConst = Constant::getIntegerValue (OperationTy, MagicStruct.m );
2986- Value *MulH = vc::createAnyIntrinsic (Builder, {ShiftedDividend, MagicConst},
2987- GenXIntrinsic::genx_umulh,
2988- {OperationTy, OperationTy}, " opt" );
2989-
2990- Value *Res = nullptr ;
2991- if (!MagicStruct.a ) {
2992- Constant *Shift =
2993- Constant::getIntegerValue (OperationTy, APInt{32 , MagicStruct.s });
2994- Res = Builder.CreateLShr (MulH, Shift);
2995- } else {
2996- Value *Fixup = Builder.CreateSub (Dividend, MulH, " q_appx" );
2997- Constant *One = Constant::getIntegerValue (OperationTy, APInt{32 , 1 });
2998- Fixup = Builder.CreateLShr (Fixup, One);
2999- Value *Addition = Builder.CreateAdd (Fixup, MulH, " q_appx_add" );
3000- Constant *Shift =
3001- Constant::getIntegerValue (OperationTy, APInt{32 , MagicStruct.s - 1 });
3002- Res = Builder.CreateLShr (Addition, Shift);
3003- }
3004- IGC_ASSERT (Res);
3005- UDivOp.replaceAllUsesWith (Res);
3006- Res->takeName (&UDivOp);
3007- }
3008-
30092917void GenXPatternMatch::visitUDiv (BinaryOperator &I) {
30102918 auto CheckRes = isSuitableUDivURemOperand (I.getOperand (1 ));
30112919 if (CheckRes == DivRemOptimize::Not)
30122920 return ;
3013- Changed = true ;
3014- if (CheckRes == DivRemOptimize::NotPow2)
3015- return decomposeUDivNotPow2 (I);
30162921 IGC_ASSERT (CheckRes == DivRemOptimize::Pow2);
2922+ Changed = true ;
30172923 return decomposeUDivPow2 (I);
30182924}
30192925
@@ -3074,33 +2980,12 @@ static void decomposeURemPow2(BinaryOperator &URemOp) {
30742980 Res->takeName (&URemOp);
30752981}
30762982
3077- // Optimization for unsigned x % y, y is not power of 2 value.
3078- // x % y = x - x * (x / y)
3079- static void decomposeURemNotPow2 (BinaryOperator &URemOp) {
3080- IGC_ASSERT (URemOp.getOpcode () == Instruction::URem);
3081- Value *Dividend = URemOp.getOperand (0 );
3082- IGC_ASSERT (isSuitableUDivURemOperand (URemOp.getOperand (1 )) ==
3083- DivRemOptimize::NotPow2);
3084- Constant *Divisor = cast<Constant>(URemOp.getOperand (1 ));
3085- IRBuilder Builder{&URemOp};
3086- Value *UDivOp = Builder.CreateUDiv (Dividend, Divisor, " udiv" );
3087- Value *Res = Builder.CreateSub (Dividend,
3088- Builder.CreateMul (Dividend, UDivOp, " tmpRes" ));
3089-
3090- decomposeUDivNotPow2 (*cast<BinaryOperator>(UDivOp));
3091-
3092- URemOp.replaceAllUsesWith (cast<BinaryOperator>(Res));
3093- Res->takeName (&URemOp);
3094- }
3095-
30962983void GenXPatternMatch::visitURem (BinaryOperator &I) {
30972984 auto CheckRes = isSuitableUDivURemOperand (I.getOperand (1 ));
30982985 if (CheckRes == DivRemOptimize::Not)
30992986 return ;
3100- Changed = true ;
3101- if (CheckRes == DivRemOptimize::NotPow2)
3102- return decomposeURemNotPow2 (I);
31032987 IGC_ASSERT (CheckRes == DivRemOptimize::Pow2);
2988+ Changed = true ;
31042989 return decomposeURemPow2 (I);
31052990}
31062991
0 commit comments