Skip to content

Commit 63f1cc8

Browse files
itetyush-inteligcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: 7d07ef5
Routine for optimizing i32 udiv, urem Routine in VC PatternMatch. Should be used if divisor is not power of 2 value Routine replaces: x / y = x * M / 2^32, M is a special constant x % y = x - (x / y ) * y
1 parent 2b2e08b commit 63f1cc8

File tree

1 file changed

+3
-118
lines changed

1 file changed

+3
-118
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp

Lines changed: 3 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ SPDX-License-Identifier: MIT
8080
#include "llvmWrapper/Support/TypeSize.h"
8181
#include "llvmWrapper/Transforms/Utils/Local.h"
8282

83-
#include "vc/Utils/GenX/Intrinsics.h"
8483
#include "vc/Utils/General/InstRebuilder.h"
8584

8685
#include <functional>
@@ -2815,8 +2814,6 @@ enum class DivRemOptimize {
28152814
Not,
28162815
// Power of 2 case optimization.
28172816
Pow2,
2818-
// Not power of 2 case optimization.
2819-
NotPow2,
28202817
};
28212818

28222819
// Check if unsigned UDiv/URem can be optimized,
@@ -2834,17 +2831,7 @@ static DivRemOptimize isSuitableUDivURemOperand(Value *Operand) {
28342831
return DivRemOptimize::Not;
28352832
if (PatternMatch::match(Operand, PatternMatch::m_Power2()))
28362833
return DivRemOptimize::Pow2;
2837-
// Not power of 2 case.
2838-
// For now we expect type to be i32 or vector of i32.
2839-
// TODO: support i8, i16 case by creating zext cast to i32.
2840-
// TODO: support i64 case by using the same pattern, as for i32 case.
2841-
// Now can not be done, as mulh i64 is not supported.
2842-
if (OperandTy->getScalarSizeInBits() != 32)
2843-
return DivRemOptimize::Not;
2844-
// Expect to be splat.
2845-
if (OperandTy->isVectorTy() && !(cast<Constant>(Operand)->getSplatValue()))
2846-
return DivRemOptimize::Not;
2847-
return DivRemOptimize::NotPow2;
2834+
return DivRemOptimize::Not;
28482835
}
28492836

28502837
// Check if unsigned SDiv/URem can be optimized,
@@ -2927,93 +2914,12 @@ static void decomposeUDivPow2(BinaryOperator &UDivOp) {
29272914
Res->takeName(&UDivOp);
29282915
}
29292916

2930-
// Unsigned optimization if x / y, y positive not power of 2:
2931-
// floor ( x / y ) = floor( x / 2^k * 2^k / y )
2932-
// m = ceil( 2^k / y ) = (2^k + e ) / y, e - error, e = y - (2^k % y )
2933-
// floor( m * x / 2^k) = floor( x / y + e / y * x / 2^k )
2934-
// k should be big enough, to( e * x ) / (y * 2^k) < 1 / d,
2935-
// as if reminder was d - 1, with error greater the result will be bigger
2936-
// x <= 2^32, e / d ~ 1, => k = 32 + ceil( log2( d) )
2937-
// Note: m is 33 bit number, so need some additional work:
2938-
// m * x = ( m - 2^32 ) * x + 2^32 * x
2939-
// Note: another hack: ( a + b) / 2^k = [ (a - b) / 2 + b ] / 2 ^ ( k -1 ).
2940-
// Usual algorithm, for every unsigned number:
2941-
// p = ceil ( log2(y) )
2942-
// m = ceil [ 2^(32 + p) / y ] - save low 32 bits
2943-
// q = ( m * x ) >> 32 = mulh( m, x )
2944-
// t = ( x - q ) / 2 + q // it is a save variant of ( x + q ) / 2
2945-
// ans = t >> ( p - 1 )
2946-
// It can be simplified, if the value is near power of 2, so
2947-
// if m is <= 2^32 we can remove addition that was added as a hack for multiply
2948-
// additional shift for p - 1 also could be removed.
2949-
// See Hacker's Delight 10-10.
2950-
static void decomposeUDivNotPow2(BinaryOperator &UDivOp) {
2951-
IGC_ASSERT(UDivOp.getOpcode() == Instruction::UDiv);
2952-
Value *Dividend = UDivOp.getOperand(0);
2953-
IGC_ASSERT(isSuitableUDivURemOperand(UDivOp.getOperand(1)) ==
2954-
DivRemOptimize::NotPow2);
2955-
Constant *Divisor = cast<Constant>(UDivOp.getOperand(1));
2956-
IRBuilder<> Builder{&UDivOp};
2957-
Type *OperationTy = Dividend->getType();
2958-
bool IsVector = OperationTy->isVectorTy();
2959-
2960-
IGC_ASSERT(!IsVector || cast<ConstantDataVector>(Divisor)->isSplat());
2961-
const APInt &DivisorVal =
2962-
(IsVector ? cast<ConstantInt>(Divisor->getSplatValue())
2963-
: cast<ConstantInt>(Divisor))
2964-
->getValue();
2965-
2966-
APInt::mu MagicStruct = DivisorVal.magicu();
2967-
const int ElementBitWidth =
2968-
Divisor->getType()->getScalarType()->getIntegerBitWidth();
2969-
// Even divisors, can pre-shift the dividend to avoid
2970-
// extra work at the end.
2971-
Value *ShiftedDividend = Dividend;
2972-
// Need addition and y is 2 * y'.
2973-
if (MagicStruct.a && !DivisorVal[0]) {
2974-
unsigned ShiftSizeRaw = DivisorVal.countTrailingZeros();
2975-
Constant *ShiftSize =
2976-
Constant::getIntegerValue(OperationTy, APInt{32, ShiftSizeRaw});
2977-
ShiftedDividend = Builder.CreateLShr(ShiftedDividend, ShiftSize);
2978-
MagicStruct = DivisorVal.lshr(ShiftSizeRaw).magicu(ShiftSizeRaw);
2979-
2980-
// Should not change addition quality.
2981-
IGC_ASSERT_MESSAGE(!MagicStruct.a, "expected to subtract now");
2982-
IGC_ASSERT_MESSAGE(MagicStruct.s < DivisorVal.getBitWidth(),
2983-
"undefined shift");
2984-
}
2985-
Constant *MagicConst = Constant::getIntegerValue(OperationTy, MagicStruct.m);
2986-
Value *MulH = vc::createAnyIntrinsic(Builder, {ShiftedDividend, MagicConst},
2987-
GenXIntrinsic::genx_umulh,
2988-
{OperationTy, OperationTy}, "opt");
2989-
2990-
Value *Res = nullptr;
2991-
if (!MagicStruct.a) {
2992-
Constant *Shift =
2993-
Constant::getIntegerValue(OperationTy, APInt{32, MagicStruct.s});
2994-
Res = Builder.CreateLShr(MulH, Shift);
2995-
} else {
2996-
Value *Fixup = Builder.CreateSub(Dividend, MulH, "q_appx");
2997-
Constant *One = Constant::getIntegerValue(OperationTy, APInt{32, 1});
2998-
Fixup = Builder.CreateLShr(Fixup, One);
2999-
Value *Addition = Builder.CreateAdd(Fixup, MulH, "q_appx_add");
3000-
Constant *Shift =
3001-
Constant::getIntegerValue(OperationTy, APInt{32, MagicStruct.s - 1});
3002-
Res = Builder.CreateLShr(Addition, Shift);
3003-
}
3004-
IGC_ASSERT(Res);
3005-
UDivOp.replaceAllUsesWith(Res);
3006-
Res->takeName(&UDivOp);
3007-
}
3008-
30092917
void GenXPatternMatch::visitUDiv(BinaryOperator &I) {
30102918
auto CheckRes = isSuitableUDivURemOperand(I.getOperand(1));
30112919
if (CheckRes == DivRemOptimize::Not)
30122920
return;
3013-
Changed = true;
3014-
if (CheckRes == DivRemOptimize::NotPow2)
3015-
return decomposeUDivNotPow2(I);
30162921
IGC_ASSERT(CheckRes == DivRemOptimize::Pow2);
2922+
Changed = true;
30172923
return decomposeUDivPow2(I);
30182924
}
30192925

@@ -3074,33 +2980,12 @@ static void decomposeURemPow2(BinaryOperator &URemOp) {
30742980
Res->takeName(&URemOp);
30752981
}
30762982

3077-
// Optimization for unsigned x % y, y is not power of 2 value.
3078-
// x % y = x - x * (x / y)
3079-
static void decomposeURemNotPow2(BinaryOperator &URemOp) {
3080-
IGC_ASSERT(URemOp.getOpcode() == Instruction::URem);
3081-
Value *Dividend = URemOp.getOperand(0);
3082-
IGC_ASSERT(isSuitableUDivURemOperand(URemOp.getOperand(1)) ==
3083-
DivRemOptimize::NotPow2);
3084-
Constant *Divisor = cast<Constant>(URemOp.getOperand(1));
3085-
IRBuilder Builder{&URemOp};
3086-
Value *UDivOp = Builder.CreateUDiv(Dividend, Divisor, "udiv");
3087-
Value *Res = Builder.CreateSub(Dividend,
3088-
Builder.CreateMul(Dividend, UDivOp, "tmpRes"));
3089-
3090-
decomposeUDivNotPow2(*cast<BinaryOperator>(UDivOp));
3091-
3092-
URemOp.replaceAllUsesWith(cast<BinaryOperator>(Res));
3093-
Res->takeName(&URemOp);
3094-
}
3095-
30962983
void GenXPatternMatch::visitURem(BinaryOperator &I) {
30972984
auto CheckRes = isSuitableUDivURemOperand(I.getOperand(1));
30982985
if (CheckRes == DivRemOptimize::Not)
30992986
return;
3100-
Changed = true;
3101-
if (CheckRes == DivRemOptimize::NotPow2)
3102-
return decomposeURemNotPow2(I);
31032987
IGC_ASSERT(CheckRes == DivRemOptimize::Pow2);
2988+
Changed = true;
31042989
return decomposeURemPow2(I);
31052990
}
31062991

0 commit comments

Comments
 (0)