Skip to content

Commit d03500d

Browse files
matborzyszkowskiigcbot
authored andcommitted
Add visa dependent code for CRI
.
1 parent 54d4779 commit d03500d

File tree

3 files changed

+39
-0
lines changed

3 files changed

+39
-0
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,6 +1499,8 @@ void CEncoder::ShflIdx4(CVariable *dst, CVariable *src0, CVariable *src1) {
14991499
VISA_RawOpnd *dstOpnd = GetRawDestination(dst);
15001500
VISA_PredOpnd *predOpnd = GetFlagOperand(m_encoderState.m_flag);
15011501

1502+
V(vKernel->AppendVISAShflIdx4Inst(ISA_SHFL_IDX4, predOpnd, GetAluEMask(dst), GetAluExecSize(dst), dstOpnd, src0Opnd,
1503+
src1Opnd));
15021504
}
15031505
// We allow H1 to be nullptr for the common case of adding 64-bit variable
15041506
// with 32-bit imm
@@ -4363,6 +4365,9 @@ void CEncoder::InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, SIMDMode sim
43634365
if (IGC_IS_FLAG_ENABLED(CopyA0ToDBG0)) {
43644366
SaveOption(vISA_CopyA0ToDBG0, true);
43654367
}
4368+
if (IGC_IS_FLAG_ENABLED(CopyMsg0ToDbg0)) {
4369+
SaveOption(vISA_CopyMsg0ToDbg0, true);
4370+
}
43664371

43674372
if (VISAPlatform == Xe_XeHPSDV && IGC_IS_FLAG_ENABLED(DPASTokenReduction)) {
43684373
SaveOption(vISA_EnableDPASTokenReduction, true);
@@ -4581,6 +4586,15 @@ void CEncoder::InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, SIMDMode sim
45814586
SaveOption(vISA_EnableProgrammableOffsetsMessageBitInHeader, true);
45824587
}
45834588

4589+
if (m_program->m_Platform->hasEfficient64bEnabled()) {
4590+
SaveOption(vISA_enableEfficient64b, true);
4591+
}
4592+
if (!m_program->m_Platform->supportStatefulScaleFolding()) {
4593+
// vISA_supportLSCImmScale has value 3 by default, which enables address
4594+
// offset scaling in all cases. Set value to 0 to disable UGM-scratch and
4595+
// SLM cases.
4596+
SaveOption(vISA_supportLSCImmScale, (uint32_t)0);
4597+
}
45844598
if (uint32_t Val = IGC_GET_FLAG_VALUE(EnableScalarPipe)) {
45854599
SaveOption(vISA_ScalarPipe, Val);
45864600
}
@@ -7056,6 +7070,8 @@ void CEncoder::bdpas(CVariable *Dst, CVariable *Acc, CVariable *B, PrecisionType
70567070
VISA_VectorOpnd *srcOpnd3 = GetSourceOperand(scalingPartsB[partIndex], noMod);
70577071
VISA_VectorOpnd *srcOpnd4 = GetSourceOperand(scalingPartsA[partIndex], noMod);
70587072

7073+
V(vKernel->AppendVISABdpasInst(ISA_BDPAS, splitExecMask, toExecSize, dstOpnd, srcOpnd0, srcOpnd1, srcOpnd2,
7074+
srcOpnd3, srcOpnd4, src2Precision, src1Precision, systolicDepth, repeatCount));
70597075
}
70607076
uint32_t dstOfstBytes = m_encoderState.m_dstOperand.subVar * getGRFSize() + Dst->GetAliasOffset();
70617077
MergePayloadToHigherSIMD(partsDst[0], partsDst[1], repeatCount, Dst, dstOfstBytes, visaNumLanes(fromExecSize));
@@ -7080,6 +7096,8 @@ void CEncoder::bdpas(CVariable *Dst, CVariable *Acc, CVariable *B, PrecisionType
70807096
VISA_VectorOpnd *srcOpnd3 = GetSourceOperand(BScaling, noMod);
70817097
VISA_VectorOpnd *srcOpnd4 = GetSourceOperand(AScaling, noMod);
70827098

7099+
V(vKernel->AppendVISABdpasInst(ISA_BDPAS, execMask, execSize, dstOpnd, srcOpnd0, srcOpnd1, srcOpnd2, srcOpnd3,
7100+
srcOpnd4, src2Precision, src1Precision, systolicDepth, repeatCount));
70837101
}
70847102
}
70857103

@@ -7241,6 +7259,9 @@ void CEncoder::lfsr(CVariable *dst, CVariable *src0, CVariable *src1, LFSR_FC fu
72417259
VISA_VectorOpnd *srcOpnd0 = GetSourceOperand(src0, m_encoderState.m_srcOperand[0]);
72427260
VISA_VectorOpnd *srcOpnd1 = GetSourceOperand(src1, m_encoderState.m_srcOperand[1]);
72437261

7262+
V(vKernel->AppendVISALfsrInst(predOpnd, GetAluEMask(dst),
7263+
visaExecSize(dst->IsUniform() ? m_encoderState.m_uniformSIMDSize : simdMode), funcCtrl,
7264+
dstOpnd, srcOpnd0, srcOpnd1));
72447265
}
72457266

72467267
void CEncoder::srnd(CVariable *D, CVariable *S0, CVariable *R) {
@@ -7264,6 +7285,10 @@ void CEncoder::emitDnscl(CVariable *dst, CVariable *src0, CVariable *src1, CVari
72647285
VISA_RawOpnd *srcOpnd1 = GetRawSource(src1);
72657286
VISA_RawOpnd *srcOpnd2 = GetRawSource(bias);
72667287

7288+
V(vKernel->AppendVISADnsclInst(
7289+
predOpnd, GetAluEMask(dst),
7290+
visaExecSize(dst->IsUniform() ? m_encoderState.m_uniformSIMDSize : m_encoderState.m_simdSize), convType, packMode,
7291+
roundMode, dstOpnd, srcOpnd0, srcOpnd1, srcOpnd2));
72677292
}
72687293

72697294
std::string CEncoder::GetVariableName(CVariable *var) {

IGC/Compiler/CISACodeGen/CISABuilder.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,8 @@ class CEncoder {
338338
inline void USubB(CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1);
339339
inline void IEEESqrt(CVariable *dst, CVariable *src0);
340340
inline void IEEEDivide(CVariable *dst, CVariable *src0, CVariable *src1);
341+
inline void Tanh(CVariable *dst, CVariable *src0);
342+
inline void Sigm(CVariable *dst, CVariable *src0);
341343
void AddPair(CVariable *Lo, CVariable *Hi, CVariable *L0, CVariable *H0, CVariable *L1, CVariable *H1 = nullptr);
342344
void SubPair(CVariable *Lo, CVariable *Hi, CVariable *L0, CVariable *H0, CVariable *L1, CVariable *H1);
343345
inline void dp4a(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
@@ -897,6 +899,9 @@ inline void CEncoder::IEEESqrt(CVariable *dst, CVariable *src0) { Arithmetic(ISA
897899
inline void CEncoder::IEEEDivide(CVariable *dst, CVariable *src0, CVariable *src1) {
898900
Arithmetic(ISA_DIVM, dst, src0, src1);
899901
}
902+
inline void CEncoder::Tanh(CVariable *dst, CVariable *src0) { Arithmetic(ISA_TANH, dst, src0); }
903+
904+
inline void CEncoder::Sigm(CVariable *dst, CVariable *src0) { Arithmetic(ISA_SIGM, dst, src0); }
900905

901906
inline void CEncoder::dp4a(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
902907
Arithmetic(ISA_DP4A, dst, src0, src1, src2);

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2299,8 +2299,10 @@ void EmitPass::EmitSimpleAlu(EOPCODE opCode, CVariable *dst, CVariable *src0, CV
22992299
m_encoder->Inv(dst, src0);
23002300
break;
23012301
case llvm_tanh:
2302+
m_encoder->Tanh(dst, src0);
23022303
break;
23032304
case llvm_sigm:
2305+
m_encoder->Sigm(dst, src0);
23042306
break;
23052307
default:
23062308
// need support
@@ -11397,6 +11399,7 @@ void EmitPass::emitStackCall(llvm::CallInst *inst) {
1139711399
IGC_ASSERT_MESSAGE(IGCLLVM::getNumArgOperands(inst) == 0, "Arguments for non-returning call are not implemented");
1139811400

1139911401
auto *funcAddr = GetSymbol(IGCLLVM::getCalledValue(inst));
11402+
if (!m_currShader->m_Platform->hasEfficient64bEnabled())
1140011403
funcAddr = TruncatePointer(funcAddr);
1140111404
IGC_ASSERT_MESSAGE(funcAddr->IsUniform(), "Function address must be uniform for non-returning stack call");
1140211405
m_encoder->IndirectStackCall(nullptr, funcAddr, 0, 0);
@@ -22846,23 +22849,28 @@ void EmitPass::emitLscIntrinsicTypedLoadStatus(llvm::GenIntrinsicInst *inst) {
2284622849
CVariable *flag = nullptr;
2284722850
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
2284822851
CVariable *prevResult = nullptr;
22852+
if (!isEfficient64bEnabled && needLoop) {
2284922853
prevResult = m_currShader->GetNewVariable(numElements, ISA_TYPE_D, EALIGN_DWORD, "typedLoadStatusResult");
2285022854
CVariable *initVal = m_currShader->ImmToVariable(0, ISA_TYPE_D);
2285122855
m_encoder->SetSimdSize(SIMDMode::SIMD1);
2285222856
m_encoder->SetNoMask();
2285322857
m_encoder->Copy(m_destination, initVal);
2285422858
m_encoder->Push();
22859+
}
2285522860
ResourceLoopSubIteration(resource, flag, label);
2285622861
m_encoder->SetPredicate(flag);
2285722862
m_encoder->LSC_TypedReadWrite(lscOp, &resource, pU, pV, pR, pLODorSampleIdx,
2285822863
needLoop && prevResult ? prevResult : m_destination, eltBitSize, numElements,
2285922864
addrSize, writeMask.getEM(), cacheOpts);
2286022865
m_encoder->Push();
22866+
if (!isEfficient64bEnabled && needLoop) {
2286122867
m_encoder->SetSimdSize(SIMDMode::SIMD1);
2286222868
m_encoder->SetNoMask();
2286322869
m_encoder->Or(m_destination, prevResult, m_destination);
2286422870
m_encoder->Push();
22871+
}
2286522872
ResourceLoopBackEdge(needLoop, flag, label);
22873+
if (!isEfficient64bEnabled) {
2286622874
// Extract per-lane status from the packed status register.
2286722875
// The hardware returns a single dw register with one status bit per lane.
2286822876
// We need to broadcast the bit result into per-lane values.
@@ -22880,6 +22888,7 @@ void EmitPass::emitLscIntrinsicTypedLoadStatus(llvm::GenIntrinsicInst *inst) {
2288022888
CVariable *nonResident = m_currShader->ImmToVariable(0x0, m_destination->GetType());
2288122889
m_encoder->Select(statusFlag, m_destination, resident, nonResident);
2288222890
m_encoder->Push();
22891+
}
2288322892
}
2288422893
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
2288522894
}

0 commit comments

Comments
 (0)