Skip to content

Commit 88fab90

Browse files
Jay-Jiewu-Lugfxbot
authored andcommitted
Revert of:
Pack threadID to short for non-OCL APIs Change-Id: I29ed499ed6485c570110403195c750a7dab0a55d
1 parent f1d9fd7 commit 88fab90

File tree

8 files changed

+21
-186
lines changed

8 files changed

+21
-186
lines changed

IGC/Compiler/CISACodeGen/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ set(IGC_BUILD__SRC__CISACodeGen_Common
7676
"${CMAKE_CURRENT_SOURCE_DIR}/VectorProcess.cpp"
7777
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderCodeGen.cpp"
7878
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderLowering.cpp"
79-
"${CMAKE_CURRENT_SOURCE_DIR}/ComputeShaderLowering.cpp"
8079
"${CMAKE_CURRENT_SOURCE_DIR}/WIAnalysis.cpp"
8180
"${CMAKE_CURRENT_SOURCE_DIR}/SLMConstProp.cpp"
8281
"${CMAKE_CURRENT_SOURCE_DIR}/POSH_RemoveNonPositionOutput.cpp"
@@ -163,7 +162,6 @@ set(IGC_BUILD__HDR__CISACodeGen_Common
163162
"${CMAKE_CURRENT_SOURCE_DIR}/VectorProcess.hpp"
164163
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderCodeGen.hpp"
165164
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderLowering.hpp"
166-
"${CMAKE_CURRENT_SOURCE_DIR}/ComputeShaderLowering.hpp"
167165
"${CMAKE_CURRENT_SOURCE_DIR}/WIAnalysis.hpp"
168166
"${CMAKE_CURRENT_SOURCE_DIR}/SLMConstProp.hpp"
169167
"${CMAKE_CURRENT_SOURCE_DIR}/POSH_RemoveNonPositionOutput.h"

IGC/Compiler/CISACodeGen/ComputeShaderCodeGen.cpp

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -125,31 +125,27 @@ void CComputeShader::ParseShaderSpecificOpcode(llvm::Instruction* inst)
125125
}
126126
}
127127

128-
void CComputeShader::CreateThreadPayloadData(void* & pThreadPayload, uint& curbeTotalDataLength, uint& curbeReadLength)
128+
void CComputeShader::CreateThreadPayloadData(void* & pThreadPayload, uint& threadPayloadSize)
129129
{
130-
typedef uint16_t ThreadPayloadEntry;
131-
132130
// Find the max thread group dimension
133131
const OctEltUnit SIZE_OF_DQWORD = OctEltUnit(2);
134-
const OctEltUnit SIZE_OF_OWORD = OctEltUnit(1);
135132
uint numberOfId = GetNumberOfId();
136133
uint dimX = numLanes(m_dispatchSize);
137-
// dimX must align to alignment_X bytes (one GRF)
138-
uint alignment_X = EltUnit(SIZE_OF_OWORD).Count() * sizeof(DWORD);
139-
uint dimX_aligned = iSTD::Align(dimX * sizeof(ThreadPayloadEntry), alignment_X) / sizeof(ThreadPayloadEntry);
140-
uint dimY = (iSTD::Align(m_threadGroupSize, dimX) / dimX) * numberOfId;
141-
curbeReadLength = dimX_aligned * numberOfId * sizeof(ThreadPayloadEntry) / alignment_X;
134+
uint dimY = (iSTD::Align(m_threadGroupSize, dimX)/dimX) * numberOfId;
135+
136+
typedef uint ThreadPayloadEntry;
137+
138+
uint alignedVal = EltUnit(SIZE_OF_DQWORD).Count() * sizeof(DWORD); // Oct Element is 8 DWORDS
142139

143-
uint alignedVal = EltUnit(SIZE_OF_DQWORD).Count() * sizeof(ThreadPayloadEntry); // Oct Element is 8 Entries
144140
// m_NOSBufferSize is the additional space for cross-thread constant data (constants set by driver).
145-
curbeTotalDataLength = iSTD::Align(dimX_aligned * dimY * sizeof(ThreadPayloadEntry) + m_NOSBufferSize, alignedVal);
141+
threadPayloadSize = iSTD::Align( dimX * dimY * sizeof( ThreadPayloadEntry ) + m_NOSBufferSize, alignedVal );
146142

147143
assert(pThreadPayload == nullptr && "Thread payload should be a null variable");
148144

149-
unsigned threadPayloadEntries = curbeTotalDataLength / sizeof(ThreadPayloadEntry);
145+
unsigned threadPayloadEntries = threadPayloadSize / sizeof(ThreadPayloadEntry);
150146

151-
ThreadPayloadEntry* pThreadPayloadMem =
152-
(ThreadPayloadEntry*)IGC::aligned_malloc(threadPayloadEntries * sizeof(ThreadPayloadEntry), 16);
147+
ThreadPayloadEntry* pThreadPayloadMem =
148+
(ThreadPayloadEntry*)IGC::aligned_malloc(threadPayloadEntries* sizeof(ThreadPayloadEntry), 16);
153149
std::fill(pThreadPayloadMem, pThreadPayloadMem + threadPayloadEntries, 0);
154150

155151
pThreadPayload = pThreadPayloadMem;
@@ -173,17 +169,17 @@ void CComputeShader::CreateThreadPayloadData(void* & pThreadPayload, uint& curbe
173169
uint lane = 0;
174170
if(m_pThread_ID_in_Group_X)
175171
{
176-
pThreadPayloadMem[(y + lane) * dimX_aligned + x] = currThreadX;
172+
pThreadPayloadMem[(y + lane) * dimX + x] = currThreadX;
177173
lane++;
178174
}
179175
if(m_pThread_ID_in_Group_Y)
180176
{
181-
pThreadPayloadMem[(y + lane) * dimX_aligned + x] = currThreadY;
177+
pThreadPayloadMem[(y + lane) * dimX + x] = currThreadY;
182178
lane++;
183179
}
184180
if(m_pThread_ID_in_Group_Z)
185181
{
186-
pThreadPayloadMem[(y + lane) * dimX_aligned + x] = currThreadZ;
182+
pThreadPayloadMem[(y + lane) * dimX + x] = currThreadZ;
187183
lane++;
188184
}
189185

@@ -263,19 +259,19 @@ CVariable* CComputeShader::CreateThreadIDinGroup(uint channelNum)
263259
case 0:
264260
if(m_pThread_ID_in_Group_X == nullptr)
265261
{
266-
m_pThread_ID_in_Group_X = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_W, EALIGN_GRF, false, m_numberInstance);
262+
m_pThread_ID_in_Group_X = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_D, EALIGN_GRF, false, m_numberInstance);
267263
}
268264
return m_pThread_ID_in_Group_X;
269265
case 1:
270266
if(m_pThread_ID_in_Group_Y == nullptr)
271267
{
272-
m_pThread_ID_in_Group_Y = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_W, EALIGN_GRF, false, m_numberInstance);
268+
m_pThread_ID_in_Group_Y = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_D, EALIGN_GRF, false, m_numberInstance);
273269
}
274270
return m_pThread_ID_in_Group_Y;
275271
case 2:
276272
if(m_pThread_ID_in_Group_Z == nullptr)
277273
{
278-
m_pThread_ID_in_Group_Z = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_W, EALIGN_GRF, false, m_numberInstance);
274+
m_pThread_ID_in_Group_Z = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_D, EALIGN_GRF, false, m_numberInstance);
279275
}
280276
return m_pThread_ID_in_Group_Z;
281277
default:
@@ -339,7 +335,6 @@ void CComputeShader::AllocatePayload()
339335
{
340336
AllocateInput(m_pThread_ID_in_Group_X, offset, i);
341337
offset += m_pThread_ID_in_Group_X->GetSize();
342-
offset = iSTD::Round(offset, alignmentSize[m_pThread_ID_in_Group_X->GetAlign()]);
343338
}
344339
}
345340

@@ -349,7 +344,6 @@ void CComputeShader::AllocatePayload()
349344
{
350345
AllocateInput(m_pThread_ID_in_Group_Y, offset, i);
351346
offset += m_pThread_ID_in_Group_Y->GetSize();
352-
offset = iSTD::Round(offset, alignmentSize[m_pThread_ID_in_Group_Y->GetAlign()]);
353347
}
354348
}
355349

@@ -359,7 +353,6 @@ void CComputeShader::AllocatePayload()
359353
{
360354
AllocateInput(m_pThread_ID_in_Group_Z, offset, i);
361355
offset += m_pThread_ID_in_Group_Z->GetSize();
362-
offset = iSTD::Round(offset, alignmentSize[m_pThread_ID_in_Group_Z->GetAlign()]);
363356
}
364357
}
365358

@@ -473,6 +466,8 @@ void CComputeShader::FillProgram(SComputeShaderKernelProgram* pKernelProgram)
473466
pKernelProgram->FloatingPointMode = USC::GFX3DSTATE_FLOATING_POINT_IEEE_754;
474467
pKernelProgram->SingleProgramFlow = USC::GFX3DSTATE_PROGRAM_FLOW_MULTIPLE;
475468
pKernelProgram->CurbeReadOffset = 0;
469+
pKernelProgram->CurbeReadLength = GetNumberOfId() * (numLanes(m_dispatchSize) / numLanes(SIMDMode::SIMD8));
470+
476471
pKernelProgram->PhysicalThreadsInGroup = static_cast<int>(
477472
std::ceil((static_cast<float>(m_threadGroupSize) /
478473
static_cast<float>((numLanes(m_dispatchSize))))));
@@ -492,8 +487,7 @@ void CComputeShader::FillProgram(SComputeShaderKernelProgram* pKernelProgram)
492487
pKernelProgram->ThreadPayloadData = nullptr;
493488
CreateThreadPayloadData(
494489
pKernelProgram->ThreadPayloadData,
495-
pKernelProgram->CurbeTotalDataLength,
496-
pKernelProgram->CurbeReadLength);
490+
pKernelProgram->CurbeTotalDataLength);
497491

498492
pKernelProgram->ThreadGroupSize = m_threadGroupSize;
499493

IGC/Compiler/CISACodeGen/ComputeShaderCodeGen.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class CComputeShader : public CShader
4343
void FillProgram(SComputeShaderKernelProgram* pKernelProgram);
4444
void PreCompile() override;
4545
void ExtractGlobalVariables() override;
46-
void CreateThreadPayloadData(void* & pThreadPayload, uint& curbeTotalDataLength, uint& curbeReadLength);
46+
void CreateThreadPayloadData(void* & pThreadPayload, uint& threadPayloadSize);
4747
uint GetNumberOfId();
4848
void ParseShaderSpecificOpcode(llvm::Instruction* inst) override;
4949

IGC/Compiler/CISACodeGen/ComputeShaderLowering.cpp

Lines changed: 0 additions & 116 deletions
This file was deleted.

IGC/Compiler/CISACodeGen/ComputeShaderLowering.hpp

Lines changed: 0 additions & 34 deletions
This file was deleted.

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6670,21 +6670,18 @@ void EmitPass::emitCSSGV(GenIntrinsicInst* inst)
66706670
}
66716671
case THREAD_ID_IN_GROUP_X:
66726672
{
6673-
assert(inst->getType() == Type::getInt16Ty(inst->getContext()) && "only 16bit ThreadID is supported now.");
66746673
pThreadIdInGroup = csProgram->CreateThreadIDinGroup(0);
66756674
m_currShader->CopyVariable(m_destination, pThreadIdInGroup);
66766675
break;
66776676
}
66786677
case THREAD_ID_IN_GROUP_Y:
66796678
{
6680-
assert(inst->getType() == Type::getInt16Ty(inst->getContext()) && "only 16bit ThreadID is supported now.");
66816679
pThreadIdInGroup = csProgram->CreateThreadIDinGroup(1);
66826680
m_currShader->CopyVariable(m_destination, pThreadIdInGroup);
66836681
break;
66846682
}
66856683
case THREAD_ID_IN_GROUP_Z:
66866684
{
6687-
assert(inst->getType() == Type::getInt16Ty(inst->getContext()) && "only 16bit ThreadID is supported now.");
66886685
pThreadIdInGroup = csProgram->CreateThreadIDinGroup(2);
66896686
m_currShader->CopyVariable(m_destination, pThreadIdInGroup);
66906687
break;

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7070
#include "Compiler/CISACodeGen/LowerGEPForPrivMem.hpp"
7171
#include "Compiler/CISACodeGen/POSH_RemoveNonPositionOutput.h"
7272
#include "Compiler/CISACodeGen/RegisterEstimator.hpp"
73-
#include "Compiler/CISACodeGen/ComputeShaderLowering.hpp"
7473

7574
#include "Compiler/CISACodeGen/SLMConstProp.hpp"
7675
#include "Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.hpp"
@@ -616,9 +615,6 @@ inline void AddLegalizationPasses(CodeGenContext &ctx, IGCPassManager& mpm)
616615
case ShaderType::DOMAIN_SHADER:
617616
mpm.add(createDomainShaderLoweringPass());
618617
break;
619-
case ShaderType::COMPUTE_SHADER:
620-
mpm.add(CreateComputeShaderLowering());
621-
break;
622618
default:
623619
break;
624620
}

IGC/GenISAIntrinsics/Intrinsic_definitions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@
142142
"GenISA_DCL_inputVec": ["anyfloat",["int","int"],"NoMem"],
143143
"GenISA_DCL_ShaderInputVec": ["anyvector",["int","int"],"NoMem"],
144144
"GenISA_DCL_GSinputVec": ["float4",["int","int"],"NoMem"],
145-
"GenISA_DCL_SystemValue": ["any:float",["int"],"NoMem"],
145+
"GenISA_DCL_SystemValue": ["anyfloat",["int"],"NoMem"],
146146
"GenISA_SampleOffsetX": ["float",["int"],"NoMem"],
147147
"GenISA_SampleOffsetY": ["float",["int"],"NoMem"],
148148
"GenISA_PixelPositionX": ["short",[],"NoMem"],

0 commit comments

Comments
 (0)