Skip to content

Commit 240839f

Browse files
mmereckiigcbot
authored andcommitted
Fix calculation of URB read entry size and max number of input registers in pull model.
1 parent d588cf8 commit 240839f

File tree

4 files changed

+78
-26
lines changed

4 files changed

+78
-26
lines changed

IGC/Compiler/CISACodeGen/VertexShaderCodeGen.cpp

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -246,31 +246,33 @@ namespace IGC
246246
return m_R1;
247247
}
248248

249-
250249
/// Returns VS URB allocation size.
251250
/// This is the size of VS URB entry consisting of the header data and attribute data.
252251
OctEltUnit CVertexShader::GetURBAllocationSize() const
253252
{
254-
const size_t payloadSize = m_Platform->getWATable().Wa_16011983264 ?
255-
std::max(setup.size(), size_t(1)) :
256-
setup.size();
257-
// max index of the variables in the payload
258-
const EltUnit maxSetupVarNum(isInputsPulled ? size_t(132) : payloadSize);
259-
const OctEltUnit maxSetupOct = round_up<OctElement>(maxSetupVarNum);
260-
// URB allocation size is the maximum of the input and ouput entry size.
261-
return std::max(round_up<OctElement>(m_properties.m_URBOutputLength), maxSetupOct);
253+
QuadEltUnit urbInputLength = isInputsPulled ?
254+
GetMaxNumInputRegister() : GetNumInputRegistersPushed();
255+
if (m_Platform->getWATable().Wa_16011983264)
256+
{
257+
const QuadEltUnit one(1);
258+
urbInputLength = std::max(urbInputLength, one);
259+
}
260+
261+
// URB allocation size is the maximum of the input and output entry size.
262+
return round_up<OctElement>(
263+
std::max(m_properties.m_URBOutputLength, urbInputLength));
262264
}
263265

264266
OctEltUnit CVertexShader::GetVertexURBEntryReadLength() const
265267
{
266-
// max index of the variables in the payload
267-
const EltUnit maxSetupVarNum(
268-
m_Platform->getWATable().Wa_16011983264 ?
269-
std::max(setup.size(), size_t(1)) :
270-
setup.size());
271-
272-
// rounded up to 8-element size
273-
return round_up<OctElement>(maxSetupVarNum);
268+
QuadEltUnit numInputsPushed = GetNumInputRegistersPushed();
269+
if (m_Platform->getWATable().Wa_16011983264)
270+
{
271+
const QuadEltUnit one(1);
272+
numInputsPushed = std::max(numInputsPushed, one);
273+
}
274+
275+
return round_up<OctElement>(numInputsPushed);
274276
}
275277

276278
OctEltUnit CVertexShader::GetVertexURBEntryReadOffset() const
@@ -300,9 +302,29 @@ namespace IGC
300302

301303
QuadEltUnit CVertexShader::GetMaxNumInputRegister() const
302304
{
303-
// max index of the variables in the payload
304-
// if there are any pulled inputs set max num input register to max possible inputs 33 * 4
305-
const EltUnit maxSetupVarNum(isInputsPulled ? 132 : setup.size());
305+
const EltUnit maxSetupVarNum(isInputsPulled ?
306+
m_properties.m_MaxUsedInputSlots : setup.size());
307+
return round_up<QuadElement>(maxSetupVarNum);
308+
}
309+
310+
QuadEltUnit CVertexShader::GetNumInputRegistersPushed() const
311+
{
312+
uint numInputComponents = 0;
313+
if (m_ElementComponentPackingEnabled)
314+
{
315+
// This code does not expect inputs with index >= 32 to be pushed.
316+
// see PushAnalysis pass.
317+
IGC_ASSERT(setup.size() <= MAX_VSHADER_INPUT_REGISTERS_PACKAGEABLE);
318+
for (int i = 0; i < MAX_VSHADER_INPUT_REGISTERS_PACKAGEABLE; ++i)
319+
{
320+
numInputComponents += iSTD::BitCount(m_ElementComponentEnableMask[i]);
321+
}
322+
}
323+
else
324+
{
325+
numInputComponents = setup.size();
326+
}
327+
const EltUnit maxSetupVarNum(numInputComponents);
306328
return round_up<QuadElement>(maxSetupVarNum);
307329
}
308330

IGC/Compiler/CISACodeGen/VertexShaderCodeGen.hpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,12 @@ namespace IGC
6464

6565
/// Returns the offset that SBE or GS should apply when reading the URB entries
6666
/// output by Vertex Shader.
67-
/// This value is used to set the corresponding field in 3DSTATE_GS.
67+
/// This value is used to set the corresponding field in 3DSTATE_VS.
6868
OctEltUnit GetVertexURBEntryOutputReadOffset() const;
6969

70-
/// Returns the size of the vertex entry read used to load payload registers.
70+
/// Returns the size of the vertex entry read used to load payload
71+
/// registers. The value is calculated with vertex element component
72+
/// packing applied.
7173
OctEltUnit GetVertexURBEntryReadLength() const;
7274

7375
/// Returns the offset that should be applied to vertex entry read used to load payload registers.
@@ -76,9 +78,17 @@ namespace IGC
7678
/// Returns the overall URB Allocation Size that should be used for handling of vertex data.
7779
OctEltUnit GetURBAllocationSize() const;
7880

79-
/// Returns
81+
/// Returns the maximum input register index (vertex element) accessed
82+
/// in the shader plus 1. The value returned is calculated without
83+
/// vertex element component packing applied. Driver uses this value to
84+
/// to configure VF.
8085
QuadEltUnit GetMaxNumInputRegister() const;
8186

87+
/// Returns the number of pushed input registers, in 4*DWORD units.
88+
/// The value returned is calculated with vertex element component
89+
/// packing applied.
90+
QuadEltUnit GetNumInputRegistersPushed() const;
91+
8292
void AddEpilogue(llvm::ReturnInst* pRet);
8393

8494
/// Helper to compact inputs

IGC/Compiler/CISACodeGen/VertexShaderLowering.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ namespace IGC
331331
case XP0:
332332
case XP1:
333333
case XP2:
334-
IGC_ASSERT(m_context->platform.supportsDrawParametersSGVs() ||
334+
IGC_ASSERT(m_context->platform.supportsDrawParametersSGVs() ||
335335
m_context->m_DriverInfo.UsesVertexBuffersToSendShaderDrawParameters());
336336
vertexFetchSGVExtendedParameters.at(usage - XP0) = inst;
337337
break;
@@ -355,7 +355,7 @@ namespace IGC
355355

356356
if (baseVertex || baseInstance || drawIndex || vertexId || InstanceId)
357357
{
358-
// Find first free location at the end i.e. after all user inputs.
358+
// Find first free location at the end i.e. after all user inputs.
359359
unsigned int drawParametersIndex = (ARRAY_COUNT(m_inputUsed) - 1);
360360
for (int index = drawParametersIndex; index >= 0; --index)
361361
{
@@ -370,7 +370,7 @@ namespace IGC
370370
}
371371
}
372372

373-
/// UMD has to limit the number of user inputs in order
373+
/// UMD has to limit the number of user inputs in order
374374
/// to be sure there are 2 free inputs at the end.
375375
assert(drawParametersIndex < (ARRAY_COUNT(m_inputUsed) - 8));
376376

@@ -442,6 +442,16 @@ namespace IGC
442442
}
443443
}
444444

445+
for (unsigned int slot = 0;
446+
slot < ARRAY_COUNT(m_inputUsed);
447+
++slot)
448+
{
449+
if (m_inputUsed[slot])
450+
{
451+
m_vsPropsPass->SetInputSlotUsed(slot);
452+
}
453+
}
454+
445455
//URB padding to 32Byte offsets
446456
bool addURBPaddingTo32Bytes = true;
447457
for (unsigned int i = 0; addURBPaddingTo32Bytes && i < MaxNumOfOutput + m_headerSize.Count(); i++)
@@ -696,6 +706,7 @@ namespace IGC
696706
}
697707

698708
VertexShaderProperties::VertexShaderProperties() :
709+
m_MaxUsedInputSlots(0),
699710
m_HasVertexID(false),
700711
m_VID(0),
701712
m_HasInstanceID(false),
@@ -750,6 +761,12 @@ namespace IGC
750761
m_vsProps.m_VID = VIDSlot;
751762
}
752763

764+
void CollectVertexShaderProperties::SetInputSlotUsed(unsigned int slot)
765+
{
766+
m_vsProps.m_MaxUsedInputSlots =
767+
std::max(m_vsProps.m_MaxUsedInputSlots, slot + 1);
768+
}
769+
753770
void CollectVertexShaderProperties::SetShaderDrawParameter(size_t paramIndex, unsigned int slot)
754771
{
755772
IGC_ASSERT(paramIndex < ARRAY_COUNT(m_vsProps.m_VertexFetchSGVExtendedParameters.extendedParameters));

IGC/Compiler/CISACodeGen/VertexShaderLowering.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ namespace IGC
3737
{
3838
public:
3939
VertexShaderProperties();
40+
// Maximum used input vertex element component +1.
41+
unsigned int m_MaxUsedInputSlots;
4042
bool m_HasVertexID;
4143
unsigned int m_VID;
4244
bool m_HasInstanceID;
@@ -53,6 +55,7 @@ namespace IGC
5355
public:
5456
CollectVertexShaderProperties();
5557
static char ID;
58+
void SetInputSlotUsed(unsigned int slot);
5659
void SetVertexIdSlot(unsigned int VIDslot);
5760
void SetInstanceIdSlot(unsigned int IIDslot);
5861
void SetShaderDrawParameter(size_t paramIndex, unsigned int slot);

0 commit comments

Comments
 (0)