Skip to content

Commit cf22c10

Browse files
scottp101igcbot
authored andcommitted
generate HW local IDs with forced linear walk order
generate HW local IDs with forced linear walk order
1 parent 0d6e06d commit cf22c10

File tree

4 files changed

+127
-36
lines changed

4 files changed

+127
-36
lines changed

IGC/Compiler/CISACodeGen/ComputeShaderBase.cpp

Lines changed: 75 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -161,40 +161,98 @@ namespace IGC
161161
m_walkOrder = WO_YXZ;
162162
}
163163

164-
uint UNDEF = 999;
164+
auto order = selectBestWalkOrder(
165+
m_ThreadIDLayout, is_pow2_x, is_pow2_y, is_pow2_z);
166+
167+
if (order) {
168+
m_walkOrder = *order;
169+
m_enableHWGenerateLID = true;
170+
} else {
171+
// Is 2D or 3D dispatch and isnt pow2, so the HW doesn't support it
172+
m_enableHWGenerateLID = false;
173+
m_ThreadIDLayout = ThreadIDLayout::X;
174+
m_walkOrder = WO_XYZ;
175+
return;
176+
}
177+
}
178+
179+
Optional<CComputeShaderBase::WALK_ORDER>
180+
CComputeShaderBase::checkLegalWalkOrder(
181+
const std::array<uint32_t, 3>& Dims,
182+
const WorkGroupWalkOrderMD& WO)
183+
{
184+
auto is_pow2 = [](uint32_t dim) {
185+
return iSTD::IsPowerOfTwo(dim);
186+
};
187+
188+
const int walkorder_x = WO.dim0;
189+
const int walkorder_y = WO.dim1;
190+
const int walkorder_z = WO.dim2;
191+
192+
const uint32_t dim_x = Dims[0];
193+
const uint32_t dim_y = Dims[1];
194+
const uint32_t dim_z = Dims[2];
195+
196+
uint order0 = (walkorder_x == 0) ? 0 : (walkorder_y == 0) ? 1 : 2;
197+
uint order1 = (walkorder_x == 1) ? 0 : (walkorder_y == 1) ? 1 : 2;
198+
199+
if (order0 != order1
200+
&& ((order0 == 0 && is_pow2(dim_x))
201+
|| (order0 == 1 && is_pow2(dim_y))
202+
|| (order0 == 2 && is_pow2(dim_z)))
203+
&& ((order1 == 0 && is_pow2(dim_x))
204+
|| (order1 == 1 && is_pow2(dim_y))
205+
|| (order1 == 2 && is_pow2(dim_z)))
206+
)
207+
{
208+
// Legal walk order for HW auto-gen
209+
return getWalkOrder(order0, order1);
210+
}
211+
212+
return None;
213+
}
214+
215+
Optional<CComputeShaderBase::WALK_ORDER>
216+
CComputeShaderBase::selectBestWalkOrder(
217+
ThreadIDLayout Layout,
218+
bool is_pow2_x, bool is_pow2_y, bool is_pow2_z)
219+
{
220+
constexpr uint UNDEF = std::numeric_limits<uint>::max();
165221
uint order0 = UNDEF;
166222
uint order1 = UNDEF;
167-
if (m_ThreadIDLayout == ThreadIDLayout::TileY) {
223+
if (Layout == ThreadIDLayout::TileY)
224+
{
168225
IGC_ASSERT(is_pow2_y);
169226
order0 = 1;
170227
order1 = (is_pow2_x ? 0 : (is_pow2_z ? 2 : UNDEF));
171-
}else {
228+
}
229+
else
230+
{
172231
//below is from HAS p-code except tileY
173232
//try to find walk_order so that HW can generate LID
174-
if (is_pow2_x) {
233+
if (is_pow2_x)
234+
{
175235
// (pow2,pow2,z) or (pow2,y,pow2) or illegal
176236
order0 = 0;
177237
order1 = (is_pow2_y ? 1 : (is_pow2_z ? 2 : UNDEF));
178-
}else if (is_pow2_y) {
238+
}
239+
else if (is_pow2_y)
240+
{
179241
// (x,pow2,pow2) or illegal
180242
order0 = 1;
181243
order1 = (is_pow2_z ? 2 : UNDEF);
182244
}
183245
}
184246

185-
if (order1 != UNDEF) {
247+
if (order1 != UNDEF)
248+
{
186249
// select walkorder
187-
m_walkOrder = getWalkOrder(order0, order1);
188-
m_enableHWGenerateLID = true;
189-
190-
}else {
191-
// Is 2D or 3D dispatch and isnt pow2, so the HW doesn't support it
192-
m_enableHWGenerateLID = false;
193-
m_ThreadIDLayout = ThreadIDLayout::X;
194-
m_walkOrder = WO_XYZ;
195-
return;
250+
return getWalkOrder(order0, order1);
196251
}
252+
253+
return None;
197254
}
255+
198256
//order0: the internal walk dim
199257
//order1: the intermediate walk dim
200258
//e.g.: 1, 0 means, YXZ walkorder
@@ -209,8 +267,8 @@ namespace IGC
209267
case getWalkOrderValue(0, 1): return WALK_ORDER::WO_XYZ; //012
210268
case getWalkOrderValue(0, 2): return WALK_ORDER::WO_XZY; //021
211269
case getWalkOrderValue(1, 0): return WALK_ORDER::WO_YXZ; //102
212-
case getWalkOrderValue(1, 2): return WALK_ORDER::WO_YZX; //201
213-
case getWalkOrderValue(2, 0): return WALK_ORDER::WO_ZXY; //120
270+
case getWalkOrderValue(1, 2): return WALK_ORDER::WO_YZX; //120
271+
case getWalkOrderValue(2, 0): return WALK_ORDER::WO_ZXY; //201
214272
case getWalkOrderValue(2, 1): return WALK_ORDER::WO_ZYX; //210
215273
default:
216274
IGC_ASSERT_MESSAGE(0, "unhandled case!");

IGC/Compiler/CISACodeGen/ComputeShaderBase.hpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ SPDX-License-Identifier: MIT
1010
#pragma once
1111

1212
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
13+
#include "common/LLVMWarningsPush.hpp"
14+
#include "llvm/ADT/Optional.h"
15+
#include "common/LLVMWarningsPop.hpp"
1316

1417
namespace IGC
1518
{
@@ -53,7 +56,14 @@ namespace IGC
5356
bool m_enableHWGenerateLID = false;
5457

5558
void setEmitLocalMask(SGVUsage channelNum);
56-
private:
59+
static llvm::Optional<WALK_ORDER> selectBestWalkOrder(
60+
ThreadIDLayout Layout,
61+
bool is_pow2_x, bool is_pow2_y, bool is_pow2_z);
62+
// Determines if HW can handle auto generating local IDs with this
63+
// order
64+
static llvm::Optional<WALK_ORDER> checkLegalWalkOrder(
65+
const std::array<uint32_t, 3>& Dims,
66+
const WorkGroupWalkOrderMD& WO);
5767
static WALK_ORDER getWalkOrder(uint order0, uint order1);
5868
};
5969
}

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -159,51 +159,74 @@ namespace IGC
159159
}
160160
void COpenCLKernel::tryHWGenerateLocalIDs()
161161
{
162-
if (hasWorkGroupWalkOrder())
163-
return;
164-
165162
auto Dims = IGCMetaDataHelper::getThreadGroupDims(
166163
*m_pMdUtils, entry);
167164

168165
if (!Dims)
169166
return;
170167

168+
auto WO = getWorkGroupWalkOrder();
169+
bool ForcedWalkOrder = false;
170+
if (WO.dim0 != 0 || WO.dim1 != 0 || WO.dim2 != 0)
171+
{
172+
if (auto Order = checkLegalWalkOrder(*Dims, WO))
173+
{
174+
ForcedWalkOrder = true;
175+
// Don't do TileY if forced in this way.
176+
m_ThreadIDLayout = ThreadIDLayout::X;
177+
m_walkOrder = *Order;
178+
}
179+
else
180+
{
181+
auto WalkOrder = getWalkOrder(WO.dim0, WO.dim1);
182+
if (WalkOrder != WO_XYZ)
183+
{
184+
IGC_ASSERT_MESSAGE(0, "unhandled walk order!");
185+
}
186+
return;
187+
}
188+
}
189+
171190
// OpenCL currently emits all local IDs even if only one dimension
172191
// is requested. Let's mirror that for now.
173192
ImplicitArgs implicitArgs(*entry, m_pMdUtils);
174193
if (implicitArgs.isImplicitArgExist(ImplicitArg::LOCAL_ID_X) ||
175194
implicitArgs.isImplicitArgExist(ImplicitArg::LOCAL_ID_Y) ||
176195
implicitArgs.isImplicitArgExist(ImplicitArg::LOCAL_ID_Z))
177196
{
197+
if (ForcedWalkOrder)
198+
m_enableHWGenerateLID = true;
178199
setEmitLocalMask(THREAD_ID_IN_GROUP_Z);
179200
}
180201

181-
selectWalkOrder(
182-
false,
183-
0,
184-
0,
185-
0, /* dummy 1D accesses */
186-
0, /* dummy 2D accesses */
187-
0, /* dummy SLM accessed */
188-
(*Dims)[0],
189-
(*Dims)[1],
190-
(*Dims)[2]);
202+
if (!ForcedWalkOrder)
203+
{
204+
selectWalkOrder(
205+
false,
206+
0,
207+
0,
208+
0, /* dummy 1D accesses */
209+
0, /* dummy 2D accesses */
210+
0, /* dummy SLM accessed */
211+
(*Dims)[0],
212+
(*Dims)[1],
213+
(*Dims)[2]);
214+
}
191215
encoder.GetVISABuilder()->SetOption(vISA_autoLoadLocalID, m_enableHWGenerateLID);
192216
}
193217

194-
bool COpenCLKernel::hasWorkGroupWalkOrder()
218+
WorkGroupWalkOrderMD COpenCLKernel::getWorkGroupWalkOrder()
195219
{
196220
const CodeGenContext* pCtx = GetContext();
197221
const ModuleMetaData* MMD = pCtx->getModuleMetaData();
198222
if (auto I = MMD->FuncMD.find(entry); I != MMD->FuncMD.end())
199223
{
200224
auto& FMD = I->second;
201225
auto& Order = FMD.workGroupWalkOrder;
202-
if (Order.dim0 != 0 || Order.dim1 != 0 || Order.dim2 != 0)
203-
return true;
226+
return Order;
204227
}
205228

206-
return false;
229+
return {};
207230
}
208231

209232
SOpenCLKernelInfo::SResourceInfo COpenCLKernel::getResourceInfo(int argNo)

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ namespace IGC
126126

127127
void ClearKernelInfo();
128128
private:
129-
bool hasWorkGroupWalkOrder();
129+
WorkGroupWalkOrderMD getWorkGroupWalkOrder();
130130
void tryHWGenerateLocalIDs();
131131
};
132132

0 commit comments

Comments
 (0)