Skip to content

Commit a2e1acd

Browse files
vsemenov368igcbot
authored andcommitted
Enable Xe3p device support in VC
.
1 parent c06c782 commit a2e1acd

File tree

139 files changed

+8445
-191
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+8445
-191
lines changed

IGC/VectorCompiler/CMCL/lib/Headers/cm-cl/detail/builtins.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2021-2024 Intel Corporation
3+
Copyright (C) 2021-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -185,6 +185,9 @@ uint32_t __cm_cl_group_id_z();
185185

186186
int __cm_cl_hw_thread_id();
187187

188+
template <typename T, typename U>
189+
T __cm_cl_packed_4bit_upconvert_lut(T lut, U src);
190+
188191
void __cm_cl_barrier();
189192
void __cm_cl_sbarrier(uint8_t);
190193
void __cm_cl_fence(memory_order semantics, memory_scope scope);

IGC/VectorCompiler/CMCL/lib/Headers/cm-cl/math.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2021-2024 Intel Corporation
3+
Copyright (C) 2021-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -379,6 +379,27 @@ vector<T, width> cosine(vector<T, width> src, cm::tag::fast_t) {
379379

380380
/*==========================================================*/
381381

382+
template <int Index, typename T, int NumElts>
383+
auto upconvert_4bit_lut(vector<uint32_t, 16> Lut, vector<T, NumElts> Src) {
384+
constexpr int Stride = sizeof(uint32_t) / sizeof(T);
385+
constexpr int Width = NumElts / Stride;
386+
constexpr int VWidth = Width / 16;
387+
388+
static_assert(Width == 16 || Width == 32,
389+
"upconvert_4bit_lut expects 16 or 32 elements");
390+
391+
vector<uint32_t, Width> Res;
392+
393+
vector<T, Width> _Src = Src.template select<Width, Stride>(Index);
394+
395+
auto _Lut =
396+
detail::read_region<VWidth, 0, 16, 1, uint32_t, 16>(Lut.cl_vector(), 0);
397+
398+
Res = detail::__cm_cl_packed_4bit_upconvert_lut(_Lut, _Src.cl_vector());
399+
400+
return Res;
401+
}
402+
382403
} // namespace math
383404
} // namespace cm
384405

IGC/VectorCompiler/CMCL/lib/Support/TranslationDescription.json

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"copyright": [
33
"============================ begin_copyright_notice ============================",
44
"",
5-
"Copyright (C) 2021-2024 Intel Corporation",
5+
"Copyright (C) 2021-2025 Intel Corporation",
66
"",
77
"SPDX-License-Identifier: MIT",
88
"",
@@ -910,6 +910,21 @@
910910
]
911911
}
912912
},
913+
"UpconvertLut": {
914+
"Name": "packed_4bit_upconvert_lut",
915+
"Operands": [
916+
{"Name": "Lut", "Kind": "Input"},
917+
{"Name": "Source", "Kind": "Input"}
918+
],
919+
"TranslateInto": {
920+
"VC-Intrinsic": "packed_4bit_upconvert_lut",
921+
"ReturnType": {"GetBuiltinReturnType": []},
922+
"Operands": [
923+
{"GetBuiltinOperand": ["Lut"]},
924+
{"GetBuiltinOperand": ["Source"]}
925+
]
926+
}
927+
},
913928
"AtomicRMW": {
914929
"Name": "atomicrmw",
915930
"Operands": [

IGC/VectorCompiler/cmake/supported_platforms_list.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#=========================== begin_copyright_notice ============================
22
#
3-
# Copyright (C) 2020-2022 Intel Corporation
3+
# Copyright (C) 2020-2025 Intel Corporation
44
#
55
# SPDX-License-Identifier: MIT
66
#
@@ -20,4 +20,5 @@ set(SUPPORTED_VC_PLATFORMS
2020
"XeHPCVG"
2121
"Xe2"
2222
"Xe3"
23+
"Xe3P"
2324
)

IGC/VectorCompiler/igcdeps/include/vc/igcdeps/cmc.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2019-2024 Intel Corporation
3+
Copyright (C) 2019-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -108,6 +108,8 @@ class CMKernel {
108108
unsigned Size, unsigned ArgOffset);
109109

110110
void createImplArgsBufferAnnotation(unsigned Size, unsigned ArgOffset);
111+
void createIndirectDataBufferAnnotation(unsigned Size, unsigned ArgOffset);
112+
void createScratchBufferAnnotation(unsigned Size, unsigned ArgOffset);
111113

112114
void RecomputeBTLayout(int numUAVs, int numResources);
113115
};

IGC/VectorCompiler/igcdeps/src/TranslationInterface.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,10 @@ getPlatformName(const PLATFORM &Platform) {
182182
if (Product == IGFX_NVL_XE3G)
183183
return {"Xe3", RevId};
184184
LLVM_FALLTHROUGH;
185+
case IGFX_XE3P_CORE:
186+
if (Product == IGFX_CRI)
187+
return {"Xe3P", RevId};
188+
break;
185189
default:
186190
break;
187191
}
@@ -203,6 +207,12 @@ static void adjustPlatform(const IGC::CPlatform &IGCPlatform,
203207
IGCPlatform.hasL3FlushOnGPUScopeInvalidate();
204208
Opts.HasHalfSIMDLSC = IGCPlatform.hasHalfSIMDLSC();
205209
Opts.WATable = &IGCPlatform.getWATable();
210+
211+
if (IGCPlatform.hasEfficient64bEnabled()) {
212+
if (!Opts.FeaturesString.empty())
213+
Opts.FeaturesString.append(",");
214+
Opts.FeaturesString.append("+efficient_64b_enabled");
215+
}
206216
}
207217

208218
static void adjustFileType(TC::TB_DATA_FORMAT DataFormat,

IGC/VectorCompiler/igcdeps/src/cmc.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,20 @@ void CMKernel::createImplArgsBufferAnnotation(unsigned Size,
307307
zebin::PreDefinedAttrGetter::ArgType::implicit_arg_buffer, ArgOffset,
308308
Size);
309309
}
310+
void CMKernel::createIndirectDataBufferAnnotation(unsigned Size,
311+
unsigned ArgOffset) {
312+
zebin::ZEInfoBuilder::addPayloadArgumentImplicit(
313+
m_kernelInfo.m_zePayloadArgs,
314+
zebin::PreDefinedAttrGetter::ArgType::indirect_data_pointer, ArgOffset,
315+
Size);
316+
}
317+
318+
void CMKernel::createScratchBufferAnnotation(unsigned Size,
319+
unsigned ArgOffset) {
320+
zebin::ZEInfoBuilder::addPayloadArgumentImplicit(
321+
m_kernelInfo.m_zePayloadArgs,
322+
zebin::PreDefinedAttrGetter::ArgType::scratch_pointer, ArgOffset, Size);
323+
}
310324

311325
// TODO: refactor this function with the OCL part.
312326
void CMKernel::RecomputeBTLayout(int numUAVs, int numResources) {
@@ -466,6 +480,13 @@ static void setArgumentsInfo(const GenXOCLRuntimeInfo::KernelInfo &Info,
466480
case ArgKind::ImplicitArgsBuffer:
467481
Kernel.createImplArgsBufferAnnotation(Arg.getSizeInBytes(), ArgOffset);
468482
break;
483+
case ArgKind::IndirectDataBuffer:
484+
Kernel.createIndirectDataBufferAnnotation(Arg.getSizeInBytes(),
485+
ArgOffset);
486+
break;
487+
case ArgKind::ScratchBuffer:
488+
Kernel.createScratchBufferAnnotation(Arg.getSizeInBytes(), ArgOffset);
489+
break;
469490
}
470491
}
471492

@@ -522,6 +543,8 @@ static void setExecutionInfo(const GenXOCLRuntimeInfo::KernelInfo &BackendInfo,
522543
ExecEnv.HasLscStoresWithNonDefaultL1CacheControls =
523544
BackendInfo.hasLscStoresWithNonDefaultL1CacheControls();
524545

546+
auto &ThreadPayload = Kernel.m_kernelInfo.m_threadPayload;
547+
ThreadPayload.PassInlineDataSize = BackendInfo.getInlineDataPayloadSize();
525548

526549
// Allocate spill-fill buffer
527550
if (JitterInfo.hasStackcalls) {

IGC/VectorCompiler/include/GenX.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2024 Intel Corporation
3+
Copyright (C) 2017-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -159,6 +159,7 @@ FunctionPass *createGenXLegacyToLscTranslatorPass();
159159
ModulePass *createGenXSLMResolution();
160160
FunctionPass *createGenXLscAddrCalcFoldingPass();
161161
ModulePass *createGenXDetectPointerArgPass();
162+
ModulePass *createGenXPropagateSurfaceStatePass();
162163
FunctionPass *createGenXLCECalculationPass();
163164
FunctionPass *createGenXFloatControlPass();
164165
ModulePass *createGenXCountIndirectStatelessPass();

IGC/VectorCompiler/include/GenXPassRegistry.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,13 @@ MODULE_PASS("CMABI", CMABIPass())
1616
MODULE_PASS("CMImpParam", CMImpParamPass())
1717
MODULE_PASS("CMKernelArgOffset",
1818
CMKernelArgOffsetPass(GTM->getGenXSubtarget().getGRFByteSize(),
19+
GTM->getGenXSubtarget().hasEfficient64b(),
1920
BC->useBindlessImages()))
2021
MODULE_PASS("GenXPacketize", GenXPacketizePass())
2122
MODULE_PASS("GenXBIFFlagCtrlResolution", GenXBIFFlagCtrlResolutionPass())
22-
MODULE_PASS("GenXBTIAssignment", GenXBTIAssignmentPass(BC->getResult()))
23+
MODULE_PASS("GenXBTIAssignment",
24+
GenXBTIAssignmentPass(BC->getResult(),
25+
GTM->getGenXSubtarget().hasEfficient64b()))
2326

2427
MODULE_PASS("GenXImportOCLBiF", GenXImportOCLBiFPass())
2528
MODULE_PASS("GenXLegalizeGVLoadUses", GenXLegalizeGVLoadUsesPass())
@@ -42,6 +45,7 @@ MODULE_PASS("GenXVerify", GenXVerifyPass())
4245
#endif
4346

4447
FUNCTION_PASS("GenXSimplify", GenXSimplifyPass())
48+
FUNCTION_PASS("GenXStatePointerFence", GenXStatePointerFencePass())
4549
FUNCTION_PASS("CMLowerVLoadVStore", CMLowerVLoadVStorePass())
4650
FUNCTION_PASS("GenXTypeLegalization", GenXTypeLegalizationPass())
4751
FUNCTION_PASS("GenXTranslateIntrinsics", GenXTranslateIntrinsicsPass())

IGC/VectorCompiler/include/GenXSubtarget.h

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2024 Intel Corporation
3+
Copyright (C) 2017-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -63,6 +63,7 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
6363
XeHPCVG,
6464
Xe2,
6565
Xe3,
66+
Xe3P,
6667
Invalid,
6768
};
6869

@@ -79,6 +80,12 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
7980
// HasFP64 - True if subtarget supports double type
8081
bool HasFP64 = false;
8182

83+
// HasNativeBFloat16 - True if subtarget supports bfloat16 arithmeics
84+
bool HasNativeBFloat16 = false;
85+
86+
// HasMxfp - True if subtarget supports mxfp* operations
87+
bool HasMxfp = false;
88+
8289
// HasIEEEDivSqrt - True if subtarget supports IEEE-754 div and sqrt
8390
bool HasIEEEDivSqrt = false;
8491

@@ -123,6 +130,10 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
123130
// True if it is profitable to use native DxD->Q multiplication
124131
bool UseMulDDQ = false;
125132

133+
// True if it is profitable to use native DxD+D->Q and DxD+Q->Q multiply-add
134+
// operations
135+
bool UseMadDDQ = false;
136+
126137
// True if codegenerating for OCL runtime (set by default since CMRT removed)
127138
bool OCLRuntime = true;
128139

@@ -175,6 +186,15 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
175186
/// True if subtarget supports half SIMD LSC messages
176187
bool HasHalfSIMDLSC = false;
177188

189+
/// True if subtarget supports efficient 64-bit addressing mode
190+
bool HasEfficient64b = false;
191+
192+
/// True if efficient 64-bit mode is enabled
193+
bool EnabledEfficient64b = false;
194+
195+
/// Number of supported cache levels
196+
unsigned NumCacheLevels = 2;
197+
178198
/// True if subtarget supports sampler messages
179199
bool HasSampler = false;
180200

@@ -235,12 +255,21 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
235255
// True if target supports global double precision atomic add/sub
236256
bool HasGlobalAtomicAddF64 = false;
237257

258+
// True if target supports half precision atomics
259+
bool HasInstrAtomicHF16 = false;
260+
261+
// True if target supports local single precision atomic add/sub
262+
bool HasInstrLocalAtomicAddF32 = false;
263+
238264
/// Max supported SLM size (in kbytes)
239265
int MaxSLMSize = 64;
240266

241267
// Number of elements in Address Register
242268
unsigned AddressRegisterElements = 16;
243269

270+
// True if subtarget supports SIMD32 programming model
271+
bool HasEfficientSIMD32 = false;
272+
244273
// Shows which surface should we use for stack
245274
PreDefined_Surface StackSurf;
246275

@@ -303,6 +332,18 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
303332

304333
bool hasLSCOffset() const { return HasLSCOffset; }
305334

335+
// * efficient 64-bit addressing is supported
336+
bool supportEfficient64b() const { return HasEfficient64b; }
337+
338+
// * efficient 64-bit addressing is supported and enabled
339+
bool hasEfficient64b() const {
340+
return HasEfficient64b && EnabledEfficient64b;
341+
}
342+
343+
bool hasLSCBase() const { return hasEfficient64b(); }
344+
345+
unsigned getLSCScaleMax() const { return hasEfficient64b() ? 32 : 1; }
346+
306347
bool translateLegacyMessages() const {
307348
return HasLSCMessages && TranslateLegacyMessages;
308349
}
@@ -332,6 +373,12 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
332373
/// * hasFP64 - true if target supports double fp
333374
bool hasFP64() const { return HasFP64; }
334375

376+
/// * hasNativeBFloat16 - true if target supports bfloat16 arithmetic
377+
bool hasNativeBFloat16() const { return HasNativeBFloat16; }
378+
379+
/// * hasMxfp - true if target supports mxfp* operations
380+
bool hasMxfp() const { return HasMxfp; }
381+
335382
/// * hasIEEEDivSqrt - true if target supports IEEE-754 div and sqrt
336383
bool hasIEEEDivSqrt() const { return HasIEEEDivSqrt; }
337384

@@ -344,6 +391,10 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
344391
/// * useMulDDQ - true if is desired to emit DxD->Q mul instruction
345392
bool useMulDDQ() const { return UseMulDDQ; }
346393

394+
/// * useMadDDQ - true if is desired to emit DxD+Q->Q and DxD+D->Q mad
395+
/// instruction
396+
bool useMadDDQ() const { return UseMadDDQ; }
397+
347398
/// * disableJmpi - true if jmpi is disabled.
348399
bool disableJmpi() const { return DisableJmpi; }
349400

@@ -411,6 +462,9 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
411462

412463
bool hasGlobalAtomicAddF64() const { return HasGlobalAtomicAddF64; }
413464

465+
bool hasInstrAtomicHF16() const { return HasInstrAtomicHF16; }
466+
bool hasInstrLocalAtomicAddF32() const { return HasInstrLocalAtomicAddF32; }
467+
414468
bool hasL1ReadOnlyCache() const { return HasL1ReadOnlyCache; }
415469
bool hasLocalMemFenceSupress() const { return HasLocalMemFenceSupress; }
416470
bool hasMultiTile() const { return HasMultiTile; };
@@ -447,13 +501,19 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
447501
/// bit fields for ThreadID (from lsb to msb).
448502
ArrayRef<std::pair<int, int>> getThreadIdBits() const;
449503

450-
unsigned getNumCacheLevels() const { return 2; }
504+
unsigned getNumCacheLevels() const {
505+
if (hasEfficient64b())
506+
return NumCacheLevels;
507+
return 2;
508+
}
451509

452510
// Address Register size in elements.
453511
unsigned getAddressRegisterElements() const {
454512
return AddressRegisterElements;
455513
}
456514

515+
bool hasEfficientSIMD32() const { return HasEfficientSIMD32; }
516+
457517
// Generic helper functions...
458518
const Triple &getTargetTriple() const { return TargetTriple; }
459519

0 commit comments

Comments
 (0)