@@ -1742,6 +1742,10 @@ namespace IGC
17421742 // Local IDs are non-uniform and may have two instances in SIMD32 mode
17431743 int numAllocInstances = arg.getArgType () == KernelArg::ArgType::IMPLICIT_LOCAL_IDS ? m_numberInstance : 1 ;
17441744
1745+ if (arg.getArgType () == KernelArg::ArgType::RT_STACK_ID) {
1746+ numAllocInstances = m_numberInstance;
1747+ }
1748+
17451749 auto allocSize = arg.getAllocateSize ();
17461750
17471751 if (!IsUnusedArg && !isRuntimeValue)
@@ -1751,6 +1755,22 @@ namespace IGC
17511755 // Align on the desired alignment for this argument
17521756 auto alignment = arg.getAlignment ();
17531757
1758+ // FIXME: move alignment checks to implicit arg creation
1759+ if ((arg.getArgType () == KernelArg::ArgType::IMPLICIT_LOCAL_IDS ||
1760+ arg.getArgType () == KernelArg::ArgType::RT_STACK_ID) &&
1761+ m_Platform->getGRFSize () == 64 )
1762+ {
1763+ alignment = 64 ;
1764+ // generate a single SIMD32 variable in this case
1765+ if (m_dispatchSize == SIMDMode::SIMD16 && m_Platform->getGRFSize () == 64 )
1766+ {
1767+ allocSize = 64 ;
1768+ }
1769+ else
1770+ {
1771+ allocSize = PVCLSCEnabled () ? 64 : 32 ;
1772+ }
1773+ }
17541774 offset = iSTD::Align (offset, alignment);
17551775
17561776 // Arguments larger than a GRF must be at least GRF-aligned.
@@ -1788,6 +1808,7 @@ namespace IGC
17881808
17891809 if (useInlineData && !inlineDataProcessed &&
17901810 arg.getArgType () != KernelArg::ArgType::IMPLICIT_LOCAL_IDS &&
1811+ arg.getArgType () != KernelArg::ArgType::RT_STACK_ID &&
17911812 arg.getArgType () != KernelArg::ArgType::IMPLICIT_R0)
17921813 {
17931814 // Calc if we can fit this arg in inlinedata:
@@ -1809,6 +1830,7 @@ namespace IGC
18091830
18101831 // numAllocInstances can be greater than 1, only when:
18111832 // artype == IMPLICIT_LOCAL_IDS
1833+ // or argtype == RT_STACK_ID,
18121834 // so there is no need to handle it here
18131835
18141836 // current arg is first to be loaded (it does not come in inlinedata)
@@ -2049,6 +2071,7 @@ namespace IGC
20492071
20502072 m_kernelInfo.m_executionEnivronment .CompiledSubGroupsNumber = funcMD.CompiledSubGroupsNumber ;
20512073
2074+ m_kernelInfo.m_executionEnivronment .HasRTCalls = funcMD.hasSyncRTCalls ;
20522075 }
20532076
20542077 m_kernelInfo.m_executionEnivronment .HasGlobalAtomics = GetHasGlobalAtomics ();
@@ -2491,6 +2514,19 @@ namespace IGC
24912514 m_Context->SetSIMDInfo (SIMD_RETRY, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
24922515 }
24932516
2517+ // Currently the FunctionMetaData is being looked up solely in order to get the hasSyncRTCalls
2518+ // If we would need to get some non-raytracing related field out of the FunctionMetaData,
2519+ // then we can move the lookup out of the #if and just leave the bool hasSyncRTCalls inside.
2520+ auto & FuncMap = m_Context->getModuleMetaData ()->FuncMD ;
2521+ // we want to check the setting for the associated kernel
2522+ auto FuncIter = FuncMap.find (entry);
2523+ if (FuncIter == FuncMap.end ()) { // wasn't able to find the meta data for the passed in llvm::Function!
2524+ // All of the kernels should have an entry in the map.
2525+ IGC_ASSERT (0 );
2526+ return false ;
2527+ }
2528+ const FunctionMetaData& funcMD = FuncIter->second ;
2529+ bool hasSyncRTCalls = funcMD.hasSyncRTCalls ; // if the function/kernel has sync raytracing calls
24942530
24952531 // If forced SIMD Mode (by driver or regkey), then:
24962532 // 1. Compile only that SIMD mode and nothing else
@@ -2504,15 +2540,16 @@ namespace IGC
25042540 // These statements are basically equivalent to (simdMode == forcedSIMDSize)
25052541 (simdMode == SIMDMode::SIMD8 && m_Context->getModuleMetaData ()->csInfo .forcedSIMDSize == 8 ) ||
25062542 (simdMode == SIMDMode::SIMD16 && m_Context->getModuleMetaData ()->csInfo .forcedSIMDSize == 16 ) ||
2507- (simdMode == SIMDMode::SIMD32 && m_Context->getModuleMetaData ()->csInfo .forcedSIMDSize == 32 )
2543+ // if we want to compile SIMD32, we need to be lacking any raytracing calls; raytracing doesn't support SIMD16
2544+ (simdMode == SIMDMode::SIMD32 && m_Context->getModuleMetaData ()->csInfo .forcedSIMDSize == 32 && !hasSyncRTCalls)
25082545 );
25092546 }
25102547
2511- SIMDStatus simdStatus = checkSIMDCompileConds (simdMode, EP, F);
2548+ SIMDStatus simdStatus = checkSIMDCompileConds (simdMode, EP, F, hasSyncRTCalls );
25122549
25132550 if (m_Context->platform .getMinDispatchMode () == SIMDMode::SIMD16)
25142551 {
2515- simdStatus = checkSIMDCompileCondsPVC (simdMode, EP, F);
2552+ simdStatus = checkSIMDCompileCondsPVC (simdMode, EP, F, hasSyncRTCalls );
25162553 }
25172554
25182555 // Func and Perf checks pass, compile this SIMD
@@ -2535,7 +2572,7 @@ namespace IGC
25352572 return simdStatus == SIMDStatus::SIMD_PASS;
25362573 }
25372574
2538- SIMDStatus COpenCLKernel::checkSIMDCompileCondsPVC (SIMDMode simdMode, EmitPass& EP, llvm::Function& F)
2575+ SIMDStatus COpenCLKernel::checkSIMDCompileCondsPVC (SIMDMode simdMode, EmitPass& EP, llvm::Function& F, bool hasSyncRTCalls )
25392576 {
25402577 if (simdMode == SIMDMode::SIMD8)
25412578 {
@@ -2590,6 +2627,12 @@ namespace IGC
25902627 pCtx->getModuleMetaData ()->csInfo .forcedSIMDSize = (unsigned char )numLanes (SIMDMode::SIMD16);
25912628 }
25922629
2630+ if (simdMode == SIMDMode::SIMD32 && hasSyncRTCalls) {
2631+ return SIMDStatus::SIMD_FUNC_FAIL;
2632+ }
2633+ else if (simdMode == SIMDMode::SIMD16 && hasSyncRTCalls) {
2634+ return SIMDStatus::SIMD_PASS;
2635+ }
25932636
25942637 if (simd_size)
25952638 {
@@ -2657,7 +2700,7 @@ namespace IGC
26572700 return m_annotatedNumThreads;
26582701 }
26592702
2660- SIMDStatus COpenCLKernel::checkSIMDCompileConds (SIMDMode simdMode, EmitPass& EP, llvm::Function& F)
2703+ SIMDStatus COpenCLKernel::checkSIMDCompileConds (SIMDMode simdMode, EmitPass& EP, llvm::Function& F, bool hasSyncRTCalls )
26612704 {
26622705 CShader* simd8Program = m_parent->GetShader (SIMDMode::SIMD8);
26632706 CShader* simd16Program = m_parent->GetShader (SIMDMode::SIMD16);
@@ -2773,7 +2816,12 @@ namespace IGC
27732816 return SIMDStatus::SIMD_FUNC_FAIL;
27742817 }
27752818 else {
2776- EP.m_canAbortOnSpill = false ;
2819+ if (hasSyncRTCalls) {
2820+ return SIMDStatus::SIMD_FUNC_FAIL; // SIMD32 unsupported with raytracing calls
2821+ }
2822+ else { // simdMode == SIMDMode::SIMD32 && !hasSyncRTCalls
2823+ EP.m_canAbortOnSpill = false ;
2824+ }
27772825 }
27782826 break ;
27792827 default :
@@ -2802,6 +2850,12 @@ namespace IGC
28022850 return SIMDStatus::SIMD_PASS;
28032851 }
28042852
2853+ if (hasSyncRTCalls) {
2854+ // If we get all the way to here, then set it to the preferred SIMD size for Ray Tracing.
2855+ SIMDMode mode = SIMDMode::UNKNOWN;
2856+ mode = m_Context->platform .getPreferredRayTracingSIMDSize ();
2857+ return (mode == simdMode) ? SIMDStatus::SIMD_PASS : SIMDStatus::SIMD_FUNC_FAIL;
2858+ }
28052859
28062860 if (groupSize != 0 && groupSize <= 16 )
28072861 {
0 commit comments