Skip to content

Commit 62c05d6

Browse files
Wei-Chen-Inteligcbot
authored andcommitted
(NFCI) Avoid passing each basic block's scheduling stats in FINALIZER_INFO, second try.
vISA FINALIZER_INFO struct has each basic block's scheduling stats (weighted and unweighted stall cycles), which is used by IGC both for stat reporting and also in SIMD size selection heuristics. This requires a dynamic allocated array to store the information. Avoid this by having vISA sum up the stats for the shader instead. Also move the basic block count information in FINALIZER_INFO out of the post-RA scheduler pass as it is not necessarily enabled always. The first submission was backed out due to test regressions, but it seems the test is unstable and has sporadic failures.
1 parent aca9cb0 commit 62c05d6

File tree

6 files changed

+49
-37
lines changed

6 files changed

+49
-37
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6074,23 +6074,10 @@ namespace IGC
60746074
SimdSize32++;
60756075
}
60766076

6077-
uint sendStallCycle = 0;
6078-
uint staticCycle = 0;
6079-
uint loopNestedStallCycle = 0;
6080-
uint loopNestedCycle = 0;
6081-
for (uint i = 0; i < jitInfo->BBNum; i++)
6082-
{
6083-
sendStallCycle += jitInfo->BBInfo[i].sendStallCycle;
6084-
staticCycle += jitInfo->BBInfo[i].staticCycle;
6085-
// expects that a loop runs 16 iterations
6086-
auto nestingfactor = (jitInfo->BBInfo[i].loopNestLevel * 4);
6087-
loopNestedStallCycle += (jitInfo->BBInfo[i].sendStallCycle << nestingfactor);
6088-
loopNestedCycle += (jitInfo->BBInfo[i].staticCycle << nestingfactor);
6089-
}
6090-
m_program->m_sendStallCycle = sendStallCycle;
6091-
m_program->m_staticCycle = staticCycle;
6092-
m_program->m_loopNestedStallCycle = loopNestedStallCycle;
6093-
m_program->m_loopNestedCycle = loopNestedCycle;
6077+
m_program->m_sendStallCycle = jitInfo->stats.sendStallCycle;
6078+
m_program->m_staticCycle = jitInfo->stats.staticCycle;
6079+
m_program->m_loopNestedStallCycle = jitInfo->stats.loopNestedStallCycle;
6080+
m_program->m_loopNestedCycle = jitInfo->stats.loopNestedCycle;
60946081

60956082
bool isStackCallProgram = m_program->HasStackCalls() || m_program->IsIntelSymbolTableVoidProgram();
60966083
bool noRetry = jitInfo->avoidRetry;

visa/LocalScheduler/LocalScheduler_G4IR.cpp

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,21 @@ SPDX-License-Identifier: MIT
1717
#include <functional>
1818
#include <queue>
1919
#include <sstream>
20+
#include <vector>
2021

2122
using namespace vISA;
2223

2324
/* Entry to the local scheduling. */
2425
void LocalScheduler::localScheduling() {
2526
// This is controlled by options for debugging
26-
if (!fg.getKernel()->isLocalSheduleable()) {
27+
if (!fg.getKernel()->isLocalSheduleable())
2728
return;
28-
}
2929

30-
DEBUG_VERBOSE("[Scheduling]: Starting...");
30+
VISA_DEBUG_VERBOSE(std::cout << "[Scheduling]: Starting...");
3131
BB_LIST_ITER ib(fg.begin()), bend(fg.end());
3232
vISA_ASSERT(ib != bend, ERROR_SCHEDULER);
3333

34-
VISA_BB_INFO *bbInfo =
35-
(VISA_BB_INFO *)mem.alloc(fg.size() * sizeof(VISA_BB_INFO));
36-
memset(bbInfo, 0, fg.size() * sizeof(VISA_BB_INFO));
34+
std::vector<VISA_BB_INFO> bbInfo(fg.size());
3735
int i = 0;
3836

3937
const Options *m_options = fg.builder->getOptions();
@@ -118,9 +116,26 @@ void LocalScheduler::localScheduling() {
118116

119117
i++;
120118
}
119+
120+
// Sum up the cycles for each BB.
121+
unsigned sendStallCycle = 0;
122+
unsigned staticCycle = 0;
123+
unsigned loopNestedStallCycle = 0;
124+
unsigned loopNestedCycle = 0;
125+
for (auto &bbStat : bbInfo) {
126+
sendStallCycle += bbStat.sendStallCycle;
127+
staticCycle += bbStat.staticCycle;
128+
// Expect that a loop runs 16 iterations.
129+
auto nestingfactor = (bbStat.loopNestLevel * 4);
130+
loopNestedStallCycle += (bbStat.sendStallCycle << nestingfactor);
131+
loopNestedCycle += (bbStat.staticCycle << nestingfactor);
132+
}
133+
121134
FINALIZER_INFO *jitInfo = fg.builder->getJitInfo();
122-
jitInfo->BBInfo = bbInfo;
123-
jitInfo->BBNum = i;
135+
jitInfo->stats.sendStallCycle = sendStallCycle;
136+
jitInfo->stats.staticCycle = staticCycle;
137+
jitInfo->stats.loopNestedStallCycle = loopNestedStallCycle;
138+
jitInfo->stats.loopNestedCycle = loopNestedCycle;
124139
jitInfo->stats.numCycles = totalCycles;
125140
}
126141

visa/LocalScheduler/LocalScheduler_G4IR.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -330,14 +330,12 @@ class G4_BB_Schedule {
330330

331331
class LocalScheduler {
332332
FlowGraph &fg;
333-
Mem_Manager &mem;
334333

335334
// send latencies are now defined in FFLatency in LIR.cpp
336335
void EmitNode(Node *);
337336

338337
public:
339-
LocalScheduler(FlowGraph &flowgraph, Mem_Manager &m)
340-
: fg(flowgraph), mem(m) {}
338+
LocalScheduler(FlowGraph &flowgraph) : fg(flowgraph) {}
341339
void localScheduling();
342340
};
343341

visa/Optimizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ class Optimizer {
157157
}
158158
}
159159
void localSchedule() {
160-
LocalScheduler lSched(kernel.fg, mem);
160+
LocalScheduler lSched(kernel.fg);
161161
lSched.localScheduling();
162162
}
163163

visa/VISAKernelImpl.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -661,10 +661,12 @@ void VISAKernelImpl::dumpPerfStatsInJson(const std::string &filename) {
661661
}
662662

663663
void VISAKernelImpl::recordFinalizerInfo() {
664-
if (m_builder->getJitInfo()) {
665-
m_builder->getJitInfo()->stats.numAsmCountUnweighted = m_kernel->getAsmCount();
666-
m_builder->getJitInfo()->stats.numGRFTotal = m_kernel->getNumRegTotal();
667-
m_builder->getJitInfo()->stats.numThreads = m_kernel->getNumThreads();
664+
auto jitInfo = m_builder->getJitInfo();
665+
if (jitInfo) {
666+
jitInfo->stats.numAsmCountUnweighted = m_kernel->getAsmCount();
667+
jitInfo->stats.numGRFTotal = m_kernel->getNumRegTotal();
668+
jitInfo->stats.numThreads = m_kernel->getNumThreads();
669+
jitInfo->BBNum = static_cast<uint32_t>(m_kernel->fg.size());
668670
}
669671
}
670672

visa/include/JitterDataStruct.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,16 @@ struct PERF_STATS {
6161

6262
uint32_t maxGRFPressure = 0;
6363

64+
// These fields are currently used by IGC.
65+
// The first two are unweighted (i.e., just a sum of each basic block's
66+
// estimated cycles), while the last two are weighted by loop (16 iterations
67+
// per loop).
68+
// Note that these stats are valid only if post-RA scheduling is enabled.
69+
uint32_t sendStallCycle = 0;
70+
uint32_t staticCycle = 0;
71+
uint32_t loopNestedStallCycle = 0;
72+
uint32_t loopNestedCycle = 0;
73+
6474
public:
6575
llvm::json::Value toJSON();
6676
};
@@ -70,10 +80,10 @@ struct PERF_STATS {
7080
// queried (vISA_DumpPerfStatsVerbose)
7181
// TODO: This set will be disable completely in the Release build.
7282
struct PERF_STATS_VERBOSE {
73-
// The number of bank conflict.
83+
// The number of bank conflict.
7484
unsigned BCNum = 0;
7585

76-
// counting the number of read-modify-write
86+
// counting the number of read-modify-write
7787
unsigned numRMWs = 0;
7888

7989
// For the static profiling of acc regsiter substituion ratio
@@ -129,8 +139,9 @@ struct FINALIZER_INFO {
129139
// This information is used by legacy CMRT as well as OpenCL/L0 runtime.
130140
uint32_t numBarriers = 0;
131141

132-
// Unweighted BB cycles counts. Used by IGC for SIMD width selection.
142+
// Number of basic blocks in the kernel, used by IGC for stat reporting.
133143
uint32_t BBNum = 0;
144+
// TODO: this is no longer used, can we remove them without breaking stuff?
134145
VISA_BB_INFO *BBInfo = nullptr;
135146

136147
// Whether kernel recompilation should be avoided. vISA hint for IGC.
@@ -160,6 +171,5 @@ struct FINALIZER_INFO {
160171
PERF_STATS_VERBOSE statsVerbose;
161172
};
162173

163-
164174
} // namespace vISA
165175
#endif // JITTERDATASTRUCT_

0 commit comments

Comments
 (0)