Skip to content

Commit abbe98b

Browse files
Fix allocating graphics memory for patterns in appendMemoryFill
Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
1 parent 9ff91de commit abbe98b

File tree

16 files changed

+190
-62
lines changed

16 files changed

+190
-62
lines changed

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ struct CommandList : _ze_command_list_handle_t {
257257

258258
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
259259
std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations;
260+
std::vector<NEO::GraphicsAllocation *> patternAllocations;
260261

261262
NEO::StreamProperties requiredStreamState{};
262263
NEO::StreamProperties finalStreamState{};

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ CommandListCoreFamily<gfxCoreFamily>::~CommandListCoreFamily() {
6464
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(alloc);
6565
}
6666
this->ownedPrivateAllocations.clear();
67+
for (auto &patternAlloc : this->patternAllocations) {
68+
device->storeReusableAllocation(*patternAlloc);
69+
}
70+
this->patternAllocations.clear();
6771
}
6872

6973
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -1468,16 +1472,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
14681472
size_t patternAllocationSize = alignUp(patternSize, MemoryConstants::cacheLineSize);
14691473
uint32_t patternSizeInEls = static_cast<uint32_t>(patternAllocationSize / middleElSize);
14701474

1471-
auto patternGfxAlloc = getAllocationFromHostPtrMap(pattern, patternAllocationSize);
1475+
auto patternGfxAlloc = device->obtainReusableAllocation(patternAllocationSize, NEO::GraphicsAllocation::AllocationType::FILL_PATTERN);
14721476
if (patternGfxAlloc == nullptr) {
14731477
patternGfxAlloc = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getNEODevice()->getRootDeviceIndex(),
14741478
patternAllocationSize,
14751479
NEO::GraphicsAllocation::AllocationType::FILL_PATTERN,
14761480
device->getNEODevice()->getDeviceBitfield()});
1477-
hostPtrMap.insert(std::make_pair(pattern, patternGfxAlloc));
14781481
}
14791482
void *patternGfxAllocPtr = patternGfxAlloc->getUnderlyingBuffer();
1480-
1483+
patternAllocations.push_back(patternGfxAlloc);
14811484
uint64_t patternAllocPtr = reinterpret_cast<uintptr_t>(patternGfxAllocPtr);
14821485
uint64_t patternAllocOffset = 0;
14831486
uint64_t patternSizeToCopy = patternSize;

level_zero/core/source/device/device.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ struct Device : _ze_device_handle_t {
133133
virtual ze_result_t getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index) = 0;
134134
virtual ze_result_t getCsrForLowPriority(NEO::CommandStreamReceiver **csr) = 0;
135135
virtual ze_result_t mapOrdinalForAvailableEngineGroup(uint32_t *ordinal) = 0;
136+
virtual NEO::GraphicsAllocation *obtainReusableAllocation(size_t requiredSize, NEO::GraphicsAllocation::AllocationType type) = 0;
137+
virtual void storeReusableAllocation(NEO::GraphicsAllocation &alloc) = 0;
136138
};
137139

138140
} // namespace L0

level_zero/core/source/device/device_imp.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,7 @@ void DeviceImp::releaseResources() {
800800
metricContext.reset();
801801
builtins.reset();
802802
cacheReservation.reset();
803+
allocationsForReuse.freeAllGraphicsAllocations(neoDevice);
803804

804805
if (getSourceLevelDebugger()) {
805806
getSourceLevelDebugger()->notifyDeviceDestruction();
@@ -918,6 +919,18 @@ NEO::GraphicsAllocation *DeviceImp::allocateMemoryFromHostPtr(const void *buffer
918919
return allocation;
919920
}
920921

922+
NEO::GraphicsAllocation *DeviceImp::obtainReusableAllocation(size_t requiredSize, NEO::GraphicsAllocation::AllocationType type) {
923+
auto alloc = allocationsForReuse.detachAllocation(requiredSize, nullptr, nullptr, type);
924+
if (alloc == nullptr)
925+
return nullptr;
926+
else
927+
return alloc.release();
928+
}
929+
930+
void DeviceImp::storeReusableAllocation(NEO::GraphicsAllocation &alloc) {
931+
allocationsForReuse.pushFrontOne(alloc);
932+
}
933+
921934
ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index) {
922935
if (ordinal >= CommonConstants::engineGroupCount) {
923936
return ZE_RESULT_ERROR_INVALID_ARGUMENT;

level_zero/core/source/device/device_imp.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#pragma once
99

1010
#include "shared/source/helpers/topology_map.h"
11+
#include "shared/source/memory_manager/allocations_list.h"
1112
#include "shared/source/page_fault_manager/cpu_page_fault_manager.h"
1213
#include "shared/source/utilities/spinlock.h"
1314

@@ -110,6 +111,8 @@ struct DeviceImp : public Device {
110111
ze_result_t getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index) override;
111112
ze_result_t getCsrForLowPriority(NEO::CommandStreamReceiver **csr) override;
112113
ze_result_t mapOrdinalForAvailableEngineGroup(uint32_t *ordinal) override;
114+
NEO::GraphicsAllocation *obtainReusableAllocation(size_t requiredSize, NEO::GraphicsAllocation::AllocationType type) override;
115+
void storeReusableAllocation(NEO::GraphicsAllocation &alloc) override;
113116
NEO::Device *getActiveDevice() const;
114117

115118
bool toPhysicalSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t &deviceIndex);
@@ -133,6 +136,7 @@ struct DeviceImp : public Device {
133136
NEO::SVMAllocsManager::MapBasedAllocationTracker peerAllocations;
134137
NEO::SpinLock peerAllocationsMutex;
135138
std::map<NEO::SvmAllocationData *, MemAdviseFlags> memAdviseSharedAllocations;
139+
NEO::AllocationsList allocationsForReuse;
136140

137141
protected:
138142
NEO::GraphicsAllocation *debugSurface = nullptr;

level_zero/core/test/unit_tests/mocks/mock_cmdlist.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
5151
using BaseClass::hostPtrMap;
5252
using BaseClass::indirectAllocationsAllowed;
5353
using BaseClass::initialize;
54+
using BaseClass::patternAllocations;
5455
using BaseClass::requiredStreamState;
5556
using BaseClass::unifiedMemoryControls;
5657
using BaseClass::updateStreamProperties;

level_zero/core/test/unit_tests/mocks/mock_device.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,12 @@ struct Mock<Device> : public Device {
273273
}
274274

275275
void removeDebugSession() override {}
276+
277+
NEO::GraphicsAllocation *obtainReusableAllocation(size_t requiredSize, NEO::GraphicsAllocation::AllocationType type) override {
278+
return nullptr;
279+
}
280+
281+
void storeReusableAllocation(NEO::GraphicsAllocation &alloc) override {}
276282
};
277283

278284
template <>

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenGettingAllocInRangeThenAlloc
9898
size_t allocSize = 0x1000;
9999
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
100100
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
101+
EXPECT_EQ(commandList->getHostPtrMap().size(), 1u);
101102

102103
auto newBufferPtr = ptrOffset(cpuPtr, 0x10);
103104
auto newBufferSize = allocSize - 0x20;
@@ -114,6 +115,7 @@ HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenSizeIsOutOfRangeThenNullPtrR
114115
size_t allocSize = 0x1000;
115116
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
116117
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
118+
EXPECT_EQ(commandList->getHostPtrMap().size(), 1u);
117119

118120
auto newBufferPtr = ptrOffset(cpuPtr, 0x10);
119121
auto newBufferSize = allocSize + 0x20;
@@ -130,6 +132,7 @@ HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsOutOfRangeThenNullPtrRe
130132
size_t allocSize = 0x1000;
131133
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
132134
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
135+
EXPECT_EQ(commandList->getHostPtrMap().size(), 1u);
133136

134137
auto newBufferPtr = reinterpret_cast<const void *>(gpuAddress - 0x100);
135138
auto newBufferSize = allocSize - 0x200;
@@ -146,6 +149,8 @@ HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenGetHostPtrAllocCalledThenCor
146149
size_t allocSize = 0x1000;
147150
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
148151
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
152+
EXPECT_EQ(commandList->getHostPtrMap().size(), 1u);
153+
149154
size_t expectedOffset = 0x10;
150155
auto newBufferPtr = ptrOffset(cpuPtr, expectedOffset);
151156
auto newBufferSize = allocSize - 0x20;
@@ -162,6 +167,7 @@ HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsInMapThenAllocationRetu
162167
size_t allocSize = 0x1000;
163168
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
164169
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
170+
EXPECT_EQ(commandList->getHostPtrMap().size(), 1u);
165171

166172
auto newBufferPtr = cpuPtr;
167173
auto newBufferSize = allocSize - 0x20;
@@ -178,6 +184,7 @@ HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsInMapButWithBiggerSizeT
178184
size_t allocSize = 0x1000;
179185
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
180186
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
187+
EXPECT_EQ(commandList->getHostPtrMap().size(), 1u);
181188

182189
auto newBufferPtr = cpuPtr;
183190
auto newBufferSize = allocSize + 0x20;
@@ -194,6 +201,7 @@ HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrLowerThanAnyInMapThenNull
194201
size_t allocSize = 0x1000;
195202
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
196203
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
204+
EXPECT_EQ(commandList->getHostPtrMap().size(), 1u);
197205

198206
auto newBufferPtr = reinterpret_cast<const void *>(gpuAddress - 0x10);
199207
auto newBufferSize = allocSize - 0x20;

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -139,45 +139,45 @@ HWTEST2_F(AppendFillFixture,
139139
}
140140

141141
HWTEST2_F(AppendFillFixture,
142-
givenTwoCallsToAppendMemoryFillWithSamePatternThenAllocationIsAddedtoHostPtrMapOnlyOnce, Platforms) {
142+
givenTwoCallsToAppendMemoryFillWithSamePatternThenAllocationIsCreatedForEachCall, Platforms) {
143143
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
144144

145145
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
146146
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
147147

148148
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr);
149149
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
150-
size_t hostPtrMapSize = commandList->getHostPtrMap().size();
151-
EXPECT_EQ(hostPtrMapSize, 1u);
150+
size_t patternAllocationsVectorSize = commandList->patternAllocations.size();
151+
EXPECT_EQ(patternAllocationsVectorSize, 1u);
152152

153153
uint8_t *newDstPtr = new uint8_t[allocSize];
154154
result = commandList->appendMemoryFill(newDstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr);
155155
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
156-
size_t newHostPtrMapSize = commandList->getHostPtrMap().size();
156+
size_t newPatternAllocationsVectorSize = commandList->patternAllocations.size();
157157

158-
EXPECT_EQ(hostPtrMapSize, newHostPtrMapSize);
158+
EXPECT_GT(newPatternAllocationsVectorSize, patternAllocationsVectorSize);
159159

160160
delete[] newDstPtr;
161161
}
162162

163163
HWTEST2_F(AppendFillFixture,
164-
givenTwoCallsToAppendMemoryFillWithDifferentPatternsThenHostPtrSizeIncrementsByOne, Platforms) {
164+
givenTwoCallsToAppendMemoryFillWithDifferentPatternsThenAllocationIsCreatedForEachPattern, Platforms) {
165165
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
166166

167167
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
168168
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
169169

170170
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr);
171171
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
172-
size_t hostPtrMapSize = commandList->getHostPtrMap().size();
173-
EXPECT_EQ(hostPtrMapSize, 1u);
172+
size_t patternAllocationsVectorSize = commandList->patternAllocations.size();
173+
EXPECT_EQ(patternAllocationsVectorSize, 1u);
174174

175175
uint8_t newPattern[patternSize] = {1, 2, 3, 4};
176176
result = commandList->appendMemoryFill(dstPtr, newPattern, patternSize, allocSize, nullptr, 0, nullptr);
177177
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
178-
size_t newHostPtrMapSize = commandList->getHostPtrMap().size();
178+
size_t newPatternAllocationsVectorSize = commandList->patternAllocations.size();
179179

180-
EXPECT_EQ(hostPtrMapSize + 1u, newHostPtrMapSize);
180+
EXPECT_EQ(patternAllocationsVectorSize + 1u, newPatternAllocationsVectorSize);
181181
}
182182

183183
HWTEST2_F(AppendFillFixture,

level_zero/core/test/unit_tests/sources/device/test_device.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,62 @@ TEST_F(DeviceTest, givenEmptySVmAllocStorageWhenAllocateMemoryFromHostPtrThenVal
424424
neoDevice->getMemoryManager()->freeGraphicsMemory(allocation);
425425
}
426426

427+
TEST_F(DeviceTest, givenNonEmptyAllocationsListWhenRequestingAllocationSmallerOrEqualInSizeThenAllocationFromListIsReturned) {
428+
auto deviceImp = static_cast<DeviceImp *>(device);
429+
constexpr auto dataSize = 1024u;
430+
auto data = std::make_unique<int[]>(dataSize);
431+
432+
constexpr auto allocationSize = sizeof(int) * dataSize;
433+
434+
auto allocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getNEODevice()->getRootDeviceIndex(),
435+
allocationSize,
436+
NEO::GraphicsAllocation::AllocationType::FILL_PATTERN,
437+
neoDevice->getDeviceBitfield()});
438+
device->storeReusableAllocation(*allocation);
439+
EXPECT_FALSE(deviceImp->allocationsForReuse.peekIsEmpty());
440+
auto obtaindedAllocation = device->obtainReusableAllocation(dataSize, NEO::GraphicsAllocation::AllocationType::FILL_PATTERN);
441+
EXPECT_TRUE(deviceImp->allocationsForReuse.peekIsEmpty());
442+
EXPECT_NE(nullptr, obtaindedAllocation);
443+
EXPECT_EQ(allocation, obtaindedAllocation);
444+
neoDevice->getMemoryManager()->freeGraphicsMemory(allocation);
445+
}
446+
447+
TEST_F(DeviceTest, givenNonEmptyAllocationsListWhenRequestingAllocationBiggerInSizeThenNullptrIsReturned) {
448+
auto deviceImp = static_cast<DeviceImp *>(device);
449+
constexpr auto dataSize = 1024u;
450+
auto data = std::make_unique<int[]>(dataSize);
451+
452+
constexpr auto allocationSize = sizeof(int) * dataSize;
453+
454+
auto allocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getNEODevice()->getRootDeviceIndex(),
455+
allocationSize,
456+
NEO::GraphicsAllocation::AllocationType::FILL_PATTERN,
457+
neoDevice->getDeviceBitfield()});
458+
device->storeReusableAllocation(*allocation);
459+
EXPECT_FALSE(deviceImp->allocationsForReuse.peekIsEmpty());
460+
auto obtaindedAllocation = device->obtainReusableAllocation(4 * dataSize + 1u, NEO::GraphicsAllocation::AllocationType::FILL_PATTERN);
461+
EXPECT_EQ(nullptr, obtaindedAllocation);
462+
EXPECT_FALSE(deviceImp->allocationsForReuse.peekIsEmpty());
463+
}
464+
465+
TEST_F(DeviceTest, givenNonEmptyAllocationsListAndUnproperAllocationTypeWhenRequestingAllocationThenNullptrIsReturned) {
466+
auto deviceImp = static_cast<DeviceImp *>(device);
467+
constexpr auto dataSize = 1024u;
468+
auto data = std::make_unique<int[]>(dataSize);
469+
470+
constexpr auto allocationSize = sizeof(int) * dataSize;
471+
472+
auto allocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getNEODevice()->getRootDeviceIndex(),
473+
allocationSize,
474+
NEO::GraphicsAllocation::AllocationType::BUFFER,
475+
neoDevice->getDeviceBitfield()});
476+
device->storeReusableAllocation(*allocation);
477+
EXPECT_FALSE(deviceImp->allocationsForReuse.peekIsEmpty());
478+
auto obtaindedAllocation = device->obtainReusableAllocation(4 * dataSize + 1u, NEO::GraphicsAllocation::AllocationType::FILL_PATTERN);
479+
EXPECT_EQ(nullptr, obtaindedAllocation);
480+
EXPECT_FALSE(deviceImp->allocationsForReuse.peekIsEmpty());
481+
}
482+
427483
struct DeviceHostPointerTest : public ::testing::Test {
428484
void SetUp() override {
429485
executionEnvironment = new NEO::ExecutionEnvironment();

0 commit comments

Comments
 (0)