Skip to content

Commit 7abdec4

Browse files
committed
Update NCU profile
1 parent 6626591 commit 7abdec4

File tree

8 files changed

+16
-15
lines changed

8 files changed

+16
-15
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,5 +161,5 @@ cython_debug/
161161
/pmpp/_torch_ops
162162
vcpkg_installed
163163
/tmp
164-
/output/*
164+
/outputs/*
165165
!.gitkeep

configs/lib-tests.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
OpTest:
22
VecAdd:
3-
- nElems: 64
3+
- nElems: 32
44
- nElems: 1024
55
- nElems: 2048
6+
- nElems: 1048576 # 1024*1024
7+
- nElems: 8388608 # 1024*1024
68
Conv2D:
79
- inputHeight: 32
810
inputWidth: 32
@@ -14,10 +16,6 @@ OpTest:
1416
divider: 4
1517
MulReduction:
1618
- nInputs: 32
17-
# # [NOTE] The following calculation results would be wrong?
18-
# - nInputs: 64
19-
# - nInputs: 128
20-
# - nInputs: 512
2119
PrefixSum:
2220
- nInputs: 32
2321
- nInputs: 64

csrc/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@ include(${PROJECT_SOURCE_DIR}/cmake/utils/common.cmake)
99

1010
log_info("CMake Tookchain File Path: ${CMAKE_TOOLCHAIN_FILE}")
1111

12+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
13+
1214
set_default_values(
1315
PROJECT_NAMESPACE "pmpp::"
1416
STDOUT_IS_TERMINAL OFF
17+
CMAKE_EXECUTABLE_SUFFIX ""
1518
)
1619

1720
# Utility functions for installing libraries:

csrc/cmake/compilers/cxx-compiler-configs.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ include(${CMAKE_CURRENT_LIST_DIR}/../utils/common.cmake)
1515

1616
enable_language(CXX)
1717

18-
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
1918
set(CMAKE_CXX_STANDARD_REQUIRED ON)
2019

2120
set_default_values(

csrc/lib/ops/vecAdd/op.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ __global__ void vecAddKernel(const fp32_t* a, const fp32_t* b, fp32_t* c,
1212

1313
int gtid = threadIdx.x + blockDim.x * blockIdx.x;
1414
if (gtid < n) {
15-
// [GM] 2 load, 1 store
15+
// [GM] 2 load, 1 store, 3 inst
1616
c[gtid] = a[gtid] + b[gtid];
1717
}
1818
}
1919

2020
void launchVecAdd(const fp32_t* d_A, const fp32_t* d_B, fp32_t* d_C, size_t n)
2121
{
22-
dim3 blockSize = 256;
23-
dim3 gridSize = ceilDiv(n, 256);
22+
dim3 blockSize = {std::min<uint32_t>(n, 1024), 1, 1};
23+
dim3 gridSize = {ceilDiv<uint32_t>(n, blockSize.x), 1, 1};
2424

2525
vecAddKernel<<<gridSize, blockSize>>>(d_A, d_B, d_C, int32_t(n));
2626

File renamed without changes.

scripts/build.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Env Variables: CC, CXX, NVCC_CCBIN
1+
# Env Variables: CC, CXX, NVCC_CCBIN, VCPKG_HOME
22

33
set -e # Exit on error
44

@@ -50,8 +50,8 @@ done
5050

5151
cmake -S $SOURCE_DIR -B $BUILD_DIR -G Ninja \
5252
-DCMAKE_TOOLCHAIN_FILE="$VCPKG_HOME/scripts/buildsystems/vcpkg.cmake" \
53-
-DVCPKG_TARGET_TRIPLET="x64-linux" \
5453
-DVCPKG_OVERLAY_TRIPLETS="csrc/cmake/vcpkg-triplets" \
54+
-DVCPKG_TARGET_TRIPLET="x64-linux" \
5555
-DSTDOUT_IS_TERMINAL=$STDOUT_IS_TERMINAL \
5656
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
5757
-DCMAKE_CXX_STANDARD=$CXX_STANDARD \

scripts/nsight-profile.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
TEST_FILE="./build/test/pmpp_test"
22
GTEST_FILTER="OpTest.VecAdd"
3-
OUTPUT_FILE="output/nsight_profile.ncu-rep"
3+
OUTPUT_FILE="outputs/nsight_profile.ncu-rep"
44

55
while [[ $# -gt 0 ]]; do
66
case $1 in
@@ -17,5 +17,6 @@ while [[ $# -gt 0 ]]; do
1717
done
1818

1919

20-
ncu --export $OUTPUT_FILE --force-overwrite $TEST_FILE \
21-
--gtest_filter=$GTEST_FILTER
20+
ncu --export $OUTPUT_FILE --force-overwrite \
21+
--set "full" \
22+
$TEST_FILE --gtest_filter=$GTEST_FILTER

0 commit comments

Comments
 (0)