From 2d8c99a89e487942ec14ae440d4c68bd250b69c6 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 12 Jan 2026 13:54:50 -0800 Subject: [PATCH 01/10] initial localized test --- .../cuda/bindings/_internal/nvfatbin.pxd | 22 ++ .../bindings/_internal/nvfatbin_linux.pyx | 242 ++++++++++++++++++ .../bindings/_internal/nvfatbin_windows.pyx | 233 +++++++++++++++++ cuda_bindings/cuda/bindings/cynvfatbin.pxd | 53 ++++ cuda_bindings/cuda/bindings/cynvfatbin.pyx | 38 +++ cuda_bindings/cuda/bindings/nvfatbin.pxd | 37 +++ cuda_bindings/cuda/bindings/nvfatbin.pyx | 194 ++++++++++++++ cuda_bindings/tests/test_nvfatbin.py | 89 +++++++ 8 files changed, 908 insertions(+) create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx create mode 100644 cuda_bindings/cuda/bindings/cynvfatbin.pxd create mode 100644 cuda_bindings/cuda/bindings/cynvfatbin.pyx create mode 100644 cuda_bindings/cuda/bindings/nvfatbin.pxd create mode 100644 cuda_bindings/cuda/bindings/nvfatbin.pyx create mode 100644 cuda_bindings/tests/test_nvfatbin.py diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd new file mode 100644 index 0000000000..14a8a6d608 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from ..cynvfatbin cimport * + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx new file mode 100644 index 0000000000..06143d9031 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -0,0 +1,242 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uintptr_t + +import threading +from .utils import FunctionNotFoundError, NotSupportedError + +from cuda.pathfinder import load_nvidia_dynamic_lib + + +############################################################################### +# Extern +############################################################################### + +# You must 'from .utils import NotSupportedError' before using this template + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + +cdef int get_cuda_version(): + cdef void* handle = NULL + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in libcuda.so.1') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + + + +############################################################################### +# Wrapper init +############################################################################### + +cdef object __symbol_lock = threading.Lock() +cdef bint __py_nvfatbin_init = False + +cdef void* __nvFatbinCreate = NULL +cdef void* __nvFatbinDestroy = NULL +cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinSize = NULL +cdef void* __nvFatbinGet = NULL +cdef void* __nvFatbinVersion = NULL + + +cdef void* load_library() except* with gil: + cdef uintptr_t handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint + return handle + + +cdef int _init_nvfatbin() except -1 nogil: + global __py_nvfatbin_init + + cdef void* handle = NULL + + with gil, __symbol_lock: + # Recheck the flag after obtaining the locks + if __py_nvfatbin_init: + return 0 + + # Load function + global __nvFatbinCreate + __nvFatbinCreate = dlsym(RTLD_DEFAULT, 'nvFatbinCreate') + if __nvFatbinCreate == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinCreate = dlsym(handle, 'nvFatbinCreate') + + global __nvFatbinDestroy + __nvFatbinDestroy = dlsym(RTLD_DEFAULT, 'nvFatbinDestroy') + if __nvFatbinDestroy == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinDestroy = dlsym(handle, 'nvFatbinDestroy') + + global __nvFatbinAddPTX + __nvFatbinAddPTX = dlsym(RTLD_DEFAULT, 'nvFatbinAddPTX') + if __nvFatbinAddPTX == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddPTX = dlsym(handle, 'nvFatbinAddPTX') + + global __nvFatbinSize + __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize') + if __nvFatbinSize == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinSize = dlsym(handle, 'nvFatbinSize') + + global __nvFatbinGet + __nvFatbinGet = dlsym(RTLD_DEFAULT, 'nvFatbinGet') + if __nvFatbinGet == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinGet = dlsym(handle, 'nvFatbinGet') + + global __nvFatbinVersion + __nvFatbinVersion = dlsym(RTLD_DEFAULT, 'nvFatbinVersion') + if __nvFatbinVersion == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinVersion = dlsym(handle, 'nvFatbinVersion') + + __py_nvfatbin_init = True + return 0 + + +cdef inline int _check_or_init_nvfatbin() except -1 nogil: + if __py_nvfatbin_init: + return 0 + + return _init_nvfatbin() + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvfatbin() + cdef dict data = {} + + global __nvFatbinCreate + data["__nvFatbinCreate"] = __nvFatbinCreate + + global __nvFatbinDestroy + data["__nvFatbinDestroy"] = __nvFatbinDestroy + + global __nvFatbinAddPTX + data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + + global __nvFatbinSize + data["__nvFatbinSize"] = __nvFatbinSize + + global __nvFatbinGet + data["__nvFatbinGet"] = __nvFatbinGet + + global __nvFatbinVersion + data["__nvFatbinVersion"] = __nvFatbinVersion + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinCreate + _check_or_init_nvfatbin() + if __nvFatbinCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinCreate is not found") + return (__nvFatbinCreate)( + handle_indirect, options, optionsCount) + + +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinDestroy + _check_or_init_nvfatbin() + if __nvFatbinDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinDestroy is not found") + return (__nvFatbinDestroy)( + handle_indirect) + + +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddPTX + _check_or_init_nvfatbin() + if __nvFatbinAddPTX == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddPTX is not found") + return (__nvFatbinAddPTX)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinSize + _check_or_init_nvfatbin() + if __nvFatbinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinSize is not found") + return (__nvFatbinSize)( + handle, size) + + +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinGet + _check_or_init_nvfatbin() + if __nvFatbinGet == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGet is not found") + return (__nvFatbinGet)( + handle, buffer) + + +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinVersion + _check_or_init_nvfatbin() + if __nvFatbinVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinVersion is not found") + return (__nvFatbinVersion)( + major, minor) + + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx new file mode 100644 index 0000000000..cc1824bf43 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -0,0 +1,233 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t + +import threading +from .utils import FunctionNotFoundError, NotSupportedError + +from cuda.pathfinder import load_nvidia_dynamic_lib + +from libc.stddef cimport wchar_t +from libc.stdint cimport uintptr_t +from cpython cimport PyUnicode_AsWideCharString, PyMem_Free + +# You must 'from .utils import NotSupportedError' before using this template + +cdef extern from "windows.h" nogil: + ctypedef void* HMODULE + ctypedef void* HANDLE + ctypedef void* FARPROC + ctypedef unsigned long DWORD + ctypedef const wchar_t *LPCWSTR + ctypedef const char *LPCSTR + + cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + cdef DWORD LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + cdef DWORD LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + + HMODULE _LoadLibraryExW "LoadLibraryExW"( + LPCWSTR lpLibFileName, + HANDLE hFile, + DWORD dwFlags + ) + + FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName) + +cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): + cdef uintptr_t result + cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL) + with nogil: + result = _LoadLibraryExW( + wpath, + hFile, + dwFlags + ) + PyMem_Free(wpath) + return result + +cdef inline void *GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil: + return _GetProcAddress(hModule, lpProcName) + +cdef int get_cuda_version(): + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = LoadLibraryExW("nvcuda.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32) + if handle == 0: + raise NotSupportedError('CUDA driver is not found') + cuDriverGetVersion = GetProcAddress(handle, 'cuDriverGetVersion') + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in nvcuda.dll') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + + + +############################################################################### +# Wrapper init +############################################################################### + +cdef object __symbol_lock = threading.Lock() +cdef bint __py_nvfatbin_init = False + +cdef void* __nvFatbinCreate = NULL +cdef void* __nvFatbinDestroy = NULL +cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinSize = NULL +cdef void* __nvFatbinGet = NULL +cdef void* __nvFatbinVersion = NULL + + +cdef int _init_nvfatbin() except -1 nogil: + global __py_nvfatbin_init + + with gil, __symbol_lock: + # Recheck the flag after obtaining the locks + if __py_nvfatbin_init: + return 0 + + # Load library + handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint + + # Load function + global __nvFatbinCreate + __nvFatbinCreate = GetProcAddress(handle, 'nvFatbinCreate') + + global __nvFatbinDestroy + __nvFatbinDestroy = GetProcAddress(handle, 'nvFatbinDestroy') + + global __nvFatbinAddPTX + __nvFatbinAddPTX = GetProcAddress(handle, 'nvFatbinAddPTX') + + global __nvFatbinSize + __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize') + + global __nvFatbinGet + __nvFatbinGet = GetProcAddress(handle, 'nvFatbinGet') + + global __nvFatbinVersion + __nvFatbinVersion = GetProcAddress(handle, 'nvFatbinVersion') + + __py_nvfatbin_init = True + return 0 + + +cdef inline int _check_or_init_nvfatbin() except -1 nogil: + if __py_nvfatbin_init: + return 0 + + return _init_nvfatbin() + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvfatbin() + cdef dict data = {} + + global __nvFatbinCreate + data["__nvFatbinCreate"] = __nvFatbinCreate + + global __nvFatbinDestroy + data["__nvFatbinDestroy"] = __nvFatbinDestroy + + global __nvFatbinAddPTX + data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + + global __nvFatbinSize + data["__nvFatbinSize"] = __nvFatbinSize + + global __nvFatbinGet + data["__nvFatbinGet"] = __nvFatbinGet + + global __nvFatbinVersion + data["__nvFatbinVersion"] = __nvFatbinVersion + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinCreate + _check_or_init_nvfatbin() + if __nvFatbinCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinCreate is not found") + return (__nvFatbinCreate)( + handle_indirect, options, optionsCount) + + +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinDestroy + _check_or_init_nvfatbin() + if __nvFatbinDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinDestroy is not found") + return (__nvFatbinDestroy)( + handle_indirect) + + +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddPTX + _check_or_init_nvfatbin() + if __nvFatbinAddPTX == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddPTX is not found") + return (__nvFatbinAddPTX)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinSize + _check_or_init_nvfatbin() + if __nvFatbinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinSize is not found") + return (__nvFatbinSize)( + handle, size) + + +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinGet + _check_or_init_nvfatbin() + if __nvFatbinGet == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGet is not found") + return (__nvFatbinGet)( + handle, buffer) + + +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinVersion + _check_or_init_nvfatbin() + if __nvFatbinVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinVersion is not found") + return (__nvFatbinVersion)( + major, minor) + + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd new file mode 100644 index 0000000000..651aa27152 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -0,0 +1,53 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# enums +ctypedef enum nvFatbinResult "nvFatbinResult": + NVFATBIN_SUCCESS "NVFATBIN_SUCCESS" = 0 + NVFATBIN_ERROR_INTERNAL "NVFATBIN_ERROR_INTERNAL" + NVFATBIN_ERROR_ELF_ARCH_MISMATCH "NVFATBIN_ERROR_ELF_ARCH_MISMATCH" + NVFATBIN_ERROR_ELF_SIZE_MISMATCH "NVFATBIN_ERROR_ELF_SIZE_MISMATCH" + NVFATBIN_ERROR_MISSING_PTX_VERSION "NVFATBIN_ERROR_MISSING_PTX_VERSION" + NVFATBIN_ERROR_NULL_POINTER "NVFATBIN_ERROR_NULL_POINTER" + NVFATBIN_ERROR_COMPRESSION_FAILED "NVFATBIN_ERROR_COMPRESSION_FAILED" + NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED "NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED" + NVFATBIN_ERROR_UNRECOGNIZED_OPTION "NVFATBIN_ERROR_UNRECOGNIZED_OPTION" + NVFATBIN_ERROR_INVALID_ARCH "NVFATBIN_ERROR_INVALID_ARCH" + NVFATBIN_ERROR_INVALID_NVVM "NVFATBIN_ERROR_INVALID_NVVM" + NVFATBIN_ERROR_EMPTY_INPUT "NVFATBIN_ERROR_EMPTY_INPUT" + NVFATBIN_ERROR_MISSING_PTX_ARCH "NVFATBIN_ERROR_MISSING_PTX_ARCH" + NVFATBIN_ERROR_PTX_ARCH_MISMATCH "NVFATBIN_ERROR_PTX_ARCH_MISMATCH" + NVFATBIN_ERROR_MISSING_FATBIN "NVFATBIN_ERROR_MISSING_FATBIN" + NVFATBIN_ERROR_INVALID_INDEX "NVFATBIN_ERROR_INVALID_INDEX" + NVFATBIN_ERROR_IDENTIFIER_REUSE "NVFATBIN_ERROR_IDENTIFIER_REUSE" + NVFATBIN_ERROR_INTERNAL_PTX_OPTION "NVFATBIN_ERROR_INTERNAL_PTX_OPTION" + _NVFATBINRESULT_INTERNAL_LOADING_ERROR "_NVFATBINRESULT_INTERNAL_LOADING_ERROR" = -42 + + +# types +ctypedef void* nvFatbinHandle 'nvFatbinHandle' + + +############################################################################### +# Functions +############################################################################### + +cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx new file mode 100644 index 0000000000..13c9ac2cc1 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from ._internal cimport nvfatbin as _nvfatbin + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinCreate(handle_indirect, options, optionsCount) + + +cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinDestroy(handle_indirect) + + +cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddPTX(handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinSize(handle, size) + + +cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinGet(handle, buffer) + + +cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinVersion(major, minor) + + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd new file mode 100644 index 0000000000..1350d0ed52 --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + +from .cynvfatbin cimport * + + +############################################################################### +# Types +############################################################################### + +ctypedef nvFatbinHandle Handle + + +############################################################################### +# Enum +############################################################################### + +ctypedef nvFatbinResult _Result + + +############################################################################### +# Functions +############################################################################### + +cpdef intptr_t create(options, size_t options_count) except -1 +cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef size_t size(intptr_t handle) except? 0 +cpdef get(intptr_t handle, buffer) +cpdef tuple version() + + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx new file mode 100644 index 0000000000..dcc669797e --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -0,0 +1,194 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +cimport cython # NOQA + +from ._internal.utils cimport (get_resource_ptr, get_nested_resource_ptr, nested_resource, nullable_unique_ptr, + get_buffer_pointer, get_resource_ptrs) + +from enum import IntEnum as _IntEnum +from libcpp.vector cimport vector + + +############################################################################### +# Enum +############################################################################### + +class Result(_IntEnum): + """See `nvFatbinResult`.""" + SUCCESS = NVFATBIN_SUCCESS + ERROR_INTERNAL = NVFATBIN_ERROR_INTERNAL + ERROR_ELF_ARCH_MISMATCH = NVFATBIN_ERROR_ELF_ARCH_MISMATCH + ERROR_ELF_SIZE_MISMATCH = NVFATBIN_ERROR_ELF_SIZE_MISMATCH + ERROR_MISSING_PTX_VERSION = NVFATBIN_ERROR_MISSING_PTX_VERSION + ERROR_NULL_POINTER = NVFATBIN_ERROR_NULL_POINTER + ERROR_COMPRESSION_FAILED = NVFATBIN_ERROR_COMPRESSION_FAILED + ERROR_COMPRESSED_SIZE_EXCEEDED = NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED + ERROR_UNRECOGNIZED_OPTION = NVFATBIN_ERROR_UNRECOGNIZED_OPTION + ERROR_INVALID_ARCH = NVFATBIN_ERROR_INVALID_ARCH + ERROR_INVALID_NVVM = NVFATBIN_ERROR_INVALID_NVVM + ERROR_EMPTY_INPUT = NVFATBIN_ERROR_EMPTY_INPUT + ERROR_MISSING_PTX_ARCH = NVFATBIN_ERROR_MISSING_PTX_ARCH + ERROR_PTX_ARCH_MISMATCH = NVFATBIN_ERROR_PTX_ARCH_MISMATCH + ERROR_MISSING_FATBIN = NVFATBIN_ERROR_MISSING_FATBIN + ERROR_INVALID_INDEX = NVFATBIN_ERROR_INVALID_INDEX + ERROR_IDENTIFIER_REUSE = NVFATBIN_ERROR_IDENTIFIER_REUSE + ERROR_INTERNAL_PTX_OPTION = NVFATBIN_ERROR_INTERNAL_PTX_OPTION + + +############################################################################### +# Error handling +############################################################################### + +class nvfatbinError(Exception): + + def __init__(self, status): + self.status = status + s = Result(status) + cdef str err = f"{s.name} ({s.value})" + super(nvfatbinError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cdef int check_status(int status) except 1 nogil: + if status != 0: + with gil: + raise nvfatbinError(status) + return status + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef destroy(intptr_t handle): + """nvFatbinDestroy frees the memory associated with the given handle. + + Args: + handle (intptr_t): nvFatbin handle. + + .. seealso:: `nvFatbinDestroy` + """ + cdef Handle h = handle + with nogil: + status = nvFatbinDestroy(&h) + check_status(status) + + +cpdef intptr_t create(options, size_t options_count) except -1: + """nvFatbinCreate creates a new handle. + + Args: + options (object): An array of strings, each containing a single option. It can be: + + - an :class:`int` as the pointer address to the nested sequence, or + - a Python sequence of :class:`int`\s, each of which is a pointer address + to a valid sequence of 'char', or + - a nested Python sequence of ``str``. + + options_count (size_t): Number of options. + + Returns: + intptr_t: Address of nvFatbin handle. + + .. seealso:: `nvFatbinCreate` + """ + cdef nested_resource[ char ] _options_ + get_nested_resource_ptr[char](_options_, options, NULL) + cdef Handle handle_indirect + with nogil: + __status__ = nvFatbinCreate(&handle_indirect, (_options_.ptrs.data()), options_count) + check_status(__status__) + return handle_indirect + + +cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line): + """nvFatbinAddPTX adds PTX to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The PTX code. + size (size_t): The size of the PTX code. + arch (str): The numerical architecture that this PTX is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the PTX, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddPTX` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddPTX(handle, _code_, size, _arch_, _identifier_, _options_cmd_line_) + check_status(__status__) + + +cpdef size_t size(intptr_t handle) except? 0: + """nvFatbinSize returns the fatbinary's size. + + Args: + handle (intptr_t): nvFatbin handle. + + Returns: + size_t: The fatbinary's size. + + .. seealso:: `nvFatbinSize` + """ + cdef size_t size + with nogil: + __status__ = nvFatbinSize(handle, &size) + check_status(__status__) + return size + + +cpdef get(intptr_t handle, buffer): + """nvFatbinGet returns the completed fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + buffer (bytes): memory to store fatbinary. + + .. seealso:: `nvFatbinGet` + """ + cdef void* _buffer_ = get_buffer_pointer(buffer, -1, readonly=False) + with nogil: + __status__ = nvFatbinGet(handle, _buffer_) + check_status(__status__) + + +cpdef tuple version(): + """nvFatbinVersion returns the current version of nvFatbin. + + Returns: + A 2-tuple containing: + + - unsigned int: The major version. + - unsigned int: The minor version. + + .. seealso:: `nvFatbinVersion` + """ + cdef unsigned int major + cdef unsigned int minor + with nogil: + __status__ = nvFatbinVersion(&major, &minor) + check_status(__status__) + return (major, minor) + + + diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py new file mode 100644 index 0000000000..627bd300e4 --- /dev/null +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -0,0 +1,89 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from cuda.bindings import nvfatbin + +import pytest + +ARCHITECTURES = ["sm_75", "sm_80", "sm_90", "sm_100"] +PTX_VERSIONS = ["6.4", "7.0", "8.5", "8.8"] + +PTX_TEMPLATE = """ +.version {PTX_VERSION} +.target {ARCH} +.address_size 64 + + // .globl _Z6kernelPi + +.visible .entry _Z6kernelPi( + .param .u64 _Z6kernelPi_param_0 +) +{{ + .reg .b32 %r<7>; + .reg .b64 %rd<5>; + + + ld.param.u64 %rd1, [_Z6kernelPi_param_0]; + cvta.to.global.u64 %rd2, %rd1; + mov.u32 %r1, %tid.x; + mov.u32 %r2, %ctaid.x; + mov.u32 %r3, %ntid.x; + mad.lo.s32 %r4, %r2, %r3, %r1; + mul.wide.s32 %rd3, %r4, 4; + add.s64 %rd4, %rd2, %rd3; + ld.global.u32 %r5, [%rd4]; + add.s32 %r6, %r5, 1; + st.global.u32 [%rd4], %r6; + ret; + +}} +""" + +@pytest.fixture(params=ARCHITECTURES) +def arch(request): + return request.param + +@pytest.fixture(params=PTX_VERSIONS) +def ptx_version(request): + return request.param + +@pytest.fixture +def PTX(arch, ptx_version): + return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch) + +def test_nvfatbin_get_version(): + major, minor = nvfatbin.version() + assert major is not None + assert minor is not None + +def test_nvfatbin_empty_create_and_destroy(): + handle = nvfatbin.create([], 0) + assert handle is not None + nvfatbin.destroy(handle) + +def test_nvfatbin_invalid_input_create(): + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_UNRECOGNIZED_OPTION"): + nvfatbin.create(["--unsupported_option"], 1) + + +def test_nvfatbin_get_empty(): + handle = nvfatbin.create([], 0) + size = nvfatbin.size(handle) + + buffer = bytearray(size) + nvfatbin.get(handle, buffer) + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_ptx(PTX, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_ptx(handle, PTX.encode(), len(PTX), arch_numeric, "add", f"-arch={arch}") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + From 9b1a5590a1b4f34e91cbd78991154707615a7171 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 12 Jan 2026 20:08:30 -0800 Subject: [PATCH 02/10] add rest of APIs --- .../cuda/bindings/_internal/nvfatbin.pxd | 5 + .../bindings/_internal/nvfatbin_linux.pyx | 65 +++++++++ .../bindings/_internal/nvfatbin_windows.pyx | 53 ++++++++ cuda_bindings/cuda/bindings/cynvfatbin.pxd | 5 + cuda_bindings/cuda/bindings/cynvfatbin.pyx | 14 ++ cuda_bindings/cuda/bindings/nvfatbin.pxd | 5 + cuda_bindings/cuda/bindings/nvfatbin.pyx | 75 ++++++++++ cuda_bindings/tests/test_nvfatbin.py | 128 +++++++++++++++++- 8 files changed, 349 insertions(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd index 14a8a6d608..d421e8c21e 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -14,9 +14,14 @@ from ..cynvfatbin cimport * cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx index 06143d9031..097043f69a 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -62,6 +62,9 @@ cdef bint __py_nvfatbin_init = False cdef void* __nvFatbinCreate = NULL cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinAddCubin = NULL +cdef void* __nvFatbinAddLTOIR = NULL +cdef void* __nvFatbinAddReloc = NULL cdef void* __nvFatbinSize = NULL cdef void* __nvFatbinGet = NULL cdef void* __nvFatbinVersion = NULL @@ -104,6 +107,27 @@ cdef int _init_nvfatbin() except -1 nogil: handle = load_library() __nvFatbinAddPTX = dlsym(handle, 'nvFatbinAddPTX') + global __nvFatbinAddCubin + __nvFatbinAddCubin = dlsym(RTLD_DEFAULT, 'nvFatbinAddCubin') + if __nvFatbinAddCubin == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddCubin = dlsym(handle, 'nvFatbinAddCubin') + + global __nvFatbinAddLTOIR + __nvFatbinAddLTOIR = dlsym(RTLD_DEFAULT, 'nvFatbinAddLTOIR') + if __nvFatbinAddLTOIR == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddLTOIR = dlsym(handle, 'nvFatbinAddLTOIR') + + global __nvFatbinAddReloc + __nvFatbinAddReloc = dlsym(RTLD_DEFAULT, 'nvFatbinAddReloc') + if __nvFatbinAddReloc == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddReloc = dlsym(handle, 'nvFatbinAddReloc') + global __nvFatbinSize __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize') if __nvFatbinSize == NULL: @@ -155,6 +179,15 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinAddPTX data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + global __nvFatbinAddCubin + data["__nvFatbinAddCubin"] = __nvFatbinAddCubin + + global __nvFatbinAddLTOIR + data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR + + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + global __nvFatbinSize data["__nvFatbinSize"] = __nvFatbinSize @@ -209,6 +242,36 @@ cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, siz handle, code, size, arch, identifier, optionsCmdLine) +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddCubin + _check_or_init_nvfatbin() + if __nvFatbinAddCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddCubin is not found") + return (__nvFatbinAddCubin)( + handle, code, size, arch, identifier) + + +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddLTOIR + _check_or_init_nvfatbin() + if __nvFatbinAddLTOIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddLTOIR is not found") + return (__nvFatbinAddLTOIR)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinSize _check_or_init_nvfatbin() @@ -240,3 +303,5 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx index cc1824bf43..a499637f0d 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -80,6 +80,9 @@ cdef bint __py_nvfatbin_init = False cdef void* __nvFatbinCreate = NULL cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinAddCubin = NULL +cdef void* __nvFatbinAddLTOIR = NULL +cdef void* __nvFatbinAddReloc = NULL cdef void* __nvFatbinSize = NULL cdef void* __nvFatbinGet = NULL cdef void* __nvFatbinVersion = NULL @@ -106,6 +109,15 @@ cdef int _init_nvfatbin() except -1 nogil: global __nvFatbinAddPTX __nvFatbinAddPTX = GetProcAddress(handle, 'nvFatbinAddPTX') + global __nvFatbinAddCubin + __nvFatbinAddCubin = GetProcAddress(handle, 'nvFatbinAddCubin') + + global __nvFatbinAddLTOIR + __nvFatbinAddLTOIR = GetProcAddress(handle, 'nvFatbinAddLTOIR') + + global __nvFatbinAddReloc + __nvFatbinAddReloc = GetProcAddress(handle, 'nvFatbinAddReloc') + global __nvFatbinSize __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize') @@ -146,6 +158,15 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinAddPTX data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + global __nvFatbinAddCubin + data["__nvFatbinAddCubin"] = __nvFatbinAddCubin + + global __nvFatbinAddLTOIR + data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR + + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + global __nvFatbinSize data["__nvFatbinSize"] = __nvFatbinSize @@ -200,6 +221,36 @@ cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, siz handle, code, size, arch, identifier, optionsCmdLine) +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddCubin + _check_or_init_nvfatbin() + if __nvFatbinAddCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddCubin is not found") + return (__nvFatbinAddCubin)( + handle, code, size, arch, identifier) + + +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddLTOIR + _check_or_init_nvfatbin() + if __nvFatbinAddLTOIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddLTOIR is not found") + return (__nvFatbinAddLTOIR)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinSize _check_or_init_nvfatbin() @@ -231,3 +282,5 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd index 651aa27152..55d8c83c1a 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -45,9 +45,14 @@ ctypedef void* nvFatbinHandle 'nvFatbinHandle' cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx index 13c9ac2cc1..142f374c1b 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -23,6 +23,18 @@ cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size return _nvfatbin._nvFatbinAddPTX(handle, code, size, arch, identifier, optionsCmdLine) +cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddCubin(handle, code, size, arch, identifier) + + +cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddLTOIR(handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddReloc(handle, code, size) + + cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: return _nvfatbin._nvFatbinSize(handle, size) @@ -36,3 +48,5 @@ cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) ex + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd index 1350d0ed52..54c793962b 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -29,9 +29,14 @@ ctypedef nvFatbinResult _Result cpdef intptr_t create(options, size_t options_count) except -1 cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier) +cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef add_reloc(intptr_t handle, code, size_t size) cpdef size_t size(intptr_t handle) except? 0 cpdef get(intptr_t handle, buffer) cpdef tuple version() + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index dcc669797e..92db285f8a 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -139,6 +139,79 @@ cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_ check_status(__status__) +cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier): + """nvFatbinAddCubin adds a CUDA binary to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The cubin. + size (size_t): The size of the cubin. + arch (str): The numerical architecture that this cubin is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the cubin, useful when extracting the fatbin with tools like cuobjdump. + + .. seealso:: `nvFatbinAddCubin` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + with nogil: + __status__ = nvFatbinAddCubin(handle, _code_, size, _arch_, _identifier_) + check_status(__status__) + + +cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line): + """nvFatbinAddLTOIR adds LTOIR to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The LTOIR code. + size (size_t): The size of the LTOIR code. + arch (str): The numerical architecture that this LTOIR is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the LTOIR, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddLTOIR` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddLTOIR(handle, _code_, size, _arch_, _identifier_, _options_cmd_line_) + check_status(__status__) + + +cpdef add_reloc(intptr_t handle, code, size_t size): + """nvFatbinAddReloc adds relocatable PTX entries from a host object to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The host object image. + size (size_t): The size of the host object image code. + + .. seealso:: `nvFatbinAddReloc` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + with nogil: + __status__ = nvFatbinAddReloc(handle, _code_, size) + check_status(__status__) + + cpdef size_t size(intptr_t handle) except? 0: """nvFatbinSize returns the fatbinary's size. @@ -192,3 +265,5 @@ cpdef tuple version(): + + diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 627bd300e4..3e893852b5 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -1,7 +1,9 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -from cuda.bindings import nvfatbin +import subprocess + +from cuda.bindings import nvfatbin, nvrtc import pytest @@ -39,6 +41,12 @@ }} """ +CODE = """ +int __device__ inc(int x) { + return x + 1; +} +""" + @pytest.fixture(params=ARCHITECTURES) def arch(request): return request.param @@ -51,6 +59,63 @@ def ptx_version(request): def PTX(arch, ptx_version): return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch) +@pytest.fixture +def CUBIN(arch): + def CHECK_NVRTC(err): + if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: + raise RuntimeError(repr(err)) + + err, program_handle = nvrtc.nvrtcCreateProgram(CODE.encode(), b"", 0, [], []) + CHECK_NVRTC(err) + err = nvrtc.nvrtcCompileProgram(program_handle, 1, [f"-arch={arch}".encode()])[0] + CHECK_NVRTC(err) + err, size = nvrtc.nvrtcGetCUBINSize(program_handle) + CHECK_NVRTC(err) + cubin = b" " * size + (err,) = nvrtc.nvrtcGetCUBIN(program_handle, cubin) + CHECK_NVRTC(err) + (err,) = nvrtc.nvrtcDestroyProgram(program_handle) + CHECK_NVRTC(err) + return cubin + +# create a valid LTOIR input for testing +@pytest.fixture +def LTOIR(arch): + arch = arch.replace("sm", "compute") + def CHECK_NVRTC(err): + if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: + raise RuntimeError(repr(err)) + + empty_cplusplus_kernel = "__global__ void A() {}" + err, program_handle = nvrtc.nvrtcCreateProgram(empty_cplusplus_kernel.encode(), b"", 0, [], []) + CHECK_NVRTC(err) + err = nvrtc.nvrtcCompileProgram(program_handle, 1, [b"-dlto", f"-arch={arch}".encode()])[0] + CHECK_NVRTC(err) + err, size = nvrtc.nvrtcGetLTOIRSize(program_handle) + CHECK_NVRTC(err) + empty_kernel_ltoir = b" " * size + (err,) = nvrtc.nvrtcGetLTOIR(program_handle, empty_kernel_ltoir) + CHECK_NVRTC(err) + (err,) = nvrtc.nvrtcDestroyProgram(program_handle) + CHECK_NVRTC(err) + return empty_kernel_ltoir + +@pytest.fixture +def OBJECT(arch, tmpdir): + if arch == "sm_100": + pytest.skip("sm_100 is not supported on local system.") + + empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" + with open(tmpdir / "object.cu", "w") as f: + f.write(empty_cplusplus_kernel) + + subprocess.check_output(["nvcc", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")]) + with open(tmpdir / "object.o", "rb") as f: + object = f.read() + + return object + + def test_nvfatbin_get_version(): major, minor = nvfatbin.version() assert major is not None @@ -87,3 +152,64 @@ def test_nvfatbin_add_ptx(PTX, arch): nvfatbin.get(handle, buffer) nvfatbin.destroy(handle) + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvfatbin_add_cubin_ELF_SIZE_MISMATCH(CUBIN, arch): + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_cubin(CUBIN, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), arch_numeric, "inc") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvfatbin_add_cubin_ELF_ARCH_MISMATCH(CUBIN, arch): + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") + + nvfatbin.destroy(handle) + + +def test_nvdfatbin_add_ltoir(LTOIR, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), arch_numeric, "inc", "") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvdfatbin_add_ltoir_ELF_ARCH_MISMATCH(LTOIR, arch): + pytest.skip() + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), "75", "inc", "") + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_reloc(OBJECT): + handle = nvfatbin.create([], 0) + nvfatbin.add_reloc(handle, OBJECT, len(OBJECT)) + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) \ No newline at end of file From a979dd08988ab86efff816e139903c50e8081a11 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 13 Jan 2026 10:30:23 -0800 Subject: [PATCH 03/10] remove local skips --- cuda_bindings/tests/test_nvfatbin.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 3e893852b5..ae29b3c5ae 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -102,9 +102,6 @@ def CHECK_NVRTC(err): @pytest.fixture def OBJECT(arch, tmpdir): - if arch == "sm_100": - pytest.skip("sm_100 is not supported on local system.") - empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" with open(tmpdir / "object.cu", "w") as f: f.write(empty_cplusplus_kernel) @@ -197,7 +194,6 @@ def test_nvdfatbin_add_ltoir(LTOIR, arch): @pytest.mark.parametrize("arch", ["sm_80"], indirect=True) def test_nvdfatbin_add_ltoir_ELF_ARCH_MISMATCH(LTOIR, arch): - pytest.skip() handle = nvfatbin.create([], 0) with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), "75", "inc", "") From fc4203ec0060257bb9d136866f31db2f3fdeeb35 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 13 Jan 2026 10:55:37 -0800 Subject: [PATCH 04/10] regenerate for CUDA 13.1 and with tile IR API --- .../cuda/bindings/_internal/nvfatbin.pxd | 6 +- .../bindings/_internal/nvfatbin_linux.pyx | 66 ++++++++++++------- .../bindings/_internal/nvfatbin_windows.pyx | 54 ++++++++++----- cuda_bindings/cuda/bindings/cynvfatbin.pxd | 6 +- cuda_bindings/cuda/bindings/cynvfatbin.pyx | 15 +++-- cuda_bindings/cuda/bindings/nvfatbin.pxd | 6 +- cuda_bindings/cuda/bindings/nvfatbin.pyx | 61 ++++++++++++----- 7 files changed, 146 insertions(+), 68 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd index d421e8c21e..fed9968e39 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from ..cynvfatbin cimport * @@ -16,10 +16,12 @@ cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NV cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil -cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx index 097043f69a..b78f15e73b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from libc.stdint cimport intptr_t, uintptr_t @@ -64,10 +64,11 @@ cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL cdef void* __nvFatbinAddCubin = NULL cdef void* __nvFatbinAddLTOIR = NULL -cdef void* __nvFatbinAddReloc = NULL cdef void* __nvFatbinSize = NULL cdef void* __nvFatbinGet = NULL cdef void* __nvFatbinVersion = NULL +cdef void* __nvFatbinAddReloc = NULL +cdef void* __nvFatbinAddTileIR = NULL cdef void* load_library() except* with gil: @@ -121,13 +122,6 @@ cdef int _init_nvfatbin() except -1 nogil: handle = load_library() __nvFatbinAddLTOIR = dlsym(handle, 'nvFatbinAddLTOIR') - global __nvFatbinAddReloc - __nvFatbinAddReloc = dlsym(RTLD_DEFAULT, 'nvFatbinAddReloc') - if __nvFatbinAddReloc == NULL: - if handle == NULL: - handle = load_library() - __nvFatbinAddReloc = dlsym(handle, 'nvFatbinAddReloc') - global __nvFatbinSize __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize') if __nvFatbinSize == NULL: @@ -149,6 +143,20 @@ cdef int _init_nvfatbin() except -1 nogil: handle = load_library() __nvFatbinVersion = dlsym(handle, 'nvFatbinVersion') + global __nvFatbinAddReloc + __nvFatbinAddReloc = dlsym(RTLD_DEFAULT, 'nvFatbinAddReloc') + if __nvFatbinAddReloc == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddReloc = dlsym(handle, 'nvFatbinAddReloc') + + global __nvFatbinAddTileIR + __nvFatbinAddTileIR = dlsym(RTLD_DEFAULT, 'nvFatbinAddTileIR') + if __nvFatbinAddTileIR == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddTileIR = dlsym(handle, 'nvFatbinAddTileIR') + __py_nvfatbin_init = True return 0 @@ -185,9 +193,6 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinAddLTOIR data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR - global __nvFatbinAddReloc - data["__nvFatbinAddReloc"] = __nvFatbinAddReloc - global __nvFatbinSize data["__nvFatbinSize"] = __nvFatbinSize @@ -197,6 +202,12 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinVersion data["__nvFatbinVersion"] = __nvFatbinVersion + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + + global __nvFatbinAddTileIR + data["__nvFatbinAddTileIR"] = __nvFatbinAddTileIR + func_ptrs = data return data @@ -262,16 +273,6 @@ cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, s handle, code, size, arch, identifier, optionsCmdLine) -cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: - global __nvFatbinAddReloc - _check_or_init_nvfatbin() - if __nvFatbinAddReloc == NULL: - with gil: - raise FunctionNotFoundError("function nvFatbinAddReloc is not found") - return (__nvFatbinAddReloc)( - handle, code, size) - - cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinSize _check_or_init_nvfatbin() @@ -302,6 +303,27 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e major, minor) +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + +cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddTileIR + _check_or_init_nvfatbin() + if __nvFatbinAddTileIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddTileIR is not found") + return (__nvFatbinAddTileIR)( + handle, code, size, identifier, optionsCmdLine) + + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx index a499637f0d..2a4ff825ab 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from libc.stdint cimport intptr_t @@ -82,10 +82,11 @@ cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL cdef void* __nvFatbinAddCubin = NULL cdef void* __nvFatbinAddLTOIR = NULL -cdef void* __nvFatbinAddReloc = NULL cdef void* __nvFatbinSize = NULL cdef void* __nvFatbinGet = NULL cdef void* __nvFatbinVersion = NULL +cdef void* __nvFatbinAddReloc = NULL +cdef void* __nvFatbinAddTileIR = NULL cdef int _init_nvfatbin() except -1 nogil: @@ -115,9 +116,6 @@ cdef int _init_nvfatbin() except -1 nogil: global __nvFatbinAddLTOIR __nvFatbinAddLTOIR = GetProcAddress(handle, 'nvFatbinAddLTOIR') - global __nvFatbinAddReloc - __nvFatbinAddReloc = GetProcAddress(handle, 'nvFatbinAddReloc') - global __nvFatbinSize __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize') @@ -127,6 +125,12 @@ cdef int _init_nvfatbin() except -1 nogil: global __nvFatbinVersion __nvFatbinVersion = GetProcAddress(handle, 'nvFatbinVersion') + global __nvFatbinAddReloc + __nvFatbinAddReloc = GetProcAddress(handle, 'nvFatbinAddReloc') + + global __nvFatbinAddTileIR + __nvFatbinAddTileIR = GetProcAddress(handle, 'nvFatbinAddTileIR') + __py_nvfatbin_init = True return 0 @@ -164,9 +168,6 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinAddLTOIR data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR - global __nvFatbinAddReloc - data["__nvFatbinAddReloc"] = __nvFatbinAddReloc - global __nvFatbinSize data["__nvFatbinSize"] = __nvFatbinSize @@ -176,6 +177,12 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinVersion data["__nvFatbinVersion"] = __nvFatbinVersion + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + + global __nvFatbinAddTileIR + data["__nvFatbinAddTileIR"] = __nvFatbinAddTileIR + func_ptrs = data return data @@ -241,16 +248,6 @@ cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, s handle, code, size, arch, identifier, optionsCmdLine) -cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: - global __nvFatbinAddReloc - _check_or_init_nvfatbin() - if __nvFatbinAddReloc == NULL: - with gil: - raise FunctionNotFoundError("function nvFatbinAddReloc is not found") - return (__nvFatbinAddReloc)( - handle, code, size) - - cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinSize _check_or_init_nvfatbin() @@ -281,6 +278,27 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e major, minor) +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + +cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddTileIR + _check_or_init_nvfatbin() + if __nvFatbinAddTileIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddTileIR is not found") + return (__nvFatbinAddTileIR)( + handle, code, size, identifier, optionsCmdLine) + + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd index 55d8c83c1a..03262af56b 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from libc.stdint cimport intptr_t, uint32_t @@ -47,10 +47,12 @@ cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVF cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil -cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx index 142f374c1b..98a2c63412 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from ._internal cimport nvfatbin as _nvfatbin @@ -31,10 +31,6 @@ cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, si return _nvfatbin._nvFatbinAddLTOIR(handle, code, size, arch, identifier, optionsCmdLine) -cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: - return _nvfatbin._nvFatbinAddReloc(handle, code, size) - - cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: return _nvfatbin._nvFatbinSize(handle, size) @@ -47,6 +43,15 @@ cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) ex return _nvfatbin._nvFatbinVersion(major, minor) +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddReloc(handle, code, size) + + +cdef nvFatbinResult nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddTileIR(handle, code, size, identifier, optionsCmdLine) + + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd index 54c793962b..ae987e3a7d 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. from libc.stdint cimport intptr_t, uint32_t @@ -31,10 +31,12 @@ cpdef intptr_t create(options, size_t options_count) except -1 cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier) cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) -cpdef add_reloc(intptr_t handle, code, size_t size) cpdef size_t size(intptr_t handle) except? 0 cpdef get(intptr_t handle, buffer) cpdef tuple version() +cpdef add_reloc(intptr_t handle, code, size_t size) +cpdef add_tile_ir(intptr_t handle, code, size_t size, identifier, options_cmd_line) + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index 92db285f8a..9470745dee 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated with version 13.0.0. Do not modify it directly. +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. cimport cython # NOQA @@ -196,22 +196,6 @@ cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cm check_status(__status__) -cpdef add_reloc(intptr_t handle, code, size_t size): - """nvFatbinAddReloc adds relocatable PTX entries from a host object to the fatbinary. - - Args: - handle (intptr_t): nvFatbin handle. - code (bytes): The host object image. - size (size_t): The size of the host object image code. - - .. seealso:: `nvFatbinAddReloc` - """ - cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) - with nogil: - __status__ = nvFatbinAddReloc(handle, _code_, size) - check_status(__status__) - - cpdef size_t size(intptr_t handle) except? 0: """nvFatbinSize returns the fatbinary's size. @@ -264,6 +248,49 @@ cpdef tuple version(): return (major, minor) +cpdef add_reloc(intptr_t handle, code, size_t size): + """nvFatbinAddReloc adds relocatable PTX entries from a host object to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The host object image. + size (size_t): The size of the host object image code. + + .. seealso:: `nvFatbinAddReloc` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + with nogil: + __status__ = nvFatbinAddReloc(handle, _code_, size) + check_status(__status__) + + +cpdef add_tile_ir(intptr_t handle, code, size_t size, identifier, options_cmd_line): + """nvFatbinAddTileIR adds Tile IR to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The Tile IR. + size (size_t): The size of the Tile IR. + identifier (str): Name of the Tile IR, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddTileIR` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddTileIR(handle, _code_, size, _identifier_, _options_cmd_line_) + check_status(__status__) + + + From e7ace35c50105a8e2029ebd41de90df6a0947f10 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 13 Jan 2026 13:21:34 -0800 Subject: [PATCH 05/10] nvfatbinError -> nvFatbinError --- cuda_bindings/cuda/bindings/nvfatbin.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index 9470745dee..527b7846b0 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -43,13 +43,13 @@ class Result(_IntEnum): # Error handling ############################################################################### -class nvfatbinError(Exception): +class nvFatbinError(Exception): def __init__(self, status): self.status = status s = Result(status) cdef str err = f"{s.name} ({s.value})" - super(nvfatbinError, self).__init__(err) + super(nvFatbinError, self).__init__(err) def __reduce__(self): return (type(self), (self.status,)) @@ -59,7 +59,7 @@ class nvfatbinError(Exception): cdef int check_status(int status) except 1 nogil: if status != 0: with gil: - raise nvfatbinError(status) + raise nvFatbinError(status) return status From d962f612da2d55d26f5d434d1baf14b96862ddb2 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 14 Jan 2026 11:55:15 -0800 Subject: [PATCH 06/10] regenerate for get_error_string --- .../cuda/bindings/_internal/nvfatbin.pxd | 7 +-- .../bindings/_internal/nvfatbin_linux.pyx | 27 +++++++--- .../bindings/_internal/nvfatbin_windows.pyx | 23 +++++--- cuda_bindings/cuda/bindings/cynvfatbin.pxd | 7 +-- cuda_bindings/cuda/bindings/cynvfatbin.pyx | 10 ++-- cuda_bindings/cuda/bindings/nvfatbin.pxd | 6 --- cuda_bindings/cuda/bindings/nvfatbin.pyx | 25 ++++++--- cuda_bindings/tests/test_nvfatbin.py | 52 +++++++++++++------ 8 files changed, 98 insertions(+), 59 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd index fed9968e39..843759b32c 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -11,6 +11,7 @@ from ..cynvfatbin cimport * # Wrapper functions ############################################################################### +cdef const char* _nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil @@ -21,9 +22,3 @@ cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NV cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil - - - - - - diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx index b78f15e73b..b50efe02b7 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -59,6 +59,7 @@ cdef int get_cuda_version(): cdef object __symbol_lock = threading.Lock() cdef bint __py_nvfatbin_init = False +cdef void* __nvFatbinGetErrorString = NULL cdef void* __nvFatbinCreate = NULL cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL @@ -87,6 +88,13 @@ cdef int _init_nvfatbin() except -1 nogil: return 0 # Load function + global __nvFatbinGetErrorString + __nvFatbinGetErrorString = dlsym(RTLD_DEFAULT, 'nvFatbinGetErrorString') + if __nvFatbinGetErrorString == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinGetErrorString = dlsym(handle, 'nvFatbinGetErrorString') + global __nvFatbinCreate __nvFatbinCreate = dlsym(RTLD_DEFAULT, 'nvFatbinCreate') if __nvFatbinCreate == NULL: @@ -178,6 +186,9 @@ cpdef dict _inspect_function_pointers(): _check_or_init_nvfatbin() cdef dict data = {} + global __nvFatbinGetErrorString + data["__nvFatbinGetErrorString"] = __nvFatbinGetErrorString + global __nvFatbinCreate data["__nvFatbinCreate"] = __nvFatbinCreate @@ -223,6 +234,16 @@ cpdef _inspect_function_pointer(str name): # Wrapper functions ############################################################################### +cdef const char* _nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil: + global __nvFatbinGetErrorString + _check_or_init_nvfatbin() + if __nvFatbinGetErrorString == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGetErrorString is not found") + return (__nvFatbinGetErrorString)( + result) + + cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinCreate _check_or_init_nvfatbin() @@ -321,9 +342,3 @@ cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, raise FunctionNotFoundError("function nvFatbinAddTileIR is not found") return (__nvFatbinAddTileIR)( handle, code, size, identifier, optionsCmdLine) - - - - - - diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx index 2a4ff825ab..e40777a7e4 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -77,6 +77,7 @@ cdef int get_cuda_version(): cdef object __symbol_lock = threading.Lock() cdef bint __py_nvfatbin_init = False +cdef void* __nvFatbinGetErrorString = NULL cdef void* __nvFatbinCreate = NULL cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL @@ -101,6 +102,9 @@ cdef int _init_nvfatbin() except -1 nogil: handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint # Load function + global __nvFatbinGetErrorString + __nvFatbinGetErrorString = GetProcAddress(handle, 'nvFatbinGetErrorString') + global __nvFatbinCreate __nvFatbinCreate = GetProcAddress(handle, 'nvFatbinCreate') @@ -153,6 +157,9 @@ cpdef dict _inspect_function_pointers(): _check_or_init_nvfatbin() cdef dict data = {} + global __nvFatbinGetErrorString + data["__nvFatbinGetErrorString"] = __nvFatbinGetErrorString + global __nvFatbinCreate data["__nvFatbinCreate"] = __nvFatbinCreate @@ -198,6 +205,16 @@ cpdef _inspect_function_pointer(str name): # Wrapper functions ############################################################################### +cdef const char* _nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil: + global __nvFatbinGetErrorString + _check_or_init_nvfatbin() + if __nvFatbinGetErrorString == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGetErrorString is not found") + return (__nvFatbinGetErrorString)( + result) + + cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinCreate _check_or_init_nvfatbin() @@ -296,9 +313,3 @@ cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, raise FunctionNotFoundError("function nvFatbinAddTileIR is not found") return (__nvFatbinAddTileIR)( handle, code, size, identifier, optionsCmdLine) - - - - - - diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd index 03262af56b..a01a3ea1d9 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -42,6 +42,7 @@ ctypedef void* nvFatbinHandle 'nvFatbinHandle' # Functions ############################################################################### +cdef const char* nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil @@ -52,9 +53,3 @@ cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVF cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil - - - - - - diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx index 98a2c63412..7cd77c2d75 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -11,6 +11,10 @@ from ._internal cimport nvfatbin as _nvfatbin # Wrapper functions ############################################################################### +cdef const char* nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil: + return _nvfatbin._nvFatbinGetErrorString(result) + + cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: return _nvfatbin._nvFatbinCreate(handle_indirect, options, optionsCount) @@ -49,9 +53,3 @@ cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, si cdef nvFatbinResult nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: return _nvfatbin._nvFatbinAddTileIR(handle, code, size, identifier, optionsCmdLine) - - - - - - diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd index ae987e3a7d..545d30dc0e 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -36,9 +36,3 @@ cpdef get(intptr_t handle, buffer) cpdef tuple version() cpdef add_reloc(intptr_t handle, code, size_t size) cpdef add_tile_ir(intptr_t handle, code, size_t size, identifier, options_cmd_line) - - - - - - diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index 527b7846b0..701d2dd502 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -81,6 +81,25 @@ cpdef destroy(intptr_t handle): check_status(status) +cpdef str get_error_string(int result): + """nvFatbinGetErrorString returns an error description string for each error code. + + Args: + result (Result): error code. + + .. seealso:: `nvFatbinGetErrorString` + """ + cdef char* _output_ + cdef bytes _output_bytes_ + _output_ = nvFatbinGetErrorString(<_Result>result) + + if _output_ == NULL: + return "" + + _output_bytes_ = _output_ + return _output_bytes_.decode() + + cpdef intptr_t create(options, size_t options_count) except -1: """nvFatbinCreate creates a new handle. @@ -288,9 +307,3 @@ cpdef add_tile_ir(intptr_t handle, code, size_t size, identifier, options_cmd_li with nogil: __status__ = nvFatbinAddTileIR(handle, _code_, size, _identifier_, _options_cmd_line_) check_status(__status__) - - - - - - diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index ae29b3c5ae..440305d51b 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -1,11 +1,9 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -import subprocess - -from cuda.bindings import nvfatbin, nvrtc import pytest +from cuda.bindings import nvfatbin, nvrtc ARCHITECTURES = ["sm_75", "sm_80", "sm_90", "sm_100"] PTX_VERSIONS = ["6.4", "7.0", "8.5", "8.8"] @@ -47,18 +45,22 @@ } """ + @pytest.fixture(params=ARCHITECTURES) def arch(request): return request.param + @pytest.fixture(params=PTX_VERSIONS) def ptx_version(request): return request.param + @pytest.fixture def PTX(arch, ptx_version): return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch) + @pytest.fixture def CUBIN(arch): def CHECK_NVRTC(err): @@ -78,10 +80,12 @@ def CHECK_NVRTC(err): CHECK_NVRTC(err) return cubin + # create a valid LTOIR input for testing @pytest.fixture def LTOIR(arch): arch = arch.replace("sm", "compute") + def CHECK_NVRTC(err): if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: raise RuntimeError(repr(err)) @@ -100,17 +104,28 @@ def CHECK_NVRTC(err): CHECK_NVRTC(err) return empty_kernel_ltoir -@pytest.fixture -def OBJECT(arch, tmpdir): - empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" - with open(tmpdir / "object.cu", "w") as f: - f.write(empty_cplusplus_kernel) - subprocess.check_output(["nvcc", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")]) - with open(tmpdir / "object.o", "rb") as f: - object = f.read() +# @pytest.fixture +# def OBJECT(arch, tmpdir): +# empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" +# with open(tmpdir / "object.cu", "w") as f: +# f.write(empty_cplusplus_kernel) + +# subprocess.check_output(["nvcc", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")]) +# with open(tmpdir / "object.o", "rb") as f: +# object = f.read() + +# return object + - return object +@pytest.mark.parametrize("error_enum", nvfatbin.Result) +def test_get_error_string(error_enum): + es = nvfatbin.get_error_string(error_enum) + + if error_enum is nvfatbin.Result.SUCCESS: + assert es == "" + else: + assert "error" in es def test_nvfatbin_get_version(): @@ -118,13 +133,15 @@ def test_nvfatbin_get_version(): assert major is not None assert minor is not None + def test_nvfatbin_empty_create_and_destroy(): handle = nvfatbin.create([], 0) assert handle is not None nvfatbin.destroy(handle) + def test_nvfatbin_invalid_input_create(): - with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_UNRECOGNIZED_OPTION"): + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_UNRECOGNIZED_OPTION"): nvfatbin.create(["--unsupported_option"], 1) @@ -153,7 +170,7 @@ def test_nvfatbin_add_ptx(PTX, arch): @pytest.mark.parametrize("arch", ["sm_80"], indirect=True) def test_nvfatbin_add_cubin_ELF_SIZE_MISMATCH(CUBIN, arch): handle = nvfatbin.create([], 0) - with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_ELF_ARCH_MISMATCH"): nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") nvfatbin.destroy(handle) @@ -174,7 +191,7 @@ def test_nvfatbin_add_cubin(CUBIN, arch): @pytest.mark.parametrize("arch", ["sm_80"], indirect=True) def test_nvfatbin_add_cubin_ELF_ARCH_MISMATCH(CUBIN, arch): handle = nvfatbin.create([], 0) - with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_ELF_ARCH_MISMATCH"): nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") nvfatbin.destroy(handle) @@ -194,8 +211,9 @@ def test_nvdfatbin_add_ltoir(LTOIR, arch): @pytest.mark.parametrize("arch", ["sm_80"], indirect=True) def test_nvdfatbin_add_ltoir_ELF_ARCH_MISMATCH(LTOIR, arch): + pytest.skip() handle = nvfatbin.create([], 0) - with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_ELF_ARCH_MISMATCH"): nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), "75", "inc", "") nvfatbin.destroy(handle) @@ -208,4 +226,4 @@ def test_nvfatbin_add_reloc(OBJECT): buffer = bytearray(nvfatbin.size(handle)) nvfatbin.get(handle, buffer) - nvfatbin.destroy(handle) \ No newline at end of file + nvfatbin.destroy(handle) From bd868bdf7f523dbf6e52ff234966831844aa6032 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 14 Jan 2026 12:20:20 -0800 Subject: [PATCH 07/10] enable object creation and testing --- cuda_bindings/tests/test_nvfatbin.py | 33 +++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 440305d51b..1779c8ab43 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -2,6 +2,9 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import shutil +import subprocess + import pytest from cuda.bindings import nvfatbin, nvrtc @@ -105,17 +108,27 @@ def CHECK_NVRTC(err): return empty_kernel_ltoir -# @pytest.fixture -# def OBJECT(arch, tmpdir): -# empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" -# with open(tmpdir / "object.cu", "w") as f: -# f.write(empty_cplusplus_kernel) +@pytest.fixture +def OBJECT(arch, tmpdir): + empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" + with open(tmpdir / "object.cu", "w") as f: + f.write(empty_cplusplus_kernel) + + nvcc = shutil.which("nvcc") + if nvcc is None: + pytest.skip("nvcc not found on PATH") -# subprocess.check_output(["nvcc", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")]) -# with open(tmpdir / "object.o", "rb") as f: -# object = f.read() + # This is a test fixture that intentionally invokes a trusted tool (`nvcc`) to + # compile a temporary CUDA translation unit. + subprocess.run( # noqa: S603 + [nvcc, "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")], + check=True, + capture_output=True, + ) + with open(tmpdir / "object.o", "rb") as f: + object = f.read() -# return object + return object @pytest.mark.parametrize("error_enum", nvfatbin.Result) @@ -125,7 +138,7 @@ def test_get_error_string(error_enum): if error_enum is nvfatbin.Result.SUCCESS: assert es == "" else: - assert "error" in es + assert es != "" def test_nvfatbin_get_version(): From d11cf026d583e0e4255b1a94a45e3108fc1c27f1 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 14 Jan 2026 16:38:02 -0800 Subject: [PATCH 08/10] remove the LTOIR mismatching arch failure test --- cuda_bindings/tests/test_nvfatbin.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 1779c8ab43..07118032f4 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -48,6 +48,14 @@ } """ +TILEIR = """ +cuda_tile.module @hello_world_module { + entry @hello_world_kernel() { + print "Hello World!\n" + } +} +""" + @pytest.fixture(params=ARCHITECTURES) def arch(request): @@ -222,19 +230,20 @@ def test_nvdfatbin_add_ltoir(LTOIR, arch): nvfatbin.destroy(handle) -@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) -def test_nvdfatbin_add_ltoir_ELF_ARCH_MISMATCH(LTOIR, arch): - pytest.skip() +def test_nvfatbin_add_reloc(OBJECT): handle = nvfatbin.create([], 0) - with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_ELF_ARCH_MISMATCH"): - nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), "75", "inc", "") + nvfatbin.add_reloc(handle, OBJECT, len(OBJECT)) + + buffer = bytearray(nvfatbin.size(handle)) + nvfatbin.get(handle, buffer) nvfatbin.destroy(handle) -def test_nvfatbin_add_reloc(OBJECT): +def test_nvfatbin_add_tile_ir(): + pytest.skip() handle = nvfatbin.create([], 0) - nvfatbin.add_reloc(handle, OBJECT, len(OBJECT)) + nvfatbin.add_tile_ir(handle, TILEIR.encode(), len(TILEIR), "hello_world_module", "") buffer = bytearray(nvfatbin.size(handle)) From d995809e4111f412e1e6d0068af846f8227be259 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 14 Jan 2026 20:49:47 -0800 Subject: [PATCH 09/10] encode a legal tileIR into test --- cuda_bindings/tests/test_nvfatbin.py | 57 +++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 07118032f4..826279d832 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import base64 import shutil import subprocess @@ -48,13 +49,44 @@ } """ -TILEIR = """ -cuda_tile.module @hello_world_module { - entry @hello_world_kernel() { - print "Hello World!\n" - } -} -""" +# This is the base64 encoded TileIR from sample VectorAddition program +TILEIR_b64 = ( + "f1RpbGVJUgANAQAAgssDCAECBgYBCwEECgC/A0QHEAUAEAUBBgQIEAAABgUMAQABBgUIEAAVBgUMAQAC" + "BgUMAQADBgUIBAAYBgQIEAAFBgUMAQAGBgUIEAAbBgUMAQAHBgUMAQAIBgUIBAAeBgQIEAAKBgUMAQAL" + "BgUIEAAhBgUMAQAMBgUMAQANBgUIBAAkMAUFBTAFBQVOBQAmEjoIWwgsAwgALi1OBQAqEzoJWwgwCwky" + "AwkAMzFbCi9bCzQlDQE1Cw43JQ8BFlsNOQsOOg8QAgA4OyUPARlbDT0LDj5ODgA4PyUOATYlDwEXWw1C" + "Cw5DDxACAEFEBBA8RQMOAEBBWxEUCxJIURJJRxATAlsUSwsVTD0VBxwASkZNESUNATULDlAlDwEcWw1S" + "Cw5TDxACAFFUJQ8BH1sNVgsOV04OAFFYJQ4BNiUPAR1bDVsLDlwPEAIAWl0EEFVeAw4AWVpbERoLEmFR" + "EmJgEBMCWxRkCxVlPRUHHABjX2YRAhUAAE5nJQ0BNQsOaiUPASJbDWwLDm0PEAIAa24lDwElWw1wCw5x" + "Tg4Aa3IlDgE2JQ8BI1sNdQsOdg8QAgB0dwQQb3gDDgBzdFsRIAsSe1ESfHplBwwAfWl5EVwAAIQvCMvL" + "A8vLy8vLy8sAAAAAAAAAAAUAAAAAAAAACgAAAAAAAAAEAQAAAAQABAAABAAAAACD/QcIy8vLy8sBy8vL" + "AAAAAGrLy8vLy8vLBAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAA" + "BAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAA" + "AAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAA" + "BAAAAAAAAAAFAAAAAAAAAAYAAAAAAAAABwAAAAAAAAAIAAAAAAAAAAcAAAAAAAAABwAAAAAAAAAJAAAA" + "AAAAAAoAAAAAAAAACQAAAAAAAAAJAAAAAAAAAAkAAAAAAAAACwAAAAAAAAAMAAAAAAAAAA0AAAAAAAAA" + "DQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAA" + "AAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAA" + "DQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAAAAAAAA0AAAAAAAAADQAAAAAAAAANAAAA" + "AAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAA" + "DgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAA" + "AAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAADgAAAAAAAAAOAAAAAAAAAA4AAAAAAAAA" + "DgAAAAAAAAAOAAAAAAAAAA8AAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAA" + "AAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAA" + "EAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAA" + "AAAAABAAAAAAAAAABAAAAAAAAAAQy8vLAAAAAAMAAAAFAAAADAAAABEAAAAXAAAAHQAAACMAAAApAAAA" + "LwAAADUAAAA7AAAAQQAAAEcAAABNAAAAUwAAAAIAAQEBBQF9AgICfQQDA34ABAMDkQEMBAMDkgEMBAMD" + "lQEIBAMDlQEZBAMDlgEIBAMDlgEZBAMDmgEIBAMDmwEIBAMDnwENBAMDoAENBAMDowEPBAMDpwEEhcQC" + "BMvLyxbLy8sAAAAAAQAAAAIAAAADAAAABQAAAAgAAAALAAAAHwAAACAAAAArAAAANgAAAEkAAABcAAAA" + "XQAAAHAAAACDAAAAhgAAAJkAAACsAAAAvwAAAMIAAADVAAAAAAMHDAINAwANAQAQEQQFBQUFBAUFBQUE" + "BQUFBQUFABENAQEBAAAAAAAAAA0BAQAEAAAAAAAADQECAQAAAAAAAAABAAAAAAAAAA0BAgEAAAAAAAAA" + "AAQAAAAAAAAEDQwCAQAAAAAAAAABAAAAAAAAAA0MAgEAAAAAAAAAAAQAAAAAAAANDAANAAIBAAAAAAAA" + "AAAEAAAAAAAADQMCAQAAAAAAAAABAAAAAAAAAA0DAgEAAAAAAAAAAAQAAAAAAAANAgANAgIBAAAAAAAA" + "AAEAAAAAAAAADQICAQAAAAAAAAAABAAAAAAAAIGxAQQFy8vLAAAAABEAAAA9AAAAVQAAAJMAAABWZWN0" + "b3JBZGRpdGlvbi5weS9sb2NhbGhvbWUvbG9jYWwtd2FuZ20vY3V0aWxlLXB5dGhvbi9zYW1wbGVzdmVj" + "X2FkZF9rZXJuZWxfMmRfZ2F0aGVyL2xvY2FsaG9tZS9sb2NhbC13YW5nbS9jdXRpbGUtcHl0aG9uL3Nh" + "bXBsZXMvVmVjdG9yQWRkaXRpb24ucHlzbV8xMjAA" +) @pytest.fixture(params=ARCHITECTURES) @@ -139,6 +171,12 @@ def OBJECT(arch, tmpdir): return object +@pytest.fixture +def TILEIR(tmpdir): + binary_data = base64.b64decode(TILEIR_b64) + return binary_data + + @pytest.mark.parametrize("error_enum", nvfatbin.Result) def test_get_error_string(error_enum): es = nvfatbin.get_error_string(error_enum) @@ -240,10 +278,9 @@ def test_nvfatbin_add_reloc(OBJECT): nvfatbin.destroy(handle) -def test_nvfatbin_add_tile_ir(): - pytest.skip() +def test_nvfatbin_add_tile_ir(TILEIR): handle = nvfatbin.create([], 0) - nvfatbin.add_tile_ir(handle, TILEIR.encode(), len(TILEIR), "hello_world_module", "") + nvfatbin.add_tile_ir(handle, TILEIR, len(TILEIR), "VectorAdd", "") buffer = bytearray(nvfatbin.size(handle)) From 8e75d8acc73b66bb2c3fa0c80f9b35128d2a9f00 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 14 Jan 2026 21:29:01 -0800 Subject: [PATCH 10/10] update the license year --- cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd | 2 +- cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx | 2 +- cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx | 2 +- cuda_bindings/cuda/bindings/cynvfatbin.pxd | 2 +- cuda_bindings/cuda/bindings/cynvfatbin.pyx | 2 +- cuda_bindings/cuda/bindings/nvfatbin.pxd | 2 +- cuda_bindings/cuda/bindings/nvfatbin.pyx | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd index 843759b32c..9ed97e016b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx index b50efe02b7..6305e49f8b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx index e40777a7e4..1592b752a9 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd index a01a3ea1d9..b75f866ae4 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx index 7cd77c2d75..028440a444 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd index 545d30dc0e..d21cca3508 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index 701d2dd502..92b571a26f 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE #