From bec95056de50cec098541dbcae83ed01318b959d Mon Sep 17 00:00:00 2001 From: Jonathan Roose Date: Fri, 16 May 2025 14:35:47 -0700 Subject: [PATCH 1/8] Changing py_* functions to use result.keepAliveHost and changing keepAliveHost to properly handle the situation where the DualView.host_view() is an unmanaged alias of DualView.device_view(), as is the case when executing in Kokkos in SERIAL mode. --- mlir/lib/Target/KokkosCpp/LAPISSupport.hpp | 24 ++++++++++++++++++- .../KokkosCpp/LAPISSupportFormatted.hpp | 24 ++++++++++++++++++- .../Target/KokkosCpp/TranslateToKokkosCpp.cpp | 7 +++--- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp b/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp index b4242e6a..ba945d4b 100644 --- a/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp +++ b/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp @@ -135,6 +135,14 @@ namespace LAPIS struct DualViewBase { + enum AliasStatus + { + ALIAS_STATUS_UNKNOWN = 0, + HOST_IS_ALIAS = 1, + DEVICE_IS_ALIAS = 2, + NEITHER_IS_ALIAS = 3 + }; + virtual ~DualViewBase() {} virtual void syncHost() = 0; virtual void syncDevice() = 0; @@ -142,6 +150,7 @@ namespace LAPIS bool modified_host = false; bool modified_device = false; std::shared_ptr parent; + AliasStatus alias_status; void setParent(const std::shared_ptr& parent_) { @@ -202,9 +211,11 @@ namespace LAPIS modified_device = true; if constexpr(deviceAccessesHost) { host_view = HostView(v.data(), v.layout()); + alias_status = AliasStatus::HOST_IS_ALIAS; } else { host_view = HostView(Kokkos::view_alloc(Kokkos::WithoutInitializing, v.label() + "_host"), v.layout()); + alias_status = AliasStatus::NEITHER_IS_ALIAS; } device_view = v; } @@ -212,9 +223,11 @@ namespace LAPIS modified_host = true; if constexpr(deviceAccessesHost) { device_view = DeviceView(v.data(), v.layout()); + alias_status = AliasStatus::DEVICE_IS_ALIAS; } else { device_view = DeviceView(Kokkos::view_alloc(Kokkos::WithoutInitializing, v.label() + "_dev"), v.layout()); + alias_status = AliasStatus::NEITHER_IS_ALIAS; } host_view = v; } @@ -287,7 +300,16 @@ namespace LAPIS // It is assumed to be either managed, // or unmanaged but references memory (e.g. from numpy) // with a longer lifetime that any result from the current LAPIS function. - keepAlive(host_view); + // + // However, if it's unmanaged because of aliasing during initialization, + // then keep alive the device_view instead to avoid reference counting + // issues in Kokkos::View. + if(alias_status != AliasStatus::HOST_IS_ALIAS) + { + keepAlive(host_view); + }else{ + keepAlive(device_view); + } } void deallocate() { diff --git a/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp b/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp index 350ecc7a..033c4ec4 100644 --- a/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp +++ b/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp @@ -135,6 +135,14 @@ "\n" " struct DualViewBase\n" " {\n" +" enum AliasStatus\n" +" {\n" +" ALIAS_STATUS_UNKNOWN = 0,\n" +" HOST_IS_ALIAS = 1,\n" +" DEVICE_IS_ALIAS = 2,\n" +" NEITHER_IS_ALIAS = 3\n" +" };\n" +"\n" " virtual ~DualViewBase() {}\n" " virtual void syncHost() = 0;\n" " virtual void syncDevice() = 0;\n" @@ -142,6 +150,7 @@ " bool modified_host = false;\n" " bool modified_device = false;\n" " std::shared_ptr parent;\n" +" AliasStatus alias_status;\n" "\n" " void setParent(const std::shared_ptr& parent_)\n" " {\n" @@ -202,9 +211,11 @@ " modified_device = true;\n" " if constexpr(deviceAccessesHost) {\n" " host_view = HostView(v.data(), v.layout());\n" +" alias_status = AliasStatus::HOST_IS_ALIAS;\n" " }\n" " else {\n" " host_view = HostView(Kokkos::view_alloc(Kokkos::WithoutInitializing, v.label() + \"_host\"), v.layout());\n" +" alias_status = AliasStatus::NEITHER_IS_ALIAS;\n" " }\n" " device_view = v;\n" " }\n" @@ -212,9 +223,11 @@ " modified_host = true;\n" " if constexpr(deviceAccessesHost) {\n" " device_view = DeviceView(v.data(), v.layout());\n" +" alias_status = AliasStatus::DEVICE_IS_ALIAS;\n" " }\n" " else {\n" " device_view = DeviceView(Kokkos::view_alloc(Kokkos::WithoutInitializing, v.label() + \"_dev\"), v.layout());\n" +" alias_status = AliasStatus::NEITHER_IS_ALIAS;\n" " }\n" " host_view = v;\n" " }\n" @@ -287,7 +300,16 @@ " // It is assumed to be either managed,\n" " // or unmanaged but references memory (e.g. from numpy)\n" " // with a longer lifetime that any result from the current LAPIS function.\n" -" keepAlive(host_view);\n" +" //\n" +" // However, if it\'s unmanaged because of aliasing during initialization,\n" +" // then keep alive the device_view instead to avoid reference counting\n" +" // issues in Kokkos::View.\n" +" if(alias_status != AliasStatus::HOST_IS_ALIAS)\n" +" {\n" +" keepAlive(host_view);\n" +" }else{\n" +" keepAlive(device_view);\n" +" }\n" " }\n" "\n" " void deallocate() {\n" diff --git a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp index e849ba14..1984243b 100644 --- a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp +++ b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp @@ -2384,12 +2384,11 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func os << ".host_view());\n"; // Keep the host view alive until lapis_finalize() is called. // Otherwise it would be deallocated as soon as this function returns. - os << "LAPIS::keepAlive("; + if(numResults == size_t(1)) - os << "results"; + os << "results.keepAliveHost();\n"; else - os << "std::get<" << i << ">(results)"; - os << ".host_view());\n"; + os << "std::get<" << i << ">(results).keepAliveHost();\n"; } else { From a44f7991fbf6cf236240256ab7623b4f2cbf12e4 Mon Sep 17 00:00:00 2001 From: Jonathan Roose Date: Tue, 20 May 2025 13:13:50 -0700 Subject: [PATCH 2/8] Implementing LAPIS Memref class to handle keepAlives using reference counting of numpy arrays --- mlir/lib/Target/KokkosCpp/LAPISSupport.hpp | 111 +++++++++--------- .../KokkosCpp/LAPISSupportFormatted.hpp | 111 +++++++++--------- .../Target/KokkosCpp/TranslateToKokkosCpp.cpp | 77 ++++++++---- 3 files changed, 168 insertions(+), 131 deletions(-) diff --git a/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp b/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp index ba945d4b..3d81a939 100644 --- a/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp +++ b/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp @@ -15,6 +15,40 @@ struct StridedMemRefType { namespace LAPIS { + // KeepAlive structure keeps a reference to Kokkos::Views which + // are returned to Python. Since it's difficult to transfer ownership of a + // Kokkos::View's memory to numpy, we just have the Kokkos::View maintain ownership + // and return an unmanaged numpy array to Python. + // + // All these views will be deallocated during lapis_finalize to avoid leaking. + // The downside is that if a function is called many times, + // all its results are kept in memory at the same time. + struct KeepAlive + { + virtual ~KeepAlive() {} + }; + + template + struct KeepAliveT : public KeepAlive + { + // Make a shallow-copy of val + KeepAliveT(const T& val) : p(new T(val)) {} + std::unique_ptr p; + }; + + template + KeepAlive* keepAlive(const T& val) + { + return new KeepAliveT(val); + } + + template + struct MemRefType + { + StridedMemRefType smr; + KeepAlive* keepAliveHandle; + }; + using TeamPolicy = Kokkos::TeamPolicy<>; using TeamMember = typename TeamPolicy::member_type; @@ -33,6 +67,15 @@ namespace LAPIS return smr; } + template + MemRefType viewToLapisMemref(const V& v, KeepAlive* keepAliveHandle) + { + MemRefType lmr; + lmr.smr = viewToStridedMemref(v); + lmr.keepAliveHandle = keepAliveHandle; + return lmr; + } + template V stridedMemrefToView(const StridedMemRefType& smr) { @@ -90,35 +133,6 @@ namespace LAPIS return V(&smr.data[smr.offset], layout); } - // KeepAlive structure keeps a reference to Kokkos::Views which - // are returned to Python. Since it's difficult to transfer ownership of a - // Kokkos::View's memory to numpy, we just have the Kokkos::View maintain ownership - // and return an unmanaged numpy array to Python. - // - // All these views will be deallocated during lapis_finalize to avoid leaking. - // The downside is that if a function is called many times, - // all its results are kept in memory at the same time. - struct KeepAlive - { - virtual ~KeepAlive() {} - }; - - template - struct KeepAliveT : public KeepAlive - { - // Make a shallow-copy of val - KeepAliveT(const T& val) : p(new T(val)) {} - std::unique_ptr p; - }; - - static std::vector> alives; - - template - void keepAlive(const T& val) - { - alives.emplace_back(new KeepAliveT(val)); - } - // DualView design // - DualView is a shallow object with a shared_ptr to a DualViewImpl. // - DualViewImpl has the actual host and device views as members @@ -130,9 +144,6 @@ namespace LAPIS // - Assume that any DualView's parent is contiguous, and can be deep-copied between h and d // - All DualViews with the same parent share the parent's modify flags // - // DualViewBase can also "keepAliveHost" to keep its host view alive until lapis_finalize is called. - // This is used to safely return host views to python for numpy arrays to alias. - struct DualViewBase { enum AliasStatus @@ -146,7 +157,6 @@ namespace LAPIS virtual ~DualViewBase() {} virtual void syncHost() = 0; virtual void syncDevice() = 0; - virtual void keepAliveHost() = 0; bool modified_host = false; bool modified_device = false; std::shared_ptr parent; @@ -294,24 +304,6 @@ namespace LAPIS } } - void keepAliveHost() override - { - // keep the parent's host view alive. - // It is assumed to be either managed, - // or unmanaged but references memory (e.g. from numpy) - // with a longer lifetime that any result from the current LAPIS function. - // - // However, if it's unmanaged because of aliasing during initialization, - // then keep alive the device_view instead to avoid reference counting - // issues in Kokkos::View. - if(alias_status != AliasStatus::HOST_IS_ALIAS) - { - keepAlive(host_view); - }else{ - keepAlive(device_view); - } - } - void deallocate() { device_view = DeviceView(); host_view = HostView(); @@ -325,6 +317,14 @@ namespace LAPIS return device_view.stride(dim); } + KeepAlive* keepAliveHost() { + if(alias_status != AliasStatus::HOST_IS_ALIAS) { + return keepAlive(host_view); + }else{ + return keepAlive(device_view); + } + } + DeviceView device_view; HostView host_view; }; @@ -437,13 +437,14 @@ namespace LAPIS return impl->stride(dim); } - void keepAliveHost() const { - impl->parent->keepAliveHost(); - } - void syncHostOnDestroy() { syncHostWhenDestroyed = true; } + + KeepAlive* keepAliveHost() + { + return impl->keepAliveHost(); + } }; inline int threadParallelVectorLength(int par) { diff --git a/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp b/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp index 033c4ec4..4e147c2d 100644 --- a/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp +++ b/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp @@ -15,6 +15,40 @@ "\n" "namespace LAPIS\n" "{\n" +" // KeepAlive structure keeps a reference to Kokkos::Views which\n" +" // are returned to Python. Since it\'s difficult to transfer ownership of a\n" +" // Kokkos::View\'s memory to numpy, we just have the Kokkos::View maintain ownership\n" +" // and return an unmanaged numpy array to Python.\n" +" //\n" +" // All these views will be deallocated during lapis_finalize to avoid leaking.\n" +" // The downside is that if a function is called many times,\n" +" // all its results are kept in memory at the same time.\n" +" struct KeepAlive\n" +" {\n" +" virtual ~KeepAlive() {}\n" +" };\n" +"\n" +" template\n" +" struct KeepAliveT : public KeepAlive\n" +" {\n" +" // Make a shallow-copy of val\n" +" KeepAliveT(const T& val) : p(new T(val)) {}\n" +" std::unique_ptr p;\n" +" };\n" +"\n" +" template\n" +" KeepAlive* keepAlive(const T& val)\n" +" {\n" +" return new KeepAliveT(val);\n" +" }\n" +"\n" +" template \n" +" struct MemRefType\n" +" {\n" +" StridedMemRefType smr;\n" +" KeepAlive* keepAliveHandle;\n" +" };\n" +"\n" " using TeamPolicy = Kokkos::TeamPolicy<>;\n" " using TeamMember = typename TeamPolicy::member_type;\n" "\n" @@ -34,6 +68,15 @@ " }\n" "\n" " template\n" +" MemRefType viewToLapisMemref(const V& v, KeepAlive* keepAliveHandle)\n" +" {\n" +" MemRefType lmr;\n" +" lmr.smr = viewToStridedMemref(v);\n" +" lmr.keepAliveHandle = keepAliveHandle;\n" +" return lmr;\n" +" }\n" +"\n" +" template\n" " V stridedMemrefToView(const StridedMemRefType& smr)\n" " {\n" " using Layout = typename V::array_layout;\n" @@ -90,35 +133,6 @@ " return V(&smr.data[smr.offset], layout);\n" " }\n" "\n" -" // KeepAlive structure keeps a reference to Kokkos::Views which\n" -" // are returned to Python. Since it\'s difficult to transfer ownership of a\n" -" // Kokkos::View\'s memory to numpy, we just have the Kokkos::View maintain ownership\n" -" // and return an unmanaged numpy array to Python.\n" -" //\n" -" // All these views will be deallocated during lapis_finalize to avoid leaking.\n" -" // The downside is that if a function is called many times,\n" -" // all its results are kept in memory at the same time.\n" -" struct KeepAlive\n" -" {\n" -" virtual ~KeepAlive() {}\n" -" };\n" -"\n" -" template\n" -" struct KeepAliveT : public KeepAlive\n" -" {\n" -" // Make a shallow-copy of val\n" -" KeepAliveT(const T& val) : p(new T(val)) {}\n" -" std::unique_ptr p;\n" -" };\n" -"\n" -" static std::vector> alives;\n" -"\n" -" template\n" -" void keepAlive(const T& val)\n" -" {\n" -" alives.emplace_back(new KeepAliveT(val));\n" -" }\n" -"\n" " // DualView design\n" " // - DualView is a shallow object with a shared_ptr to a DualViewImpl.\n" " // - DualViewImpl has the actual host and device views as members\n" @@ -130,9 +144,6 @@ " // - Assume that any DualView\'s parent is contiguous, and can be deep-copied between h and d\n" " // - All DualViews with the same parent share the parent\'s modify flags\n" " //\n" -" // DualViewBase can also \"keepAliveHost\" to keep its host view alive until lapis_finalize is called.\n" -" // This is used to safely return host views to python for numpy arrays to alias.\n" -"\n" " struct DualViewBase\n" " {\n" " enum AliasStatus\n" @@ -146,7 +157,6 @@ " virtual ~DualViewBase() {}\n" " virtual void syncHost() = 0;\n" " virtual void syncDevice() = 0;\n" -" virtual void keepAliveHost() = 0;\n" " bool modified_host = false;\n" " bool modified_device = false;\n" " std::shared_ptr parent;\n" @@ -294,24 +304,6 @@ " }\n" " }\n" "\n" -" void keepAliveHost() override\n" -" {\n" -" // keep the parent\'s host view alive.\n" -" // It is assumed to be either managed,\n" -" // or unmanaged but references memory (e.g. from numpy)\n" -" // with a longer lifetime that any result from the current LAPIS function.\n" -" //\n" -" // However, if it\'s unmanaged because of aliasing during initialization,\n" -" // then keep alive the device_view instead to avoid reference counting\n" -" // issues in Kokkos::View.\n" -" if(alias_status != AliasStatus::HOST_IS_ALIAS)\n" -" {\n" -" keepAlive(host_view);\n" -" }else{\n" -" keepAlive(device_view);\n" -" }\n" -" }\n" -"\n" " void deallocate() {\n" " device_view = DeviceView();\n" " host_view = HostView();\n" @@ -325,6 +317,14 @@ " return device_view.stride(dim);\n" " }\n" "\n" +" KeepAlive* keepAliveHost() {\n" +" if(alias_status != AliasStatus::HOST_IS_ALIAS) {\n" +" return keepAlive(host_view);\n" +" }else{\n" +" return keepAlive(device_view);\n" +" }\n" +" }\n" +"\n" " DeviceView device_view;\n" " HostView host_view;\n" " };\n" @@ -437,13 +437,14 @@ " return impl->stride(dim);\n" " }\n" "\n" -" void keepAliveHost() const {\n" -" impl->parent->keepAliveHost();\n" -" }\n" -"\n" " void syncHostOnDestroy() {\n" " syncHostWhenDestroyed = true;\n" " }\n" +"\n" +" KeepAlive* keepAliveHost()\n" +" {\n" +" return impl->keepAliveHost();\n" +" }\n" " };\n" "\n" " inline int threadParallelVectorLength(int par) {\n" diff --git a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp index 1984243b..3bcf79b5 100644 --- a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp +++ b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp @@ -2278,9 +2278,9 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func auto retType = ftype.getResult(i); if(auto memrefType = dyn_cast(retType)) { - os << "StridedMemRefType<"; + os << "LAPIS::MemRefType<"; if (failed(emitter.emitType(loc, memrefType.getElementType()))) - return functionOp.emitError("Failed to emit result type as StridedMemRefType"); + return functionOp.emitError("Failed to emit result type as LAPIS::MemRefType"); os << ", " << memrefType.getShape().size() << ">** ret" << i; } else @@ -2376,19 +2376,14 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func os << "results.syncHost();\n"; else os << "std::get<" << i << ">(results).syncHost();\n"; - os << "**ret" << i << " = LAPIS::viewToStridedMemref("; + os << "**ret" << i << " = LAPIS::viewToLapisMemref("; if(numResults == size_t(1)) os << "results"; else os << "std::get<" << i << ">(results)"; - os << ".host_view());\n"; + os << ".host_view(), results.keepAliveHost());\n"; // Keep the host view alive until lapis_finalize() is called. // Otherwise it would be deallocated as soon as this function returns. - - if(numResults == size_t(1)) - os << "results.keepAliveHost();\n"; - else - os << "std::get<" << i << ">(results).keepAliveHost();\n"; } else { @@ -2589,7 +2584,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func if(auto memrefType = dyn_cast(retType)) { int rank = memrefType.hasRank() ? memrefType.getShape().size() : 1; - py_os << "ret" << i << " = ctypes.pointer(ctypes.pointer(rt.make_nd_memref_descriptor(" << rank << ", " << getCtypesType(memrefType.getElementType()) << ")()))\n"; + py_os << "ret" << i << " = _make_lapis_memref(self, " << rank << ", " << getCtypesType(memrefType.getElementType()) << ")()\n"; } else if(isa(retType)) { @@ -2630,7 +2625,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func } else if(isa(retType)) { - py_os << "ret" << i; + py_os << "ctypes.pointer(ctypes.pointer(ret" << i << "))"; } else { @@ -2666,7 +2661,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func } py_os << ")\n"; // Finally, generate the return statement. - // Note that in Python, a 1-elem tuple is equivalent to scalar. + // Note that we return a scalar if a single result is returned. if(numResults) { py_os << "return ("; @@ -2681,7 +2676,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func } else if(isa(retType)) { - py_os << "rt.ranked_memref_to_numpy(ret" << i << "[0])"; + py_os << "ret" << i << ".asnumpy()"; } else if(auto structType = dyn_cast(retType)) { int idx = 0; @@ -2693,7 +2688,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func py_os << "ret" << i << "[0]"; } } - py_os << ")\n"; + py_os << ")\n\n"; } py_os.unindent(); return success(); @@ -3745,11 +3740,15 @@ LogicalResult KokkosCppEmitter::emitInitAndFinalize(bool finalizeKokkos = true) } } // Free views returned to Python - os << "LAPIS::alives.clear();\n"; if(finalizeKokkos) os << "Kokkos::finalize();\n"; os.unindent(); + os << "}\n\n"; + os << "extern \"C\" void freeKeepAlive(LAPIS::KeepAlive* handle)\n"; + os << "{\n"; + os << " delete handle;\n"; os << "}\n"; + return success(); } @@ -3766,22 +3765,58 @@ void KokkosCppEmitter::emitCppBoilerplate() ; } - void KokkosCppEmitter::emitPythonBoilerplate() { *py_os << "import ctypes\n"; + *py_os << "import types\n"; *py_os << "import numpy\n"; + *py_os << "import functools\n"; + *py_os << "import sys\n"; *py_os << "from mlir import runtime as rt\n"; + *py_os << "\n"; + *py_os << "@functools.cache\n"; + *py_os << "def _make_lapis_memref(module, rank, ctypes_type):\n"; + *py_os << " mlir_memref = rt.make_nd_memref_descriptor(rank, ctypes_type)\n"; + *py_os << "\n"; + *py_os << " class LapisMemref(ctypes.Structure):\n"; + *py_os << " _ctype = ctypes_type\n"; + *py_os << "\n"; + *py_os << " _fields_ = [\n"; + *py_os << " ('smr', mlir_memref),\n"; + *py_os << " ('keepAliveHandle', ctypes.c_void_p)\n"; + *py_os << " ]\n"; + *py_os << "\n"; + *py_os << " def asctypes(self):\n"; + *py_os << " size = sum((size-1) for size in self.smr.shape) + self.smr.offset\n"; + *py_os << " buffer_type = self._ctype * size\n"; + *py_os << " ret = ctypes.cast(self.smr.aligned, ctypes.POINTER(buffer_type)).contents\n"; + *py_os << " ret.base = self\n"; + *py_os << " return ret\n"; + *py_os << "\n"; + *py_os << " def asnumpy(self):\n"; + *py_os << " carray = self.asctypes()\n"; + *py_os << " obj = numpy.frombuffer(carray, dtype=self._ctype, offset=self.smr.offset * ctypes.sizeof(self._ctype))\n"; + *py_os << " ret = numpy.lib.stride_tricks.as_strided(\n"; + *py_os << " obj[self.smr.offset:],\n"; + *py_os << " shape=numpy.ctypeslib.as_array(self.smr.shape),\n"; + *py_os << " strides=numpy.ctypeslib.as_array(self.smr.strides) * obj.itemsize\n"; + *py_os << " )\n"; + *py_os << " return ret\n"; + *py_os << "\n"; + *py_os << " def __del__(self):\n"; + *py_os << " module.libHandle.freeKeepAlive(ctypes.c_void_p(self.keepAliveHandle))\n"; + *py_os << "\n"; + *py_os << " return LapisMemref\n"; + *py_os << "\n"; *py_os << "class LAPISModule:\n"; *py_os << " def __init__(self, libPath):\n"; - //*py_os << " print('Hello from LAPISModule.__init__!')\n"; *py_os << " self.libHandle = ctypes.CDLL(libPath)\n"; - // Do all initialization immediately - //*py_os << " print('Initializing module.')\n"; *py_os << " self.libHandle.lapis_initialize()\n"; - //*py_os << " print('Done initializing module.')\n"; + *py_os << "\n"; *py_os << " def __del__(self):\n"; - *py_os << " self.libHandle.lapis_finalize()\n"; + *py_os << " self.libHandle.lapis_finalize()\n"; + *py_os << "\n"; + //From here, only function wrappers are emitted. //These are class members so indent all of them now. py_os->indent(); From af7ac34a2cbc83e27c3aeb8a50acbcdc9d6e749b Mon Sep 17 00:00:00 2001 From: Jonathan Roose Date: Wed, 11 Jun 2025 07:15:44 -0700 Subject: [PATCH 3/8] Implemented dual views in Python. Still need to check struct compatibility. --- mlir/lib/Target/KokkosCpp/LAPISSupport.hpp | 194 +++++++++++------- .../KokkosCpp/LAPISSupportFormatted.hpp | 194 +++++++++++------- .../Target/KokkosCpp/TranslateToKokkosCpp.cpp | 190 ++++++++++------- python/lapis/KokkosBackend.py | 5 +- 4 files changed, 349 insertions(+), 234 deletions(-) diff --git a/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp b/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp index 3d81a939..b1bdf047 100644 --- a/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp +++ b/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp @@ -4,8 +4,12 @@ #include #include +struct StridedMemRefTypeBase +{ +}; + template -struct StridedMemRefType { +struct StridedMemRefType : public StridedMemRefTypeBase { T *basePtr; T *data; int64_t offset; @@ -15,67 +19,6 @@ struct StridedMemRefType { namespace LAPIS { - // KeepAlive structure keeps a reference to Kokkos::Views which - // are returned to Python. Since it's difficult to transfer ownership of a - // Kokkos::View's memory to numpy, we just have the Kokkos::View maintain ownership - // and return an unmanaged numpy array to Python. - // - // All these views will be deallocated during lapis_finalize to avoid leaking. - // The downside is that if a function is called many times, - // all its results are kept in memory at the same time. - struct KeepAlive - { - virtual ~KeepAlive() {} - }; - - template - struct KeepAliveT : public KeepAlive - { - // Make a shallow-copy of val - KeepAliveT(const T& val) : p(new T(val)) {} - std::unique_ptr p; - }; - - template - KeepAlive* keepAlive(const T& val) - { - return new KeepAliveT(val); - } - - template - struct MemRefType - { - StridedMemRefType smr; - KeepAlive* keepAliveHandle; - }; - - using TeamPolicy = Kokkos::TeamPolicy<>; - using TeamMember = typename TeamPolicy::member_type; - - template - StridedMemRefType viewToStridedMemref(const V& v) - { - StridedMemRefType smr; - smr.basePtr = v.data(); - smr.data = v.data(); - smr.offset = 0; - for(int i = 0; i < int(V::rank); i++) - { - smr.sizes[i] = v.extent(i); - smr.strides[i] = v.stride(i); - } - return smr; - } - - template - MemRefType viewToLapisMemref(const V& v, KeepAlive* keepAliveHandle) - { - MemRefType lmr; - lmr.smr = viewToStridedMemref(v); - lmr.keepAliveHandle = keepAliveHandle; - return lmr; - } - template V stridedMemrefToView(const StridedMemRefType& smr) { @@ -133,6 +76,41 @@ namespace LAPIS return V(&smr.data[smr.offset], layout); } + struct PythonParameterBase + { + enum WrapperType : int32_t { + EMPTY_TYPE = 0, + STRIDED_MEMREF_TYPE = 1, + DUALVIEW_TYPE = 2 + }; + + WrapperType wrapper_type; + int32_t rank; + + union { + struct StridedMemRefTypeBase* smr; + struct DualViewBase* view; + }; + }; + + using TeamPolicy = Kokkos::TeamPolicy<>; + using TeamMember = typename TeamPolicy::member_type; + + template + StridedMemRefType viewToStridedMemref(const V& v) + { + StridedMemRefType smr; + smr.basePtr = v.data(); + smr.data = v.data(); + smr.offset = 0; + for(int i = 0; i < int(V::rank); i++) + { + smr.sizes[i] = v.extent(i); + smr.strides[i] = v.stride(i); + } + return smr; + } + // DualView design // - DualView is a shallow object with a shared_ptr to a DualViewImpl. // - DualViewImpl has the actual host and device views as members @@ -144,7 +122,7 @@ namespace LAPIS // - Assume that any DualView's parent is contiguous, and can be deep-copied between h and d // - All DualViews with the same parent share the parent's modify flags // - struct DualViewBase + struct DualViewImplBase { enum AliasStatus { @@ -154,25 +132,27 @@ namespace LAPIS NEITHER_IS_ALIAS = 3 }; - virtual ~DualViewBase() {} + virtual ~DualViewImplBase() {} virtual void syncHost() = 0; virtual void syncDevice() = 0; + virtual void toStridedMemRef(StridedMemRefTypeBase* vp_out) = 0; bool modified_host = false; bool modified_device = false; - std::shared_ptr parent; + std::shared_ptr parent; AliasStatus alias_status; - void setParent(const std::shared_ptr& parent_) + void setParent(const std::shared_ptr& parent_) { this->parent = parent_; } }; template - struct DualViewImpl : public DualViewBase + struct DualViewImpl : public DualViewImplBase { using HostView = Kokkos::View; using DeviceView = Kokkos::View; + using HostMemRefType = StridedMemRefType; static constexpr bool deviceAccessesHost = Kokkos::SpaceAccessibility::accessible; static constexpr bool hostAccessesDevice = Kokkos::SpaceAccessibility::accessible; @@ -317,24 +297,28 @@ namespace LAPIS return device_view.stride(dim); } - KeepAlive* keepAliveHost() { - if(alias_status != AliasStatus::HOST_IS_ALIAS) { - return keepAlive(host_view); - }else{ - return keepAlive(device_view); - } + void toStridedMemRef(StridedMemRefTypeBase* out) { + syncHost(); + *static_cast(out) = viewToStridedMemref(host_view); } DeviceView device_view; HostView host_view; }; + struct DualViewBase + { + virtual void toStridedMemRef(StridedMemRefTypeBase* out) = 0; + virtual ~DualViewBase() {} + }; + template - struct DualView + struct DualView : public DualViewBase { using ImplType = DualViewImpl; using DeviceView = typename ImplType::DeviceView; using HostView = typename ImplType::HostView; + using HostMemRefType = typename ImplType::HostMemRefType; std::shared_ptr impl; bool syncHostWhenDestroyed = false; @@ -354,6 +338,11 @@ namespace LAPIS impl->setParent(impl); } + void toStridedMemRef(StridedMemRefTypeBase* out) + { + impl->toStridedMemRef(out); + } + template DualView(const V& v) { static_assert(std::is_same_v, @@ -371,11 +360,11 @@ namespace LAPIS impl->setParent(parent.impl->parent); } - ~DualView() { + virtual ~DualView() { if(syncHostWhenDestroyed) syncHost(); - DualViewBase* parent = impl->parent.get(); + DualViewImplBase* parent = impl->parent.get(); impl.reset(); - // All DualViewBases keep a shared reference to themselves, so + // All DualViewImplBases keep a shared reference to themselves, so // parent always keeps a shared_ptr to itself. This would normally // prevent the parent destructor ever being called. // @@ -440,10 +429,57 @@ namespace LAPIS void syncHostOnDestroy() { syncHostWhenDestroyed = true; } + }; + + template + struct PythonParameter : public PythonParameterBase + { + DV toView() { + switch(wrapper_type) + { + case STRIDED_MEMREF_TYPE: + return stridedMemrefToView(*static_cast(smr)); + break; + + case DUALVIEW_TYPE: + return *dynamic_cast(view); + break; + + default: + assert(false); + + // In case asserts are turned off, initialize to nullptr to make it easier to debug + DV* ret = nullptr; + return *ret; + }; + } + + PythonParameter(const DV& dv) + { + wrapper_type = DUALVIEW_TYPE; + rank = DV::HostView::rank; + view = new DV(dv); + } - KeepAlive* keepAliveHost() + PythonParameter(const PythonParameter& other) { - return impl->keepAliveHost(); + wrapper_type = other.wrapper_type; + rank = other.rank; + if(wrapper_type == DUALVIEW_TYPE) { + view = new DV(other.view); + }else if(wrapper_type == STRIDED_MEMREF_TYPE) { + smr = new typename DV::HostMemRefType(static_cast(other.smr)); + } + } + + ~PythonParameter() + { + if(wrapper_type == DUALVIEW_TYPE) + { + delete static_cast(view); + }else if(wrapper_type == STRIDED_MEMREF_TYPE) { + delete static_cast(smr); + } } }; diff --git a/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp b/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp index 4e147c2d..7b4a5222 100644 --- a/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp +++ b/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp @@ -4,8 +4,12 @@ "#include \n" "#include \n" "\n" +"struct StridedMemRefTypeBase\n" +"{\n" +"};\n" +"\n" "template \n" -"struct StridedMemRefType {\n" +"struct StridedMemRefType : public StridedMemRefTypeBase {\n" " T *basePtr;\n" " T *data;\n" " int64_t offset;\n" @@ -15,67 +19,6 @@ "\n" "namespace LAPIS\n" "{\n" -" // KeepAlive structure keeps a reference to Kokkos::Views which\n" -" // are returned to Python. Since it\'s difficult to transfer ownership of a\n" -" // Kokkos::View\'s memory to numpy, we just have the Kokkos::View maintain ownership\n" -" // and return an unmanaged numpy array to Python.\n" -" //\n" -" // All these views will be deallocated during lapis_finalize to avoid leaking.\n" -" // The downside is that if a function is called many times,\n" -" // all its results are kept in memory at the same time.\n" -" struct KeepAlive\n" -" {\n" -" virtual ~KeepAlive() {}\n" -" };\n" -"\n" -" template\n" -" struct KeepAliveT : public KeepAlive\n" -" {\n" -" // Make a shallow-copy of val\n" -" KeepAliveT(const T& val) : p(new T(val)) {}\n" -" std::unique_ptr p;\n" -" };\n" -"\n" -" template\n" -" KeepAlive* keepAlive(const T& val)\n" -" {\n" -" return new KeepAliveT(val);\n" -" }\n" -"\n" -" template \n" -" struct MemRefType\n" -" {\n" -" StridedMemRefType smr;\n" -" KeepAlive* keepAliveHandle;\n" -" };\n" -"\n" -" using TeamPolicy = Kokkos::TeamPolicy<>;\n" -" using TeamMember = typename TeamPolicy::member_type;\n" -"\n" -" template\n" -" StridedMemRefType viewToStridedMemref(const V& v)\n" -" {\n" -" StridedMemRefType smr;\n" -" smr.basePtr = v.data();\n" -" smr.data = v.data();\n" -" smr.offset = 0;\n" -" for(int i = 0; i < int(V::rank); i++)\n" -" {\n" -" smr.sizes[i] = v.extent(i);\n" -" smr.strides[i] = v.stride(i);\n" -" }\n" -" return smr;\n" -" }\n" -"\n" -" template\n" -" MemRefType viewToLapisMemref(const V& v, KeepAlive* keepAliveHandle)\n" -" {\n" -" MemRefType lmr;\n" -" lmr.smr = viewToStridedMemref(v);\n" -" lmr.keepAliveHandle = keepAliveHandle;\n" -" return lmr;\n" -" }\n" -"\n" " template\n" " V stridedMemrefToView(const StridedMemRefType& smr)\n" " {\n" @@ -133,6 +76,41 @@ " return V(&smr.data[smr.offset], layout);\n" " }\n" "\n" +" struct PythonParameterBase\n" +" {\n" +" enum WrapperType : int32_t {\n" +" EMPTY_TYPE = 0,\n" +" STRIDED_MEMREF_TYPE = 1,\n" +" DUALVIEW_TYPE = 2\n" +" };\n" +"\n" +" WrapperType wrapper_type;\n" +" int32_t rank;\n" +"\n" +" union {\n" +" struct StridedMemRefTypeBase* smr;\n" +" struct DualViewBase* view;\n" +" };\n" +" };\n" +"\n" +" using TeamPolicy = Kokkos::TeamPolicy<>;\n" +" using TeamMember = typename TeamPolicy::member_type;\n" +"\n" +" template\n" +" StridedMemRefType viewToStridedMemref(const V& v)\n" +" {\n" +" StridedMemRefType smr;\n" +" smr.basePtr = v.data();\n" +" smr.data = v.data();\n" +" smr.offset = 0;\n" +" for(int i = 0; i < int(V::rank); i++)\n" +" {\n" +" smr.sizes[i] = v.extent(i);\n" +" smr.strides[i] = v.stride(i);\n" +" }\n" +" return smr;\n" +" }\n" +"\n" " // DualView design\n" " // - DualView is a shallow object with a shared_ptr to a DualViewImpl.\n" " // - DualViewImpl has the actual host and device views as members\n" @@ -144,7 +122,7 @@ " // - Assume that any DualView\'s parent is contiguous, and can be deep-copied between h and d\n" " // - All DualViews with the same parent share the parent\'s modify flags\n" " //\n" -" struct DualViewBase\n" +" struct DualViewImplBase\n" " {\n" " enum AliasStatus\n" " {\n" @@ -154,25 +132,27 @@ " NEITHER_IS_ALIAS = 3\n" " };\n" "\n" -" virtual ~DualViewBase() {}\n" +" virtual ~DualViewImplBase() {}\n" " virtual void syncHost() = 0;\n" " virtual void syncDevice() = 0;\n" +" virtual void toStridedMemRef(StridedMemRefTypeBase* vp_out) = 0;\n" " bool modified_host = false;\n" " bool modified_device = false;\n" -" std::shared_ptr parent;\n" +" std::shared_ptr parent;\n" " AliasStatus alias_status;\n" "\n" -" void setParent(const std::shared_ptr& parent_)\n" +" void setParent(const std::shared_ptr& parent_)\n" " {\n" " this->parent = parent_;\n" " }\n" " };\n" "\n" " template\n" -" struct DualViewImpl : public DualViewBase\n" +" struct DualViewImpl : public DualViewImplBase\n" " {\n" " using HostView = Kokkos::View;\n" " using DeviceView = Kokkos::View;\n" +" using HostMemRefType = StridedMemRefType;\n" "\n" " static constexpr bool deviceAccessesHost = Kokkos::SpaceAccessibility::accessible;\n" " static constexpr bool hostAccessesDevice = Kokkos::SpaceAccessibility::accessible;\n" @@ -317,24 +297,28 @@ " return device_view.stride(dim);\n" " }\n" "\n" -" KeepAlive* keepAliveHost() {\n" -" if(alias_status != AliasStatus::HOST_IS_ALIAS) {\n" -" return keepAlive(host_view);\n" -" }else{\n" -" return keepAlive(device_view);\n" -" }\n" +" void toStridedMemRef(StridedMemRefTypeBase* out) {\n" +" syncHost();\n" +" *static_cast(out) = viewToStridedMemref(host_view);\n" " }\n" "\n" " DeviceView device_view;\n" " HostView host_view;\n" " };\n" "\n" +" struct DualViewBase\n" +" {\n" +" virtual void toStridedMemRef(StridedMemRefTypeBase* out) = 0;\n" +" virtual ~DualViewBase() {}\n" +" };\n" +"\n" " template\n" -" struct DualView\n" +" struct DualView : public DualViewBase\n" " {\n" " using ImplType = DualViewImpl;\n" " using DeviceView = typename ImplType::DeviceView;\n" " using HostView = typename ImplType::HostView;\n" +" using HostMemRefType = typename ImplType::HostMemRefType;\n" "\n" " std::shared_ptr impl;\n" " bool syncHostWhenDestroyed = false;\n" @@ -354,6 +338,11 @@ " impl->setParent(impl);\n" " }\n" "\n" +" void toStridedMemRef(StridedMemRefTypeBase* out)\n" +" {\n" +" impl->toStridedMemRef(out);\n" +" }\n" +"\n" " template\n" " DualView(const V& v) {\n" " static_assert(std::is_same_v,\n" @@ -371,11 +360,11 @@ " impl->setParent(parent.impl->parent);\n" " }\n" "\n" -" ~DualView() {\n" +" virtual ~DualView() {\n" " if(syncHostWhenDestroyed) syncHost();\n" -" DualViewBase* parent = impl->parent.get();\n" +" DualViewImplBase* parent = impl->parent.get();\n" " impl.reset();\n" -" // All DualViewBases keep a shared reference to themselves, so\n" +" // All DualViewImplBases keep a shared reference to themselves, so\n" " // parent always keeps a shared_ptr to itself. This would normally\n" " // prevent the parent destructor ever being called.\n" " //\n" @@ -440,10 +429,57 @@ " void syncHostOnDestroy() {\n" " syncHostWhenDestroyed = true;\n" " }\n" +" };\n" +"\n" +" template\n" +" struct PythonParameter : public PythonParameterBase\n" +" {\n" +" DV toView() {\n" +" switch(wrapper_type)\n" +" {\n" +" case STRIDED_MEMREF_TYPE:\n" +" return stridedMemrefToView(*static_cast(smr));\n" +" break;\n" +"\n" +" case DUALVIEW_TYPE:\n" +" return *dynamic_cast(view);\n" +" break;\n" +"\n" +" default:\n" +" assert(false);\n" +"\n" +" // In case asserts are turned off, initialize to nullptr to make it easier to debug\n" +" DV* ret = nullptr;\n" +" return *ret;\n" +" };\n" +" }\n" +"\n" +" PythonParameter(const DV& dv)\n" +" {\n" +" wrapper_type = DUALVIEW_TYPE;\n" +" rank = DV::HostView::rank;\n" +" view = new DV(dv);\n" +" }\n" "\n" -" KeepAlive* keepAliveHost()\n" +" PythonParameter(const PythonParameter& other)\n" " {\n" -" return impl->keepAliveHost();\n" +" wrapper_type = other.wrapper_type;\n" +" rank = other.rank;\n" +" if(wrapper_type == DUALVIEW_TYPE) {\n" +" view = new DV(other.view);\n" +" }else if(wrapper_type == STRIDED_MEMREF_TYPE) {\n" +" smr = new typename DV::HostMemRefType(static_cast(other.smr));\n" +" }\n" +" }\n" +"\n" +" ~PythonParameter()\n" +" {\n" +" if(wrapper_type == DUALVIEW_TYPE)\n" +" {\n" +" delete static_cast(view);\n" +" }else if(wrapper_type == STRIDED_MEMREF_TYPE) {\n" +" delete static_cast(smr);\n" +" }\n" " }\n" " };\n" "\n" diff --git a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp index 3bcf79b5..3cd6d876 100644 --- a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp +++ b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp @@ -2278,10 +2278,10 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func auto retType = ftype.getResult(i); if(auto memrefType = dyn_cast(retType)) { - os << "LAPIS::MemRefType<"; - if (failed(emitter.emitType(loc, memrefType.getElementType()))) - return functionOp.emitError("Failed to emit result type as LAPIS::MemRefType"); - os << ", " << memrefType.getShape().size() << ">** ret" << i; + os << "LAPIS::PythonParameter<"; + if (failed(emitter.emitMemrefType(loc, memrefType, kokkos::MemorySpace::DualView))) + return functionOp.emitError("Failed to emit result type as DualView"); + os << ">** ret" << i; } else { @@ -2299,13 +2299,14 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func auto paramType = ftype.getInput(i); if(auto memrefType = dyn_cast(paramType)) { - os << "StridedMemRefType<"; - if (failed(emitter.emitType(loc, memrefType.getElementType()))) - return functionOp.emitError("Failed to emit param type as StridedMemRefType"); - os << ", " << memrefType.getShape().size() << ">* param" << i; + os << "LAPIS::PythonParameter<"; + if (failed(emitter.emitMemrefType(loc, memrefType, kokkos::MemorySpace::DualView))) + return functionOp.emitError("Failed to emit param type as DualView"); + os << ">* param" << i << "_wrapper"; } else { + //TODO: Handle structs appropriately bool isStruct = isa(paramType); // Structs are passed by const reference if(isStruct) { @@ -2327,7 +2328,11 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func //os << "std::cout << \"Starting MLIR function on process \" << getpid() << '\\n';\n"; //os << "std::cout << \"Optionally attach debugger now, then press to continue: \";\n"; //os << "std::cin.get();\n"; - //Construct an unmanaged, LayoutRight Kokkos::View for each memref input parameter. + //Wrap each parameter in a PythonParameter wrapper. If the parameter is a + //numpy array, the functions that use the parameters will create an unmanaged + //Kokkos::view. If the parameter was already a PythonParameter wrapper, it + //will be passed through. + // //Note: stridedMemrefToView with LayoutRight will check the strides at runtime, //and the python wrapper will use numpy.require to deep-copy the data to the correct //layout if it's not already. @@ -2337,10 +2342,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func auto memrefType = dyn_cast(paramType); if(memrefType) { - os << "auto param" << i << "_smr = LAPIS::stridedMemrefToView<"; - if(failed(emitter.emitMemrefType(loc, memrefType, kokkos::MemorySpace::Host))) - return functionOp.emitError("Failed to emit memref type as host view"); - os << ">(*param" << i << ");\n"; + os << "auto param" << i << " = param" << i << "_wrapper->toView();\n"; } } // Emit the call @@ -2355,7 +2357,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func auto memrefType = dyn_cast(paramType); if(memrefType) { - os << "param" << i << "_smr"; + os << "param" << i; } else { @@ -2372,18 +2374,12 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func auto memrefType = dyn_cast(retType); if(memrefType) { - if(numResults == size_t(1)) - os << "results.syncHost();\n"; - else - os << "std::get<" << i << ">(results).syncHost();\n"; - os << "**ret" << i << " = LAPIS::viewToLapisMemref("; + os << "new (*ret" << i << ") LAPIS::PythonParameter("; if(numResults == size_t(1)) os << "results"; else os << "std::get<" << i << ">(results)"; - os << ".host_view(), results.keepAliveHost());\n"; - // Keep the host view alive until lapis_finalize() is called. - // Otherwise it would be deallocated as soon as this function returns. + os << ");\n"; } else { @@ -2472,8 +2468,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func // NOTE: numpy.zeros(shape, dtype=...) already defaults to LayoutRight (and probably most other functions) // so in practice this shouldn't usually trigger a deep-copy. auto& py_os = emitter.py_ostream(); - //NOTE: this function is a member of the module's class, but py_os is already indented to write methods. - py_os << "def " << funcName << "(self, "; + py_os << "def " << funcName << "("; for(size_t i = 0; i < numParams; i++) { if(i != 0) @@ -2550,7 +2545,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func std::string numpyDType = getNumpyType(memrefType.getElementType()); if(!numpyDType.size()) return functionOp.emitError("Could not determine corresponding numpy type for memref element type"); - py_os << "param" << i << " = numpy.require(param" << i << ", dtype=" << numpyDType << ", requirements=['C'])\n"; + py_os << "param" << i << " = wrap_array_parameter(param" << i << ", dtype=" << numpyDType << ")\n"; } else if(auto structType = dyn_cast(paramType)) { // Expect this parameter to be a tuple with the correct structure. Flatten it to a numpy array. @@ -2565,7 +2560,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func int flatIdx = 0; genStructFlatten("param" + std::to_string(i), "param_flat" + std::to_string(i), flatIdx, structType); // Replace original param with flattened version, as we don't need original anymore - py_os << "param" << i << " = param_flat" << i << "\n"; + py_os << "param" << i << " = wrap_array_parameter(param_flat" << i << ", dtype=" << numpyDType << ")\n"; } else { // Ensure scalars have the correct type. @@ -2584,7 +2579,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func if(auto memrefType = dyn_cast(retType)) { int rank = memrefType.hasRank() ? memrefType.getShape().size() : 1; - py_os << "ret" << i << " = _make_lapis_memref(self, " << rank << ", " << getCtypesType(memrefType.getElementType()) << ")()\n"; + py_os << "ret" << i << " = ParameterWrapper.empty(" << getCtypesType(memrefType.getElementType()) << ")\n"; } else if(isa(retType)) { @@ -2599,7 +2594,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func std::string numpyDType = getNumpyType(elem); if(!numpyDType.size()) return functionOp.emitError("Could not determine corresponding numpy type for result scalar type"); - py_os << "ret" << i << " = numpy.zeros(" << size << ", dtype=" << numpyDType << ")\n"; + py_os << "ret" << i << " = ParameterWrapper.empty(" << numpyDType << ")\n"; } else { @@ -2611,7 +2606,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func } } // Generate the native call. It always returns void. - py_os << "self.libHandle.py_" << funcName << "("; + py_os << "libHandle.py_" << funcName << "("; // Outputs go first for(size_t i = 0; i < numResults; i++) { @@ -2647,7 +2642,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func else if(isa(paramType)) { //Numpy array (or a scalar from a numpy array) - py_os << "ctypes.pointer(rt.get_ranked_memref_descriptor(param" << i << "))"; + py_os << "ctypes.pointer(param" << i << ")"; } else if(isa(paramType)) { @@ -2676,7 +2671,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, func::FuncOp func } else if(isa(retType)) { - py_os << "ret" << i << ".asnumpy()"; + py_os << "ret" << i; } else if(auto structType = dyn_cast(retType)) { int idx = 0; @@ -3646,6 +3641,8 @@ LogicalResult KokkosCppEmitter::emitOperation(Operation &op, bool trailingSemico if(auto memrefType = dyn_cast(result.getType())) { if(kokkos::getMemSpace(result) == kokkos::MemorySpace::DualView) { if(skipPrint || !trailingSemicolon) { + std::cerr << "skipPrint=" << skipPrint << std::endl; + std::cerr << "trailingSemicolon=" << trailingSemicolon << std::endl; return op.emitOpError("op produced at least one DualView, but op was emitted in a context where we can't declare v_d and v_h views"); } declareDeviceHostViews(result); @@ -3657,6 +3654,19 @@ LogicalResult KokkosCppEmitter::emitOperation(Operation &op, bool trailingSemico LogicalResult KokkosCppEmitter::emitInitAndFinalize(bool finalizeKokkos = true) { + os << "extern \"C\" void getHostData(StridedMemRefTypeBase* out, LAPIS::PythonParameterBase* in)\n"; + os << "{\n"; + os << " assert(in->wrapper_type == LAPIS::PythonParameterBase::DUALVIEW_TYPE);\n"; + os << " in->view->toStridedMemRef(out);\n"; + os << "}\n"; + os << "\n"; + + os << "extern \"C\" void freeDualView(LAPIS::DualViewBase* handle)\n"; + os << "{\n"; + os << " delete handle;\n"; + os << "}\n"; + os << "\n"; + os << "extern \"C\" void lapis_initialize()\n"; os << "{\n"; os.indent(); @@ -3744,10 +3754,6 @@ LogicalResult KokkosCppEmitter::emitInitAndFinalize(bool finalizeKokkos = true) os << "Kokkos::finalize();\n"; os.unindent(); os << "}\n\n"; - os << "extern \"C\" void freeKeepAlive(LAPIS::KeepAlive* handle)\n"; - os << "{\n"; - os << " delete handle;\n"; - os << "}\n"; return success(); } @@ -3767,59 +3773,99 @@ void KokkosCppEmitter::emitCppBoilerplate() void KokkosCppEmitter::emitPythonBoilerplate() { + *py_os << "import atexit\n"; *py_os << "import ctypes\n"; - *py_os << "import types\n"; - *py_os << "import numpy\n"; + *py_os << "import enum\n"; *py_os << "import functools\n"; + *py_os << "import os.path\n"; *py_os << "import sys\n"; + *py_os << "import types\n\n"; + *py_os << "import numpy\n"; *py_os << "from mlir import runtime as rt\n"; + *py_os << "import os.path\n"; *py_os << "\n"; - *py_os << "@functools.cache\n"; - *py_os << "def _make_lapis_memref(module, rank, ctypes_type):\n"; - *py_os << " mlir_memref = rt.make_nd_memref_descriptor(rank, ctypes_type)\n"; + *py_os << "dirpath = os.path.dirname(os.path.abspath(__file__))\n"; + *py_os << "modpath = os.path.join(dirpath, \"build\", f\"lib{__name__}_module.so\")\n"; + *py_os << "if not os.path.isfile(modpath):\n"; + *py_os << " modpath = os.path.join(dirpath, \"build\", f\"lib{__name__}_module.dylib\")\n"; + *py_os << "libHandle = ctypes.CDLL(modpath)\n"; + *py_os << "libHandle.lapis_initialize()\n"; + *py_os << "atexit.register(libHandle.lapis_finalize)\n"; *py_os << "\n"; - *py_os << " class LapisMemref(ctypes.Structure):\n"; - *py_os << " _ctype = ctypes_type\n"; + *py_os << "class ParameterWrapperType(enum.Enum):\n"; + *py_os << " EMPTY_TYPE = 0\n"; + *py_os << " STRIDED_MEMREF_TYPE = 1\n"; + *py_os << " DUALVIEW_TYPE = 2\n"; *py_os << "\n"; - *py_os << " _fields_ = [\n"; - *py_os << " ('smr', mlir_memref),\n"; - *py_os << " ('keepAliveHandle', ctypes.c_void_p)\n"; - *py_os << " ]\n"; + *py_os << "class ParameterWrapper(ctypes.Structure):\n"; + *py_os << " _fields_ = [\n"; + *py_os << " ('wrapper_type', ctypes.c_int32),\n"; + *py_os << " ('rank', ctypes.c_int32),\n"; + *py_os << " ('ptr', ctypes.c_void_p),\n"; + *py_os << " ]\n"; *py_os << "\n"; - *py_os << " def asctypes(self):\n"; - *py_os << " size = sum((size-1) for size in self.smr.shape) + self.smr.offset\n"; - *py_os << " buffer_type = self._ctype * size\n"; - *py_os << " ret = ctypes.cast(self.smr.aligned, ctypes.POINTER(buffer_type)).contents\n"; - *py_os << " ret.base = self\n"; - *py_os << " return ret\n"; + *py_os << " @classmethod\n"; + *py_os << " def build(cls, wrapper_type, ptr, dtype, rank, base=None):\n"; + *py_os << " ret = cls()\n"; + *py_os << " ret.wrapper_type = wrapper_type.value\n"; + *py_os << " ret.ptr = ctypes.cast(ptr, ctypes.c_void_p)\n"; + *py_os << " ret.rank = rank\n"; + *py_os << " ret.base = base #ties lifespan of base to this object\n"; + *py_os << " ret._ctype = numpy.ctypeslib.as_ctypes_type(dtype)\n"; + *py_os << " return ret\n"; *py_os << "\n"; - *py_os << " def asnumpy(self):\n"; - *py_os << " carray = self.asctypes()\n"; - *py_os << " obj = numpy.frombuffer(carray, dtype=self._ctype, offset=self.smr.offset * ctypes.sizeof(self._ctype))\n"; - *py_os << " ret = numpy.lib.stride_tricks.as_strided(\n"; - *py_os << " obj[self.smr.offset:],\n"; - *py_os << " shape=numpy.ctypeslib.as_array(self.smr.shape),\n"; - *py_os << " strides=numpy.ctypeslib.as_array(self.smr.strides) * obj.itemsize\n"; - *py_os << " )\n"; - *py_os << " return ret\n"; + *py_os << " @classmethod\n"; + *py_os << " def empty(cls, dtype, rank=0):\n"; + *py_os << " ret = cls()\n"; + *py_os << " ret.wrapper_type = ParameterWrapperType.EMPTY_TYPE.value\n"; + *py_os << " ret.ptr = ctypes.c_void_p(0)\n"; + *py_os << " ret.rank = rank\n"; + *py_os << " ret._ctype = numpy.ctypeslib.as_ctypes_type(dtype)\n"; + *py_os << " return ret\n"; *py_os << "\n"; - *py_os << " def __del__(self):\n"; - *py_os << " module.libHandle.freeKeepAlive(ctypes.c_void_p(self.keepAliveHandle))\n"; + *py_os << " def asmemref(self):\n"; + *py_os << " ret_type = rt.make_nd_memref_descriptor(self.rank, self._ctype)\n"; + *py_os << " if self.wrapper_type == ParameterWrapperType.STRIDED_MEMREF_TYPE.value:\n"; + *py_os << " ret = ctypes.cast(self.ptr, ctypes.POINTER(ret_type)).contents\n"; + *py_os << " elif self.wrapper_type == ParameterWrapperType.DUALVIEW_TYPE.value:\n"; + *py_os << " ret = ret_type()\n"; + *py_os << " libHandle.getHostData(ctypes.pointer(ret), ctypes.pointer(self))\n"; + *py_os << " ret.base = self # ties lifespan of this object to strided memref ret\n"; + *py_os << " return ret\n"; *py_os << "\n"; - *py_os << " return LapisMemref\n"; + *py_os << " def asctypes(self):\n"; + *py_os << " smr = self.asmemref()\n"; + *py_os << " size = sum((size-1) for size in smr.shape) + smr.offset\n"; + *py_os << " buffer_type = self._ctype * size\n"; + *py_os << " ret = ctypes.cast(smr.aligned, ctypes.POINTER(buffer_type)).contents\n"; + *py_os << " ret.base = self # ties lifespan of this object to ctypes array ret\n"; + *py_os << " return ret\n"; *py_os << "\n"; - *py_os << "class LAPISModule:\n"; - *py_os << " def __init__(self, libPath):\n"; - *py_os << " self.libHandle = ctypes.CDLL(libPath)\n"; - *py_os << " self.libHandle.lapis_initialize()\n"; + *py_os << " def asnumpy(self):\n"; + *py_os << " smr = self.asmemref()\n"; + *py_os << " carray = self.asctypes()\n"; + *py_os << " # numpy ties lifespan of carray to numpy arrays created by frombuffer\n"; + *py_os << " obj = numpy.frombuffer(carray, dtype=self._ctype, offset=smr.offset * ctypes.sizeof(self._ctype))\n"; + *py_os << " ret = numpy.lib.stride_tricks.as_strided(\n"; + *py_os << " obj[smr.offset:],\n"; + *py_os << " shape=numpy.ctypeslib.as_array(smr.shape),\n"; + *py_os << " strides=numpy.ctypeslib.as_array(smr.strides) * obj.itemsize\n"; + *py_os << " )\n"; + *py_os << " return ret\n"; *py_os << "\n"; *py_os << " def __del__(self):\n"; - *py_os << " self.libHandle.lapis_finalize()\n"; + *py_os << " if self.wrapper_type == ParameterWrapperType.DUALVIEW_TYPE.value:\n"; + *py_os << " libHandle.freeDualView(ctypes.c_void_p(self.ptr))\n"; + *py_os << "\n"; + *py_os << "def wrap_array_parameter(param, dtype):\n"; + *py_os << " if isinstance(param, numpy.ndarray):\n"; + *py_os << " param = numpy.require(param, dtype=dtype, requirements=['C'])\n"; + *py_os << " ptr = ctypes.pointer(rt.get_ranked_memref_descriptor(param))\n"; + *py_os << " return ParameterWrapper.build(ParameterWrapperType.STRIDED_MEMREF_TYPE, ptr, dtype, param.ndim, base=param)\n"; + *py_os << " else:\n"; + *py_os << " return param\n"; *py_os << "\n"; - //From here, only function wrappers are emitted. - //These are class members so indent all of them now. - py_os->indent(); } LogicalResult KokkosCppEmitter::emitType(Location loc, Type type, bool forSparseRuntime) { diff --git a/python/lapis/KokkosBackend.py b/python/lapis/KokkosBackend.py index 9eaf4e79..db107a89 100644 --- a/python/lapis/KokkosBackend.py +++ b/python/lapis/KokkosBackend.py @@ -61,10 +61,7 @@ def compile_kokkos_to_native(self, moduleRoot, linkSparseSupportLib): buildOut = subprocess.run(['make'], cwd=buildDir, shell=True) sys.path.insert(0, moduleRoot) lapis = __import__(self.package_name) - if os.path.isfile(buildDir + "/lib" + self.package_name + "_module.so"): - return lapis.LAPISModule(buildDir + "/lib" + self.package_name + "_module.so") - if os.path.isfile(buildDir + "/lib" + self.package_name + "_module.dylib"): - return lapis.LAPISModule(buildDir + "/lib" + self.package_name + "_module.dylib") + return lapis def run_cli(self, app, flags, stdin): appAbsolute = which(app) From 1f4d57cf22b08e372c254bc21ad5070382d152ec Mon Sep 17 00:00:00 2001 From: Jonathan Roose Date: Wed, 23 Jul 2025 13:16:34 -0700 Subject: [PATCH 4/8] Fixing device_view () problem --- mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp index 3cd6d876..14626cfa 100644 --- a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp +++ b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp @@ -823,7 +823,7 @@ static LogicalResult printOperation(KokkosCppEmitter &emitter, emitter << "Kokkos::deep_copy("; if(failed(emitter.emitValue(op.getTarget()))) return failure(); - emitter << "." << (isDevice ? "device" : "host") << "_view, "; + emitter << "." << (isDevice ? "device" : "host") << "_view(), "; if(failed(emitter.emitValue(op.getSource()))) return failure(); emitter << ");"; From aeb010c85c02836eb20cb3fb687bf47db1c9a2aa Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 4 Aug 2025 15:01:02 -0600 Subject: [PATCH 5/8] Revert removing declareHostDeviceViews Signed-off-by: Brian Kelley --- mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp index 9129367c..2cb9db6c 100644 --- a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp +++ b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp @@ -4044,6 +4044,18 @@ LogicalResult KokkosCppEmitter::emitOperation(Operation &op, bool trailingSemico if(!skipPrint) { *this << (trailingSemicolon ? ";\n" : "\n"); } + // If op produced any DualView typed memrefs, + // declare variables for its host and device views + for(auto result : op.getResults()) { + if(auto memrefType = dyn_cast(result.getType())) { + if(kokkos::getMemSpace(result) == kokkos::MemorySpace::DualView) { + if(skipPrint || !trailingSemicolon) { + return op.emitOpError("op produced at least one DualView, but op was emitted in a context where we can't declare v_d and v_h views"); + } + declareDeviceHostViews(result); + } + } + } return success(); } From 88fd5390dc82fb7680f0a33c6cea40051deddb56 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 4 Aug 2025 15:06:32 -0600 Subject: [PATCH 6/8] Uncomment check for impl == nullptr in ~DualView This change still applies since DualView::deallocate still resets impl. Signed-off-by: Brian Kelley --- mlir/lib/Target/KokkosCpp/LAPISSupport.hpp | 2 +- mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp b/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp index fc2be167..a803bd9b 100644 --- a/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp +++ b/mlir/lib/Target/KokkosCpp/LAPISSupport.hpp @@ -361,7 +361,7 @@ namespace LAPIS } virtual ~DualView() { - //if(!impl) return; + if(!impl) return; if(syncHostWhenDestroyed) syncHost(); DualViewImplBase* parent = impl->parent.get(); impl.reset(); diff --git a/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp b/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp index ccf9344f..a53f16e1 100644 --- a/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp +++ b/mlir/lib/Target/KokkosCpp/LAPISSupportFormatted.hpp @@ -361,7 +361,7 @@ " }\n" "\n" " virtual ~DualView() {\n" -" //if(!impl) return;\n" +" if(!impl) return;\n" " if(syncHostWhenDestroyed) syncHost();\n" " DualViewImplBase* parent = impl->parent.get();\n" " impl.reset();\n" From 901d48e68572855e0077fa1ed7953e61b89964f7 Mon Sep 17 00:00:00 2001 From: Jonathan Roose Date: Thu, 7 Aug 2025 11:34:53 -0700 Subject: [PATCH 7/8] Fixing bug in sparse_axpy caused by dual views --- examples/sparse_axpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/sparse_axpy.py b/examples/sparse_axpy.py index 79715fd9..a21a0dde 100644 --- a/examples/sparse_axpy.py +++ b/examples/sparse_axpy.py @@ -60,7 +60,7 @@ def check_axpy(module_kokkos, v1_pos, v1_inds, v1_vals, v2_pos, v2_inds, v2_vals [result, actual_nnz] = module_kokkos.sparse_axpy(v1, v2) print("Test case result:") module_kokkos.print_sparse_vec(result) - result = module_kokkos.sparse_to_dense(result) + result = module_kokkos.sparse_to_dense(result).asnumpy() if correct_nnz != actual_nnz: print("Failed: result nonzero count incorrect") return False From 04906e2a8e3884b8bc1fa0ce8dd0d793a64f1b6f Mon Sep 17 00:00:00 2001 From: Jonathan Roose Date: Thu, 7 Aug 2025 12:09:18 -0700 Subject: [PATCH 8/8] Fixing issues with asnumpy in new examples and merging missed fix for issue76 --- examples/bspmm.py | 2 +- examples/csrv_softmax.py | 2 +- examples/hitting_times.py | 1 + examples/issue76.py | 2 +- .../Target/KokkosCpp/TranslateToKokkosCpp.cpp | 18 +++++++++--------- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/examples/bspmm.py b/examples/bspmm.py index 0f51b24c..e101e47f 100644 --- a/examples/bspmm.py +++ b/examples/bspmm.py @@ -100,7 +100,7 @@ def main(): backend = KokkosBackend.KokkosBackend(decompose_tensors=True, parallel_strategy=par, index_instance=instance, num_instances=len(parStrats)) instance += 1 module_kokkos = backend.compile(moduleText) - C_kokkos = module_kokkos.pte_local_bspmm(rowptrs, colinds, values, ((m, n, b), (m+1, nnz, nnz*b)), B) + C_kokkos = module_kokkos.pte_local_bspmm(rowptrs, colinds, values, ((m, n, b), (m+1, nnz, nnz*b)), B).asnumpy() # For debugging: print the CSRV formatted matrix # module_kokkos.print_csrv(rowptrs, colinds, values, ((m, n, b), (m+1, nnz, nnz*b))) if np.allclose(C_gold, C_kokkos): diff --git a/examples/csrv_softmax.py b/examples/csrv_softmax.py index 31e459f7..8647aba3 100644 --- a/examples/csrv_softmax.py +++ b/examples/csrv_softmax.py @@ -155,7 +155,7 @@ def main(): # For debugging: print the CSRV formatted matrix #module_kokkos.print_csrv(A) result = module_kokkos.pte_softmax(A) - resultDense = module_kokkos.csrv_to_dense(result) + resultDense = module_kokkos.csrv_to_dense(result).asnumpy() print("Result (converted to dense): ") print(resultDense) if checkResult is None: diff --git a/examples/hitting_times.py b/examples/hitting_times.py index d8074a2d..4c5a0712 100644 --- a/examples/hitting_times.py +++ b/examples/hitting_times.py @@ -212,6 +212,7 @@ def main(): (mom1, mom2) = module_kokkos.mht(A.indptr, A.indices, A.data, ((n, n), (n + 1, nnz, nnz)), mask, D, 0.99, 1e-10, 20) # Normalize 2nd moment mom2_norm = module_kokkos.normalize_mom2(mom1, mom2) + mom1, mom2_norm = mom1.asnumpy(), mom2_norm.asnumpy() print("1st moment:", mom1) print("2nd moment:", mom2_norm) mom1_gold = [9.9999999999999957e+01, 9.9999999999999957e+01, 1.9947875961498600e+01, 0, 1.8542105566249749e+01, 1.9909372734041774e+01, 9.9999999999999957e+01, 1.9970947033662114e+01, 2.0196661010469668e+01, 1.4624345260746775e+01, 1.8252877036565582e+01, 1.9213516388393590e+01] diff --git a/examples/issue76.py b/examples/issue76.py index e9476c48..e2841329 100644 --- a/examples/issue76.py +++ b/examples/issue76.py @@ -69,7 +69,7 @@ def main(): module_kokkos.print_dcsr(A_dcsr) [result, rank, nnz] = module_kokkos.column_sums(A_dcsr) # Convert the sparse vector result to dense to check the output - result_dense = module_kokkos.sparse_vec_to_dense(result) + result_dense = module_kokkos.sparse_vec_to_dense(result).asnumpy() print("Results: ", rank, nnz, result_dense) print("Correct result: ", 1, gold_nnz, gold) diff --git a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp index eb67676a..31927cba 100644 --- a/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp +++ b/mlir/lib/Target/KokkosCpp/TranslateToKokkosCpp.cpp @@ -4070,16 +4070,16 @@ LogicalResult KokkosCppEmitter::emitOperation(Operation &op, bool trailingSemico return failure(); if(!skipPrint) { *this << (trailingSemicolon ? ";\n" : "\n"); - } - // If op produced any DualView typed memrefs, - // declare variables for its host and device views - for(auto result : op.getResults()) { - if(auto memrefType = dyn_cast(result.getType())) { - if(kokkos::getMemSpace(result) == kokkos::MemorySpace::DualView) { - if(skipPrint || !trailingSemicolon) { - return op.emitOpError("op produced at least one DualView, but op was emitted in a context where we can't declare v_d and v_h views"); + // If op produced any DualView typed memrefs, + // declare variables for its host and device views + for(auto result : op.getResults()) { + if(auto memrefType = dyn_cast(result.getType())) { + if(kokkos::getMemSpace(result) == kokkos::MemorySpace::DualView) { + if(skipPrint || !trailingSemicolon) { + return op.emitOpError("op produced at least one DualView, but op was emitted in a context where we can't declare v_d and v_h views"); + } + declareDeviceHostViews(result); } - declareDeviceHostViews(result); } } }