diff --git a/clang/runtime/dpct-rt/include/dpct/math.hpp b/clang/runtime/dpct-rt/include/dpct/math.hpp index e71f09c82af5..cbff39e477e3 100644 --- a/clang/runtime/dpct-rt/include/dpct/math.hpp +++ b/clang/runtime/dpct-rt/include/dpct/math.hpp @@ -2088,7 +2088,7 @@ class joint_matrix { /// \param [in] trans Indicates whether the matrix to be loaded transposed /// \param [in] mat The matrix index to be loaded template -void ldmatrix(uintptr_t addr, T *m, bool trans = false, unsigned mat = 0) { +inline void ldmatrix(uintptr_t addr, T *m, bool trans = false, unsigned mat = 0) { auto sg = sycl::ext::oneapi::this_work_item::get_sub_group(); int lane = sg.get_local_linear_id(); @@ -2165,7 +2165,7 @@ void ldmatrix(uintptr_t addr, T *m, bool trans = false, unsigned mat = 0) { /// to 2 b16 type elements. /// \param [in] trans Indicates whether the matrix to be loaded transposed template -void ldmatrix(uintptr_t addr, T *m1, T *m2, bool trans = false) { +inline void ldmatrix(uintptr_t addr, T *m1, T *m2, bool trans = false) { // Load 1st matrix ldmatrix(addr, m1, trans, 0); // Load 2nd matrix @@ -2207,7 +2207,7 @@ void ldmatrix(uintptr_t addr, T *m1, T *m2, bool trans = false) { /// to 2 b16 type elements. /// \param [in] trans Indicates whether the matrix to be loaded transposed template -void ldmatrix(uintptr_t addr, T *m1, T *m2, T *m3, T *m4, bool trans = false) { +inline void ldmatrix(uintptr_t addr, T *m1, T *m2, T *m3, T *m4, bool trans = false) { // Load 1st matrix ldmatrix(addr, m1, trans, 0); // Load 2nd matrix @@ -2248,7 +2248,7 @@ void ldmatrix(uintptr_t addr, T *m1, T *m2, T *m3, T *m4, bool trans = false) { /// \param [in] trans Indicates whether the matrix to be stored transposed /// \param [in] mat The matrix index to be stored template -void stmatrix(uintptr_t addr, T m, bool trans = false, unsigned mat = 0) { +inline void stmatrix(uintptr_t addr, T m, bool trans = false, unsigned mat = 0) { auto sg = sycl::ext::oneapi::this_work_item::get_sub_group(); int lane = sg.get_local_linear_id(); @@ -2325,7 +2325,7 @@ void stmatrix(uintptr_t addr, T m, bool trans = false, unsigned mat = 0) { /// to 2 b16 type elements. /// \param [in] trans Indicates whether the matrix to be stored transposed template -void stmatrix(uintptr_t addr, T m1, T m2, bool trans = false) { +inline void stmatrix(uintptr_t addr, T m1, T m2, bool trans = false) { // Store 1st matrix stmatrix(addr, m1, trans, 0); // Store 2nd matrix @@ -2367,7 +2367,7 @@ void stmatrix(uintptr_t addr, T m1, T m2, bool trans = false) { /// to 2 b16 type elements. /// \param [in] trans Indicates whether the matrix to be stored transposed template -void stmatrix(uintptr_t addr, T m1, T m2, T m3, T m4, bool trans = false) { +inline void stmatrix(uintptr_t addr, T m1, T m2, T m3, T m4, bool trans = false) { // Store 1st matrix stmatrix(addr, m1, trans, 0); // Store 2nd matrix @@ -2412,7 +2412,7 @@ template struct MMAType { /// \param [in] c_mat_frag The fragment of the input matrix C to be added with /// the result of A * B fragments template -void mma(volatile void **d_mat_frag, void *a_mat_frag, void *b_mat_frag, +inline void mma(volatile void **d_mat_frag, void *a_mat_frag, void *b_mat_frag, void *c_mat_frag) { auto d = reinterpret_cast(d_mat_frag); auto a = reinterpret_cast::PackType *>(a_mat_frag); @@ -2865,7 +2865,7 @@ void mma(volatile void **d_mat_frag, void *a_mat_frag, void *b_mat_frag, /// b16 type elements. /// \param [in] input: The register to store the matrix fragment. It refers to 2 b16 /// type elements. -void movmatrix(uint32_t &output, uint32_t &input) { +inline void movmatrix(uint32_t &output, uint32_t &input) { auto sg = sycl::ext::oneapi::this_work_item::get_sub_group(); int laneid = sg.get_local_linear_id();