From 5ee5dd83bb04b98bf622af3619f66ab3362a143b Mon Sep 17 00:00:00 2001 From: skc7 Date: Mon, 8 Dec 2025 22:17:14 +0530 Subject: [PATCH] [CIR][AMDGPU] Add lowering for amdgcn div fixup builtins --- clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 3 +- .../CIR/CodeGen/HIP/builtins-amdgcn-vi.hip | 67 +++++++++++++++++++ .../test/CIR/CodeGen/HIP/builtins-amdgcn.hip | 20 ++++++ .../CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl | 67 +++++++++++++++++++ .../CIR/CodeGen/OpenCL/builtins_amdgcn.cl | 22 ++++++ 5 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 clang/test/CIR/CodeGen/HIP/builtins-amdgcn-vi.hip create mode 100644 clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index d4ac3251169b..7d40fb2717c3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -159,7 +159,8 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, case AMDGPU::BI__builtin_amdgcn_div_fixup: case AMDGPU::BI__builtin_amdgcn_div_fixupf: case AMDGPU::BI__builtin_amdgcn_div_fixuph: { - llvm_unreachable("div_fixup_* NYI"); + return emitBuiltinWithOneOverloadedType<3>(expr, "amdgcn.div.fixup") + .getScalarVal(); } case AMDGPU::BI__builtin_amdgcn_trig_preop: case AMDGPU::BI__builtin_amdgcn_trig_preopf: { diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-vi.hip new file mode 100644 index 000000000000..4a915d245f72 --- /dev/null +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-vi.hip @@ -0,0 +1,67 @@ +#include "../Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_ +// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.f16, !cir.f16, !cir.f16) -> !cir.f16 +// LLVM: define{{.*}} void @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_ +// LLVM: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}}) +// OGCG: define{{.*}} void @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_ +// OGCG: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}}) +__device__ void test_div_fixup_f16(_Float16* out, _Float16 a, _Float16 b, _Float16 c) { + *out = __builtin_amdgcn_div_fixuph(a, b, c); +} diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip index b70f64e87c7d..5f3655d7d8da 100644 --- a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip @@ -305,3 +305,23 @@ __device__ void test_readlane(int* out, int a, int b) { __device__ void test_readfirstlane(int* out, int a) { *out = __builtin_amdgcn_readfirstlane(a); } + +// CIR-LABEL: @_Z18test_div_fixup_f32Pffff +// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.float, !cir.float, !cir.float) -> !cir.float +// LLVM: define{{.*}} void @_Z18test_div_fixup_f32Pffff +// LLVM: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}) +// OGCG: define{{.*}} void @_Z18test_div_fixup_f32Pffff +// OGCG: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}) +__device__ void test_div_fixup_f32(float* out, float a, float b, float c) { + *out = __builtin_amdgcn_div_fixupf(a, b, c); +} + +// CIR-LABEL: @_Z18test_div_fixup_f64Pdddd +// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.double, !cir.double, !cir.double) -> !cir.double +// LLVM: define{{.*}} void @_Z18test_div_fixup_f64Pdddd +// LLVM: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}) +// OGCG: define{{.*}} void @_Z18test_div_fixup_f64Pdddd +// OGCG: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}) +__device__ void test_div_fixup_f64(double* out, double a, double b, double c) { + *out = __builtin_amdgcn_div_fixup(a, b, c); +} diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl new file mode 100644 index 000000000000..8e447a94f347 --- /dev/null +++ b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl @@ -0,0 +1,67 @@ +#include "../Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu tonga -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx900 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1010 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1012 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu tonga -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx900 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1010 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1012 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu tonga -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx900 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1010 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1012 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @test_div_fixup_f16 +// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.f16, !cir.f16, !cir.f16) -> !cir.f16 +// LLVM: define{{.*}} void @test_div_fixup_f16 +// LLVM: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}}) +// OGCG: define{{.*}} void @test_div_fixup_f16 +// OGCG: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}}) +__device__ void test_div_fixup_f16(_Float16* out, _Float16 a, _Float16 b, _Float16 c) { + *out = __builtin_amdgcn_div_fixuph(a, b, c); +} diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl b/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl index c602b7405155..691334345409 100644 --- a/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl +++ b/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl @@ -318,3 +318,25 @@ void test_readlane(global int* out, int a, int b) { void test_readfirstlane(global int* out, int a) { *out = __builtin_amdgcn_readfirstlane(a); } + +// CIR-LABEL: @test_div_fixup_f32 +// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.float, !cir.float, !cir.float) -> !cir.float +// LLVM: define{{.*}} void @test_div_fixup_f32 +// LLVM: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}) +// OGCG: define{{.*}} void @test_div_fixup_f32 +// OGCG: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}) +void test_div_fixup_f32(global float* out, float a, float b, float c) +{ + *out = __builtin_amdgcn_div_fixupf(a, b, c); +} + +// CIR-LABEL: @test_div_fixup_f64 +// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.double, !cir.double, !cir.double) -> !cir.double +// LLVM: define{{.*}} void @test_div_fixup_f64 +// LLVM: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}) +// OGCG: define{{.*}} void @test_div_fixup_f64 +// OGCG: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}) +void test_div_fixup_f64(global double* out, double a, double b, double c) +{ + *out = __builtin_amdgcn_div_fixup(a, b, c); +}