[CIR][AMDGPU] Add lowering for amdgcn div fixup builtins

skc7 · skc7 · commit 5ee5dd83bb04 · 2025-12-09T12:31:17.000+05:30
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -159,7 +159,8 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
   case AMDGPU::BI__builtin_amdgcn_div_fixup:
   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
   case AMDGPU::BI__builtin_amdgcn_div_fixuph: {
-    llvm_unreachable("div_fixup_* NYI");
+    return emitBuiltinWithOneOverloadedType<3>(expr, "amdgcn.div.fixup")
+        .getScalarVal();
   }
   case AMDGPU::BI__builtin_amdgcn_trig_preop:
   case AMDGPU::BI__builtin_amdgcn_trig_preopf: {
diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-vi.hip
@@ -0,0 +1,67 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.f16, !cir.f16, !cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_
+// LLVM: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}})
+// OGCG: define{{.*}} void @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_
+// OGCG: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}})
+__device__ void test_div_fixup_f16(_Float16* out, _Float16 a, _Float16 b, _Float16 c) {
+  *out = __builtin_amdgcn_div_fixuph(a, b, c);
+}
diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip
@@ -305,3 +305,23 @@ __device__ void test_readlane(int* out, int a, int b) {
 __device__ void test_readfirstlane(int* out, int a) {
   *out = __builtin_amdgcn_readfirstlane(a);
 }
+
+// CIR-LABEL: @_Z18test_div_fixup_f32Pffff
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.float, !cir.float, !cir.float) -> !cir.float
+// LLVM: define{{.*}} void @_Z18test_div_fixup_f32Pffff
+// LLVM: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}})
+// OGCG: define{{.*}} void @_Z18test_div_fixup_f32Pffff
+// OGCG: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}})
+__device__ void test_div_fixup_f32(float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_div_fixupf(a, b, c);
+}
+
+// CIR-LABEL: @_Z18test_div_fixup_f64Pdddd
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.double, !cir.double, !cir.double) -> !cir.double
+// LLVM: define{{.*}} void @_Z18test_div_fixup_f64Pdddd
+// LLVM: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}})
+// OGCG: define{{.*}} void @_Z18test_div_fixup_f64Pdddd
+// OGCG: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}})
+__device__ void test_div_fixup_f64(double* out, double a, double b, double c) {
+  *out = __builtin_amdgcn_div_fixup(a, b, c);
+}
diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl
@@ -0,0 +1,67 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu tonga -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx900 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1010 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1012 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu tonga -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx900 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1010 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1012 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu tonga -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx900 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1010 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1012 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @test_div_fixup_f16
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.f16, !cir.f16, !cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @test_div_fixup_f16
+// LLVM: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}})
+// OGCG: define{{.*}} void @test_div_fixup_f16
+// OGCG: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}})
+__device__ void test_div_fixup_f16(_Float16* out, _Float16 a, _Float16 b, _Float16 c) {
+  *out = __builtin_amdgcn_div_fixuph(a, b, c);
+}
diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl b/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl
@@ -318,3 +318,25 @@ void test_readlane(global int* out, int a, int b) {
 void test_readfirstlane(global int* out, int a) {
   *out = __builtin_amdgcn_readfirstlane(a);
 }
+
+// CIR-LABEL: @test_div_fixup_f32
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.float, !cir.float, !cir.float) -> !cir.float
+// LLVM: define{{.*}} void @test_div_fixup_f32
+// LLVM: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}})
+// OGCG: define{{.*}} void @test_div_fixup_f32
+// OGCG: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}})
+void test_div_fixup_f32(global float* out, float a, float b, float c)
+{
+  *out = __builtin_amdgcn_div_fixupf(a, b, c);
+}
+
+// CIR-LABEL: @test_div_fixup_f64
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.double, !cir.double, !cir.double) -> !cir.double
+// LLVM: define{{.*}} void @test_div_fixup_f64
+// LLVM: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}})
+// OGCG: define{{.*}} void @test_div_fixup_f64
+// OGCG: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}})
+void test_div_fixup_f64(global double* out, double a, double b, double c)
+{
+  *out = __builtin_amdgcn_div_fixup(a, b, c);
+}