Skip to content

Conversation

@nhaehnle
Copy link
Collaborator

@nhaehnle nhaehnle commented Dec 6, 2025

Create more canonical code that may even lead to slightly better
codegen.


Stack:

⚠️ Part of a stack created by spr. Merging this PR using the GitHub UI may have unexpected results.

@llvmbot
Copy link
Member

llvmbot commented Dec 6, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Nicolai Hähnle (nhaehnle)

Changes

Create more canonical code that may even lead to slightly better
codegen.


Stack:

  • [2/2] #170956 ⬅
  • [1/2] #170955

⚠️ Part of a stack created by spr. Merging this PR using the GitHub UI may have unexpected results.


Full diff: https://github.com/llvm/llvm-project/pull/170956.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+7-6)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll (+15-13)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll (+8-4)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll (+1-1)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index d8e8c8f024518..b79689c39ef84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -461,13 +461,15 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
     return nullptr;
 
   Value *Offset = VarOffset.first;
-  auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
-  if (!OffsetType)
+  if (!isa<IntegerType>(Offset->getType()))
     return nullptr;
 
+  Offset = Builder.CreateSExtOrTrunc(Offset, Builder.getIntNTy(BW));
+  if (Offset != VarOffset.first)
+    NewInsts.push_back(cast<Instruction>(Offset));
+
   if (!OffsetQuot.isOne()) {
-    ConstantInt *ConstMul =
-        ConstantInt::get(Ctx, OffsetQuot.sext(OffsetType->getBitWidth()));
+    ConstantInt *ConstMul = ConstantInt::get(Ctx, OffsetQuot.sextOrTrunc(BW));
     Offset = Builder.CreateMul(Offset, ConstMul);
     if (Instruction *NewInst = dyn_cast<Instruction>(Offset))
       NewInsts.push_back(NewInst);
@@ -475,8 +477,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
   if (ConstOffset.isZero())
     return Offset;
 
-  ConstantInt *ConstIndex =
-      ConstantInt::get(Ctx, IndexQuot.sext(OffsetType->getBitWidth()));
+  ConstantInt *ConstIndex = ConstantInt::get(Ctx, IndexQuot.sextOrTrunc(BW));
   Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);
   if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
     NewInsts.push_back(NewInst);
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
index 63622e67e7d0b..7b64d8728cc24 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
@@ -262,14 +262,15 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(ptr %out) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <6 x i64> [[TMP12]], i64 3, i32 3
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <6 x i64> [[TMP13]], i64 4, i32 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 5, i32 5
-; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[SEL3]], 3
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = trunc i64 [[SEL3]] to i32
+; CHECK-NEXT:    [[TMP16:%.*]] = mul i32 [[TMP7]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP16]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <3 x i64> poison, i64 [[TMP2]], i64 0
-; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP17]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <3 x i64> [[TMP3]], i64 [[TMP5]], i64 1
-; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP1]], 2
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP16]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP18]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <3 x i64> [[TMP6]], i64 [[TMP8]], i64 2
 ; CHECK-NEXT:    [[ELEM:%.*]] = extractelement <3 x i64> [[TMP9]], i32 2
 ; CHECK-NEXT:    store i64 [[ELEM]], ptr [[OUT]], align 8
@@ -311,15 +312,16 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out)
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <6 x i64> [[TMP13]], i64 3, i32 3
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32 5
-; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[SEL3]], 3
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 6
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP17:%.*]] = trunc i64 [[SEL3]] to i32
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP17]], 3
+; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP8]], 6
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP18]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP19:%.*]] = add i32 [[TMP18]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP19]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <3 x i64> [[TMP4]], i64 [[TMP6]], i64 1
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP2]], 2
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[TMP18]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP20]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <3 x i64> [[TMP7]], i64 [[TMP9]], i64 2
 ; CHECK-NEXT:    [[ELEM:%.*]] = extractelement <3 x i64> [[TMP10]], i32 2
 ; CHECK-NEXT:    store i64 [[ELEM]], ptr [[OUT]], align 8
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
index a865bf5058d6a..7da441f2e79d2 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
@@ -11,8 +11,10 @@ define amdgpu_kernel void @negative_index_byte(ptr %out, i64 %offset) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 1, i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 2, i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i8> [[TMP3]], i8 3, i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i8> [[TMP4]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[OFFSET:%.*]] to i32
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[OFFSET]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP8]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i8> [[TMP4]], i32 [[TMP7]]
 ; CHECK-NEXT:    store i8 [[TMP6]], ptr [[OUT:%.*]], align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -39,8 +41,10 @@ define amdgpu_kernel void @negative_index_word(ptr %out, i64 %offset) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 1, i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 2, i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 3, i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[OFFSET:%.*]] to i32
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[OFFSET]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP8]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 [[TMP7]]
 ; CHECK-NEXT:    store i32 [[TMP6]], ptr [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
index 9fb73963153a2..aaec725f85890 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: opt -S -mtriple=amdgcn-- -data-layout=A5 -mcpu=fiji -passes=sroa,amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=fiji -passes=sroa,amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
 
 ; GCN-LABEL: {{^}}float4_alloca_store4:
 ; OPT-LABEL: define amdgpu_kernel void @float4_alloca_store4

…)"

The second pass of promotion to vector can be quite simple. Reflect that
simplicity in the code for better maintainability.

v2:
- don't put placeholders into the SSAUpdater, and add a test that shows
  the problem

commit-id:d6d2255a
Create more canonical code that may even lead to slightly better
codegen.

commit-id:a3832fee
@nhaehnle nhaehnle force-pushed the users/nhaehnle/spr/main/a3832fee branch from 06702f0 to 67b208e Compare December 6, 2025 00:40
Base automatically changed from users/nhaehnle/spr/main/d6d2255a to main December 6, 2025 01:15
@nhaehnle nhaehnle merged commit 8dee997 into main Dec 6, 2025
10 checks passed
@nhaehnle nhaehnle deleted the users/nhaehnle/spr/main/a3832fee branch December 6, 2025 16:54
@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 6, 2025

LLVM Buildbot has detected a new failure on builder lldb-remote-linux-win running on as-builder-10 while building llvm at step 17 "test-check-lldb-api".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/197/builds/11605

Here is the relevant piece of the build log for the reference
Step 17 (test-check-lldb-api) failure: Test just built components: check-lldb-api completed (failure)
******************** TEST 'lldb-api :: functionalities/data-formatter/data-formatter-stl/generic/vector/TestDataFormatterStdVector.py' FAILED ********************
Script:
--
C:/Python312/python.exe C:/buildbot/as-builder-10/lldb-x-aarch64/llvm-project/lldb\test\API\dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=C:/buildbot/as-builder-10/lldb-x-aarch64/build/./lib --env LLVM_INCLUDE_DIR=C:/buildbot/as-builder-10/lldb-x-aarch64/build/include --env LLVM_TOOLS_DIR=C:/buildbot/as-builder-10/lldb-x-aarch64/build/./bin --arch aarch64 --build-dir C:/buildbot/as-builder-10/lldb-x-aarch64/build/lldb-test-build.noindex --lldb-module-cache-dir C:/buildbot/as-builder-10/lldb-x-aarch64/build/lldb-test-build.noindex/module-cache-lldb\lldb-api --clang-module-cache-dir C:/buildbot/as-builder-10/lldb-x-aarch64/build/lldb-test-build.noindex/module-cache-clang\lldb-api --executable C:/buildbot/as-builder-10/lldb-x-aarch64/build/./bin/lldb.exe --compiler C:/buildbot/as-builder-10/lldb-x-aarch64/build/./bin/clang.exe --dsymutil C:/buildbot/as-builder-10/lldb-x-aarch64/build/./bin/dsymutil.exe --make C:/ninja/make.exe --llvm-tools-dir C:/buildbot/as-builder-10/lldb-x-aarch64/build/./bin --lldb-obj-root C:/buildbot/as-builder-10/lldb-x-aarch64/build/tools/lldb --lldb-libs-dir C:/buildbot/as-builder-10/lldb-x-aarch64/build/./lib --cmake-build-type Release --platform-url connect://jetson-agx-0086.lab.llvm.org:1234 --platform-working-dir /home/ubuntu/lldb-tests --sysroot c:/buildbot/fs/jetson-agx-ubuntu --env ARCH_CFLAGS=-mcpu=cortex-a78 --platform-name remote-linux --skip-category=lldb-server C:\buildbot\as-builder-10\lldb-x-aarch64\llvm-project\lldb\test\API\functionalities\data-formatter\data-formatter-stl\generic\vector -p TestDataFormatterStdVector.py
--
Exit Code: 3221226356

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision 8dee997a8558b460b82b23fb43b197d68258baac)
  clang revision 8dee997a8558b460b82b23fb43b197d68258baac
  llvm revision 8dee997a8558b460b82b23fb43b197d68258baac
Setting up remote platform 'remote-linux'

Connecting to remote platform 'remote-linux' at 'connect://jetson-agx-0086.lab.llvm.org:1234'...

Connected.

Setting remote platform working directory to '/home/ubuntu/lldb-tests'...

Skipping the following test categories: lldb-server, libc++, msvcstl, dsym, pdb, gmodules, debugserver, objc, lldb-dap


--
Command Output (stderr):
--
UNSUPPORTED: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_libcxx_dsym (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libcxx_dsym) (test case does not fall in any category of interest for this run) 

UNSUPPORTED: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_libcxx_dwarf (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libcxx_dwarf) (test case does not fall in any category of interest for this run) 

UNSUPPORTED: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_libcxx_dwo (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libcxx_dwo) (test case does not fall in any category of interest for this run) 

UNSUPPORTED: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_libstdcxx_debug_dsym (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_debug_dsym) (test case does not fall in any category of interest for this run) 

PASS: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_libstdcxx_debug_dwarf (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_debug_dwarf)

PASS: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_libstdcxx_debug_dwo (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_debug_dwo)

UNSUPPORTED: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_libstdcxx_dsym (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_dsym) (test case does not fall in any category of interest for this run) 

PASS: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_libstdcxx_dwarf (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_dwarf)

PASS: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_libstdcxx_dwo (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_dwo)

UNSUPPORTED: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_msvcstl_dsym (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_msvcstl_dsym) (test case does not fall in any category of interest for this run) 

UNSUPPORTED: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_msvcstl_dwarf (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_msvcstl_dwarf) (test case does not fall in any category of interest for this run) 

UNSUPPORTED: LLDB (C:\buildbot\as-builder-10\lldb-x-aarch64\build\bin\clang.exe-aarch64) :: test_msvcstl_dwo (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_msvcstl_dwo) (test case does not fall in any category of interest for this run) 

...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants