Skip to content

Commit 751f98f

Browse files
artagnonfhahn
authored andcommitted
[VPlan] Introduce m_Cmp; match more compares (llvm#154771)
Extend [Specific]Cmp_match to handle floating-point compares, and introduce m_Cmp that matches both integer and floating-point compares. Use it in simplifyRecipe to match and simplify the general case of compares. The change has necessitated a bugfix in VPReplicateRecipe::execute. (cherry picked from commit 66be00d)
1 parent 8f44611 commit 751f98f

File tree

6 files changed

+153
-73
lines changed

6 files changed

+153
-73
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7032,12 +7032,12 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
70327032
if (Instruction *UI = GetInstructionForCost(&R)) {
70337033
// If we adjusted the predicate of the recipe, the cost in the legacy
70347034
// cost model may be different.
7035-
if (auto *WidenCmp = dyn_cast<VPWidenRecipe>(&R)) {
7036-
if ((WidenCmp->getOpcode() == Instruction::ICmp ||
7037-
WidenCmp->getOpcode() == Instruction::FCmp) &&
7038-
WidenCmp->getPredicate() != cast<CmpInst>(UI)->getPredicate())
7039-
return true;
7040-
}
7035+
using namespace VPlanPatternMatch;
7036+
CmpPredicate Pred;
7037+
if (match(&R, m_Cmp(Pred, m_VPValue(), m_VPValue())) &&
7038+
cast<VPRecipeWithIRFlags>(R).getPredicate() !=
7039+
cast<CmpInst>(UI)->getPredicate())
7040+
return true;
70417041
SeenInstrs.insert(UI);
70427042
}
70437043
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,9 @@ class VPIRFlags {
806806

807807
GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }
808808

809+
/// Returns true if the recipe has a comparison predicate.
810+
bool hasPredicate() const { return OpType == OperationType::Cmp; }
811+
809812
/// Returns true if the recipe has fast-math flags.
810813
bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
811814

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,99 @@ m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
461461
return m_BinaryOr<Op0_t, Op1_t, /*Commutative*/ true>(Op0, Op1);
462462
}
463463

464+
/// Cmp_match is a variant of BinaryRecipe_match that also binds the comparison
465+
/// predicate. Opcodes must either be Instruction::ICmp or Instruction::FCmp, or
466+
/// both.
467+
template <typename Op0_t, typename Op1_t, unsigned... Opcodes>
468+
struct Cmp_match {
469+
static_assert((sizeof...(Opcodes) == 1 || sizeof...(Opcodes) == 2) &&
470+
"Expected one or two opcodes");
471+
static_assert(
472+
((Opcodes == Instruction::ICmp || Opcodes == Instruction::FCmp) && ...) &&
473+
"Expected a compare instruction opcode");
474+
475+
CmpPredicate *Predicate = nullptr;
476+
Op0_t Op0;
477+
Op1_t Op1;
478+
479+
Cmp_match(CmpPredicate &Pred, const Op0_t &Op0, const Op1_t &Op1)
480+
: Predicate(&Pred), Op0(Op0), Op1(Op1) {}
481+
Cmp_match(const Op0_t &Op0, const Op1_t &Op1) : Op0(Op0), Op1(Op1) {}
482+
483+
bool match(const VPValue *V) const {
484+
auto *DefR = V->getDefiningRecipe();
485+
return DefR && match(DefR);
486+
}
487+
488+
bool match(const VPRecipeBase *V) const {
489+
if ((m_Binary<Opcodes>(Op0, Op1).match(V) || ...)) {
490+
if (Predicate)
491+
*Predicate = cast<VPRecipeWithIRFlags>(V)->getPredicate();
492+
return true;
493+
}
494+
return false;
495+
}
496+
};
497+
498+
/// SpecificCmp_match is a variant of Cmp_match that matches the comparison
499+
/// predicate, instead of binding it.
500+
template <typename Op0_t, typename Op1_t, unsigned... Opcodes>
501+
struct SpecificCmp_match {
502+
const CmpPredicate Predicate;
503+
Op0_t Op0;
504+
Op1_t Op1;
505+
506+
SpecificCmp_match(CmpPredicate Pred, const Op0_t &LHS, const Op1_t &RHS)
507+
: Predicate(Pred), Op0(LHS), Op1(RHS) {}
508+
509+
bool match(const VPValue *V) const {
510+
CmpPredicate CurrentPred;
511+
return Cmp_match<Op0_t, Op1_t, Opcodes...>(CurrentPred, Op0, Op1)
512+
.match(V) &&
513+
CmpPredicate::getMatching(CurrentPred, Predicate);
514+
}
515+
};
516+
517+
template <typename Op0_t, typename Op1_t>
518+
inline Cmp_match<Op0_t, Op1_t, Instruction::ICmp> m_ICmp(const Op0_t &Op0,
519+
const Op1_t &Op1) {
520+
return Cmp_match<Op0_t, Op1_t, Instruction::ICmp>(Op0, Op1);
521+
}
522+
523+
template <typename Op0_t, typename Op1_t>
524+
inline Cmp_match<Op0_t, Op1_t, Instruction::ICmp>
525+
m_ICmp(CmpPredicate &Pred, const Op0_t &Op0, const Op1_t &Op1) {
526+
return Cmp_match<Op0_t, Op1_t, Instruction::ICmp>(Pred, Op0, Op1);
527+
}
528+
529+
template <typename Op0_t, typename Op1_t>
530+
inline SpecificCmp_match<Op0_t, Op1_t, Instruction::ICmp>
531+
m_SpecificICmp(CmpPredicate MatchPred, const Op0_t &Op0, const Op1_t &Op1) {
532+
return SpecificCmp_match<Op0_t, Op1_t, Instruction::ICmp>(MatchPred, Op0,
533+
Op1);
534+
}
535+
536+
template <typename Op0_t, typename Op1_t>
537+
inline Cmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>
538+
m_Cmp(const Op0_t &Op0, const Op1_t &Op1) {
539+
return Cmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>(Op0,
540+
Op1);
541+
}
542+
543+
template <typename Op0_t, typename Op1_t>
544+
inline Cmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>
545+
m_Cmp(CmpPredicate &Pred, const Op0_t &Op0, const Op1_t &Op1) {
546+
return Cmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>(
547+
Pred, Op0, Op1);
548+
}
549+
550+
template <typename Op0_t, typename Op1_t>
551+
inline SpecificCmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>
552+
m_SpecificCmp(CmpPredicate MatchPred, const Op0_t &Op0, const Op1_t &Op1) {
553+
return SpecificCmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>(
554+
MatchPred, Op0, Op1);
555+
}
556+
464557
template <typename Op0_t, typename Op1_t>
465558
using GEPLikeRecipe_match =
466559
BinaryRecipe_match<Op0_t, Op1_t, Instruction::GetElementPtr, false,

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2915,6 +2915,9 @@ static void scalarizeInstruction(const Instruction *Instr,
29152915
RepRecipe->applyFlags(*Cloned);
29162916
RepRecipe->applyMetadata(*Cloned);
29172917

2918+
if (RepRecipe->hasPredicate())
2919+
cast<CmpInst>(Cloned)->setPredicate(RepRecipe->getPredicate());
2920+
29182921
if (auto DL = RepRecipe->getDebugLoc())
29192922
State.setDebugLocFrom(DL);
29202923

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -996,18 +996,16 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
996996
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]]
997997
; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
998998
; TFA_INTERLEAVE: [[VECTOR_BODY]]:
999-
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[TMP19:.*]] ]
1000-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP19]] ]
1001-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP19]] ]
999+
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[TMP18:.*]] ]
1000+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP18]] ]
1001+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP18]] ]
10021002
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
10031003
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
10041004
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
1005-
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
1006-
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ogt double [[TMP6]], 0.000000e+00
1007-
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = xor i1 [[TMP7]], true
1008-
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP8]], true
1009-
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP9]], i1 false
1010-
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP10]], i1 false
1005+
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp ule double [[TMP5]], 0.000000e+00
1006+
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ule double [[TMP6]], 0.000000e+00
1007+
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP7]], i1 false
1008+
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP8]], i1 false
10111009
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP11]], double 1.000000e+00, double 0.000000e+00
10121010
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP12]], double 1.000000e+00, double 0.000000e+00
10131011
; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], double [[PREDPHI3]], double [[PREDPHI]]
@@ -1016,11 +1014,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
10161014
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true
10171015
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = xor i1 [[TMP14]], true
10181016
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
1019-
; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[TMP19]]
1020-
; TFA_INTERLEAVE: [[BB18]]:
1017+
; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB16:.*]], label %[[TMP18]]
1018+
; TFA_INTERLEAVE: [[BB16]]:
10211019
; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8
1022-
; TFA_INTERLEAVE-NEXT: br label %[[TMP19]]
1023-
; TFA_INTERLEAVE: [[TMP19]]:
1020+
; TFA_INTERLEAVE-NEXT: br label %[[TMP18]]
1021+
; TFA_INTERLEAVE: [[TMP18]]:
10241022
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
10251023
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
10261024
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]]

llvm/test/Transforms/LoopVectorize/select-cmp.ll

Lines changed: 37 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -136,18 +136,14 @@ define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) {
136136
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
137137
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
138138
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
139-
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3
140-
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3
141-
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3
142-
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3
143-
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
144-
; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
145-
; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
146-
; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
147-
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
148-
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
149-
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
150-
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
139+
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP8]], 3
140+
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP9]], 3
141+
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP10]], 3
142+
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP11]], 3
143+
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
144+
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
145+
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
146+
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
151147
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
152148
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
153149
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -516,18 +512,14 @@ define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) {
516512
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
517513
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
518514
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
519-
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3
520-
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3
521-
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3
522-
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3
523-
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
524-
; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
525-
; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
526-
; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
527-
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
528-
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
529-
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
530-
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
515+
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP8]], 3
516+
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP9]], 3
517+
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP10]], 3
518+
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP11]], 3
519+
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
520+
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
521+
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
522+
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
531523
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
532524
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
533525
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -708,18 +700,14 @@ define i32 @select_const_i32_from_fcmp_fast(ptr %v, i64 %n) {
708700
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
709701
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
710702
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
711-
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp fast ueq float [[TMP8]], 3.000000e+00
712-
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp fast ueq float [[TMP9]], 3.000000e+00
713-
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp fast ueq float [[TMP10]], 3.000000e+00
714-
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp fast ueq float [[TMP11]], 3.000000e+00
715-
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
716-
; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
717-
; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
718-
; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
719-
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
720-
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
721-
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
722-
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
703+
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp fast one float [[TMP8]], 3.000000e+00
704+
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp fast one float [[TMP9]], 3.000000e+00
705+
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp fast one float [[TMP10]], 3.000000e+00
706+
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp fast one float [[TMP11]], 3.000000e+00
707+
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
708+
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
709+
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
710+
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
723711
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
724712
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
725713
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -900,18 +888,14 @@ define i32 @select_const_i32_from_fcmp(ptr %v, i64 %n) {
900888
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
901889
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
902890
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
903-
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp ueq float [[TMP8]], 3.000000e+00
904-
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp ueq float [[TMP9]], 3.000000e+00
905-
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp ueq float [[TMP10]], 3.000000e+00
906-
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp ueq float [[TMP11]], 3.000000e+00
907-
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
908-
; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
909-
; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
910-
; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
911-
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
912-
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
913-
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
914-
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
891+
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp one float [[TMP8]], 3.000000e+00
892+
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp one float [[TMP9]], 3.000000e+00
893+
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp one float [[TMP10]], 3.000000e+00
894+
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp one float [[TMP11]], 3.000000e+00
895+
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
896+
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
897+
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
898+
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
915899
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
916900
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
917901
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -1059,19 +1043,18 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
10591043
; CHECK-VF1IC4: [[VECTOR_PH]]:
10601044
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
10611045
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1062-
; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = icmp eq i32 [[A]], 3
1063-
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = xor i1 [[TMP0]], true
1046+
; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = icmp ne i32 [[A]], 3
10641047
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
10651048
; CHECK-VF1IC4: [[VECTOR_BODY]]:
10661049
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
10671050
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
10681051
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
10691052
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
10701053
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
1071-
; CHECK-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI]], [[TMP4]]
1072-
; CHECK-VF1IC4-NEXT: [[TMP6]] = or i1 [[VEC_PHI1]], [[TMP4]]
1073-
; CHECK-VF1IC4-NEXT: [[TMP7]] = or i1 [[VEC_PHI2]], [[TMP4]]
1074-
; CHECK-VF1IC4-NEXT: [[TMP8]] = or i1 [[VEC_PHI3]], [[TMP4]]
1054+
; CHECK-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI]], [[TMP0]]
1055+
; CHECK-VF1IC4-NEXT: [[TMP6]] = or i1 [[VEC_PHI1]], [[TMP0]]
1056+
; CHECK-VF1IC4-NEXT: [[TMP7]] = or i1 [[VEC_PHI2]], [[TMP0]]
1057+
; CHECK-VF1IC4-NEXT: [[TMP8]] = or i1 [[VEC_PHI3]], [[TMP0]]
10751058
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
10761059
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
10771060
; CHECK-VF1IC4-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

0 commit comments

Comments
 (0)