From 8f3d16905d75b07a933d01dc29677fe5867c1b3e Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 21 Jun 2021 16:24:16 -0700 Subject: [PATCH] [ScalarEvolution] Ensure backedge-taken counts are not pointers. A backedge-taken count doesn't refer to memory; returning a pointer type is nonsense. So make sure we always return an integer. The obvious way to do this would be to just convert the operands of the icmp to integers, but that doesn't quite work out at the moment: isLoopEntryGuardedByCond currently gets confused by ptrtoint operations. So we perform the ptrtoint conversion late for lt/gt operations. The test changes are mostly innocuous. The most interesting changes are more complex SCEV expressions of the form "(-1 * (ptrtoint i8* %ptr to i64)) + %ptr)". This is expected: we can't fold this to zero because we need to preserve the pointer base. The call to isLoopEntryGuardedByCond in howFarToZero is less precise because of ptrtoint operations; this shows up in the function pr46786_c26_char in ptrtoint.ll. Fixing it here would require more complex refactoring. It should eventually be fixed by future improvements to isImpliedCond. See https://bugs.llvm.org/show_bug.cgi?id=46786 for context. Differential Revision: https://reviews.llvm.org/D103656 --- llvm/lib/Analysis/ScalarEvolution.cpp | 54 ++++++++- .../max-backedge-taken-count-guard-info.ll | 8 +- .../no-wrap-symbolic-becount.ll | 31 ++++-- llvm/test/Analysis/ScalarEvolution/nsw.ll | 4 +- .../test/Analysis/ScalarEvolution/ptrtoint.ll | 56 +++++----- .../IndVarSimplify/2011-11-01-lftrptr.ll | 38 ++++--- .../IndVarSimplify/eliminate-exit-no-dl.ll | 6 +- .../LoopIdiom/memset-debugify-remarks.ll | 11 +- llvm/test/Transforms/LoopReroll/ptrindvar.ll | 16 +-- .../LoopVectorize/X86/cost-model-assert.ll | 103 ++++++++++-------- .../LoopVectorize/pointer-induction.ll | 10 +- llvm/test/Transforms/LoopVectorize/pr45259.ll | 57 +++++----- .../PhaseOrdering/scev-custom-dl.ll | 2 +- 13 files changed, 232 insertions(+), 164 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index cf57041c275b..f011020266c7 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1071,10 +1071,17 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op, if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // It isn't legal for optimizations to construct new ptrtoint expressions + // for non-integral pointers. + if (getDataLayout().isNonIntegralPointerType(Op->getType())) + return getCouldNotCompute(); + Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType()); - // We can only model ptrtoint if SCEV's effective (integer) type + // We can only trivially model ptrtoint if SCEV's effective (integer) type // is sufficiently wide to represent all possible pointer values. + // We could theoretically teach SCEV to truncate wider pointers, but + // that isn't implemented for now. if (getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(Op->getType())) != getDataLayout().getTypeSizeInBits(IntPtrTy)) return getCouldNotCompute(); @@ -7527,6 +7534,10 @@ ScalarEvolution::ExitLimit::ExitLimit( for (auto *PredSet : PredSetList) for (auto *P : *PredSet) addPredicate(P); + assert((isa(E) || !E->getType()->isPointerTy()) && + "Backedge count should be int"); + assert((isa(M) || !M->getType()->isPointerTy()) && + "Max backedge count should be int"); } ScalarEvolution::ExitLimit::ExitLimit( @@ -7952,6 +7963,16 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, switch (Pred) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) + if (LHS->getType()->isPointerTy()) { + LHS = getLosslessPtrToIntExpr(LHS); + if (isa(LHS)) + return LHS; + } + if (RHS->getType()->isPointerTy()) { + RHS = getLosslessPtrToIntExpr(RHS); + if (isa(RHS)) + return RHS; + } ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; @@ -11515,6 +11536,22 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, } const SCEV *Start = IV->getStart(); + + // Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond. + // Use integer-typed versions for actual computation. + const SCEV *OrigStart = Start; + const SCEV *OrigRHS = RHS; + if (Start->getType()->isPointerTy()) { + Start = getLosslessPtrToIntExpr(Start); + if (isa(Start)) + return Start; + } + if (RHS->getType()->isPointerTy()) { + RHS = getLosslessPtrToIntExpr(RHS); + if (isa(RHS)) + return RHS; + } + const SCEV *End = RHS; // When the RHS is not invariant, we do not know the end bound of the loop and // cannot calculate the ExactBECount needed by ExitLimit. However, we can @@ -11541,13 +11578,13 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, // result is as above, and if not max(End,Start) is Start so we get a backedge // count of zero. const SCEV *BECount; - if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) + if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(OrigStart, Stride), OrigRHS)) BECount = BECountIfBackedgeTaken; else { // If we know that RHS >= Start in the context of loop, then we know that // max(RHS, Start) = RHS at this point. if (isLoopEntryGuardedByCond( - L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, RHS, Start)) + L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, OrigRHS, OrigStart)) End = RHS; else End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); @@ -11626,6 +11663,17 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start); } + if (Start->getType()->isPointerTy()) { + Start = getLosslessPtrToIntExpr(Start); + if (isa(Start)) + return Start; + } + if (End->getType()->isPointerTy()) { + End = getLosslessPtrToIntExpr(End); + if (isa(End)) + return End; + } + const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride); APInt MaxStart = IsSigned ? getSignedRangeMax(Start) diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index 26b39d813c76..fa2f6bcbc9ab 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -910,13 +910,13 @@ define void @crash(i8* %ptr) { ; CHECK-NEXT: %incdec.ptr112 = getelementptr inbounds i8, i8* %text.addr.5, i64 -1 ; CHECK-NEXT: --> {(-1 + null),+,-1}<%while.cond111> U: full-set S: full-set Exits: <> LoopDispositions: { %while.cond111: Computable, %while.body: Variant } ; CHECK-NEXT: %lastout.2271 = phi i8* [ %incdec.ptr126, %while.body125 ], [ %ptr, %while.end117 ] -; CHECK-NEXT: --> {%ptr,+,1}<%while.body125> U: full-set S: full-set Exits: {(-2 + null),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } +; CHECK-NEXT: --> {%ptr,+,1}<%while.body125> U: full-set S: full-set Exits: {(-2 + (-1 * (ptrtoint i8* %ptr to i64)) + %ptr),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } ; CHECK-NEXT: %incdec.ptr126 = getelementptr inbounds i8, i8* %lastout.2271, i64 1 -; CHECK-NEXT: --> {(1 + %ptr),+,1}<%while.body125> U: [1,0) S: [1,0) Exits: {(-1 + null),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } +; CHECK-NEXT: --> {(1 + %ptr),+,1}<%while.body125> U: [1,0) S: [1,0) Exits: {(-1 + (-1 * (ptrtoint i8* %ptr to i64)) + %ptr),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } ; CHECK-NEXT: Determining loop execution counts for: @crash -; CHECK-NEXT: Loop %while.body125: backedge-taken count is {(-2 + (-1 * %ptr) + null),+,-1}<%while.cond111> +; CHECK-NEXT: Loop %while.body125: backedge-taken count is {(-2 + (-1 * (ptrtoint i8* %ptr to i64))),+,-1}<%while.cond111> ; CHECK-NEXT: Loop %while.body125: max backedge-taken count is -2 -; CHECK-NEXT: Loop %while.body125: Predicated backedge-taken count is {(-2 + (-1 * %ptr) + null),+,-1}<%while.cond111> +; CHECK-NEXT: Loop %while.body125: Predicated backedge-taken count is {(-2 + (-1 * (ptrtoint i8* %ptr to i64))),+,-1}<%while.cond111> ; CHECK-NEXT: Predicates: ; CHECK: Loop %while.body125: Trip multiple is 1 ; CHECK-NEXT: Loop %while.cond111: Unpredictable backedge-taken count. diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll index 92f12a47a37d..0f5b28385366 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll @@ -93,18 +93,20 @@ exit: ; preds = %loop } define void @pointer_iv_nowrap(i8* %startptr, i8* %endptr) local_unnamed_addr { -; CHECK-LABEL: Classifying expressions for: @pointer_iv_nowrap -; CHECK-NEXT: %init = getelementptr inbounds i8, i8* %startptr, i64 2000 -; CHECK-NEXT: --> (2000 + %startptr) U: [2000,0) S: [2000,0) -; CHECK-NEXT: %iv = phi i8* [ %init, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {(2000 + %startptr),+,1}<%loop> U: [2000,0) S: [2000,0) -; CHECK-NEXT: %iv.next = getelementptr inbounds i8, i8* %iv, i64 1 -; CHECK-NEXT: --> {(2001 + %startptr),+,1}<%loop> U: [2001,0) S: [2001,0) - -; CHECK-NEXT:Determining loop execution counts for: @pointer_iv_nowrap -; CHECK-NEXT:Loop %loop: backedge-taken count is (-2000 + (-1 * %startptr) + ((2000 + %startptr) umax %endptr)) -; CHECK-NEXT:Loop %loop: max backedge-taken count is -2001 -; CHECK-NEXT:Loop %loop: Predicated backedge-taken count is (-2000 + (-1 * %startptr) + ((2000 + %startptr) umax %endptr)) +; CHECK-LABEL: 'pointer_iv_nowrap' +; CHECK-NEXT: Classifying expressions for: @pointer_iv_nowrap +; CHECK-NEXT: %init = getelementptr inbounds i8, i8* %startptr, i64 2000 +; CHECK-NEXT: --> (2000 + %startptr) U: [2000,0) S: [2000,0) +; CHECK-NEXT: %iv = phi i8* [ %init, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {(2000 + %startptr),+,1}<%loop> U: [2000,0) S: [2000,0) Exits: ((-1 * (ptrtoint i8* %startptr to i64)) + ((2000 + (ptrtoint i8* %startptr to i64)) umax (ptrtoint i8* %endptr to i64)) + %startptr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr inbounds i8, i8* %iv, i64 1 +; CHECK-NEXT: --> {(2001 + %startptr),+,1}<%loop> U: [2001,0) S: [2001,0) Exits: (1 + (-1 * (ptrtoint i8* %startptr to i64)) + ((2000 + (ptrtoint i8* %startptr to i64)) umax (ptrtoint i8* %endptr to i64)) + %startptr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @pointer_iv_nowrap +; CHECK-NEXT: Loop %loop: backedge-taken count is (-2000 + (-1 * (ptrtoint i8* %startptr to i64)) + ((2000 + (ptrtoint i8* %startptr to i64)) umax (ptrtoint i8* %endptr to i64))) +; CHECK-NEXT: Loop %loop: max backedge-taken count is -2001 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-2000 + (-1 * (ptrtoint i8* %startptr to i64)) + ((2000 + (ptrtoint i8* %startptr to i64)) umax (ptrtoint i8* %endptr to i64))) +; CHECK-NEXT: Predicates: +; CHECK: Loop %loop: Trip multiple is 1 ; entry: %init = getelementptr inbounds i8, i8* %startptr, i64 2000 @@ -119,3 +121,8 @@ loop: end: ret void } + +define i32 @dummy(i32 %start, i32* %p, i32* %q) { +entry: + ret i32 0 +} diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll index b80b3ada315e..627d5c6c63bd 100644 --- a/llvm/test/Analysis/ScalarEvolution/nsw.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw.ll @@ -102,7 +102,7 @@ for.body.i.i: ; preds = %entry, %for.body.i. store i32 0, i32* %__first.addr.08.i.i, align 4 %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i -; CHECK: Loop %for.body.i.i: backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4) +; CHECK: Loop %for.body.i.i: backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4) ; CHECK: Loop %for.body.i.i: max backedge-taken count is 4611686018427387903 _ZSt4fillIPiiEvT_S1_RKT0_.exit: ; preds = %for.body.i.i, %entry ret void @@ -147,7 +147,7 @@ bb7: ; preds = %bb1 } ; CHECK-LABEL: PR12376 -; CHECK: --> {(4 + %arg),+,4}<%bb2>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax %arg1)) /u 4)) + %arg) +; CHECK: --> {(4 + %arg),+,4}<%bb2>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (ptrtoint i32* %arg1 to i64))) /u 4)) + %arg) define void @PR12376(i32* nocapture %arg, i32* nocapture %arg1) { bb: br label %bb2 diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll index 6f5785f42b92..f4e716409ea8 100644 --- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll @@ -380,25 +380,25 @@ define void @pr46786_c26_char(i8* %arg, i8* %arg1, i8* %arg2) { ; X64-NEXT: %i4 = ptrtoint i8* %arg to i64 ; X64-NEXT: --> (ptrtoint i8* %arg to i64) U: full-set S: full-set ; X64-NEXT: %i7 = phi i8* [ %arg, %bb3 ], [ %i14, %bb6 ] -; X64-NEXT: --> {%arg,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + %arg1) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {%arg,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i8 = load i8, i8* %i7, align 1 ; X64-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i9 = ptrtoint i8* %i7 to i64 -; X64-NEXT: --> {(ptrtoint i8* %arg to i64),+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * %arg) + (ptrtoint i8* %arg to i64) + %arg1) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(ptrtoint i8* %arg to i64),+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint i8* %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i10 = sub i64 %i9, %i4 -; X64-NEXT: --> {0,+,1}<%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * %arg) + %arg1) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {0,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i11 = getelementptr inbounds i8, i8* %arg2, i64 %i10 -; X64-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * %arg) + %arg1 + %arg2) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i12 = load i8, i8* %i11, align 1 ; X64-NEXT: --> %i12 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i13 = add i8 %i12, %i8 ; X64-NEXT: --> (%i12 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i14 = getelementptr inbounds i8, i8* %i7, i64 1 -; X64-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: %arg1 LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: ((-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char -; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * %arg) + %arg1) -; X64-NEXT: Loop %bb6: max backedge-taken count is -2 -; X64-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (-1 * %arg) + %arg1) +; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64)) +; X64-NEXT: Loop %bb6: max backedge-taken count is -1 +; X64-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64)) ; X64-NEXT: Predicates: ; X64: Loop %bb6: Trip multiple is 1 ; @@ -407,25 +407,25 @@ define void @pr46786_c26_char(i8* %arg, i8* %arg1, i8* %arg2) { ; X32-NEXT: %i4 = ptrtoint i8* %arg to i64 ; X32-NEXT: --> (zext i32 (ptrtoint i8* %arg to i32) to i64) U: [0,4294967296) S: [0,4294967296) ; X32-NEXT: %i7 = phi i8* [ %arg, %bb3 ], [ %i14, %bb6 ] -; X32-NEXT: --> {%arg,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + %arg1) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {%arg,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i8 = load i8, i8* %i7, align 1 ; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i9 = ptrtoint i8* %i7 to i64 -; X32-NEXT: --> {(zext i32 (ptrtoint i8* %arg to i32) to i64),+,1}<%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i8* (-1 + (-1 * %arg) + %arg1) to i64) + (zext i32 (ptrtoint i8* %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(zext i32 (ptrtoint i8* %arg to i32) to i64),+,1}<%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32)) to i64) + (zext i32 (ptrtoint i8* %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i10 = sub i64 %i9, %i4 -; X32-NEXT: --> {0,+,1}<%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i8* (-1 + (-1 * %arg) + %arg1) to i64) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {0,+,1}<%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = getelementptr inbounds i8, i8* %arg2, i64 %i10 -; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * %arg) + %arg1 + %arg2) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i12 = load i8, i8* %i11, align 1 ; X32-NEXT: --> %i12 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i13 = add i8 %i12, %i8 ; X32-NEXT: --> (%i12 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i14 = getelementptr inbounds i8, i8* %i7, i64 1 -; X32-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: %arg1 LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: ((-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char -; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * %arg) + %arg1) -; X32-NEXT: Loop %bb6: max backedge-taken count is -2 -; X32-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (-1 * %arg) + %arg1) +; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32)) +; X32-NEXT: Loop %bb6: max backedge-taken count is -1 +; X32-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32)) ; X32-NEXT: Predicates: ; X32: Loop %bb6: Trip multiple is 1 ; @@ -465,13 +465,13 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) { ; X64-NEXT: %i4 = ptrtoint i32* %arg to i64 ; X64-NEXT: --> (ptrtoint i32* %arg to i64) U: full-set S: full-set ; X64-NEXT: %i7 = phi i32* [ %arg, %bb3 ], [ %i15, %bb6 ] -; X64-NEXT: --> {%arg,+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {%arg,+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i8 = load i32, i32* %i7, align 4 ; X64-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i9 = ptrtoint i32* %i7 to i64 -; X64-NEXT: --> {(ptrtoint i32* %arg to i64),+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + (ptrtoint i32* %arg to i64)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(ptrtoint i32* %arg to i64),+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4)) + (ptrtoint i32* %arg to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i10 = sub i64 %i9, %i4 -; X64-NEXT: --> {0,+,4}<%bb6> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {0,+,4}<%bb6> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 * ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i11 = ashr exact i64 %i10, 2 ; X64-NEXT: --> %i11 U: [-2305843009213693952,2305843009213693952) S: [-2305843009213693952,2305843009213693952) Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11 @@ -481,11 +481,11 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) { ; X64-NEXT: %i14 = add nsw i32 %i13, %i8 ; X64-NEXT: --> (%i13 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i15 = getelementptr inbounds i32, i32* %i7, i64 1 -; X64-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_int -; X64-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4) +; X64-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4) ; X64-NEXT: Loop %bb6: max backedge-taken count is 4611686018427387903 -; X64-NEXT: Loop %bb6: Predicated backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4) +; X64-NEXT: Loop %bb6: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4) ; X64-NEXT: Predicates: ; X64: Loop %bb6: Trip multiple is 1 ; @@ -494,13 +494,13 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) { ; X32-NEXT: %i4 = ptrtoint i32* %arg to i64 ; X32-NEXT: --> (zext i32 (ptrtoint i32* %arg to i32) to i64) U: [0,4294967296) S: [0,4294967296) ; X32-NEXT: %i7 = phi i32* [ %arg, %bb3 ], [ %i15, %bb6 ] -; X32-NEXT: --> {%arg,+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {%arg,+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i8 = load i32, i32* %i7, align 4 ; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i9 = ptrtoint i32* %i7 to i64 -; X32-NEXT: --> {(zext i32 (ptrtoint i32* %arg to i32) to i64),+,4}<%bb6> U: [0,8589934588) S: [0,8589934588) Exits: ((zext i32 (ptrtoint i32* %arg to i32) to i64) + (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4))) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(zext i32 (ptrtoint i32* %arg to i32) to i64),+,4}<%bb6> U: [0,8589934588) S: [0,8589934588) Exits: ((zext i32 (ptrtoint i32* %arg to i32) to i64) + (4 * ((zext i32 (-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) to i64) /u 4))) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i10 = sub i64 %i9, %i4 -; X32-NEXT: --> {0,+,4}<%bb6> U: [0,4294967293) S: [0,4294967293) Exits: (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4)) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {0,+,4}<%bb6> U: [0,4294967293) S: [0,4294967293) Exits: (4 * ((zext i32 (-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) to i64) /u 4)) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = ashr exact i64 %i10, 2 ; X32-NEXT: --> %i11 U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11 @@ -510,11 +510,11 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) { ; X32-NEXT: %i14 = add nsw i32 %i13, %i8 ; X32-NEXT: --> (%i13 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i15 = getelementptr inbounds i32, i32* %i7, i64 1 -; X32-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_int -; X32-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4) +; X32-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4) ; X32-NEXT: Loop %bb6: max backedge-taken count is 1073741823 -; X32-NEXT: Loop %bb6: Predicated backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4) +; X32-NEXT: Loop %bb6: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4) ; X32-NEXT: Predicates: ; X32: Loop %bb6: Trip multiple is 1 ; diff --git a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll index 5c0daa0784f7..1780a69f1e2b 100644 --- a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll +++ b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll @@ -12,17 +12,19 @@ define i8 @testnullptrptr(i8* %buf, i8* %end) nounwind { ; PTR64-LABEL: @testnullptrptr( +; PTR64-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i64 ; PTR64-NEXT: br label [[LOOPGUARD:%.*]] ; PTR64: loopguard: -; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END:%.*]] +; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: +; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[END1]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -32,17 +34,19 @@ define i8 @testnullptrptr(i8* %buf, i8* %end) nounwind { ; PTR64-NEXT: ret i8 [[RET]] ; ; PTR32-LABEL: @testnullptrptr( +; PTR32-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i32 ; PTR32-NEXT: br label [[LOOPGUARD:%.*]] ; PTR32: loopguard: -; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END:%.*]] +; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: +; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i32 [[END1]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -75,17 +79,21 @@ exit: define i8 @testptrptr(i8* %buf, i8* %end) nounwind { ; PTR64-LABEL: @testptrptr( +; PTR64-NEXT: [[BUF2:%.*]] = ptrtoint i8* [[BUF:%.*]] to i64 +; PTR64-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i64 ; PTR64-NEXT: br label [[LOOPGUARD:%.*]] ; PTR64: loopguard: -; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF:%.*]], [[END:%.*]] +; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF]], [[END]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: +; PTR64-NEXT: [[TMP1:%.*]] = sub i64 [[END1]], [[BUF2]] +; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[BUF]], i64 [[TMP1]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -95,17 +103,21 @@ define i8 @testptrptr(i8* %buf, i8* %end) nounwind { ; PTR64-NEXT: ret i8 [[RET]] ; ; PTR32-LABEL: @testptrptr( +; PTR32-NEXT: [[BUF2:%.*]] = ptrtoint i8* [[BUF:%.*]] to i32 +; PTR32-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i32 ; PTR32-NEXT: br label [[LOOPGUARD:%.*]] ; PTR32: loopguard: -; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF:%.*]], [[END:%.*]] +; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF]], [[END]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: +; PTR32-NEXT: [[TMP1:%.*]] = sub i32 [[END1]], [[BUF2]] +; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[BUF]], i32 [[TMP1]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -306,10 +318,7 @@ define void @testnullptr([512 x i8]* %base) nounwind { ; PTR64: for.body.preheader: ; PTR64-NEXT: br label [[FOR_BODY:%.*]] ; PTR64: for.body: -; PTR64-NEXT: [[R_17193:%.*]] = phi i8* [ [[INCDEC_PTR1608:%.*]], [[FOR_BODY]] ], [ null, [[FOR_BODY_PREHEADER]] ] -; PTR64-NEXT: [[INCDEC_PTR1608]] = getelementptr i8, i8* [[R_17193]], i64 1 -; PTR64-NEXT: [[CMP1604:%.*]] = icmp ult i8* [[INCDEC_PTR1608]], [[ADD_PTR1603]] -; PTR64-NEXT: br i1 [[CMP1604]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] +; PTR64-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] ; PTR64: for.end1609.loopexit: ; PTR64-NEXT: br label [[FOR_END1609]] ; PTR64: for.end1609: @@ -325,10 +334,7 @@ define void @testnullptr([512 x i8]* %base) nounwind { ; PTR32: for.body.preheader: ; PTR32-NEXT: br label [[FOR_BODY:%.*]] ; PTR32: for.body: -; PTR32-NEXT: [[R_17193:%.*]] = phi i8* [ [[INCDEC_PTR1608:%.*]], [[FOR_BODY]] ], [ null, [[FOR_BODY_PREHEADER]] ] -; PTR32-NEXT: [[INCDEC_PTR1608]] = getelementptr i8, i8* [[R_17193]], i64 1 -; PTR32-NEXT: [[CMP1604:%.*]] = icmp ult i8* [[INCDEC_PTR1608]], [[ADD_PTR1603]] -; PTR32-NEXT: br i1 [[CMP1604]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] +; PTR32-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] ; PTR32: for.end1609.loopexit: ; PTR32-NEXT: br label [[FOR_END1609]] ; PTR32: for.end1609: diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll index 0b1609b3096c..11c960e103f5 100644 --- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll @@ -12,13 +12,15 @@ define void @foo() { ; CHECK-LABEL: @foo( ; CHECK-NEXT: bb: +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 add (i64 ptrtoint ([0 x i8]* @global to i64), i64 500), i64 add (i64 ptrtoint ([0 x i8]* @global to i64), i64 1)) +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 add (i64 ptrtoint ([0 x i8]* @global to i64), i64 1), [[UMIN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP:%.*]] = phi i8* [ [[TMP4:%.*]], [[BB7:%.*]] ], [ getelementptr inbounds ([0 x i8], [0 x i8]* @global, i64 0, i64 2), [[BB:%.*]] ] ; CHECK-NEXT: [[TMP4]] = getelementptr inbounds i8, i8* [[TMP]], i64 -1 ; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP4]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i8* [[TMP4]], getelementptr inbounds ([0 x i8], [0 x i8]* @global, i64 0, i64 500) -; CHECK-NEXT: br i1 [[TMP5]], label [[BB7]], label [[BB11:%.*]] +; CHECK-NEXT: br i1 [[TMP1]], label [[BB7]], label [[BB11:%.*]] ; CHECK: bb7: ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 ; CHECK-NEXT: br i1 true, label [[BB11]], label [[BB3]] diff --git a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll index e1ccdf86594c..b7915e839a42 100644 --- a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll +++ b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll @@ -16,15 +16,14 @@ target triple = "x86_64-unknown-linux-gnu" define void @_Z15my_basic_memsetPcS_c(i8* %ptr, i8* %end, i8 %value) { ; CHECK-LABEL: @_Z15my_basic_memsetPcS_c( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTR1:%.*]] = ptrtoint i8* [[PTR:%.*]] to i64, !dbg [[DBG15:![0-9]+]] -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8* [[PTR]], [[END:%.*]], !dbg [[DBG15]] +; CHECK-NEXT: [[PTR2:%.*]] = ptrtoint i8* [[PTR:%.*]] to i64, !dbg [[DBG15:![0-9]+]] +; CHECK-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i64, !dbg [[DBG15]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8* [[PTR]], [[END]], !dbg [[DBG15]] ; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP3]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15]] ; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !dbg [[DBG16:![0-9]+]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[PTR1]], !dbg [[DBG17:![0-9]+]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i64 [[TMP0]], !dbg [[DBG17]] -; CHECK-NEXT: [[SCEVGEP2:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64, !dbg [[DBG17]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[PTR]], i8 [[VALUE:%.*]], i64 [[SCEVGEP2]], i1 false), !dbg [[DBG18:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[PTR2]], !dbg [[DBG17:![0-9]+]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[PTR]], i8 [[VALUE:%.*]], i64 [[TMP0]], i1 false), !dbg [[DBG18:![0-9]+]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg [[DBG17]] ; CHECK: for.body: ; CHECK-NEXT: [[PTR_ADDR_04:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR]], [[FOR_BODY_PREHEADER]] ], !dbg [[DBG19:![0-9]+]] diff --git a/llvm/test/Transforms/LoopReroll/ptrindvar.ll b/llvm/test/Transforms/LoopReroll/ptrindvar.ll index ce60ea7dec65..8c876b495f9d 100644 --- a/llvm/test/Transforms/LoopReroll/ptrindvar.ll +++ b/llvm/test/Transforms/LoopReroll/ptrindvar.ll @@ -14,10 +14,10 @@ while.body: ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] ;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ] ;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %indvar -;CHECK-NEXT: %4 = load i32, i32* %scevgep, align 4 -;CHECK-NEXT: %add = add nsw i32 %4, %S.011 +;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4 +;CHECK-NEXT: %add = add nsw i32 %5, %S.011 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %3 +;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %4 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] @@ -52,12 +52,12 @@ while.body: ;CHECK-LABEL: while.body: ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] ;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ] -;CHECK-NEXT: %4 = mul nsw i64 %indvar, -1 -;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %4 -;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4 -;CHECK-NEXT: %add = add nsw i32 %5, %S.011 +;CHECK-NEXT: %5 = mul nsw i64 %indvar, -1 +;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %5 +;CHECK-NEXT: %6 = load i32, i32* %scevgep, align 4 +;CHECK-NEXT: %add = add nsw i32 %6, %S.011 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %3 +;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %4 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll index 29f7c6a27656..20980c520a3c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll @@ -16,8 +16,17 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] ; CHECK: if.then: -; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 undef, i64 4) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i32 0 @@ -25,68 +34,68 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[P:%.*]], align 1, !tbaa [[TBAA1:![0-9]+]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw <4 x i32> [[TMP8]], +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw <4 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[P:%.*]], align 1, !tbaa [[TBAA1:![0-9]+]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[P]], align 1, !tbaa [[TBAA1]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[P]], align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP13]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw <4 x i32> [[TMP10]], -; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw <4 x i32> [[TMP11]], -; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP12]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i32> [[TMP13]], [[TMP7]] -; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP15]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], i32* undef, align 4, !tbaa [[TBAA4:![0-9]+]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1 -; CHECK-NEXT: store i32 [[TMP23]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2 -; CHECK-NEXT: store i32 [[TMP24]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3 -; CHECK-NEXT: store i32 [[TMP25]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP21]], i32 0 -; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP21]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw <4 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw <4 x i32> [[TMP15]], +; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP16]], [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP17]], [[TMP11]] +; CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i32> [[TMP22]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = or <4 x i32> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0 +; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1 ; CHECK-NEXT: store i32 [[TMP27]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP21]], i32 2 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2 ; CHECK-NEXT: store i32 [[TMP28]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3 ; CHECK-NEXT: store i32 [[TMP29]], i32* undef, align 4, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP25]], i32 0 +; CHECK-NEXT: store i32 [[TMP30]], i32* undef, align 4, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP25]], i32 1 +; CHECK-NEXT: store i32 [[TMP31]], i32* undef, align 4, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP25]], i32 2 +; CHECK-NEXT: store i32 [[TMP32]], i32* undef, align 4, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[TMP25]], i32 3 +; CHECK-NEXT: store i32 [[TMP33]], i32* undef, align 4, !tbaa [[TBAA4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[SW_EPILOG:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ null, [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ] ; CHECK-NEXT: br label [[FOR_BODY68:%.*]] ; CHECK: for.body68: ; CHECK-NEXT: [[P_359:%.*]] = phi i8* [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[CONV70:%.*]] = zext i8 [[X]] to i32 ; CHECK-NEXT: [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24 -; CHECK-NEXT: [[TMP31:%.*]] = load i8, i8* [[P]], align 1, !tbaa [[TBAA1]] -; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP31]] to i32 +; CHECK-NEXT: [[TMP35:%.*]] = load i8, i8* [[P]], align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP35]] to i32 ; CHECK-NEXT: [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16 ; CHECK-NEXT: [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]] -; CHECK-NEXT: [[TMP32:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP36:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] ; CHECK-NEXT: [[SHL78:%.*]] = shl nuw nsw i32 undef, 8 ; CHECK-NEXT: [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]] ; CHECK-NEXT: [[CONV81:%.*]] = zext i8 undef to i32 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 65bc1ab6c92f..c1ef506b8b97 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -12,13 +12,11 @@ define void @a(i8* readnone %b) { ; CHECK-NEXT: br i1 [[CMP_NOT4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[B1]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP0]] -; CHECK-NEXT: [[EXITCOUNT_PTRCNT_TO_INT:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[EXITCOUNT_PTRCNT_TO_INT]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[EXITCOUNT_PTRCNT_TO_INT]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[EXITCOUNT_PTRCNT_TO_INT]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], -1 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -68,7 +66,7 @@ define void @a(i8* readnone %b) { ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 -4 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[EXITCOUNT_PTRCNT_TO_INT]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll index 225f30ed84c4..229274b2888e 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45259.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -6,7 +6,7 @@ define i8 @widget(i8* %arr, i8 %t9) { ; CHECK-LABEL: @widget( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[ARR1:%.*]] = ptrtoint i8* [[ARR:%.*]] to i64 +; CHECK-NEXT: [[ARR2:%.*]] = ptrtoint i8* [[ARR:%.*]] to i64 ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: ; CHECK-NEXT: [[T1_0:%.*]] = phi i8* [ [[ARR]], [[BB:%.*]] ], [ null, [[BB6]] ] @@ -14,30 +14,29 @@ define i8 @widget(i8* %arr, i8 %t9) { ; CHECK-NEXT: br i1 [[C]], label [[FOR_PREHEADER:%.*]], label [[BB6]] ; CHECK: for.preheader: ; CHECK-NEXT: [[T1_0_LCSSA:%.*]] = phi i8* [ [[T1_0]], [[BB6]] ] -; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint i8* [[T1_0_LCSSA]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[T1_0_LCSSA3]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ARR1]] to i32 +; CHECK-NEXT: [[T1_0_LCSSA1:%.*]] = ptrtoint i8* [[T1_0_LCSSA]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[T1_0_LCSSA1]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ARR2]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 -1, [[ARR1]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[T1_0_LCSSA]], i64 [[TMP3]] -; CHECK-NEXT: [[SCEVGEP2:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[SCEVGEP2]] to i8 -; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP4]]) +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[T1_0_LCSSA1]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], [[ARR2]] +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i8 +; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP5]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = add i8 1, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP6:%.*]] = sub i8 1, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i8 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP5]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 false, i1 [[TMP7]], i1 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[SCEVGEP2]], 255 -; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP13:%.*]] = or i1 false, [[TMP12]] -; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = add i8 1, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i8 1, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 false, i1 [[TMP8]], i1 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP4]], 255 +; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP14:%.*]] = or i1 false, [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] @@ -48,18 +47,18 @@ define i8 @widget(i8* %arr, i8 %t9) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i8> [[TMP14]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[ARR]], i8 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp slt <4 x i8> [[TMP14]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP18:%.*]] = zext <4 x i1> [[TMP17]] to <4 x i8> -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[TMP16]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <4 x i8>* -; CHECK-NEXT: store <4 x i8> [[TMP18]], <4 x i8>* [[TMP20]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8* [[ARR]], i8 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp slt <4 x i8> [[TMP15]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i1> [[TMP18]] to <4 x i8> +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> [[TMP19]], <4 x i8>* [[TMP21]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll index 78343128882e..07aca83eecdf 100644 --- a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll +++ b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll @@ -115,7 +115,7 @@ declare void @use(i1) declare void @llvm.experimental.guard(i1, ...) ; This tests getRangeRef acts as intended with different idx size. -; CHECK: max backedge-taken count is 41 +; CHECK: Loop %loop: Unpredictable max backedge-taken count. define void @test_range_ref1(i8 %t) { entry: %t.ptr = inttoptr i8 %t to i8*