[DAG] visitINSERT_VECTOR_ELT - attempt to reconstruct BUILD_VECTOR before other fold interfere
Another issue unearthed by D127115 We take a long time to canonicalize an insert_vector_elt chain before being able to convert it into a build_vector - even if they are already in ascending insertion order, we fold the nodes one at a time into the build_vector 'seed', leaving plenty of time for other folds to alter it (in particular recognising when they come from extract_vector_elt resulting in a shuffle_vector that is much harder to fold with). D127115 makes this particularly difficult as we're almost guaranteed to have the lost the sequence before all possible insertions have been folded. This patch proposes to begin at the last insertion and attempt to collect all the (oneuse) insertions right away and create the build_vector before its too late. Differential Revision: https://reviews.llvm.org/D127595
This commit is contained in:
parent
f97e15ef45
commit
7d8fd4f5db
|
@ -19426,6 +19426,41 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
|
|||
Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
|
||||
return UpdateBuildVector(Ops);
|
||||
}
|
||||
|
||||
// If we're inserting into the end of a vector as part of an sequence, see
|
||||
// if we can create a BUILD_VECTOR by following the sequence back up the
|
||||
// chain.
|
||||
if (Elt == (NumElts - 1)) {
|
||||
SmallVector<SDValue> ReverseInsertions;
|
||||
ReverseInsertions.push_back(InVal);
|
||||
|
||||
EVT MaxEltVT = InVal.getValueType();
|
||||
SDValue CurVec = InVec;
|
||||
for (unsigned I = 1; I != NumElts; ++I) {
|
||||
if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse())
|
||||
break;
|
||||
|
||||
auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2));
|
||||
if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I))
|
||||
break;
|
||||
SDValue CurVal = CurVec.getOperand(1);
|
||||
ReverseInsertions.push_back(CurVal);
|
||||
if (VT.isInteger()) {
|
||||
EVT CurValVT = CurVal.getValueType();
|
||||
MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT;
|
||||
}
|
||||
CurVec = CurVec.getOperand(0);
|
||||
}
|
||||
|
||||
if (ReverseInsertions.size() == NumElts) {
|
||||
for (unsigned I = 0; I != NumElts; ++I) {
|
||||
SDValue Val = ReverseInsertions[(NumElts - 1) - I];
|
||||
Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val;
|
||||
Ops.push_back(Val);
|
||||
}
|
||||
return DAG.getBuildVector(VT, DL, Ops);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
|
|
@ -258,9 +258,7 @@ define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
|
|||
define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
|
||||
; CHECK-LABEL: ins2d1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: mov v1.d[0], v0.d[0]
|
||||
; CHECK-NEXT: fmov d0, d1
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = extractelement <2 x i64> %tmp1, i32 0
|
||||
%tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
|
||||
|
@ -282,7 +280,7 @@ define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
|
|||
define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
|
||||
; CHECK-LABEL: ins2f1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup v0.2d, v0.d[1]
|
||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = extractelement <2 x double> %tmp1, i32 1
|
||||
|
|
|
@ -228,7 +228,8 @@ define <64 x float> @insertelement_v64f32(<64 x float>* %a) #0 {
|
|||
define <1 x double> @insertelement_v1f64(<1 x double> %op1) #0 {
|
||||
; VBITS_GE_256-LABEL: insertelement_v1f64:
|
||||
; VBITS_GE_256: // %bb.0:
|
||||
; VBITS_GE_256-NEXT: fmov d0, #5.00000000
|
||||
; VBITS_GE_256-NEXT: mov x8, #4617315517961601024
|
||||
; VBITS_GE_256-NEXT: fmov d0, x8
|
||||
; VBITS_GE_256-NEXT: ret
|
||||
%r = insertelement <1 x double> %op1, double 5.0, i64 0
|
||||
ret <1 x double> %r
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
; CHECK: Legally typed node: [[VTWOA]]: v2f64 = BUILD_VECTOR
|
||||
; CHECK: Legalizing node: [[VTWOB:t.*]]: v2f64 = BUILD_VECTOR
|
||||
; CHECK: Legally typed node: [[VTWOB]]: v2f64 = BUILD_VECTOR
|
||||
; CHECK: Legalizing node: t34: v2f64 = fmaxnum nnan reassoc [[VTWOB]], [[VTWOA]]
|
||||
; CHECK: Legalizing node: t30: v2f64 = fmaxnum nnan reassoc [[VTWOB]], [[VTWOA]]
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
|
|
@ -257,10 +257,7 @@ define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
|
|||
define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
|
||||
; CHECK-LABEL: ins2d1:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov r0, r1, d0
|
||||
; CHECK-NEXT: vmov.32 d2[0], r0
|
||||
; CHECK-NEXT: vmov.32 d2[1], r1
|
||||
; CHECK-NEXT: vorr d0, d2, d2
|
||||
; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-NEXT: bx lr
|
||||
%tmp3 = extractelement <2 x i64> %tmp1, i32 0
|
||||
%tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
|
||||
|
|
|
@ -7,13 +7,9 @@
|
|||
define <1 x i1> @insertelt_v1i1(<1 x i1> %x, i1 %elt) nounwind {
|
||||
; CHECK-LABEL: insertelt_v1i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: andi a0, a0, 1
|
||||
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v8, 0
|
||||
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
|
||||
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, mu
|
||||
; CHECK-NEXT: vmv.s.x v8, a0
|
||||
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
|
||||
; CHECK-NEXT: vand.vi v8, v8, 1
|
||||
; CHECK-NEXT: vmv.v.x v8, a0
|
||||
; CHECK-NEXT: vmsne.vi v0, v8, 0
|
||||
; CHECK-NEXT: ret
|
||||
%y = insertelement <1 x i1> %x, i1 %elt, i64 0
|
||||
|
|
|
@ -41,9 +41,8 @@ define <1 x i8> @mgather_v1i8(<1 x i8*> %ptrs, <1 x i1> %m, <1 x i8> %passthru)
|
|||
; RV64ZVE32F-NEXT: andi a1, a1, 1
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB0_2
|
||||
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
|
||||
; RV64ZVE32F-NEXT: lb a0, 0(a0)
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, tu, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vlse8.v v8, (a0), zero
|
||||
; RV64ZVE32F-NEXT: .LBB0_2: # %else
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
%v = call <1 x i8> @llvm.masked.gather.v1i8.v1p0i8(<1 x i8*> %ptrs, i32 1, <1 x i1> %m, <1 x i8> %passthru)
|
||||
|
@ -1012,9 +1011,8 @@ define <1 x i16> @mgather_v1i16(<1 x i16*> %ptrs, <1 x i1> %m, <1 x i16> %passth
|
|||
; RV64ZVE32F-NEXT: andi a1, a1, 1
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB13_2
|
||||
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
|
||||
; RV64ZVE32F-NEXT: lh a0, 0(a0)
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
|
||||
; RV64ZVE32F-NEXT: .LBB13_2: # %else
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
%v = call <1 x i16> @llvm.masked.gather.v1i16.v1p0i16(<1 x i16*> %ptrs, i32 2, <1 x i1> %m, <1 x i16> %passthru)
|
||||
|
@ -2325,9 +2323,8 @@ define <1 x i32> @mgather_v1i32(<1 x i32*> %ptrs, <1 x i1> %m, <1 x i32> %passth
|
|||
; RV64ZVE32F-NEXT: andi a1, a1, 1
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB27_2
|
||||
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
|
||||
; RV64ZVE32F-NEXT: lw a0, 0(a0)
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
|
||||
; RV64ZVE32F-NEXT: .LBB27_2: # %else
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
%v = call <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*> %ptrs, i32 4, <1 x i1> %m, <1 x i32> %passthru)
|
||||
|
@ -7574,9 +7571,8 @@ define <1 x half> @mgather_v1f16(<1 x half*> %ptrs, <1 x i1> %m, <1 x half> %pas
|
|||
; RV64ZVE32F-NEXT: andi a1, a1, 1
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB58_2
|
||||
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
|
||||
; RV64ZVE32F-NEXT: flh ft0, 0(a0)
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
|
||||
; RV64ZVE32F-NEXT: .LBB58_2: # %else
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
%v = call <1 x half> @llvm.masked.gather.v1f16.v1p0f16(<1 x half*> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
|
||||
|
@ -8594,9 +8590,8 @@ define <1 x float> @mgather_v1f32(<1 x float*> %ptrs, <1 x i1> %m, <1 x float> %
|
|||
; RV64ZVE32F-NEXT: andi a1, a1, 1
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB68_2
|
||||
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
|
||||
; RV64ZVE32F-NEXT: flw ft0, 0(a0)
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
|
||||
; RV64ZVE32F-NEXT: .LBB68_2: # %else
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
%v = call <1 x float> @llvm.masked.gather.v1f32.v1p0f32(<1 x float*> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru)
|
||||
|
|
|
@ -593,8 +593,9 @@ define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
|
|||
define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
|
||||
; CHECK-LABEL: insert_v2i64:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovq %rdi, %xmm1
|
||||
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; CHECK-NEXT: retq
|
||||
%val = load i64, i64* %ptr
|
||||
%r1 = insertelement <2 x i64> %x, i64 %val, i32 1
|
||||
|
|
|
@ -2284,38 +2284,40 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
|
|||
; KNL_64: # %bb.0:
|
||||
; KNL_64-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; KNL_64-NEXT: vpsllq $3, %xmm0, %xmm0
|
||||
; KNL_64-NEXT: vmovq %rdi, %xmm2
|
||||
; KNL_64-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
; KNL_64-NEXT: vpaddq %xmm0, %xmm2, %xmm0
|
||||
; KNL_64-NEXT: vmovq %rdi, %xmm1
|
||||
; KNL_64-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpaddq %xmm0, %xmm1, %xmm0
|
||||
; KNL_64-NEXT: vmovq %xmm0, %rax
|
||||
; KNL_64-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; KNL_64-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm0
|
||||
; KNL_64-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; KNL_64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; KNL_64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; KNL_64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; KNL_64-NEXT: retq
|
||||
;
|
||||
; KNL_32-LABEL: test26:
|
||||
; KNL_32: # %bb.0:
|
||||
; KNL_32-NEXT: vpslld $3, %xmm0, %xmm0
|
||||
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm2
|
||||
; KNL_32-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
||||
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
|
||||
; KNL_32-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; KNL_32-NEXT: vmovd %xmm0, %eax
|
||||
; KNL_32-NEXT: vpinsrd $0, (%eax), %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpinsrd $1, 4(%eax), %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm0
|
||||
; KNL_32-NEXT: vpinsrd $3, 4(%eax), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: vpextrd $1, %xmm0, %ecx
|
||||
; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; KNL_32-NEXT: vpinsrd $1, 4(%eax), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: vpinsrd $2, (%ecx), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: vpinsrd $3, 4(%ecx), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: retl
|
||||
;
|
||||
; SKX-LABEL: test26:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; SKX-NEXT: vpbroadcastq %rdi, %xmm2
|
||||
; SKX-NEXT: vpbroadcastq %rdi, %xmm1
|
||||
; SKX-NEXT: vpsllq $3, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpaddq %xmm0, %xmm2, %xmm0
|
||||
; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0
|
||||
; SKX-NEXT: vmovq %xmm0, %rax
|
||||
; SKX-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1
|
||||
; SKX-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; SKX-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm0
|
||||
; SKX-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; SKX_32-LABEL: test26:
|
||||
|
@ -2323,11 +2325,11 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
|
|||
; SKX_32-NEXT: vpslld $3, %xmm0, %xmm0
|
||||
; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
|
||||
; SKX_32-NEXT: vmovd %xmm0, %eax
|
||||
; SKX_32-NEXT: vpinsrd $0, (%eax), %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vpinsrd $1, 4(%eax), %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm0
|
||||
; SKX_32-NEXT: vpinsrd $3, 4(%eax), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: vpextrd $1, %xmm0, %ecx
|
||||
; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SKX_32-NEXT: vpinsrd $1, 4(%eax), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: vpinsrd $2, (%ecx), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: vpinsrd $3, 4(%ecx), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: retl
|
||||
%sext_ind = sext <2 x i32> %ind to <2 x i64>
|
||||
%gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
|
||||
|
|
|
@ -4,28 +4,13 @@
|
|||
define void @f(<4 x half>* %a, <4 x half>* %b, <8 x half>* %c) {
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzwl (%rdi), %eax
|
||||
; CHECK-NEXT: movzwl 2(%rdi), %ecx
|
||||
; CHECK-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movzwl 6(%rdi), %r8d
|
||||
; CHECK-NEXT: movzwl 4(%rdi), %r11d
|
||||
; CHECK-NEXT: movq (%rsi), %rsi
|
||||
; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq (%rdi), %rax
|
||||
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0
|
||||
; CHECK-NEXT: pextrw $1, %xmm0, %r9d
|
||||
; CHECK-NEXT: movd %xmm0, %r10d
|
||||
; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %esi
|
||||
; CHECK-NEXT: pextrw $3, %xmm0, %eax
|
||||
; CHECK-NEXT: pextrw $2, %xmm0, %edi
|
||||
; CHECK-NEXT: movw %r11w, 8(%rdx)
|
||||
; CHECK-NEXT: movw %cx, 4(%rdx)
|
||||
; CHECK-NEXT: movw %r8w, 12(%rdx)
|
||||
; CHECK-NEXT: movw %si, (%rdx)
|
||||
; CHECK-NEXT: movw %di, 10(%rdx)
|
||||
; CHECK-NEXT: movw %ax, 14(%rdx)
|
||||
; CHECK-NEXT: movw %r10w, 2(%rdx)
|
||||
; CHECK-NEXT: movw %r9w, 6(%rdx)
|
||||
; CHECK-NEXT: movq (%rsi), %rax
|
||||
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
|
||||
; CHECK-NEXT: movdqa %xmm0, (%rdx)
|
||||
; CHECK-NEXT: retq
|
||||
%tmp4 = load <4 x half>, <4 x half>* %a
|
||||
%tmp5 = load <4 x half>, <4 x half>* %b
|
||||
|
|
|
@ -379,26 +379,15 @@ define <4 x i32> @insert_i32_two_elts(<4 x i32> %x, i32* %s.addr) {
|
|||
}
|
||||
|
||||
define <2 x i64> @insert_i64_two_elts(<2 x i64> %x, i64* %s.addr) {
|
||||
; SSE2-LABEL: insert_i64_two_elts:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movq (%rdi), %rax
|
||||
; SSE2-NEXT: movq %rax, %xmm0
|
||||
; SSE2-NEXT: movq %rax, %xmm1
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_i64_two_elts:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movq (%rdi), %rax
|
||||
; SSE41-NEXT: pinsrq $0, %rax, %xmm0
|
||||
; SSE41-NEXT: pinsrq $1, %rax, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: insert_i64_two_elts:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_i64_two_elts:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: movq (%rdi), %rax
|
||||
; AVX-NEXT: vpinsrq $0, %rax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%s = load i64, i64* %s.addr
|
||||
%i0 = insertelement <2 x i64> %x, i64 %s, i32 0
|
||||
|
|
|
@ -352,24 +352,11 @@ define <4 x i32> @insert_i32_two_elts(<4 x i32> %x, i32 %s) {
|
|||
}
|
||||
|
||||
define <2 x i64> @insert_i64_two_elts(<2 x i64> %x, i64 %s) {
|
||||
; SSE2-LABEL: insert_i64_two_elts:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movq %rdi, %xmm0
|
||||
; SSE2-NEXT: movq %rdi, %xmm1
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_i64_two_elts:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pinsrq $0, %rdi, %xmm0
|
||||
; SSE41-NEXT: pinsrq $1, %rdi, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_i64_two_elts:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; SSE-LABEL: insert_i64_two_elts:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movq %rdi, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; SSE-NEXT: retq
|
||||
%i0 = insertelement <2 x i64> %x, i64 %s, i32 0
|
||||
%i1 = insertelement <2 x i64> %i0, i64 %s, i32 1
|
||||
ret <2 x i64> %i1
|
||||
|
|
|
@ -8,7 +8,8 @@
|
|||
define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
|
||||
; X86-LABEL: mmx_movzl:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movq {{\.?LCPI[0-9]+_[0-9]+}}, %mm0
|
||||
; X86-NEXT: movl $32, %eax
|
||||
; X86-NEXT: movd %eax, %mm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mmx_movzl:
|
||||
|
|
Loading…
Reference in New Issue
Block a user