[DAG] visitINSERT_VECTOR_ELT - attempt to reconstruct BUILD_VECTOR before other fold interfere

Another issue unearthed by D127115

We take a long time to canonicalize an insert_vector_elt chain before being able to convert it into a build_vector - even if they are already in ascending insertion order, we fold the nodes one at a time into the build_vector 'seed', leaving plenty of time for other folds to alter it (in particular recognising when they come from extract_vector_elt resulting in a shuffle_vector that is much harder to fold with).

D127115 makes this particularly difficult as we're almost guaranteed to have the lost the sequence before all possible insertions have been folded.

This patch proposes to begin at the last insertion and attempt to collect all the (oneuse) insertions right away and create the build_vector before its too late.

Differential Revision: https://reviews.llvm.org/D127595
This commit is contained in:
Simon Pilgrim 2022-06-13 11:47:14 +01:00
parent f97e15ef45
commit 7d8fd4f5db
13 changed files with 100 additions and 113 deletions

View File

@ -19426,6 +19426,41 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
return UpdateBuildVector(Ops);
}
// If we're inserting into the end of a vector as part of an sequence, see
// if we can create a BUILD_VECTOR by following the sequence back up the
// chain.
if (Elt == (NumElts - 1)) {
SmallVector<SDValue> ReverseInsertions;
ReverseInsertions.push_back(InVal);
EVT MaxEltVT = InVal.getValueType();
SDValue CurVec = InVec;
for (unsigned I = 1; I != NumElts; ++I) {
if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse())
break;
auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2));
if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I))
break;
SDValue CurVal = CurVec.getOperand(1);
ReverseInsertions.push_back(CurVal);
if (VT.isInteger()) {
EVT CurValVT = CurVal.getValueType();
MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT;
}
CurVec = CurVec.getOperand(0);
}
if (ReverseInsertions.size() == NumElts) {
for (unsigned I = 0; I != NumElts; ++I) {
SDValue Val = ReverseInsertions[(NumElts - 1) - I];
Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val;
Ops.push_back(Val);
}
return DAG.getBuildVector(VT, DL, Ops);
}
}
}
return SDValue();

View File

@ -258,9 +258,7 @@ define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
; CHECK-LABEL: ins2d1:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov v1.d[0], v0.d[0]
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%tmp3 = extractelement <2 x i64> %tmp1, i32 0
%tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
@ -282,7 +280,7 @@ define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
; CHECK-LABEL: ins2f1:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.2d, v0.d[1]
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%tmp3 = extractelement <2 x double> %tmp1, i32 1

View File

@ -228,7 +228,8 @@ define <64 x float> @insertelement_v64f32(<64 x float>* %a) #0 {
define <1 x double> @insertelement_v1f64(<1 x double> %op1) #0 {
; VBITS_GE_256-LABEL: insertelement_v1f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: fmov d0, #5.00000000
; VBITS_GE_256-NEXT: mov x8, #4617315517961601024
; VBITS_GE_256-NEXT: fmov d0, x8
; VBITS_GE_256-NEXT: ret
%r = insertelement <1 x double> %op1, double 5.0, i64 0
ret <1 x double> %r

View File

@ -13,7 +13,7 @@
; CHECK: Legally typed node: [[VTWOA]]: v2f64 = BUILD_VECTOR
; CHECK: Legalizing node: [[VTWOB:t.*]]: v2f64 = BUILD_VECTOR
; CHECK: Legally typed node: [[VTWOB]]: v2f64 = BUILD_VECTOR
; CHECK: Legalizing node: t34: v2f64 = fmaxnum nnan reassoc [[VTWOB]], [[VTWOA]]
; CHECK: Legalizing node: t30: v2f64 = fmaxnum nnan reassoc [[VTWOB]], [[VTWOA]]
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"

View File

@ -257,10 +257,7 @@ define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
; CHECK-LABEL: ins2d1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: vmov.32 d2[0], r0
; CHECK-NEXT: vmov.32 d2[1], r1
; CHECK-NEXT: vorr d0, d2, d2
; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: bx lr
%tmp3 = extractelement <2 x i64> %tmp1, i32 0
%tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0

View File

@ -7,13 +7,9 @@
define <1 x i1> @insertelt_v1i1(<1 x i1> %x, i1 %elt) nounwind {
; CHECK-LABEL: insertelt_v1i1:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, mu
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%y = insertelement <1 x i1> %x, i1 %elt, i64 0

View File

@ -41,9 +41,8 @@ define <1 x i8> @mgather_v1i8(<1 x i8*> %ptrs, <1 x i1> %m, <1 x i8> %passthru)
; RV64ZVE32F-NEXT: andi a1, a1, 1
; RV64ZVE32F-NEXT: beqz a1, .LBB0_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lb a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, tu, mu
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vlse8.v v8, (a0), zero
; RV64ZVE32F-NEXT: .LBB0_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i8> @llvm.masked.gather.v1i8.v1p0i8(<1 x i8*> %ptrs, i32 1, <1 x i1> %m, <1 x i8> %passthru)
@ -1012,9 +1011,8 @@ define <1 x i16> @mgather_v1i16(<1 x i16*> %ptrs, <1 x i1> %m, <1 x i16> %passth
; RV64ZVE32F-NEXT: andi a1, a1, 1
; RV64ZVE32F-NEXT: beqz a1, .LBB13_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, tu, mu
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: .LBB13_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i16> @llvm.masked.gather.v1i16.v1p0i16(<1 x i16*> %ptrs, i32 2, <1 x i1> %m, <1 x i16> %passthru)
@ -2325,9 +2323,8 @@ define <1 x i32> @mgather_v1i32(<1 x i32*> %ptrs, <1 x i1> %m, <1 x i32> %passth
; RV64ZVE32F-NEXT: andi a1, a1, 1
; RV64ZVE32F-NEXT: beqz a1, .LBB27_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, tu, mu
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: .LBB27_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*> %ptrs, i32 4, <1 x i1> %m, <1 x i32> %passthru)
@ -7574,9 +7571,8 @@ define <1 x half> @mgather_v1f16(<1 x half*> %ptrs, <1 x i1> %m, <1 x half> %pas
; RV64ZVE32F-NEXT: andi a1, a1, 1
; RV64ZVE32F-NEXT: beqz a1, .LBB58_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: flh ft0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, tu, mu
; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: .LBB58_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x half> @llvm.masked.gather.v1f16.v1p0f16(<1 x half*> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
@ -8594,9 +8590,8 @@ define <1 x float> @mgather_v1f32(<1 x float*> %ptrs, <1 x i1> %m, <1 x float> %
; RV64ZVE32F-NEXT: andi a1, a1, 1
; RV64ZVE32F-NEXT: beqz a1, .LBB68_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: flw ft0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, tu, mu
; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: .LBB68_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x float> @llvm.masked.gather.v1f32.v1p0f32(<1 x float*> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru)

View File

@ -593,8 +593,9 @@ define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
; CHECK-LABEL: insert_v2i64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
; CHECK-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovq %rdi, %xmm1
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: retq
%val = load i64, i64* %ptr
%r1 = insertelement <2 x i64> %x, i64 %val, i32 1

View File

@ -2284,38 +2284,40 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
; KNL_64: # %bb.0:
; KNL_64-NEXT: vpmovsxdq %xmm0, %xmm0
; KNL_64-NEXT: vpsllq $3, %xmm0, %xmm0
; KNL_64-NEXT: vmovq %rdi, %xmm2
; KNL_64-NEXT: vpbroadcastq %xmm2, %xmm2
; KNL_64-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; KNL_64-NEXT: vmovq %rdi, %xmm1
; KNL_64-NEXT: vpbroadcastq %xmm1, %xmm1
; KNL_64-NEXT: vpaddq %xmm0, %xmm1, %xmm0
; KNL_64-NEXT: vmovq %xmm0, %rax
; KNL_64-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1
; KNL_64-NEXT: vpextrq $1, %xmm0, %rax
; KNL_64-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm0
; KNL_64-NEXT: vpextrq $1, %xmm0, %rcx
; KNL_64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; KNL_64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; KNL_64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test26:
; KNL_32: # %bb.0:
; KNL_32-NEXT: vpslld $3, %xmm0, %xmm0
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm2
; KNL_32-NEXT: vpaddd %xmm0, %xmm2, %xmm0
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
; KNL_32-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; KNL_32-NEXT: vmovd %xmm0, %eax
; KNL_32-NEXT: vpinsrd $0, (%eax), %xmm1, %xmm1
; KNL_32-NEXT: vpinsrd $1, 4(%eax), %xmm1, %xmm1
; KNL_32-NEXT: vpextrd $1, %xmm0, %eax
; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm0
; KNL_32-NEXT: vpinsrd $3, 4(%eax), %xmm0, %xmm0
; KNL_32-NEXT: vpextrd $1, %xmm0, %ecx
; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL_32-NEXT: vpinsrd $1, 4(%eax), %xmm0, %xmm0
; KNL_32-NEXT: vpinsrd $2, (%ecx), %xmm0, %xmm0
; KNL_32-NEXT: vpinsrd $3, 4(%ecx), %xmm0, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test26:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxdq %xmm0, %xmm0
; SKX-NEXT: vpbroadcastq %rdi, %xmm2
; SKX-NEXT: vpbroadcastq %rdi, %xmm1
; SKX-NEXT: vpsllq $3, %xmm0, %xmm0
; SKX-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0
; SKX-NEXT: vmovq %xmm0, %rax
; SKX-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1
; SKX-NEXT: vpextrq $1, %xmm0, %rax
; SKX-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm0
; SKX-NEXT: vpextrq $1, %xmm0, %rcx
; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; SKX-NEXT: retq
;
; SKX_32-LABEL: test26:
@ -2323,11 +2325,11 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
; SKX_32-NEXT: vpslld $3, %xmm0, %xmm0
; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
; SKX_32-NEXT: vmovd %xmm0, %eax
; SKX_32-NEXT: vpinsrd $0, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: vpinsrd $1, 4(%eax), %xmm1, %xmm1
; SKX_32-NEXT: vpextrd $1, %xmm0, %eax
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm0
; SKX_32-NEXT: vpinsrd $3, 4(%eax), %xmm0, %xmm0
; SKX_32-NEXT: vpextrd $1, %xmm0, %ecx
; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SKX_32-NEXT: vpinsrd $1, 4(%eax), %xmm0, %xmm0
; SKX_32-NEXT: vpinsrd $2, (%ecx), %xmm0, %xmm0
; SKX_32-NEXT: vpinsrd $3, 4(%ecx), %xmm0, %xmm0
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind

View File

@ -4,28 +4,13 @@
define void @f(<4 x half>* %a, <4 x half>* %b, <8 x half>* %c) {
; CHECK-LABEL: f:
; CHECK: # %bb.0:
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: movzwl 2(%rdi), %ecx
; CHECK-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movzwl 6(%rdi), %r8d
; CHECK-NEXT: movzwl 4(%rdi), %r11d
; CHECK-NEXT: movq (%rsi), %rsi
; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: pextrw $1, %xmm0, %r9d
; CHECK-NEXT: movd %xmm0, %r10d
; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: pextrw $3, %xmm0, %eax
; CHECK-NEXT: pextrw $2, %xmm0, %edi
; CHECK-NEXT: movw %r11w, 8(%rdx)
; CHECK-NEXT: movw %cx, 4(%rdx)
; CHECK-NEXT: movw %r8w, 12(%rdx)
; CHECK-NEXT: movw %si, (%rdx)
; CHECK-NEXT: movw %di, 10(%rdx)
; CHECK-NEXT: movw %ax, 14(%rdx)
; CHECK-NEXT: movw %r10w, 2(%rdx)
; CHECK-NEXT: movw %r9w, 6(%rdx)
; CHECK-NEXT: movq (%rsi), %rax
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: movdqa %xmm0, (%rdx)
; CHECK-NEXT: retq
%tmp4 = load <4 x half>, <4 x half>* %a
%tmp5 = load <4 x half>, <4 x half>* %b

View File

@ -379,26 +379,15 @@ define <4 x i32> @insert_i32_two_elts(<4 x i32> %x, i32* %s.addr) {
}
define <2 x i64> @insert_i64_two_elts(<2 x i64> %x, i64* %s.addr) {
; SSE2-LABEL: insert_i64_two_elts:
; SSE2: # %bb.0:
; SSE2-NEXT: movq (%rdi), %rax
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: movq %rax, %xmm1
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: insert_i64_two_elts:
; SSE41: # %bb.0:
; SSE41-NEXT: movq (%rdi), %rax
; SSE41-NEXT: pinsrq $0, %rax, %xmm0
; SSE41-NEXT: pinsrq $1, %rax, %xmm0
; SSE41-NEXT: retq
; SSE-LABEL: insert_i64_two_elts:
; SSE: # %bb.0:
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_i64_two_elts:
; AVX: # %bb.0:
; AVX-NEXT: movq (%rdi), %rax
; AVX-NEXT: vpinsrq $0, %rax, %xmm0, %xmm0
; AVX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm0
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX-NEXT: retq
%s = load i64, i64* %s.addr
%i0 = insertelement <2 x i64> %x, i64 %s, i32 0

View File

@ -352,24 +352,11 @@ define <4 x i32> @insert_i32_two_elts(<4 x i32> %x, i32 %s) {
}
define <2 x i64> @insert_i64_two_elts(<2 x i64> %x, i64 %s) {
; SSE2-LABEL: insert_i64_two_elts:
; SSE2: # %bb.0:
; SSE2-NEXT: movq %rdi, %xmm0
; SSE2-NEXT: movq %rdi, %xmm1
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: insert_i64_two_elts:
; SSE41: # %bb.0:
; SSE41-NEXT: pinsrq $0, %rdi, %xmm0
; SSE41-NEXT: pinsrq $1, %rdi, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: insert_i64_two_elts:
; AVX: # %bb.0:
; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
; AVX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
; AVX-NEXT: retq
; SSE-LABEL: insert_i64_two_elts:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
%i0 = insertelement <2 x i64> %x, i64 %s, i32 0
%i1 = insertelement <2 x i64> %i0, i64 %s, i32 1
ret <2 x i64> %i1

View File

@ -8,7 +8,8 @@
define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
; X86-LABEL: mmx_movzl:
; X86: ## %bb.0:
; X86-NEXT: movq {{\.?LCPI[0-9]+_[0-9]+}}, %mm0
; X86-NEXT: movl $32, %eax
; X86-NEXT: movd %eax, %mm0
; X86-NEXT: retl
;
; X64-LABEL: mmx_movzl: