[RISCV][NFC] Share interleave mask checking logic
This adds two new methods to ShuffleVectorInst, isInterleave and isInterleaveMask, so that the logic to check if a shuffle mask is an interleave can be shared across the TTI, codegen and the interleaved access pass. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D145971
This commit is contained in:
parent
0e79106fc9
commit
a9d9616c0d
|
@ -2430,6 +2430,33 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
/// Return if this shuffle interleaves its two input vectors together.
|
||||
bool isInterleave(unsigned Factor);
|
||||
|
||||
/// Return true if the mask interleaves one or more input vectors together.
|
||||
///
|
||||
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
|
||||
/// E.g. For a Factor of 2 (LaneLen=4):
|
||||
/// <0, 4, 1, 5, 2, 6, 3, 7>
|
||||
/// E.g. For a Factor of 3 (LaneLen=4):
|
||||
/// <4, 0, 9, 5, 1, 10, 6, 2, 11, 7, 3, 12>
|
||||
/// E.g. For a Factor of 4 (LaneLen=2):
|
||||
/// <0, 2, 6, 4, 1, 3, 7, 5>
|
||||
///
|
||||
/// NumInputElts is the total number of elements in the input vectors.
|
||||
///
|
||||
/// StartIndexes are the first indexes of each vector being interleaved,
|
||||
/// substituting any indexes that were undef
|
||||
/// E.g. <4, -1, 2, 5, 1, 3> (Factor=3): StartIndexes=<4, 0, 2>
|
||||
static bool isInterleaveMask(ArrayRef<int> Mask, unsigned Factor,
|
||||
unsigned NumInputElts,
|
||||
SmallVectorImpl<unsigned> &StartIndexes);
|
||||
static bool isInterleaveMask(ArrayRef<int> Mask, unsigned Factor,
|
||||
unsigned NumInputElts) {
|
||||
SmallVector<unsigned, 8> StartIndexes;
|
||||
return isInterleaveMask(Mask, Factor, NumInputElts, StartIndexes);
|
||||
}
|
||||
|
||||
// Methods for support type inquiry through isa, cast, and dyn_cast:
|
||||
static bool classof(const Instruction *I) {
|
||||
return I->getOpcode() == Instruction::ShuffleVector;
|
||||
|
|
|
@ -202,86 +202,15 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
|
|||
/// The particular case of an RE-interleave mask is:
|
||||
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
|
||||
/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
|
||||
static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
|
||||
unsigned MaxFactor, unsigned OpNumElts) {
|
||||
unsigned NumElts = Mask.size();
|
||||
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
|
||||
unsigned MaxFactor) {
|
||||
unsigned NumElts = SVI->getShuffleMask().size();
|
||||
if (NumElts < 4)
|
||||
return false;
|
||||
|
||||
// Check potential Factors.
|
||||
for (Factor = 2; Factor <= MaxFactor; Factor++) {
|
||||
if (NumElts % Factor)
|
||||
continue;
|
||||
|
||||
unsigned LaneLen = NumElts / Factor;
|
||||
if (!isPowerOf2_32(LaneLen))
|
||||
continue;
|
||||
|
||||
// Check whether each element matches the general interleaved rule.
|
||||
// Ignore undef elements, as long as the defined elements match the rule.
|
||||
// Outer loop processes all factors (x, y, z in the above example)
|
||||
unsigned I = 0, J;
|
||||
for (; I < Factor; I++) {
|
||||
unsigned SavedLaneValue;
|
||||
unsigned SavedNoUndefs = 0;
|
||||
|
||||
// Inner loop processes consecutive accesses (x, x+1... in the example)
|
||||
for (J = 0; J < LaneLen - 1; J++) {
|
||||
// Lane computes x's position in the Mask
|
||||
unsigned Lane = J * Factor + I;
|
||||
unsigned NextLane = Lane + Factor;
|
||||
int LaneValue = Mask[Lane];
|
||||
int NextLaneValue = Mask[NextLane];
|
||||
|
||||
// If both are defined, values must be sequential
|
||||
if (LaneValue >= 0 && NextLaneValue >= 0 &&
|
||||
LaneValue + 1 != NextLaneValue)
|
||||
break;
|
||||
|
||||
// If the next value is undef, save the current one as reference
|
||||
if (LaneValue >= 0 && NextLaneValue < 0) {
|
||||
SavedLaneValue = LaneValue;
|
||||
SavedNoUndefs = 1;
|
||||
}
|
||||
|
||||
// Undefs are allowed, but defined elements must still be consecutive:
|
||||
// i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
|
||||
// Verify this by storing the last non-undef followed by an undef
|
||||
// Check that following non-undef masks are incremented with the
|
||||
// corresponding distance.
|
||||
if (SavedNoUndefs > 0 && LaneValue < 0) {
|
||||
SavedNoUndefs++;
|
||||
if (NextLaneValue >= 0 &&
|
||||
SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (J < LaneLen - 1)
|
||||
break;
|
||||
|
||||
int StartMask = 0;
|
||||
if (Mask[I] >= 0) {
|
||||
// Check that the start of the I range (J=0) is greater than 0
|
||||
StartMask = Mask[I];
|
||||
} else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
|
||||
// StartMask defined by the last value in lane
|
||||
StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
|
||||
} else if (SavedNoUndefs > 0) {
|
||||
// StartMask defined by some non-zero value in the j loop
|
||||
StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
|
||||
}
|
||||
// else StartMask remains set to 0, i.e. all elements are undefs
|
||||
|
||||
if (StartMask < 0)
|
||||
break;
|
||||
// We must stay within the vectors; This case can happen with undefs.
|
||||
if (StartMask + LaneLen > OpNumElts*2)
|
||||
break;
|
||||
}
|
||||
|
||||
// Found an interleaved mask of current factor.
|
||||
if (I == Factor)
|
||||
if (SVI->isInterleave(Factor))
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -500,9 +429,7 @@ bool InterleavedAccess::lowerInterleavedStore(
|
|||
|
||||
// Check if the shufflevector is RE-interleave shuffle.
|
||||
unsigned Factor;
|
||||
unsigned OpNumElts =
|
||||
cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
|
||||
if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
|
||||
if (!isReInterleaveMask(SVI, Factor, MaxFactor))
|
||||
return false;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
|
||||
|
|
|
@ -2728,6 +2728,98 @@ bool ShuffleVectorInst::isOneUseSingleSourceMask(int VF) const {
|
|||
return isOneUseSingleSourceMask(ShuffleMask, VF);
|
||||
}
|
||||
|
||||
bool ShuffleVectorInst::isInterleave(unsigned Factor) {
|
||||
FixedVectorType *OpTy = dyn_cast<FixedVectorType>(getOperand(0)->getType());
|
||||
// shuffle_vector can only interleave fixed length vectors - for scalable
|
||||
// vectors, see the @llvm.experimental.vector.interleave2 intrinsic
|
||||
if (!OpTy)
|
||||
return false;
|
||||
unsigned OpNumElts = OpTy->getNumElements();
|
||||
|
||||
return isInterleaveMask(ShuffleMask, Factor, OpNumElts * 2);
|
||||
}
|
||||
|
||||
bool ShuffleVectorInst::isInterleaveMask(
|
||||
ArrayRef<int> Mask, unsigned Factor, unsigned NumInputElts,
|
||||
SmallVectorImpl<unsigned> &StartIndexes) {
|
||||
unsigned NumElts = Mask.size();
|
||||
if (NumElts % Factor)
|
||||
return false;
|
||||
|
||||
unsigned LaneLen = NumElts / Factor;
|
||||
if (!isPowerOf2_32(LaneLen))
|
||||
return false;
|
||||
|
||||
StartIndexes.resize(Factor);
|
||||
|
||||
// Check whether each element matches the general interleaved rule.
|
||||
// Ignore undef elements, as long as the defined elements match the rule.
|
||||
// Outer loop processes all factors (x, y, z in the above example)
|
||||
unsigned I = 0, J;
|
||||
for (; I < Factor; I++) {
|
||||
unsigned SavedLaneValue;
|
||||
unsigned SavedNoUndefs = 0;
|
||||
|
||||
// Inner loop processes consecutive accesses (x, x+1... in the example)
|
||||
for (J = 0; J < LaneLen - 1; J++) {
|
||||
// Lane computes x's position in the Mask
|
||||
unsigned Lane = J * Factor + I;
|
||||
unsigned NextLane = Lane + Factor;
|
||||
int LaneValue = Mask[Lane];
|
||||
int NextLaneValue = Mask[NextLane];
|
||||
|
||||
// If both are defined, values must be sequential
|
||||
if (LaneValue >= 0 && NextLaneValue >= 0 &&
|
||||
LaneValue + 1 != NextLaneValue)
|
||||
break;
|
||||
|
||||
// If the next value is undef, save the current one as reference
|
||||
if (LaneValue >= 0 && NextLaneValue < 0) {
|
||||
SavedLaneValue = LaneValue;
|
||||
SavedNoUndefs = 1;
|
||||
}
|
||||
|
||||
// Undefs are allowed, but defined elements must still be consecutive:
|
||||
// i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
|
||||
// Verify this by storing the last non-undef followed by an undef
|
||||
// Check that following non-undef masks are incremented with the
|
||||
// corresponding distance.
|
||||
if (SavedNoUndefs > 0 && LaneValue < 0) {
|
||||
SavedNoUndefs++;
|
||||
if (NextLaneValue >= 0 &&
|
||||
SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (J < LaneLen - 1)
|
||||
return false;
|
||||
|
||||
int StartMask = 0;
|
||||
if (Mask[I] >= 0) {
|
||||
// Check that the start of the I range (J=0) is greater than 0
|
||||
StartMask = Mask[I];
|
||||
} else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
|
||||
// StartMask defined by the last value in lane
|
||||
StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
|
||||
} else if (SavedNoUndefs > 0) {
|
||||
// StartMask defined by some non-zero value in the j loop
|
||||
StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
|
||||
}
|
||||
// else StartMask remains set to 0, i.e. all elements are undefs
|
||||
|
||||
if (StartMask < 0)
|
||||
return false;
|
||||
// We must stay within the vectors; This case can happen with undefs.
|
||||
if (StartMask + LaneLen > NumInputElts)
|
||||
return false;
|
||||
|
||||
StartIndexes[I] = StartMask;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// InsertValueInst Class
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/DiagnosticPrinter.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicsRISCV.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
@ -3062,46 +3063,19 @@ static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
|
|||
return false;
|
||||
|
||||
int Size = Mask.size();
|
||||
int HalfSize = Size / 2;
|
||||
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
|
||||
|
||||
int Srcs[] = {-1, -1};
|
||||
for (int i = 0; i != Size; ++i) {
|
||||
// Ignore undef elements.
|
||||
if (Mask[i] < 0)
|
||||
continue;
|
||||
SmallVector<unsigned, 2> StartIndexes;
|
||||
if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
|
||||
return false;
|
||||
|
||||
// Is this an even or odd element.
|
||||
int Pol = i % 2;
|
||||
|
||||
// Ensure we consistently use the same half source for this polarity.
|
||||
int Src = alignDown(Mask[i], HalfSize);
|
||||
if (Srcs[Pol] < 0)
|
||||
Srcs[Pol] = Src;
|
||||
if (Srcs[Pol] != Src)
|
||||
return false;
|
||||
|
||||
// Make sure the element within the source is appropriate for this element
|
||||
// in the destination.
|
||||
int Elt = Mask[i] % HalfSize;
|
||||
if (Elt != i / 2)
|
||||
return false;
|
||||
}
|
||||
EvenSrc = StartIndexes[0] % 2 ? StartIndexes[1] : StartIndexes[0];
|
||||
OddSrc = StartIndexes[0] % 2 ? StartIndexes[0] : StartIndexes[1];
|
||||
|
||||
// One source should be low half of first vector.
|
||||
if (Srcs[0] != 0 && Srcs[1] != 0)
|
||||
if (EvenSrc != 0 && OddSrc != 0)
|
||||
return false;
|
||||
|
||||
// Other source should be the upper half of the first source or the lower
|
||||
// half of the second source.
|
||||
// FIXME: This is only a heuristic to avoid regressions.
|
||||
if (Srcs[0] != HalfSize && Srcs[0] != Size && Srcs[1] != HalfSize &&
|
||||
Srcs[1] != Size)
|
||||
return false;
|
||||
|
||||
EvenSrc = Srcs[0];
|
||||
OddSrc = Srcs[1];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,10 +10,10 @@
|
|||
#include "MCTargetDesc/RISCVMatInt.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/VectorUtils.h"
|
||||
#include "llvm/CodeGen/BasicTTIImpl.h"
|
||||
#include "llvm/CodeGen/CostTable.h"
|
||||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include <cmath>
|
||||
#include <optional>
|
||||
using namespace llvm;
|
||||
|
@ -261,16 +261,17 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
|||
if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
|
||||
MVT EltTp = LT.second.getVectorElementType();
|
||||
// If the size of the element is < ELEN then shuffles of interleaves and
|
||||
// deinterleaves of 2 vectors can be lowered into the following sequences
|
||||
// deinterleaves of 2 vectors can be lowered into the following
|
||||
// sequences
|
||||
if (EltTp.getScalarSizeInBits() < ST->getELEN()) {
|
||||
auto InterleaveMask = createInterleaveMask(Mask.size() / 2, 2);
|
||||
// Example sequence:
|
||||
// vsetivli zero, 4, e8, mf4, ta, ma (ignored)
|
||||
// vsetivli zero, 4, e8, mf4, ta, ma (ignored)
|
||||
// vwaddu.vv v10, v8, v9
|
||||
// li a0, -1 (ignored)
|
||||
// vwmaccu.vx v10, a0, v9
|
||||
if (equal(InterleaveMask, Mask))
|
||||
return 2 * LT.first * getLMULCost(LT.second);
|
||||
if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size() * 2))
|
||||
return 2 * LT.first * getLMULCost(LT.second);
|
||||
|
||||
if (Mask[0] == 0 || Mask[0] == 1) {
|
||||
auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());
|
||||
|
|
|
@ -1,6 +1,19 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v | FileCheck %s -check-prefixes=CHECK,RV32
|
||||
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v | FileCheck %s -check-prefixes=CHECK,RV64
|
||||
|
||||
; The mask here interleaves (%v1, %v0), not (%v0, %v1): it should still be cheap.
|
||||
define <4 x i8> @interleave2_v2i8(<2 x i8> %v0, <2 x i8> %v1) {
|
||||
; CHECK-LABEL: 'interleave2_v2i8'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i8> %res
|
||||
;
|
||||
%concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
|
||||
ret <4 x i8> %res
|
||||
}
|
||||
|
||||
define <8 x i8> @interleave2_v8i8(<4 x i8> %v0, <4 x i8> %v1) {
|
||||
; CHECK-LABEL: 'interleave2_v8i8'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %concat = shufflevector <4 x i8> %v0, <4 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
|
|
|
@ -116,4 +116,33 @@ TEST(ShuffleVectorInst, isOneUseSingleSourceMask) {
|
|||
ShuffleVectorInst::isOneUseSingleSourceMask({0, 1, 2, 3, 3, 3, 1, 0}, 4));
|
||||
}
|
||||
|
||||
TEST(ShuffleVectorInst, isInterleaveMask) {
|
||||
SmallVector<unsigned> StartIndexes;
|
||||
ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({0, 4, 1, 5, 2, 6, 3, 7}, 2,
|
||||
8, StartIndexes));
|
||||
ASSERT_EQ(StartIndexes, SmallVector<unsigned>({0, 4}));
|
||||
|
||||
ASSERT_FALSE(
|
||||
ShuffleVectorInst::isInterleaveMask({0, 4, 1, 6, 2, 6, 3, 7}, 2, 8));
|
||||
|
||||
ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, 0, 5, 1, 6, 2, 7, 3}, 2,
|
||||
8, StartIndexes));
|
||||
ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 0}));
|
||||
|
||||
ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, 0, -1, 1, -1, 2, 7, 3}, 2,
|
||||
8, StartIndexes));
|
||||
ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 0}));
|
||||
|
||||
ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({0, 2, 4, 1, 3, 5}, 3, 6,
|
||||
StartIndexes));
|
||||
ASSERT_EQ(StartIndexes, SmallVector<unsigned>({0, 2, 4}));
|
||||
|
||||
ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, -1, 0, 5, 3, 1}, 3, 6,
|
||||
StartIndexes));
|
||||
ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 2, 0}));
|
||||
|
||||
ASSERT_FALSE(
|
||||
ShuffleVectorInst::isInterleaveMask({8, 2, 12, 4, 9, 3, 13, 5}, 4, 8));
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
|
Loading…
Reference in New Issue
Block a user