[RISCV][NFC] Share interleave mask checking logic

This adds two new methods to ShuffleVectorInst, isInterleave and
isInterleaveMask, so that the logic to check if a shuffle mask is an
interleave can be shared across the TTI, codegen and the interleaved
access pass.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D145971
This commit is contained in:
Luke Lau 2023-03-13 18:12:12 +00:00
parent 0e79106fc9
commit a9d9616c0d
7 changed files with 179 additions and 116 deletions

View File

@ -2430,6 +2430,33 @@ public:
}
}
/// Return if this shuffle interleaves its two input vectors together.
bool isInterleave(unsigned Factor);
/// Return true if the mask interleaves one or more input vectors together.
///
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
/// E.g. For a Factor of 2 (LaneLen=4):
/// <0, 4, 1, 5, 2, 6, 3, 7>
/// E.g. For a Factor of 3 (LaneLen=4):
/// <4, 0, 9, 5, 1, 10, 6, 2, 11, 7, 3, 12>
/// E.g. For a Factor of 4 (LaneLen=2):
/// <0, 2, 6, 4, 1, 3, 7, 5>
///
/// NumInputElts is the total number of elements in the input vectors.
///
/// StartIndexes are the first indexes of each vector being interleaved,
/// substituting any indexes that were undef
/// E.g. <4, -1, 2, 5, 1, 3> (Factor=3): StartIndexes=<4, 0, 2>
static bool isInterleaveMask(ArrayRef<int> Mask, unsigned Factor,
unsigned NumInputElts,
SmallVectorImpl<unsigned> &StartIndexes);
static bool isInterleaveMask(ArrayRef<int> Mask, unsigned Factor,
unsigned NumInputElts) {
SmallVector<unsigned, 8> StartIndexes;
return isInterleaveMask(Mask, Factor, NumInputElts, StartIndexes);
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ShuffleVector;

View File

@ -202,86 +202,15 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
/// The particular case of an RE-interleave mask is:
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
unsigned MaxFactor, unsigned OpNumElts) {
unsigned NumElts = Mask.size();
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
unsigned MaxFactor) {
unsigned NumElts = SVI->getShuffleMask().size();
if (NumElts < 4)
return false;
// Check potential Factors.
for (Factor = 2; Factor <= MaxFactor; Factor++) {
if (NumElts % Factor)
continue;
unsigned LaneLen = NumElts / Factor;
if (!isPowerOf2_32(LaneLen))
continue;
// Check whether each element matches the general interleaved rule.
// Ignore undef elements, as long as the defined elements match the rule.
// Outer loop processes all factors (x, y, z in the above example)
unsigned I = 0, J;
for (; I < Factor; I++) {
unsigned SavedLaneValue;
unsigned SavedNoUndefs = 0;
// Inner loop processes consecutive accesses (x, x+1... in the example)
for (J = 0; J < LaneLen - 1; J++) {
// Lane computes x's position in the Mask
unsigned Lane = J * Factor + I;
unsigned NextLane = Lane + Factor;
int LaneValue = Mask[Lane];
int NextLaneValue = Mask[NextLane];
// If both are defined, values must be sequential
if (LaneValue >= 0 && NextLaneValue >= 0 &&
LaneValue + 1 != NextLaneValue)
break;
// If the next value is undef, save the current one as reference
if (LaneValue >= 0 && NextLaneValue < 0) {
SavedLaneValue = LaneValue;
SavedNoUndefs = 1;
}
// Undefs are allowed, but defined elements must still be consecutive:
// i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
// Verify this by storing the last non-undef followed by an undef
// Check that following non-undef masks are incremented with the
// corresponding distance.
if (SavedNoUndefs > 0 && LaneValue < 0) {
SavedNoUndefs++;
if (NextLaneValue >= 0 &&
SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
break;
}
}
if (J < LaneLen - 1)
break;
int StartMask = 0;
if (Mask[I] >= 0) {
// Check that the start of the I range (J=0) is greater than 0
StartMask = Mask[I];
} else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
// StartMask defined by the last value in lane
StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
} else if (SavedNoUndefs > 0) {
// StartMask defined by some non-zero value in the j loop
StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
}
// else StartMask remains set to 0, i.e. all elements are undefs
if (StartMask < 0)
break;
// We must stay within the vectors; This case can happen with undefs.
if (StartMask + LaneLen > OpNumElts*2)
break;
}
// Found an interleaved mask of current factor.
if (I == Factor)
if (SVI->isInterleave(Factor))
return true;
}
@ -500,9 +429,7 @@ bool InterleavedAccess::lowerInterleavedStore(
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
unsigned OpNumElts =
cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
if (!isReInterleaveMask(SVI, Factor, MaxFactor))
return false;
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");

View File

@ -2728,6 +2728,98 @@ bool ShuffleVectorInst::isOneUseSingleSourceMask(int VF) const {
return isOneUseSingleSourceMask(ShuffleMask, VF);
}
bool ShuffleVectorInst::isInterleave(unsigned Factor) {
FixedVectorType *OpTy = dyn_cast<FixedVectorType>(getOperand(0)->getType());
// shuffle_vector can only interleave fixed length vectors - for scalable
// vectors, see the @llvm.experimental.vector.interleave2 intrinsic
if (!OpTy)
return false;
unsigned OpNumElts = OpTy->getNumElements();
return isInterleaveMask(ShuffleMask, Factor, OpNumElts * 2);
}
bool ShuffleVectorInst::isInterleaveMask(
ArrayRef<int> Mask, unsigned Factor, unsigned NumInputElts,
SmallVectorImpl<unsigned> &StartIndexes) {
unsigned NumElts = Mask.size();
if (NumElts % Factor)
return false;
unsigned LaneLen = NumElts / Factor;
if (!isPowerOf2_32(LaneLen))
return false;
StartIndexes.resize(Factor);
// Check whether each element matches the general interleaved rule.
// Ignore undef elements, as long as the defined elements match the rule.
// Outer loop processes all factors (x, y, z in the above example)
unsigned I = 0, J;
for (; I < Factor; I++) {
unsigned SavedLaneValue;
unsigned SavedNoUndefs = 0;
// Inner loop processes consecutive accesses (x, x+1... in the example)
for (J = 0; J < LaneLen - 1; J++) {
// Lane computes x's position in the Mask
unsigned Lane = J * Factor + I;
unsigned NextLane = Lane + Factor;
int LaneValue = Mask[Lane];
int NextLaneValue = Mask[NextLane];
// If both are defined, values must be sequential
if (LaneValue >= 0 && NextLaneValue >= 0 &&
LaneValue + 1 != NextLaneValue)
break;
// If the next value is undef, save the current one as reference
if (LaneValue >= 0 && NextLaneValue < 0) {
SavedLaneValue = LaneValue;
SavedNoUndefs = 1;
}
// Undefs are allowed, but defined elements must still be consecutive:
// i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
// Verify this by storing the last non-undef followed by an undef
// Check that following non-undef masks are incremented with the
// corresponding distance.
if (SavedNoUndefs > 0 && LaneValue < 0) {
SavedNoUndefs++;
if (NextLaneValue >= 0 &&
SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
break;
}
}
if (J < LaneLen - 1)
return false;
int StartMask = 0;
if (Mask[I] >= 0) {
// Check that the start of the I range (J=0) is greater than 0
StartMask = Mask[I];
} else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
// StartMask defined by the last value in lane
StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
} else if (SavedNoUndefs > 0) {
// StartMask defined by some non-zero value in the j loop
StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
}
// else StartMask remains set to 0, i.e. all elements are undefs
if (StartMask < 0)
return false;
// We must stay within the vectors; This case can happen with undefs.
if (StartMask + LaneLen > NumInputElts)
return false;
StartIndexes[I] = StartMask;
}
return true;
}
//===----------------------------------------------------------------------===//
// InsertValueInst Class
//===----------------------------------------------------------------------===//

View File

@ -31,6 +31,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
@ -3062,46 +3063,19 @@ static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
return false;
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
int Srcs[] = {-1, -1};
for (int i = 0; i != Size; ++i) {
// Ignore undef elements.
if (Mask[i] < 0)
continue;
SmallVector<unsigned, 2> StartIndexes;
if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
return false;
// Is this an even or odd element.
int Pol = i % 2;
// Ensure we consistently use the same half source for this polarity.
int Src = alignDown(Mask[i], HalfSize);
if (Srcs[Pol] < 0)
Srcs[Pol] = Src;
if (Srcs[Pol] != Src)
return false;
// Make sure the element within the source is appropriate for this element
// in the destination.
int Elt = Mask[i] % HalfSize;
if (Elt != i / 2)
return false;
}
EvenSrc = StartIndexes[0] % 2 ? StartIndexes[1] : StartIndexes[0];
OddSrc = StartIndexes[0] % 2 ? StartIndexes[0] : StartIndexes[1];
// One source should be low half of first vector.
if (Srcs[0] != 0 && Srcs[1] != 0)
if (EvenSrc != 0 && OddSrc != 0)
return false;
// Other source should be the upper half of the first source or the lower
// half of the second source.
// FIXME: This is only a heuristic to avoid regressions.
if (Srcs[0] != HalfSize && Srcs[0] != Size && Srcs[1] != HalfSize &&
Srcs[1] != Size)
return false;
EvenSrc = Srcs[0];
OddSrc = Srcs[1];
return true;
}

View File

@ -10,10 +10,10 @@
#include "MCTargetDesc/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/Instructions.h"
#include <cmath>
#include <optional>
using namespace llvm;
@ -261,16 +261,17 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
MVT EltTp = LT.second.getVectorElementType();
// If the size of the element is < ELEN then shuffles of interleaves and
// deinterleaves of 2 vectors can be lowered into the following sequences
// deinterleaves of 2 vectors can be lowered into the following
// sequences
if (EltTp.getScalarSizeInBits() < ST->getELEN()) {
auto InterleaveMask = createInterleaveMask(Mask.size() / 2, 2);
// Example sequence:
// vsetivli zero, 4, e8, mf4, ta, ma (ignored)
// vsetivli zero, 4, e8, mf4, ta, ma (ignored)
// vwaddu.vv v10, v8, v9
// li a0, -1 (ignored)
// vwmaccu.vx v10, a0, v9
if (equal(InterleaveMask, Mask))
return 2 * LT.first * getLMULCost(LT.second);
if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size() * 2))
return 2 * LT.first * getLMULCost(LT.second);
if (Mask[0] == 0 || Mask[0] == 1) {
auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());

View File

@ -1,6 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v | FileCheck %s -check-prefixes=CHECK,RV32
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v | FileCheck %s -check-prefixes=CHECK,RV64
; The mask here interleaves (%v1, %v0), not (%v0, %v1): it should still be cheap.
define <4 x i8> @interleave2_v2i8(<2 x i8> %v0, <2 x i8> %v1) {
; CHECK-LABEL: 'interleave2_v2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i8> %res
;
%concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
ret <4 x i8> %res
}
define <8 x i8> @interleave2_v8i8(<4 x i8> %v0, <4 x i8> %v1) {
; CHECK-LABEL: 'interleave2_v8i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %concat = shufflevector <4 x i8> %v0, <4 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>

View File

@ -116,4 +116,33 @@ TEST(ShuffleVectorInst, isOneUseSingleSourceMask) {
ShuffleVectorInst::isOneUseSingleSourceMask({0, 1, 2, 3, 3, 3, 1, 0}, 4));
}
TEST(ShuffleVectorInst, isInterleaveMask) {
SmallVector<unsigned> StartIndexes;
ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({0, 4, 1, 5, 2, 6, 3, 7}, 2,
8, StartIndexes));
ASSERT_EQ(StartIndexes, SmallVector<unsigned>({0, 4}));
ASSERT_FALSE(
ShuffleVectorInst::isInterleaveMask({0, 4, 1, 6, 2, 6, 3, 7}, 2, 8));
ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, 0, 5, 1, 6, 2, 7, 3}, 2,
8, StartIndexes));
ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 0}));
ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, 0, -1, 1, -1, 2, 7, 3}, 2,
8, StartIndexes));
ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 0}));
ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({0, 2, 4, 1, 3, 5}, 3, 6,
StartIndexes));
ASSERT_EQ(StartIndexes, SmallVector<unsigned>({0, 2, 4}));
ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, -1, 0, 5, 3, 1}, 3, 6,
StartIndexes));
ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 2, 0}));
ASSERT_FALSE(
ShuffleVectorInst::isInterleaveMask({8, 2, 12, 4, 9, 3, 13, 5}, 4, 8));
}
} // end anonymous namespace