@@ -808,14 +808,16 @@ void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
808
808
809
809
bool matchBuildVectorToDup (MachineInstr &MI, MachineRegisterInfo &MRI) {
810
810
assert (MI.getOpcode () == TargetOpcode::G_BUILD_VECTOR);
811
-
811
+ auto Splat = getAArch64VectorSplat (MI, MRI);
812
+ if (!Splat)
813
+ return false ;
814
+ if (Splat->isReg ())
815
+ return true ;
812
816
// Later, during selection, we'll try to match imported patterns using
813
817
// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
814
818
// G_BUILD_VECTORs which could match those patterns.
815
- if (isBuildVectorAllZeros (MI, MRI) || isBuildVectorAllOnes (MI, MRI))
816
- return false ;
817
-
818
- return getAArch64VectorSplat (MI, MRI).has_value ();
819
+ int64_t Cst = Splat->getCst ();
820
+ return (Cst != 0 && Cst != -1 );
819
821
}
820
822
821
823
void applyBuildVectorToDup (MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -931,40 +933,58 @@ void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
931
933
932
934
// / \returns a function which builds a vector floating point compare instruction
933
935
// / for a condition code \p CC.
936
+ // / \param [in] IsZero - True if the comparison is against 0.
934
937
// / \param [in] NoNans - True if the target has NoNansFPMath.
935
938
std::function<Register(MachineIRBuilder &)>
936
- getVectorFCMP (AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans ,
937
- MachineRegisterInfo &MRI) {
939
+ getVectorFCMP (AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero ,
940
+ bool NoNans, MachineRegisterInfo &MRI) {
938
941
LLT DstTy = MRI.getType (LHS);
939
942
assert (DstTy.isVector () && " Expected vector types only?" );
940
943
assert (DstTy == MRI.getType (RHS) && " Src and Dst types must match!" );
941
944
switch (CC) {
942
945
default :
943
946
llvm_unreachable (" Unexpected condition code!" );
944
947
case AArch64CC::NE:
945
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
946
- auto FCmp = MIB.buildInstr (AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
948
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
949
+ auto FCmp = IsZero
950
+ ? MIB.buildInstr (AArch64::G_FCMEQZ, {DstTy}, {LHS})
951
+ : MIB.buildInstr (AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
947
952
return MIB.buildNot (DstTy, FCmp).getReg (0 );
948
953
};
949
954
case AArch64CC::EQ:
950
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
951
- return MIB.buildInstr (AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg (0 );
955
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
956
+ return IsZero
957
+ ? MIB.buildInstr (AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg (0 )
958
+ : MIB.buildInstr (AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
959
+ .getReg (0 );
952
960
};
953
961
case AArch64CC::GE:
954
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
955
- return MIB.buildInstr (AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg (0 );
962
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
963
+ return IsZero
964
+ ? MIB.buildInstr (AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg (0 )
965
+ : MIB.buildInstr (AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
966
+ .getReg (0 );
956
967
};
957
968
case AArch64CC::GT:
958
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
959
- return MIB.buildInstr (AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg (0 );
969
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
970
+ return IsZero
971
+ ? MIB.buildInstr (AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg (0 )
972
+ : MIB.buildInstr (AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
973
+ .getReg (0 );
960
974
};
961
975
case AArch64CC::LS:
962
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
963
- return MIB.buildInstr (AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg (0 );
976
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
977
+ return IsZero
978
+ ? MIB.buildInstr (AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg (0 )
979
+ : MIB.buildInstr (AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
980
+ .getReg (0 );
964
981
};
965
982
case AArch64CC::MI:
966
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
967
- return MIB.buildInstr (AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg (0 );
983
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
984
+ return IsZero
985
+ ? MIB.buildInstr (AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg (0 )
986
+ : MIB.buildInstr (AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
987
+ .getReg (0 );
968
988
};
969
989
}
970
990
}
@@ -1004,17 +1024,23 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1004
1024
1005
1025
LLT DstTy = MRI.getType (Dst);
1006
1026
1027
+ auto Splat = getAArch64VectorSplat (*MRI.getVRegDef (RHS), MRI);
1028
+
1029
+ // Compares against 0 have special target-specific pseudos.
1030
+ bool IsZero = Splat && Splat->isCst () && Splat->getCst () == 0 ;
1031
+
1007
1032
bool Invert = false ;
1008
1033
AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
1009
1034
if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1010
1035
Pred == CmpInst::Predicate::FCMP_UNO) &&
1011
- isBuildVectorAllZeros (*MRI. getVRegDef (RHS), MRI) ) {
1036
+ IsZero ) {
1012
1037
// The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1013
1038
// NaN, so equivalent to a == a and doesn't need the two comparisons an
1014
1039
// "ord" normally would.
1015
1040
// Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1016
1041
// thus equivalent to a != a.
1017
1042
RHS = LHS;
1043
+ IsZero = false ;
1018
1044
CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
1019
1045
} else
1020
1046
changeVectorFCMPPredToAArch64CC (Pred, CC, CC2, Invert);
@@ -1025,12 +1051,12 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1025
1051
const bool NoNans =
1026
1052
ST.getTargetLowering ()->getTargetMachine ().Options .NoNaNsFPMath ;
1027
1053
1028
- auto Cmp = getVectorFCMP (CC, LHS, RHS, NoNans, MRI);
1054
+ auto Cmp = getVectorFCMP (CC, LHS, RHS, IsZero, NoNans, MRI);
1029
1055
Register CmpRes;
1030
1056
if (CC2 == AArch64CC::AL)
1031
1057
CmpRes = Cmp (MIB);
1032
1058
else {
1033
- auto Cmp2 = getVectorFCMP (CC2, LHS, RHS, NoNans, MRI);
1059
+ auto Cmp2 = getVectorFCMP (CC2, LHS, RHS, IsZero, NoNans, MRI);
1034
1060
auto Cmp2Dst = Cmp2 (MIB);
1035
1061
auto Cmp1Dst = Cmp (MIB);
1036
1062
CmpRes = MIB.buildOr (DstTy, Cmp1Dst, Cmp2Dst).getReg (0 );
0 commit comments