intel
diff --git a/‎llvm/lib/CodeGen/GlobalISel/Utils.cpp
+1-2 b/‎llvm/lib/CodeGen/GlobalISel/Utils.cpp
+1-2
diff --git a/‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+36-1 b/‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+36-1
diff --git a/‎llvm/lib/Target/AArch64/AArch64ISelLowering.h
+7 b/‎llvm/lib/Target/AArch64/AArch64ISelLowering.h
+7
diff --git a/‎llvm/lib/Target/AArch64/AArch64InstrFormats.td
+1-1 b/‎llvm/lib/Target/AArch64/AArch64InstrFormats.td
+1-1
diff --git a/‎llvm/lib/Target/AArch64/AArch64InstrGISel.td
+36 b/‎llvm/lib/Target/AArch64/AArch64InstrGISel.td
+36
diff --git a/‎llvm/lib/Target/AArch64/AArch64InstrInfo.td
+5-14 b/‎llvm/lib/Target/AArch64/AArch64InstrInfo.td
+5-14
diff --git a/‎llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+48-22 b/‎llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+48-22
@@ -1385,8 +1385,7 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,
                                       const MachineRegisterInfo &MRI,
                                       int64_t SplatValue, bool AllowUndef) {
   if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
-    return SplatValAndReg->Value.getSExtValue() == SplatValue;
-
+    return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
   return false;
 }
 
 
@@ -2602,6 +2602,11 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
   case AArch64ISD::FCMEQ:
   case AArch64ISD::FCMGE:
   case AArch64ISD::FCMGT:
+  case AArch64ISD::FCMEQz:
+  case AArch64ISD::FCMGEz:
+  case AArch64ISD::FCMGTz:
+  case AArch64ISD::FCMLEz:
+  case AArch64ISD::FCMLTz:
     // Compares return either 0 or all-ones
     return VTBits;
   case AArch64ISD::VASHR: {
@@ -2818,6 +2823,11 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::FCMEQ)
     MAKE_CASE(AArch64ISD::FCMGE)
     MAKE_CASE(AArch64ISD::FCMGT)
+    MAKE_CASE(AArch64ISD::FCMEQz)
+    MAKE_CASE(AArch64ISD::FCMGEz)
+    MAKE_CASE(AArch64ISD::FCMGTz)
+    MAKE_CASE(AArch64ISD::FCMLEz)
+    MAKE_CASE(AArch64ISD::FCMLTz)
     MAKE_CASE(AArch64ISD::SADDV)
     MAKE_CASE(AArch64ISD::UADDV)
     MAKE_CASE(AArch64ISD::UADDLV)
@@ -15830,33 +15840,58 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
   assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
          "function only supposed to emit natural comparisons");
 
+  APInt SplatValue;
+  APInt SplatUndef;
+  unsigned SplatBitSize = 0;
+  bool HasAnyUndefs;
+
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
+  bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
+                                            SplatBitSize, HasAnyUndefs);
+
+  bool IsZero = IsCnst && SplatValue == 0;
+
   if (SrcVT.getVectorElementType().isFloatingPoint()) {
     switch (CC) {
     default:
       return SDValue();
     case AArch64CC::NE: {
-      SDValue Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
+      SDValue Fcmeq;
+      if (IsZero)
+        Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
+      else
+        Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
       return DAG.getNOT(dl, Fcmeq, VT);
     }
     case AArch64CC::EQ:
+      if (IsZero)
+        return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
       return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
     case AArch64CC::GE:
+      if (IsZero)
+        return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
       return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
     case AArch64CC::GT:
+      if (IsZero)
+        return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
       return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
     case AArch64CC::LE:
       if (!NoNans)
         return SDValue();
       // If we ignore NaNs then we can use to the LS implementation.
       [[fallthrough]];
     case AArch64CC::LS:
+      if (IsZero)
+        return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
       return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
     case AArch64CC::LT:
       if (!NoNans)
         return SDValue();
       // If we ignore NaNs then we can use to the MI implementation.
       [[fallthrough]];
     case AArch64CC::MI:
+      if (IsZero)
+        return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
       return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
     }
   }
 
@@ -245,6 +245,13 @@ enum NodeType : unsigned {
   FCMGE,
   FCMGT,
 
+  // Vector zero comparisons
+  FCMEQz,
+  FCMGEz,
+  FCMGTz,
+  FCMLEz,
+  FCMLTz,
+
   // Round wide FP to narrow FP with inexact results to odd.
   FCVTXN,
 
 
@@ -7136,7 +7136,7 @@ multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
 
 // FP Comparisons support only S and D element sizes (and H for v8.2a).
 multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
-                              string asm, SDPatternOperator OpNode> {
+                              string asm, SDNode OpNode> {
 
   let mayRaiseFPException = 1, Uses = [FPCR] in {
   let Predicates = [HasNEON, HasFullFP16] in {
 
@@ -179,6 +179,36 @@ def G_FCMGT : AArch64GenericInstruction {
   let hasSideEffects = 0;
 }
 
+def G_FCMEQZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMGEZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMGTZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMLEZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMLTZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
 def G_AARCH64_PREFETCH : AArch64GenericInstruction {
   let OutOperandList = (outs);
   let InOperandList = (ins type0:$imm, ptype0:$src1);
@@ -265,6 +295,12 @@ def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
 def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
 def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
 
+def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
+def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
+def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
+def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
+def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
+
 def : GINodeEquiv<G_BSP, AArch64bsp>;
 
 def : GINodeEquiv<G_UMULL, AArch64umull>;
 
@@ -882,20 +882,11 @@ def AArch64cmltz : PatFrag<(ops node:$lhs),
 def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
                            (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
 
-def AArch64fcmeqz : PatFrag<(ops node:$lhs),
-                            (AArch64fcmeq node:$lhs, immAllZerosV)>;
-
-def AArch64fcmgez : PatFrag<(ops node:$lhs),
-                            (AArch64fcmge node:$lhs, immAllZerosV)>;
-
-def AArch64fcmgtz : PatFrag<(ops node:$lhs),
-                            (AArch64fcmgt node:$lhs, immAllZerosV)>;
-
-def AArch64fcmlez : PatFrag<(ops node:$lhs),
-                            (AArch64fcmge immAllZerosV, node:$lhs)>;
-
-def AArch64fcmltz : PatFrag<(ops node:$lhs),
-                            (AArch64fcmgt immAllZerosV, node:$lhs)>;
+def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
+def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
+def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
+def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
+def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
 
 def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
 def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),
 
@@ -808,14 +808,16 @@ void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
 
 bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
   assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
-
+  auto Splat = getAArch64VectorSplat(MI, MRI);
+  if (!Splat)
+    return false;
+  if (Splat->isReg())
+    return true;
   // Later, during selection, we'll try to match imported patterns using
   // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
   // G_BUILD_VECTORs which could match those patterns.
-  if (isBuildVectorAllZeros(MI, MRI) || isBuildVectorAllOnes(MI, MRI))
-    return false;
-
-  return getAArch64VectorSplat(MI, MRI).has_value();
+  int64_t Cst = Splat->getCst();
+  return (Cst != 0 && Cst != -1);
 }
 
 void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -931,40 +933,58 @@ void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
 
 /// \returns a function which builds a vector floating point compare instruction
 /// for a condition code \p CC.
+/// \param [in] IsZero - True if the comparison is against 0.
 /// \param [in] NoNans - True if the target has NoNansFPMath.
 std::function<Register(MachineIRBuilder &)>
-getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
-              MachineRegisterInfo &MRI) {
+getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
+              bool NoNans, MachineRegisterInfo &MRI) {
   LLT DstTy = MRI.getType(LHS);
   assert(DstTy.isVector() && "Expected vector types only?");
   assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
   switch (CC) {
   default:
     llvm_unreachable("Unexpected condition code!");
   case AArch64CC::NE:
-    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
-      auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
+    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+      auto FCmp = IsZero
+                      ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
+                      : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
       return MIB.buildNot(DstTy, FCmp).getReg(0);
     };
   case AArch64CC::EQ:
-    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
-      return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
+    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+      return IsZero
+                 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
+                 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
+                       .getReg(0);
     };
   case AArch64CC::GE:
-    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
-      return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
+    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+      return IsZero
+                 ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
+                 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
+                       .getReg(0);
     };
   case AArch64CC::GT:
-    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
-      return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
+    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+      return IsZero
+                 ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
+                 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
+                       .getReg(0);
     };
   case AArch64CC::LS:
-    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
-      return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
+    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+      return IsZero
+                 ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
+                 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
+                       .getReg(0);
     };
   case AArch64CC::MI:
-    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
-      return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
+    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+      return IsZero
+                 ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
+                 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
+                       .getReg(0);
     };
   }
 }
@@ -1004,17 +1024,23 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
 
   LLT DstTy = MRI.getType(Dst);
 
+  auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
+
+  // Compares against 0 have special target-specific pseudos.
+  bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
+
   bool Invert = false;
   AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
   if ((Pred == CmpInst::Predicate::FCMP_ORD ||
        Pred == CmpInst::Predicate::FCMP_UNO) &&
-      isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
+      IsZero) {
     // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
     // NaN, so equivalent to a == a and doesn't need the two comparisons an
     // "ord" normally would.
     // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
     // thus equivalent to a != a.
     RHS = LHS;
+    IsZero = false;
     CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
   } else
     changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
@@ -1025,12 +1051,12 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
   const bool NoNans =
       ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
 
-  auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
+  auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
   Register CmpRes;
   if (CC2 == AArch64CC::AL)
     CmpRes = Cmp(MIB);
   else {
-    auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
+    auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
     auto Cmp2Dst = Cmp2(MIB);
     auto Cmp1Dst = Cmp(MIB);
     CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
Original file line number	Diff line number	Diff line change
`@@ -1385,8 +1385,7 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,`
`1385`	`1385`	`const MachineRegisterInfo &MRI,`
`1386`	`1386`	`int64_t SplatValue, bool AllowUndef) {`
`1387`	`1387`	`if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))`
`1388`		`- return SplatValAndReg->Value.getSExtValue() == SplatValue;`
`1389`		`-`
	`1388`	`+ return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));`
`1390`	`1389`	`return false;`
`1391`	`1390`	`}`
`1392`	`1391`