[v4,12/12] aarch64: Implement TImode comparisons

Message ID 20200410034853.4322-13-richard.henderson@linaro.org
State New
Headers show
Series
  • aarch64: Implement TImode comparisons
Related show

Commit Message

guojiufu via Gcc-patches April 10, 2020, 3:48 a.m.
* config/aarch64/aarch64-modes.def (CC_NV): New.
	* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Expand
	all of the comparisons for TImode, not just NE.
	(aarch64_select_cc_mode): Recognize <su>cmp<GPI>_carryin.
	(aarch64_get_condition_code_1): Handle CC_NVmode.
    	* config/aarch64/aarch64.md (cbranchti4, cstoreti4): New.
	(ccmp_iorne<GPI>): New.
	(<su_optab>cmp<GPI>_carryin): New.
	(*<su_optab>cmp<GPI>_carryin): New.
	(*<su_optab>cmp<GPI>_carryin_z1): New.
	(*<su_optab>cmp<GPI>_carryin_z2): New.
	(*cmp<GPI>_carryin_m2, *ucmp<GPI>_carryin_m2): New.
	* config/aarch64/iterators.md (CC_EXTEND): New.
	* config/aarch64/predicates.md (const_dword_umax): New.
---
 gcc/config/aarch64/aarch64.c         | 164 ++++++++++++++++++++++++---
 gcc/config/aarch64/aarch64-modes.def |   1 +
 gcc/config/aarch64/aarch64.md        | 113 ++++++++++++++++++
 gcc/config/aarch64/iterators.md      |   3 +
 gcc/config/aarch64/predicates.md     |   9 ++
 5 files changed, 277 insertions(+), 13 deletions(-)

-- 
2.20.1

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 837ee6a5e37..6c825b341a0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2731,32 +2731,143 @@  rtx
 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 {
   machine_mode cmp_mode = GET_MODE (x);
-  machine_mode cc_mode;
   rtx cc_reg;
 
   if (cmp_mode == TImode)
     {
-      gcc_assert (code == NE);
+      rtx x_lo, x_hi, y_lo, y_hi, tmp;
+      struct expand_operand ops[2];
 
-      cc_mode = CCmode;
-      cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+      x_lo = operand_subword (x, 0, 0, TImode);
+      x_hi = operand_subword (x, 1, 0, TImode);
 
-      rtx x_lo = operand_subword (x, 0, 0, TImode);
-      rtx y_lo = operand_subword (y, 0, 0, TImode);
-      emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
+      if (CONST_SCALAR_INT_P (y))
+	{
+	  wide_int y_wide = rtx_mode_t (y, TImode);
 
-      rtx x_hi = operand_subword (x, 1, 0, TImode);
-      rtx y_hi = operand_subword (y, 1, 0, TImode);
-      emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi,
-			       gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
-			       GEN_INT (AARCH64_EQ)));
+	  switch (code)
+	    {
+	    case EQ:
+	    case NE:
+	      /* For equality, IOR the two halves together.  If this gets
+		 used for a branch, we expect this to fold to cbz/cbnz;
+		 otherwise it's no larger than the cmp+ccmp below.  Beware
+		 of the compare-and-swap post-reload split and use ccmp.  */
+	      if (y_wide == 0 && can_create_pseudo_p ())
+		{
+		  tmp = gen_reg_rtx (DImode);
+		  emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+		  emit_insn (gen_cmpdi (tmp, const0_rtx));
+		  cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+		  goto done;
+		}
+	      break;
+
+	    case LE:
+	    case GT:
+	      /* Add 1 to Y to convert to LT/GE, which avoids the swap and
+		 keeps the constant operand.  */
+	      if (wi::cmps(y_wide, wi::max_value (TImode, SIGNED)) < 0)
+		{
+		  y = immed_wide_int_const (wi::add (y_wide, 1), TImode);
+		  code = (code == LE ? LT : GE);
+		}
+	      break;
+
+	    case LEU:
+	    case GTU:
+	      /* Add 1 to Y to convert to LT/GE, which avoids the swap and
+		 keeps the constant operand.  */
+	      if (wi::cmpu(y_wide, wi::max_value (TImode, UNSIGNED)) < 0)
+		{
+		  y = immed_wide_int_const (wi::add (y_wide, 1), TImode);
+		  code = (code == LEU ? LTU : GEU);
+		}
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+
+      y_lo = simplify_gen_subreg (DImode, y, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+      y_hi = simplify_gen_subreg (DImode, y, TImode,
+				  subreg_highpart_offset (DImode, TImode));
+
+      switch (code)
+	{
+	case LEU:
+	case GTU:
+	case LE:
+	case GT:
+	  std::swap (x_lo, y_lo);
+	  std::swap (x_hi, y_hi);
+	  code = swap_condition (code);
+	  break;
+
+	case LTU:
+	case GEU:
+	case LT:
+	case GE:
+	  /* If the low word of y is 0, then this is simply a normal
+	     compare of the upper words.  */
+	  if (y_lo == const0_rtx)
+	    {
+	      if (!aarch64_plus_operand (y_hi, DImode))
+		y_hi = force_reg (DImode, y_hi);
+	      return aarch64_gen_compare_reg (code, x_hi, y_hi);
+	    }
+	  break;
+
+	default:
+	  break;
+	}
+
+      /* Emit cmpdi, forcing operands into registers as required.  */
+      create_input_operand (&ops[0], x_lo, DImode);
+      create_input_operand (&ops[1], y_lo, DImode);
+      expand_insn (CODE_FOR_cmpdi, 2, ops);
+
+      switch (code)
+	{
+	case EQ:
+	case NE:
+	  /* For NE, (x_lo != y_lo) || (x_hi != y_hi).  */
+	  create_input_operand (&ops[0], x_hi, DImode);
+	  create_input_operand (&ops[1], y_hi, DImode);
+	  expand_insn (CODE_FOR_ccmp_iornedi, 2, ops);
+	  cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+	  break;
+
+	case LTU:
+	case GEU:
+	  create_input_operand (&ops[0], x_hi, DImode);
+	  create_input_operand (&ops[1], y_hi, DImode);
+	  expand_insn (CODE_FOR_ucmpdi_carryin, 2, ops);
+	  cc_reg = gen_rtx_REG (CC_NOTCmode, CC_REGNUM);
+	  break;
+
+	case LT:
+	case GE:
+	  create_input_operand (&ops[0], x_hi, DImode);
+	  create_input_operand (&ops[1], y_hi, DImode);
+	  expand_insn (CODE_FOR_cmpdi_carryin, 2, ops);
+	  cc_reg = gen_rtx_REG (CC_NVmode, CC_REGNUM);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
     }
   else
     {
-      cc_mode = SELECT_CC_MODE (code, x, y);
+      machine_mode cc_mode = SELECT_CC_MODE (code, x, y);
       cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
       emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
     }
+
+ done:
   return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
 }
 
@@ -9551,6 +9662,24 @@  aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
     return CC_Vmode;
 
+  /* A test for signed GE/LT comparison with borrow.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && (code == GE || code == LT)
+      && (code_x == SIGN_EXTEND || x == const0_rtx)
+      && ((GET_CODE (y) == PLUS
+	   && aarch64_borrow_operation (XEXP (y, 0), mode_x))
+	  || aarch64_borrow_operation (y, mode_x)))
+    return CC_NVmode;
+
+  /* A test for unsigned GEU/LTU comparison with borrow.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && (code == GEU || code == LTU)
+      && (code_x == ZERO_EXTEND || x == const0_rtx)
+      && ((GET_CODE (y) == PLUS
+	   && aarch64_borrow_operation (XEXP (y, 0), mode_x))
+	  || aarch64_borrow_operation (y, mode_x)))
+    return CC_NOTCmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -9690,6 +9819,15 @@  aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
 	}
       break;
 
+    case E_CC_NVmode:
+      switch (comp_code)
+	{
+	case GE: return AARCH64_GE;
+	case LT: return AARCH64_LT;
+	default: return -1;
+	}
+      break;
+
     default:
       return -1;
     }
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 181b7b30dcd..beb5919ab01 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -41,6 +41,7 @@  CC_MODE (CC_C);     /* C represents unsigned overflow of a simple addition.  */
 CC_MODE (CC_NOTC);  /* !C represents unsigned overflow of subtraction,
                        as well as our representation of add-with-carry.  */
 CC_MODE (CC_V);     /* Only V bit of condition flags is valid.  */
+CC_MODE (CC_NV);    /* N and V bits set for signed GE/LT comparison.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2b5a6eb510d..e62f79ed6f1 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -471,6 +471,20 @@ 
   operands[2] = const0_rtx;
 })
 
+(define_expand "cbranchti4"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand:TI 1 "register_operand")
+			     (match_operand:TI 2 "aarch64_reg_or_imm")])
+			   (label_ref (match_operand 3 "" ""))
+			   (pc)))]
+  ""
+{
+  operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+					 operands[2]);
+  operands[1] = XEXP (operands[0], 0);
+  operands[2] = const0_rtx;
+})
+
 (define_expand "cbranch<mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
 			    [(match_operand:GPF 1 "register_operand")
@@ -569,6 +583,25 @@ 
   [(set_attr "type" "fccmp<s>")]
 )
 
+;; This specialization has the advantage of being able to swap operands.
+;; Use CC_NZ because SELECT_CC_MODE uses that for comparisons against 0.
+(define_insn "ccmp_iorne<mode>"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	  (ior:SI
+	    (ne:SI (reg:CC CC_REGNUM)
+		   (const_int 0))
+	    (ne:SI (match_operand:GPI 0 "register_operand" "%r,r,r")
+		   (match_operand:GPI 1 "aarch64_ccmp_operand" "r,Uss,Usn")))
+	  (const_int 0)))]
+  ""
+  "@
+   ccmp\\t%<w>0, %<w>1, 0, eq
+   ccmp\\t%<w>0, %1, 0, eq
+   ccmn\\t%<w>0, #%n1, 0, eq"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+)
+
 ;; Expansion of signed mod by a power of 2 using CSNEG.
 ;; For x0 % n where n is a power of 2 produce:
 ;; negs   x1, x0
@@ -3364,6 +3397,72 @@ 
   [(set_attr "type" "adc_reg")]
 )
 
+(define_expand "<su_optab>cmp<mode>_carryin"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+	(compare:<CC_EXTEND>
+	  (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand"))
+	  (plus:<DWI>
+	    (geu:<DWI> (reg:CC_C CC_REGNUM) (const_int 0))
+	    (ANY_EXTEND:<DWI> (match_operand:GPI 1 "register_operand")))))]
+  ""
+)
+
+(define_insn "*<su_optab>cmp<mode>_carryin"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+	(compare:<CC_EXTEND>
+	  (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+	  (plus:<DWI>
+	    (match_operand:<DWI> 2 "aarch64_borrow_operation" "")
+	    (ANY_EXTEND:<DWI> (match_operand:GPI 1 "register_operand" "r")))))]
+  ""
+  "sbcs\\t<w>zr, %<w>0, %<w>1"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*<su_optab>cmp<mode>_carryin_z1"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+	(compare:<CC_EXTEND>
+	  (const_int 0)
+	  (plus:<DWI>
+	    (match_operand:<DWI> 1 "aarch64_borrow_operation" "")
+	    (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r")))))]
+  ""
+  "sbcs\\t<w>zr, <w>zr, %<w>0"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*<su_optab>cmp<mode>_carryin_z2"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+	(compare:<CC_EXTEND>
+	  (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+	  (match_operand:<DWI> 1 "aarch64_borrow_operation" "")))]
+  ""
+  "sbcs\\t<w>zr, %<w>0, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*cmp<mode>_carryin_m2"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+	(compare:<CC_EXTEND>
+	  (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+	  (neg:<DWI> (match_operand:<DWI> 1 "aarch64_carry_operation" ""))))]
+  ""
+  "adcs\\t<w>zr, %<w>0, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*ucmp<mode>_carryin_m2"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+	(compare:<CC_EXTEND>
+	  (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+	  (plus:<DWI>
+	    (match_operand:<DWI> 1 "aarch64_borrow_operation" "")
+	    (match_operand:<DWI> 2 "const_dword_umax" ""))))]
+  ""
+  "adcs\\t<w>zr, %<w>0, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_expand "usub<GPI:mode>3_carryinC"
   [(parallel
      [(set (reg:CC_NOTC CC_REGNUM)
@@ -3985,6 +4084,20 @@ 
   operands[3] = const0_rtx;
 })
 
+(define_expand "cstoreti4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "aarch64_comparison_operator"
+	 [(match_operand:TI 2 "register_operand")
+	  (match_operand:TI 3 "aarch64_reg_or_imm")]))]
+  ""
+{
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				         operands[3]);
+  PUT_MODE (operands[1], SImode);
+  operands[2] = XEXP (operands[1], 0);
+  operands[3] = const0_rtx;
+})
+
 (define_expand "cstorecc4"
   [(set (match_operand:SI 0 "register_operand")
        (match_operator 1 "aarch64_comparison_operator_mode"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 8e434389e59..f6f2e9cefd5 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1907,6 +1907,9 @@ 
 (define_code_attr fix_trunc_optab [(fix "fix_trunc")
 				   (unsigned_fix "fixuns_trunc")])
 
+;; For double-word comparisons
+(define_code_attr CC_EXTEND [(sign_extend "CC_NV") (zero_extend "CC_NOTC")])
+
 ;; Optab prefix for sign/zero-extending operations
 (define_code_attr su_optab [(sign_extend "") (zero_extend "u")
 			    (div "") (udiv "u")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index e3572d2f60d..93d068cc69c 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -55,6 +55,15 @@ 
   return rtx_mode_t (op, mode) == (wi::shwi (1, mode) << bits);
 })
 
+;; True for (1 << (GET_MODE_BITSIZE (mode) / 2)) - 1
+;; I.e UINT_MAX for a given mode, in the double-word mode.
+(define_predicate "const_dword_umax"
+  (match_code "const_int,const_wide_int")
+{
+  unsigned bits = GET_MODE_BITSIZE (mode).to_constant () / 2;
+  return rtx_mode_t (op, mode) == wi::sub(wi::shwi (1, mode) << bits, 1);
+})
+
 (define_predicate "subreg_lowpart_operator"
   (ior (match_code "truncate")
        (and (match_code "subreg")