[2/2,PR88836,aarch64] Fix CSE to process parallel rtx dest one by one

Message ID 1557972484-24599-3-git-send-email-kugan.vivekanandarajah@linaro.org
State New
Headers show
Series
  • Fix redundant ptest instruction
Related show

Commit Message

Kugan Vivekanandarajah May 16, 2019, 2:08 a.m.
From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>


This patch changes cse_insn to process parallel rtx one by one such that
any destination rtx in cse list is invalidated before processing the
next.

gcc/ChangeLog:

2019-05-16  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

	PR target/88834
	* cse.c (safe_hash): Handle VEC_DUPLICATE.
	(exp_equiv_p): Likewise.
	(hash_rtx_cb): Change to accept const_rtx.
	(struct set): Add field to record if uses of dest is invalidated.
	(cse_insn): For parallel rtx, invalidate register set by first rtx
	before processing the next.

gcc/testsuite/ChangeLog:

2019-05-16  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

	PR target/88834
	* gcc.target/aarch64/pr88834.c: New test.

Change-Id: I7c3a61f034128f38abe0c2b7dab5d81dec28146c
---
 gcc/cse.c                                  | 67 ++++++++++++++++++++++++++----
 gcc/testsuite/gcc.target/aarch64/pr88836.c | 14 +++++++
 2 files changed, 73 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr88836.c

-- 
2.7.4

Comments

Jeff Law May 16, 2019, 6:08 p.m. | #1
On 5/15/19 8:08 PM, kugan.vivekanandarajah@linaro.org wrote:
> From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>

> 

> This patch changes cse_insn to process parallel rtx one by one such that

> any destination rtx in cse list is invalidated before processing the

> next.

> 

> gcc/ChangeLog:

> 

> 2019-05-16  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

> 

> 	PR target/88834

> 	* cse.c (safe_hash): Handle VEC_DUPLICATE.

> 	(exp_equiv_p): Likewise.

> 	(hash_rtx_cb): Change to accept const_rtx.

> 	(struct set): Add field to record if uses of dest is invalidated.

> 	(cse_insn): For parallel rtx, invalidate register set by first rtx

> 	before processing the next.

> 

> gcc/testsuite/ChangeLog:

> 

> 2019-05-16  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

> 

> 	PR target/88834

> 	* gcc.target/aarch64/pr88834.c: New test.

I haven't dug into the code, so if my concerns are off base, just say so.


> (insn 19 18 20 3 (parallel [

>             (set (reg:VNx4BI 93 [ next_mask_18 ])

>                 (unspec:VNx4BI [

>                         (const_int 0 [0])

>                         (reg:DI 95 [ _33 ])

>                     ] UNSPEC_WHILE_LO))

>             (set (reg:CC 66 cc)

>                 (compare:CC (unspec:SI [

>                             (vec_duplicate:VNx4BI (const_int 1 [0x1]))

>                             (reg:VNx4BI 93 [ next_mask_18 ])

>                         ] UNSPEC_PTEST_PTRUE)

>                     (const_int 0 [0])))

>         ]) 4244 {while_ultdivnx4bi}

RTL semantics in case of a PARALLEL are that all the inputs are
consumed, then all outputs are generated.  So for the example insn reg93
is read in the second set before it's set in the output of the first set.


So the ordering you're using for processing/invaliding seems unexpected.

jeff

Patch

diff --git a/gcc/cse.c b/gcc/cse.c
index 6c9cda1..9dc31f5 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -570,7 +570,7 @@  static void invalidate_for_call (void);
 static rtx use_related_value (rtx, struct table_elt *);
 
 static inline unsigned canon_hash (rtx, machine_mode);
-static inline unsigned safe_hash (rtx, machine_mode);
+static inline unsigned safe_hash (const_rtx, machine_mode);
 static inline unsigned hash_rtx_string (const char *);
 
 static rtx canon_reg (rtx, rtx_insn *);
@@ -2369,6 +2369,11 @@  hash_rtx_cb (const_rtx x, machine_mode mode,
       hash += fixed_hash (CONST_FIXED_VALUE (x));
       return hash;
 
+    case VEC_DUPLICATE:
+      return hash_rtx_cb (XEXP (x, 0), VOIDmode,
+			  do_not_record_p, hash_arg_in_memory_p,
+			  have_reg_qty, cb);
+
     case CONST_VECTOR:
       {
 	int units;
@@ -2599,7 +2604,7 @@  canon_hash (rtx x, machine_mode mode)
    and hash_arg_in_memory are not changed.  */
 
 static inline unsigned
-safe_hash (rtx x, machine_mode mode)
+safe_hash (const_rtx x, machine_mode mode)
 {
   int dummy_do_not_record;
   return hash_rtx (x, mode, &dummy_do_not_record, NULL, true);
@@ -2630,6 +2635,16 @@  exp_equiv_p (const_rtx x, const_rtx y, int validate, bool for_gcse)
     return x == y;
 
   code = GET_CODE (x);
+  if ((code == CONST_VECTOR && GET_CODE (y) == VEC_DUPLICATE)
+       || (code == VEC_DUPLICATE && GET_CODE (y) == CONST_VECTOR))
+    {
+      if (code == VEC_DUPLICATE)
+	std::swap (x, y);
+      if (const_vector_encoded_nelts (x) != 1)
+	return 0;
+      return exp_equiv_p (CONST_VECTOR_ENCODED_ELT (x, 0), XEXP (y, 0),
+			  validate, for_gcse);
+    }
   if (code != GET_CODE (y))
     return 0;
 
@@ -4192,7 +4207,8 @@  struct set
   char src_in_memory;
   /* Nonzero if the SET_SRC contains something
      whose value cannot be predicted and understood.  */
-  char src_volatile;
+  char src_volatile : 1;
+  char invalidate_dest_p : 1;
   /* Original machine mode, in case it becomes a CONST_INT.
      The size of this field should match the size of the mode
      field of struct rtx_def (see rtl.h).  */
@@ -4639,7 +4655,7 @@  cse_insn (rtx_insn *insn)
   for (i = 0; i < n_sets; i++)
     {
       bool repeat = false;
-      bool mem_noop_insn = false;
+      bool noop_insn = false;
       rtx src, dest;
       rtx src_folded;
       struct table_elt *elt = 0, *p;
@@ -4736,6 +4752,7 @@  cse_insn (rtx_insn *insn)
       sets[i].src = src;
       sets[i].src_hash = HASH (src, mode);
       sets[i].src_volatile = do_not_record;
+      sets[i].invalidate_dest_p = 1;
       sets[i].src_in_memory = hash_arg_in_memory;
 
       /* If SRC is a MEM, there is a REG_EQUIV note for SRC, and DEST is
@@ -5365,7 +5382,7 @@  cse_insn (rtx_insn *insn)
 		       || insn_nothrow_p (insn)))
 	    {
 	      SET_SRC (sets[i].rtl) = trial;
-	      mem_noop_insn = true;
+	      noop_insn = true;
 	      break;
 	    }
 
@@ -5418,6 +5435,19 @@  cse_insn (rtx_insn *insn)
 	      src_folded_cost = constant_pool_entries_cost;
 	      src_folded_regcost = constant_pool_entries_regcost;
 	    }
+	  else if (n_sets == 1
+		   && REG_P (trial)
+		   && REG_P (SET_DEST (sets[i].rtl))
+		   && GET_MODE_CLASS (mode) == MODE_CC
+		   && REGNO (trial) == REGNO (SET_DEST (sets[i].rtl))
+		   && !side_effects_p (dest)
+		   && (cfun->can_delete_dead_exceptions
+		       || insn_nothrow_p (insn)))
+	    {
+	      SET_SRC (sets[i].rtl) = trial;
+	      noop_insn = true;
+	      break;
+	    }
 	}
 
       /* If we changed the insn too much, handle this set from scratch.  */
@@ -5588,7 +5618,7 @@  cse_insn (rtx_insn *insn)
 	}
 
       /* Similarly for no-op MEM moves.  */
-      else if (mem_noop_insn)
+      else if (noop_insn)
 	{
 	  if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
 	    cse_cfg_altered = true;
@@ -5760,6 +5790,26 @@  cse_insn (rtx_insn *insn)
 		  }
 		elt = insert (src, classp, sets[i].src_hash, mode);
 		elt->in_memory = sets[i].src_in_memory;
+
+		if (REG_P (dest)
+		    && ! reg_mentioned_p (dest, src))
+		    {
+		      sets[i].invalidate_dest_p = 0;
+		      unsigned int regno = REGNO (dest);
+		      unsigned int endregno = END_REGNO (dest);
+		      unsigned int j;
+
+		      for (j = regno; j < endregno; j++)
+			{
+			  if (REG_IN_TABLE (j) >= 0)
+			    {
+			      remove_invalid_refs (j);
+			      REG_IN_TABLE (j) = -1;
+			    }
+			}
+		      invalidate (dest, VOIDmode);
+		    }
+
 		/* If inline asm has any clobbers, ensure we only reuse
 		   existing inline asms and never try to put the ASM_OPERANDS
 		   into an insn that isn't inline asm.  */
@@ -5853,7 +5903,8 @@  cse_insn (rtx_insn *insn)
 	   previous quantity's chain.
 	   Needed for memory if this is a nonvarying address, unless
 	   we have just done an invalidate_memory that covers even those.  */
-	if (REG_P (dest) || GET_CODE (dest) == SUBREG)
+	if ((REG_P (dest) || GET_CODE (dest) == SUBREG)
+	    && sets[i].invalidate_dest_p)
 	  invalidate (dest, VOIDmode);
 	else if (MEM_P (dest))
 	  invalidate (dest, VOIDmode);
@@ -5887,7 +5938,7 @@  cse_insn (rtx_insn *insn)
 
 	  if (!REG_P (x))
 	    mention_regs (x);
-	  else
+	  else if (sets[i].invalidate_dest_p)
 	    {
 	      /* We used to rely on all references to a register becoming
 		 inaccessible when a register changes to a new quantity,
diff --git a/gcc/testsuite/gcc.target/aarch64/pr88836.c b/gcc/testsuite/gcc.target/aarch64/pr88836.c
new file mode 100644
index 0000000..442e8a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr88836.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */
+
+void
+f (int *restrict x, int *restrict y, int *restrict z, int n)
+{
+  for (int i = 0; i < n; i += 2)
+    {
+      x[i] = y[i] + z[i];
+      x[i + 1] = y[i + 1] - z[i + 1];
+    }
+}
+
+/* { dg-final { scan-assembler-times {ptest} 0 } } */