[BACKPORT] Backport revision 254777 and 254778 to GCC 7 branch

Message ID DB5PR0801MB2742D498CD2498BBDE9CDF91E70F0@DB5PR0801MB2742.eurprd08.prod.outlook.com
State New
Headers show
Series
  • [BACKPORT] Backport revision 254777 and 254778 to GCC 7 branch
Related show

Commit Message

Bin Cheng Dec. 19, 2017, 3:15 p.m.
Hi,
This patch backports revision 254777 and 254778 to GCC 7 branch.
Bootstrap and test on x86_64.  Is it OK?

Thanks,
bin

2017-12-18  Bin Cheng  <bin.cheng@arm.com>

	Backport from mainline
	2017-11-15  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/82726
	PR tree-optimization/70754
	* tree-predcom.c (order_drefs_by_pos): New function.
	(combine_chains): Move code setting has_max_use_after to...
	(try_combine_chains): ...here.  New parameter.  Sort combined chains
	according to position information.
	(tree_predictive_commoning_loop): Update call to above function.
	(update_pos_for_combined_chains, pcom_stmt_dominates_stmt_p): New.

	2017-11-15  Bin Cheng  <bin.cheng@arm.com>


	PR tree-optimization/82726
	Revert
	2017-01-23  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/70754
	* tree-predcom.c (stmt_combining_refs): New parameter INSERT_BEFORE.
	(reassociate_to_the_same_stmt): New parameter INSERT_BEFORE.  Insert
	combined stmt before it if not NULL.
	(combine_chains): Process refs reversely and compute dominance point
	for root ref.

	Revert
	2017-02-23  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/79663
	* tree-predcom.c (combine_chains): Process refs in reverse order
	only for ZERO length chains, and add explaining comment.

gcc/testsuite
2017-12-18  Bin Cheng  <bin.cheng@arm.com>

	Backport from mainline
	2017-11-15  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/82726
	* gcc.dg/tree-ssa/pr82726.c: New test.
From 3f9a8b53738aded4ead32fb97c251527cbf31ea7 Mon Sep 17 00:00:00 2001
From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com>
Date: Mon, 18 Dec 2017 11:23:21 +0000
Subject: [PATCH] backport-pr70754.txt

---
 gcc/testsuite/gcc.dg/tree-ssa/pr82726.c |  26 +++++
 gcc/tree-predcom.c                      | 200 +++++++++++++++++++++-----------
 2 files changed, 160 insertions(+), 66 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr82726.c

Comments

Richard Biener Dec. 19, 2017, 3:16 p.m. | #1
On Tue, Dec 19, 2017 at 4:15 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
> Hi,

> This patch backports revision 254777 and 254778 to GCC 7 branch.

> Bootstrap and test on x86_64.  Is it OK?


Ok.

Richard.

> Thanks,

> bin

>

> 2017-12-18  Bin Cheng  <bin.cheng@arm.com>

>

>         Backport from mainline

>         2017-11-15  Bin Cheng  <bin.cheng@arm.com>

>

>         PR tree-optimization/82726

>         PR tree-optimization/70754

>         * tree-predcom.c (order_drefs_by_pos): New function.

>         (combine_chains): Move code setting has_max_use_after to...

>         (try_combine_chains): ...here.  New parameter.  Sort combined chains

>         according to position information.

>         (tree_predictive_commoning_loop): Update call to above function.

>         (update_pos_for_combined_chains, pcom_stmt_dominates_stmt_p): New.

>

>         2017-11-15  Bin Cheng  <bin.cheng@arm.com>

>

>

>         PR tree-optimization/82726

>         Revert

>         2017-01-23  Bin Cheng  <bin.cheng@arm.com>

>

>         PR tree-optimization/70754

>         * tree-predcom.c (stmt_combining_refs): New parameter INSERT_BEFORE.

>         (reassociate_to_the_same_stmt): New parameter INSERT_BEFORE.  Insert

>         combined stmt before it if not NULL.

>         (combine_chains): Process refs reversely and compute dominance point

>         for root ref.

>

>         Revert

>         2017-02-23  Bin Cheng  <bin.cheng@arm.com>

>

>         PR tree-optimization/79663

>         * tree-predcom.c (combine_chains): Process refs in reverse order

>         only for ZERO length chains, and add explaining comment.

>

> gcc/testsuite

> 2017-12-18  Bin Cheng  <bin.cheng@arm.com>

>

>         Backport from mainline

>         2017-11-15  Bin Cheng  <bin.cheng@arm.com>

>

>         PR tree-optimization/82726

>         * gcc.dg/tree-ssa/pr82726.c: New test.

Patch

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82726.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82726.c
new file mode 100644
index 0000000..22bc59d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82726.c
@@ -0,0 +1,26 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 --param tree-reassoc-width=4" } */
+/* { dg-additional-options "-mavx2" { target { x86_64-*-* i?86-*-* } } } */
+
+#define N 40
+#define M 128
+unsigned int in[N+M];
+unsigned short out[N];
+
+/* Outer-loop vectorization. */
+
+void
+foo (){
+  int i,j;
+  unsigned int diff;
+
+  for (i = 0; i < N; i++) {
+    diff = 0;
+    for (j = 0; j < M; j+=8) {
+      diff += in[j+i];
+    }
+    out[i]=(unsigned short)diff;
+  }
+
+  return;
+}
diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index 57d8f7d..a2bb676 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -943,6 +943,17 @@  order_drefs (const void *a, const void *b)
   return (*da)->pos - (*db)->pos;
 }
 
+/* Compares two drefs A and B by their position.  Callback for qsort.  */
+
+static int
+order_drefs_by_pos (const void *a, const void *b)
+{
+  const dref *const da = (const dref *) a;
+  const dref *const db = (const dref *) b;
+
+  return (*da)->pos - (*db)->pos;
+}
+
 /* Returns root of the CHAIN.  */
 
 static inline dref
@@ -2164,11 +2175,10 @@  remove_name_from_operation (gimple *stmt, tree op)
 }
 
 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
-   are combined in a single statement, and returns this statement.  Note the
-   statement is inserted before INSERT_BEFORE if it's not NULL.  */
+   are combined in a single statement, and returns this statement.  */
 
 static gimple *
-reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
+reassociate_to_the_same_stmt (tree name1, tree name2)
 {
   gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
   gassign *new_stmt, *tmp_stmt;
@@ -2225,12 +2235,6 @@  reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
   var = create_tmp_reg (type, "predreastmp");
   new_name = make_ssa_name (var);
   new_stmt = gimple_build_assign (new_name, code, name1, name2);
-  if (insert_before && stmt_dominates_stmt_p (insert_before, s1))
-    bsi = gsi_for_stmt (insert_before);
-  else
-    bsi = gsi_for_stmt (s1);
-
-  gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
 
   var = create_tmp_reg (type, "predreastmp");
   tmp_name = make_ssa_name (var);
@@ -2247,6 +2251,7 @@  reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
   s1 = gsi_stmt (bsi);
   update_stmt (s1);
 
+  gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
   gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
 
   return new_stmt;
@@ -2255,11 +2260,10 @@  reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
 /* Returns the statement that combines references R1 and R2.  In case R1
    and R2 are not used in the same statement, but they are used with an
    associative and commutative operation in the same expression, reassociate
-   the expression so that they are used in the same statement.  The combined
-   statement is inserted before INSERT_BEFORE if it's not NULL.  */
+   the expression so that they are used in the same statement.  */
 
 static gimple *
-stmt_combining_refs (dref r1, dref r2, gimple *insert_before)
+stmt_combining_refs (dref r1, dref r2)
 {
   gimple *stmt1, *stmt2;
   tree name1 = name_for_ref (r1);
@@ -2270,7 +2274,7 @@  stmt_combining_refs (dref r1, dref r2, gimple *insert_before)
   if (stmt1 == stmt2)
     return stmt1;
 
-  return reassociate_to_the_same_stmt (name1, name2, insert_before);
+  return reassociate_to_the_same_stmt (name1, name2);
 }
 
 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
@@ -2283,8 +2287,7 @@  combine_chains (chain_p ch1, chain_p ch2)
   enum tree_code op = ERROR_MARK;
   bool swap = false;
   chain_p new_chain;
-  int i, j, num;
-  gimple *root_stmt;
+  unsigned i;
   tree rslt_type = NULL_TREE;
 
   if (ch1 == ch2)
@@ -2305,9 +2308,6 @@  combine_chains (chain_p ch1, chain_p ch2)
 	return NULL;
     }
 
-  ch1->combined = true;
-  ch2->combined = true;
-
   if (swap)
     std::swap (ch1, ch2);
 
@@ -2319,69 +2319,65 @@  combine_chains (chain_p ch1, chain_p ch2)
   new_chain->rslt_type = rslt_type;
   new_chain->length = ch1->length;
 
-  gimple *insert = NULL;
-  num = ch1->refs.length ();
-  i = (new_chain->length == 0) ? num - 1 : 0;
-  j = (new_chain->length == 0) ? -1 : 1;
-  /* For ZERO length chain, process refs in reverse order so that dominant
-     position is ready when it comes to the root ref.
-     For non-ZERO length chain, process refs in order.  See PR79663.  */
-  for (; num > 0; num--, i += j)
-    {
-      r1 = ch1->refs[i];
-      r2 = ch2->refs[i];
+  for (i = 0; (ch1->refs.iterate (i, &r1)
+	       && ch2->refs.iterate (i, &r2)); i++)
+    {
       nw = XCNEW (struct dref_d);
+      nw->stmt = stmt_combining_refs (r1, r2);
       nw->distance = r1->distance;
 
-      /* For ZERO length chain, insert combined stmt of root ref at dominant
-	 position.  */
-      nw->stmt = stmt_combining_refs (r1, r2, i == 0 ? insert : NULL);
-      /* For ZERO length chain, record dominant position where combined stmt
-	 of root ref should be inserted.  In this case, though all root refs
-	 dominate following ones, it's possible that combined stmt doesn't.
-	 See PR70754.  */
-      if (new_chain->length == 0
-	  && (insert == NULL || stmt_dominates_stmt_p (nw->stmt, insert)))
-	insert = nw->stmt;
-
       new_chain->refs.safe_push (nw);
     }
-  if (new_chain->length == 0)
-    {
-      /* Restore the order for ZERO length chain's refs.  */
-      num = new_chain->refs.length () >> 1;
-      for (i = 0, j = new_chain->refs.length () - 1; i < num; i++, j--)
-	std::swap (new_chain->refs[i], new_chain->refs[j]);
 
-      /* For ZERO length chain, has_max_use_after must be true since root
-	 combined stmt must dominates others.  */
-      new_chain->has_max_use_after = true;
-      return new_chain;
-    }
+  ch1->combined = true;
+  ch2->combined = true;
+  return new_chain;
+}
 
-  new_chain->has_max_use_after = false;
-  root_stmt = get_chain_root (new_chain)->stmt;
-  for (i = 1; new_chain->refs.iterate (i, &nw); i++)
-    {
-      if (nw->distance == new_chain->length
-	  && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
-	{
-	  new_chain->has_max_use_after = true;
-	  break;
-	}
-    }
+/* Recursively update position information of all offspring chains to ROOT
+   chain's position information.  */
 
-  return new_chain;
+static void
+update_pos_for_combined_chains (chain_p root)
+{
+  chain_p ch1 = root->ch1, ch2 = root->ch2;
+  dref ref, ref1, ref2;
+  for (unsigned j = 0; (root->refs.iterate (j, &ref)
+			&& ch1->refs.iterate (j, &ref1)
+			&& ch2->refs.iterate (j, &ref2)); ++j)
+    ref1->pos = ref2->pos = ref->pos;
+
+  if (ch1->type == CT_COMBINATION)
+    update_pos_for_combined_chains (ch1);
+  if (ch2->type == CT_COMBINATION)
+    update_pos_for_combined_chains (ch2);
 }
 
-/* Try to combine the CHAINS.  */
+/* Returns true if statement S1 dominates statement S2.  */
+
+static bool
+pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
+{
+  basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
+
+  if (!bb1 || s1 == s2)
+    return true;
+
+  if (bb1 == bb2)
+    return gimple_uid (s1) < gimple_uid (s2);
+
+  return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
+}
+
+/* Try to combine the CHAINS in LOOP.  */
 
 static void
-try_combine_chains (vec<chain_p> *chains)
+try_combine_chains (struct loop *loop, vec<chain_p> *chains)
 {
   unsigned i, j;
   chain_p ch1, ch2, cch;
   auto_vec<chain_p> worklist;
+  bool combined_p = false;
 
   FOR_EACH_VEC_ELT (*chains, i, ch1)
     if (chain_can_be_combined_p (ch1))
@@ -2403,6 +2399,78 @@  try_combine_chains (vec<chain_p> *chains)
 	    {
 	      worklist.safe_push (cch);
 	      chains->safe_push (cch);
+	      combined_p = true;
+	      break;
+	    }
+	}
+    }
+  if (!combined_p)
+    return;
+
+  /* Setup UID for all statements in dominance order.  */
+  basic_block *bbs = get_loop_body (loop);
+  renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
+  free (bbs);
+
+  /* Re-association in combined chains may generate statements different to
+     order of references of the original chain.  We need to keep references
+     of combined chain in dominance order so that all uses will be inserted
+     after definitions.  Note:
+       A) This is necessary for all combined chains.
+       B) This is only necessary for ZERO distance references because other
+	  references inherit value from loop carried PHIs.
+
+     We first update position information for all combined chains.  */
+  dref ref;
+  for (i = 0; chains->iterate (i, &ch1); ++i)
+    {
+      if (ch1->type != CT_COMBINATION || ch1->combined)
+	continue;
+
+      for (j = 0; ch1->refs.iterate (j, &ref); ++j)
+	ref->pos = gimple_uid (ref->stmt);
+
+      update_pos_for_combined_chains (ch1);
+    }
+  /* Then sort references according to newly updated position information.  */
+  for (i = 0; chains->iterate (i, &ch1); ++i)
+    {
+      if (ch1->type != CT_COMBINATION && !ch1->combined)
+	continue;
+
+      /* Find the first reference with non-ZERO distance.  */
+      if (ch1->length == 0)
+	j = ch1->refs.length();
+      else
+	{
+	  for (j = 0; ch1->refs.iterate (j, &ref); ++j)
+	    if (ref->distance != 0)
+	      break;
+	}
+
+      /* Sort all ZERO distance references by position.  */
+      qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
+
+      if (ch1->combined)
+	continue;
+
+      /* For ZERO length chain, has_max_use_after must be true since root
+	 combined stmt must dominates others.  */
+      if (ch1->length == 0)
+	{
+	  ch1->has_max_use_after = true;
+	  continue;
+	}
+      /* Check if there is use at max distance after root for combined chains
+	 and set flag accordingly.  */
+      ch1->has_max_use_after = false;
+      gimple *root_stmt = get_chain_root (ch1)->stmt;
+      for (j = 1; ch1->refs.iterate (j, &ref); ++j)
+	{
+	  if (ref->distance == ch1->length
+	      && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
+	    {
+	      ch1->has_max_use_after = true;
 	      break;
 	    }
 	}
@@ -2564,7 +2632,7 @@  tree_predictive_commoning_loop (struct loop *loop)
   prepare_initializers (loop, chains);
 
   /* Try to combine the chains that are always worked with together.  */
-  try_combine_chains (&chains);
+  try_combine_chains (loop, &chains);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {