tree-optimization/93868 copy SLP tree before re-arranging stmts

Message ID nycvar.YFH.7.76.2002251030240.5137@zhemvz.fhfr.qr
State New
Headers show
Series
  • tree-optimization/93868 copy SLP tree before re-arranging stmts
Related show

Commit Message

Richard Biener Feb. 25, 2020, 9:31 a.m.
This avoids altering possibly shared SLP subtrees when attempting
to get rid of permutations in SLP reductions by copying the SLP
subtree before re-arranging stmts in it.

It's a little bit awkward and some refactoring I've done for GCC11
would make this less so but it's not the point to push this thus
the following.  Copying is also easier than verifying there's
no sharing involved.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

Richard.

2020-02-24  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/93868
	* tree-vect-slp.c (slp_copy_subtree): New function.
	(vect_attempt_slp_rearrange_stmts): Copy the SLP tree before
	re-arranging stmts in it.

	* gcc.dg/torture/pr93868.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr93868.c | 31 +++++++++++++++++++++
 gcc/tree-vect-slp.c                    | 50 ++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr93868.c

-- 
2.16.4

Patch

diff --git a/gcc/testsuite/gcc.dg/torture/pr93868.c b/gcc/testsuite/gcc.dg/torture/pr93868.c
new file mode 100644
index 00000000000..850eba1dd49
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr93868.c
@@ -0,0 +1,31 @@ 
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-vectorize" } */
+
+unsigned a[1024];
+unsigned b[1024];
+
+void __attribute__((noipa))
+foo (unsigned *q, unsigned *r)
+{
+  unsigned sum1 = 0, sum2 = 0;
+  for (int i = 0; i < 512; ++i)
+    {
+      sum1 += a[2*i];
+      sum2 += a[2*i+1];
+      b[2*i] = a[2*i+1];
+      b[2*i+1] = a[2*i];
+    }
+  *q = sum1;
+  *r = sum2;
+}
+
+int main()
+{
+  unsigned sum1, sum2;
+  a[0] = 0;
+  a[1] = 1;
+  foo (&sum1, &sum2);
+  if (b[0] != 1 || b[1] != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index c61c7e197d8..c1fa0e6b6a4 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1749,6 +1749,44 @@  vect_mark_slp_stmts_relevant (slp_tree node)
   vect_mark_slp_stmts_relevant (node, visited);
 }
 
+/* Copy the SLP subtree rooted at NODE.  */
+
+static slp_tree
+slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map)
+{
+  unsigned i;
+
+  bool existed_p;
+  slp_tree &copy = map.get_or_insert (node, &existed_p);
+  if (existed_p)
+    return copy;
+
+  copy = XNEW (_slp_tree);
+  memcpy (copy, node, sizeof (_slp_tree));
+  if (SLP_TREE_SCALAR_STMTS (node).exists ())
+    {
+      SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy ();
+      stmt_vec_info stmt_info;
+      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
+	STMT_VINFO_NUM_SLP_USES (stmt_info)++;
+    }
+  if (SLP_TREE_SCALAR_OPS (node).exists ())
+    SLP_TREE_SCALAR_OPS (copy) = SLP_TREE_SCALAR_OPS (node).copy ();
+  if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
+    SLP_TREE_LOAD_PERMUTATION (copy) = SLP_TREE_LOAD_PERMUTATION (node).copy ();
+  if (SLP_TREE_CHILDREN (node).exists ())
+    SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy ();
+  gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ());
+  copy->refcnt = 0;
+
+  slp_tree child;
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child)
+    {
+      SLP_TREE_CHILDREN (copy)[i] = slp_copy_subtree (child, map);
+      SLP_TREE_CHILDREN (copy)[i]->refcnt++;
+    }
+  return copy;
+}
 
 /* Rearrange the statements of NODE according to PERMUTATION.  */
 
@@ -1841,6 +1879,18 @@  vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
      statements in the nodes is not important unless they are memory
      accesses, we can rearrange the statements in all the nodes
      according to the order of the loads.  */
+
+  /* We have to unshare the SLP tree we modify.  */
+  hash_map<slp_tree, slp_tree> map;
+  slp_tree unshared = slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn), map);
+  vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn), false);
+  unshared->refcnt++;
+  SLP_INSTANCE_TREE (slp_instn) = unshared;
+  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+    SLP_INSTANCE_LOADS (slp_instn)[i] = *map.get (node);
+  node = SLP_INSTANCE_LOADS (slp_instn)[0];
+
+  /* Do the actual re-arrangement.  */
   hash_set<slp_tree> visited;
   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
 			    node->load_permutation, visited);