Fix PR91207, revert vectorizer change for PR91178

Message ID alpine.LSU.2.20.1907191044140.30921@zhemvz.fhfr.qr
State New
Headers show
Series
  • Fix PR91207, revert vectorizer change for PR91178
Related show

Commit Message

Richard Biener July 19, 2019, 8:45 a.m.
Need to think more about that one.  I'm leaving the testcase in,
it compiles somewhat slowly (6s for -O0 optimized cc1 with checking)
but still reasonable due to the other fix for said PR.

Applied.

Richard.

2019-07-19  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/91207
	Revert
	2019-07-17  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/91178
	* tree-vect-stmts.c (get_group_load_store_type): For SLP
	loads with a gap larger than the vector size always use
	VMAT_STRIDED_SLP.
	(vectorizable_load): For VMAT_STRIDED_SLP with a permutation
	avoid loading vectors that are only contained in the gap
	and thus are not needed.

	* gcc.dg/torture/pr91207.c: New testcase.

Patch

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 273590)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -2267,14 +2267,6 @@  get_group_load_store_type (stmt_vec_info
 			/ vect_get_scalar_dr_size (first_dr_info)))
 	    overrun_p = false;
 
-	  /* If the gap at the end of the group exceeds a whole vector
-	     in size use the strided SLP code which can skip code-generation
-	     for the gap.  */
-	  if (vls_type == VLS_LOAD && known_gt (gap, nunits))
-	    *memory_access_type = VMAT_STRIDED_SLP;
-	  else
-	    *memory_access_type = VMAT_CONTIGUOUS;
-
 	  /* If the gap splits the vector in half and the target
 	     can do half-vector operations avoid the epilogue peeling
 	     by simply loading half of the vector only.  Usually
@@ -2282,8 +2274,7 @@  get_group_load_store_type (stmt_vec_info
 	  dr_alignment_support alignment_support_scheme;
 	  scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
 	  machine_mode vmode;
-	  if (*memory_access_type == VMAT_CONTIGUOUS
-	      && overrun_p
+	  if (overrun_p
 	      && !masked_p
 	      && (((alignment_support_scheme
 		      = vect_supportable_dr_alignment (first_dr_info, false)))
@@ -2306,6 +2297,7 @@  get_group_load_store_type (stmt_vec_info
 				 "Peeling for outer loop is not supported\n");
 	      return false;
 	    }
+	  *memory_access_type = VMAT_CONTIGUOUS;
 	}
     }
   else
@@ -8740,7 +8732,6 @@  vectorizable_load (stmt_vec_info stmt_in
       /* Checked by get_load_store_type.  */
       unsigned int const_nunits = nunits.to_constant ();
       unsigned HOST_WIDE_INT cst_offset = 0;
-      unsigned int group_gap = 0;
 
       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
       gcc_assert (!nested_in_vect_loop);
@@ -8758,7 +8749,6 @@  vectorizable_load (stmt_vec_info stmt_in
       if (slp && grouped_load)
 	{
 	  group_size = DR_GROUP_SIZE (first_stmt_info);
-	  group_gap = DR_GROUP_GAP (first_stmt_info);
 	  ref_type = get_group_alias_ptr_type (first_stmt_info);
 	}
       else
@@ -8902,14 +8892,6 @@  vectorizable_load (stmt_vec_info stmt_in
 	  if (nloads > 1)
 	    vec_alloc (v, nloads);
 	  stmt_vec_info new_stmt_info = NULL;
-	  if (slp && slp_perm
-	      && (group_el % group_size) > group_size - group_gap
-	      && (group_el % group_size) + nloads * lnel < group_size)
-	    {
-	      dr_chain.quick_push (NULL_TREE);
-	      group_el += nloads * lnel;
-	      continue;
-	    }
 	  for (i = 0; i < nloads; i++)
 	    {
 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
Index: gcc/testsuite/gcc.dg/torture/pr91207.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr91207.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr91207.c	(working copy)
@@ -0,0 +1,25 @@ 
+/* { dg-do run } */
+
+long long a;
+int b[92][32];
+unsigned int c, d;
+
+void e(long long *f, int p2) { *f = p2; }
+
+int main()
+{
+  for (int i = 6; i <= 20; d = i++)
+    for (int j = 6; j <= 91; j++) {
+	for (int k = 16; k <= 31;k++)
+	  b[j][k] ^= 7;
+	c *= d;
+    }
+
+  for (int i = 0; i < 21; ++i)
+    for (int j = 0; j < 32; ++j)
+      e(&a, b[i][j]);
+
+  if (a != 7)
+    __builtin_abort ();
+  return 0;
+}