Fix PR83326

Message ID alpine.LSU.2.20.1712141125540.12252@zhemvz.fhfr.qr
State New
Headers show
Series
  • Fix PR83326
Related show

Commit Message

Richard Biener Dec. 14, 2017, 10:29 a.m.
After the previous fix of mine to no longer peel loops during cunrolli
to avoid littering an outer loop with tons of loop exit tests and thus
disabling vectorization there's a reported slowdown in the exchange
benchmark.  This is because we no longer fully unroll a loop with
appearant constant iterations early.  This is a long-standing issue
of cunrolli running before loop header copying and thus receiving
niters like (a < b) ? 2 : 0.  The fix is to treat zero or constant
iteration counts like if they were constant, doing loop-header copying
on-the fly during unrolling by simply not removing the first peeled
copies exit test.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2017-12-14  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/83326
	* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Add
	may_be_zero parameter and handle it by not marking the first
	peeled copy as not exiting the loop.
	(try_peel_loop): Likewise.
	(canonicalize_loop_induction_variables): Use number_of_iterations_exit
	to handle the case of constant or zero iterations and perform
	loop header copying on-the-fly.

	* gcc.dg/tree-ssa/pr81388-2.c: Adjust.

Patch

Index: gcc/tree-ssa-loop-ivcanon.c
===================================================================
--- gcc/tree-ssa-loop-ivcanon.c	(revision 255622)
+++ gcc/tree-ssa-loop-ivcanon.c	(working copy)
@@ -681,7 +681,7 @@  unloop_loops (bitmap loop_closed_ssa_inv
 
 static bool
 try_unroll_loop_completely (struct loop *loop,
-			    edge exit, tree niter,
+			    edge exit, tree niter, bool may_be_zero,
 			    enum unroll_level ul,
 			    HOST_WIDE_INT maxiter,
 			    location_t locus, bool allow_peel)
@@ -893,6 +893,8 @@  try_unroll_loop_completely (struct loop
 	  exit = NULL;
 	  bitmap_clear (wont_exit);
 	}
+      if (may_be_zero)
+	bitmap_clear_bit (wont_exit, 1);
 
       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 						 n_unroll, wont_exit,
@@ -977,7 +979,7 @@  estimated_peeled_sequence_size (struct l
 
 static bool
 try_peel_loop (struct loop *loop,
-	       edge exit, tree niter,
+	       edge exit, tree niter, bool may_be_zero,
 	       HOST_WIDE_INT maxiter)
 {
   HOST_WIDE_INT npeel;
@@ -1080,6 +1082,8 @@  try_peel_loop (struct loop *loop,
       exit = NULL;
       bitmap_clear (wont_exit);
     }
+  if (may_be_zero)
+    bitmap_clear_bit (wont_exit, 1);
   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 					     npeel, wont_exit,
 					     exit, &edges_to_remove,
@@ -1152,13 +1156,35 @@  canonicalize_loop_induction_variables (s
   HOST_WIDE_INT maxiter;
   bool modified = false;
   location_t locus = UNKNOWN_LOCATION;
+  struct tree_niter_desc niter_desc;
+  bool may_be_zero = false;
 
-  niter = number_of_latch_executions (loop);
+  /* For unrolling allow conditional constant or zero iterations, thus
+     perform loop-header copying on-the-fly.  */
   exit = single_exit (loop);
+  niter = chrec_dont_know;
+  if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
+    {
+      niter = niter_desc.niter;
+      may_be_zero
+	= niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
+    }
   if (TREE_CODE (niter) == INTEGER_CST)
     locus = gimple_location (last_stmt (exit->src));
   else
     {
+      /* For non-constant niter fold may_be_zero into niter again.  */
+      if (may_be_zero)
+	{
+	  if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+	    niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
+				 niter_desc.may_be_zero,
+				 build_int_cst (TREE_TYPE (niter), 0), niter);
+	  else
+	    niter = chrec_dont_know;
+	  may_be_zero = false;
+	}
+
       /* If the loop has more than one exit, try checking all of them
 	 for # of iterations determinable through scev.  */
       if (!exit)
@@ -1213,17 +1239,31 @@  canonicalize_loop_induction_variables (s
      populates the loop bounds.  */
   modified |= remove_redundant_iv_tests (loop);
 
-  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus,
-				  allow_peel))
+  if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
+				  maxiter, locus, allow_peel))
     return true;
 
   if (create_iv
       && niter && !chrec_contains_undetermined (niter)
       && exit && just_once_each_iteration_p (loop, exit->src))
-    create_canonical_iv (loop, exit, niter);
+    {
+      tree iv_niter = niter;
+      if (may_be_zero)
+	{
+	  if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+	    iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
+				    niter_desc.may_be_zero,
+				    build_int_cst (TREE_TYPE (iv_niter), 0),
+				    iv_niter);
+	  else
+	    iv_niter = NULL_TREE;
+	}
+      if (iv_niter)
+	create_canonical_iv (loop, exit, iv_niter);
+    }
 
   if (ul == UL_ALL)
-    modified |= try_peel_loop (loop, exit, niter, maxiter);
+    modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
 
   return modified;
 }
Index: gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c	(revision 255622)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c	(working copy)
@@ -11,4 +11,4 @@  void foo(unsigned dst)
   } while (dst < end);
 }
 
-/* { dg-final { scan-tree-dump-times " zero if " 1 "ivcanon" } } */
+/* { dg-final { scan-tree-dump " zero if " "ivcanon" } } */