Make vectorizer versioning re-use if-conversion versioned loops

Message ID alpine.LSU.2.20.1906121500230.10704@zhemvz.fhfr.qr
State New
Headers show
Series
  • Make vectorizer versioning re-use if-conversion versioned loops
Related show

Commit Message

Richard Biener June 12, 2019, 1:07 p.m.
This avoids loop_version () calls when if-conversion already versioned
the loops and simplifies vect_loop_versioning because we need not
do as much fixup.  There's followup work to do for the profile scaling.
Honza - any suggestion on how to apply a different true/false 
profile to an existing condition and its branches?

I've built SPEC 2006 with this and see 132 loop versions re-used
from 3254 versionings done by the vectorizer (most loops do not
need if-conversion).

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Richard.

	* tree-vectorizer.h (vect_loop_vectorized_call): Declare.
	* tree-vectorizer.c (vect_loop_vectorized_call): Export and
	also return the condition stmt.
	* tree-vect-loop-manip.c (vect_loop_versioning): Reuse the
	loop version created by if-conversion instead of versioning
	again.

Patch

diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index b3fae5ba4da..be4b95a14a1 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -3032,7 +3032,8 @@  vect_loop_versioning (loop_vec_info loop_vinfo,
     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
 
   if (cond_expr)
-    cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
+    cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
+					&cond_expr_stmt_list,
 					is_gimple_condexpr, NULL_TREE);
 
   if (version_align)
@@ -3076,45 +3077,77 @@  vect_loop_versioning (loop_vec_info loop_vinfo,
 				      is_gimple_condexpr, NULL_TREE);
   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 
-  initialize_original_copy_tables ();
   if (scalar_loop)
     {
-      edge scalar_e;
-      basic_block preheader, scalar_preheader;
+      gcond *cond;
+      gimple *call;
+      if (!(call = vect_loop_vectorized_call (scalar_loop, &cond)))
+	gcc_unreachable ();
+      condition_bb = gimple_bb (cond);
+      gimple_cond_set_condition_from_tree (cond, cond_expr);
+      update_stmt (cond);
 
-      /* We don't want to scale SCALAR_LOOP's frequencies, we need to
-	 scale LOOP's frequencies instead.  */
-      nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
-			    prob, prob.invert (), prob, prob.invert (), true);
-      scale_loop_frequencies (loop, prob);
-      /* CONDITION_BB was created above SCALAR_LOOP's preheader,
-	 while we need to move it above LOOP's preheader.  */
-      e = loop_preheader_edge (loop);
-      scalar_e = loop_preheader_edge (scalar_loop);
-      /* The vector loop preheader might not be empty, since new
-	 invariants could have been created while analyzing the loop.  */
-      gcc_assert (single_pred_p (e->src));
-      gcc_assert (empty_block_p (scalar_e->src)
-		  && single_pred_p (scalar_e->src));
-      gcc_assert (single_pred_p (condition_bb));
-      preheader = e->src;
-      scalar_preheader = scalar_e->src;
-      scalar_e = find_edge (condition_bb, scalar_preheader);
-      e = single_pred_edge (preheader);
-      redirect_edge_and_branch_force (single_pred_edge (condition_bb),
-				      scalar_preheader);
-      redirect_edge_and_branch_force (scalar_e, preheader);
-      redirect_edge_and_branch_force (e, condition_bb);
-      set_immediate_dominator (CDI_DOMINATORS, condition_bb,
-			       single_pred (condition_bb));
-      set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
-			       single_pred (scalar_preheader));
-      set_immediate_dominator (CDI_DOMINATORS, preheader,
-			       condition_bb);
+      if (cond_expr_stmt_list)
+	{
+	  cond_exp_gsi = gsi_for_stmt (call);
+	  gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
+				 GSI_SAME_STMT);
+	}
+
+      /* ???  if-conversion uses profile_probability::always () but
+         prob below is profile_probability::likely ().  */
+      nloop = scalar_loop;
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "reusing loop version created by if conversion\n");
     }
   else
-    nloop = loop_version (loop, cond_expr, &condition_bb,
-			  prob, prob.invert (), prob, prob.invert (), true);
+    {
+      initialize_original_copy_tables ();
+      nloop = loop_version (loop, cond_expr, &condition_bb,
+			    prob, prob.invert (), prob, prob.invert (), true);
+      free_original_copy_tables ();
+
+      if (cond_expr_stmt_list)
+	{
+	  cond_exp_gsi = gsi_last_bb (condition_bb);
+	  gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
+				 GSI_SAME_STMT);
+	}
+
+      /* Loop versioning violates an assumption we try to maintain during
+	 vectorization - that the loop exit block has a single predecessor.
+	 After versioning, the exit block of both loop versions is the same
+	 basic block (i.e. it has two predecessors). Just in order to simplify
+	 following transformations in the vectorizer, we fix this situation
+	 here by adding a new (empty) block on the exit-edge of the loop,
+	 with the proper loop-exit phis to maintain loop-closed-form.
+	 If loop versioning wasn't done from loop, but scalar_loop instead,
+	 merge_bb will have already just a single successor.  */
+
+      merge_bb = single_exit (loop)->dest;
+      if (EDGE_COUNT (merge_bb->preds) >= 2)
+	{
+	  gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
+	  new_exit_bb = split_edge (single_exit (loop));
+	  new_exit_e = single_exit (loop);
+	  e = EDGE_SUCC (new_exit_bb, 0);
+
+	  for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
+	       gsi_next (&gsi))
+	    {
+	      tree new_res;
+	      orig_phi = gsi.phi ();
+	      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
+	      new_phi = create_phi_node (new_res, new_exit_bb);
+	      arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
+	      add_phi_arg (new_phi, arg, new_exit_e,
+			   gimple_phi_arg_location_from_edge (orig_phi, e));
+	      adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
+	    }
+	}
+
+      update_ssa (TODO_update_ssa);
+    }
 
   if (version_niter)
     {
@@ -3141,48 +3174,6 @@  vect_loop_versioning (loop_vec_info loop_vinfo,
 			 "alignment\n");
 
     }
-  free_original_copy_tables ();
-
-  /* Loop versioning violates an assumption we try to maintain during
-     vectorization - that the loop exit block has a single predecessor.
-     After versioning, the exit block of both loop versions is the same
-     basic block (i.e. it has two predecessors). Just in order to simplify
-     following transformations in the vectorizer, we fix this situation
-     here by adding a new (empty) block on the exit-edge of the loop,
-     with the proper loop-exit phis to maintain loop-closed-form.
-     If loop versioning wasn't done from loop, but scalar_loop instead,
-     merge_bb will have already just a single successor.  */
-
-  merge_bb = single_exit (loop)->dest;
-  if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
-    {
-      gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
-      new_exit_bb = split_edge (single_exit (loop));
-      new_exit_e = single_exit (loop);
-      e = EDGE_SUCC (new_exit_bb, 0);
-
-      for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
-	{
-	  tree new_res;
-	  orig_phi = gsi.phi ();
-	  new_res = copy_ssa_name (PHI_RESULT (orig_phi));
-	  new_phi = create_phi_node (new_res, new_exit_bb);
-	  arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
-	  add_phi_arg (new_phi, arg, new_exit_e,
-		       gimple_phi_arg_location_from_edge (orig_phi, e));
-	  adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
-	}
-    }
-
-  /* End loop-exit-fixes after versioning.  */
-
-  if (cond_expr_stmt_list)
-    {
-      cond_exp_gsi = gsi_last_bb (condition_bb);
-      gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
-			     GSI_SAME_STMT);
-    }
-  update_ssa (TODO_update_ssa);
 
   return nloop;
 }
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 4f6c65faf64..325ef58722d 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -727,8 +727,8 @@  vect_free_loop_info_assumptions (struct loop *loop)
 /* If LOOP has been versioned during ifcvt, return the internal call
    guarding it.  */
 
-static gimple *
-vect_loop_vectorized_call (struct loop *loop)
+gimple *
+vect_loop_vectorized_call (struct loop *loop, gcond **cond)
 {
   basic_block bb = loop_preheader_edge (loop)->src;
   gimple *g;
@@ -744,6 +744,8 @@  vect_loop_vectorized_call (struct loop *loop)
   while (1);
   if (g && gimple_code (g) == GIMPLE_COND)
     {
+      if (cond)
+	*cond = as_a <gcond *> (g);
       gimple_stmt_iterator gsi = gsi_for_stmt (g);
       gsi_prev (&gsi);
       if (!gsi_end_p (gsi))
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 4db30ccc22b..6713b895091 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1650,5 +1650,7 @@  void vect_pattern_recog (vec_info *);
 /* In tree-vectorizer.c.  */
 unsigned vectorize_loops (void);
 void vect_free_loop_info_assumptions (struct loop *);
+gimple *vect_loop_vectorized_call (struct loop *, gcond **cond = NULL);
+
 
 #endif  /* GCC_TREE_VECTORIZER_H  */