Handle LOOP_DIST_ALIAS ifns in move_sese_region_to_fn (PR tree-optimization/83359)

Message ID 20171211211051.GR2353@tucnak
State New
Headers show
Series
  • Handle LOOP_DIST_ALIAS ifns in move_sese_region_to_fn (PR tree-optimization/83359)
Related show

Commit Message

Jakub Jelinek Dec. 11, 2017, 9:10 p.m.
Hi!

Unlike LOOP_VECTORIZED ifns, LOOP_DIST_ALIAS is added by the ldist pass
and needs to be maintained until the vectorizer, and parloops in between
that.  Earlier I've added code to update or drop orig_loop_num during
move_sese_region_to_fn, but that is not sufficient.  If we move
the whole pair of loops with the associated LOOP_DIST_ALIAS call into
the outlined loopfn, we need to update the first argument, as orig_loop_num
is likely changing.  If the whole triplet (two loops with orig_loop_num
and LOOP_DIST_ALIAS with the same first argument) stays in parent function,
we don't need to adjust it.  In all other cases, this patch folds the
LOOP_DIST_ALIAS ifn to the second argument, like the vectorizer does if
it fails to vectorize it.

Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux,
bootstrapped on powerpc64-linux, regtest there pending.  Ok for trunk?

2017-12-11  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/83359
	* tree-cfg.h (fold_loop_internal_call): Declare.
	* tree-vectorizer.c (fold_loop_internal_call): Moved to ...
	* tree-cfg.c (fold_loop_internal_call): ... here.  No longer static.
	(find_loop_dist_alias): New function.
	(move_sese_region_to_fn): If any dloop->orig_loop_num value is
	updated, also adjust any corresponding LOOP_DIST_ALIAS internal
	calls.

	* gcc.dg/graphite/pr83359.c: New test.


	Jakub

Comments

Richard Biener Dec. 12, 2017, 9:15 a.m. | #1
On Mon, 11 Dec 2017, Jakub Jelinek wrote:

> Hi!

> 

> Unlike LOOP_VECTORIZED ifns, LOOP_DIST_ALIAS is added by the ldist pass

> and needs to be maintained until the vectorizer, and parloops in between

> that.  Earlier I've added code to update or drop orig_loop_num during

> move_sese_region_to_fn, but that is not sufficient.  If we move

> the whole pair of loops with the associated LOOP_DIST_ALIAS call into

> the outlined loopfn, we need to update the first argument, as orig_loop_num

> is likely changing.  If the whole triplet (two loops with orig_loop_num

> and LOOP_DIST_ALIAS with the same first argument) stays in parent function,

> we don't need to adjust it.  In all other cases, this patch folds the

> LOOP_DIST_ALIAS ifn to the second argument, like the vectorizer does if

> it fails to vectorize it.

> 

> Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux,

> bootstrapped on powerpc64-linux, regtest there pending.  Ok for trunk?


Ok.

Thanks,
Richard.

> 2017-12-11  Jakub Jelinek  <jakub@redhat.com>

> 

> 	PR tree-optimization/83359

> 	* tree-cfg.h (fold_loop_internal_call): Declare.

> 	* tree-vectorizer.c (fold_loop_internal_call): Moved to ...

> 	* tree-cfg.c (fold_loop_internal_call): ... here.  No longer static.

> 	(find_loop_dist_alias): New function.

> 	(move_sese_region_to_fn): If any dloop->orig_loop_num value is

> 	updated, also adjust any corresponding LOOP_DIST_ALIAS internal

> 	calls.

> 

> 	* gcc.dg/graphite/pr83359.c: New test.

> 

> --- gcc/tree-cfg.h.jj	2017-09-05 23:28:14.000000000 +0200

> +++ gcc/tree-cfg.h	2017-12-11 12:35:24.284777550 +0100

> @@ -77,6 +77,7 @@ extern void gather_blocks_in_sese_region

>  					  vec<basic_block> *bbs_p);

>  extern void verify_sese (basic_block, basic_block, vec<basic_block> *);

>  extern bool gather_ssa_name_hash_map_from (tree const &, tree const &, void *);

> +extern void fold_loop_internal_call (gimple *, tree);

>  extern basic_block move_sese_region_to_fn (struct function *, basic_block,

>  				           basic_block, tree);

>  extern void dump_function_to_file (tree, FILE *, dump_flags_t);

> --- gcc/tree-vectorizer.c.jj	2017-09-01 09:26:37.000000000 +0200

> +++ gcc/tree-vectorizer.c	2017-12-11 12:33:41.436055580 +0100

> @@ -464,27 +464,6 @@ vect_loop_vectorized_call (struct loop *

>    return NULL;

>  }

>  

> -/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS

> -   to VALUE and update any immediate uses of it's LHS.  */

> -

> -static void

> -fold_loop_internal_call (gimple *g, tree value)

> -{

> -  tree lhs = gimple_call_lhs (g);

> -  use_operand_p use_p;

> -  imm_use_iterator iter;

> -  gimple *use_stmt;

> -  gimple_stmt_iterator gsi = gsi_for_stmt (g);

> -

> -  update_call_from_tree (&gsi, value);

> -  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)

> -    {

> -      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)

> -	SET_USE (use_p, value);

> -      update_stmt (use_stmt);

> -    }

> -}

> -

>  /* If LOOP has been versioned during loop distribution, return the gurading

>     internal call.  */

>  

> --- gcc/tree-cfg.c.jj	2017-12-07 18:05:30.000000000 +0100

> +++ gcc/tree-cfg.c	2017-12-11 12:34:55.054140750 +0100

> @@ -7337,6 +7337,47 @@ gather_ssa_name_hash_map_from (tree cons

>    return true;

>  }

>  

> +/* Return LOOP_DIST_ALIAS call if present in BB.  */

> +

> +static gimple *

> +find_loop_dist_alias (basic_block bb)

> +{

> +  gimple *g = last_stmt (bb);

> +  if (g == NULL || gimple_code (g) != GIMPLE_COND)

> +    return NULL;

> +

> +  gimple_stmt_iterator gsi = gsi_for_stmt (g);

> +  gsi_prev (&gsi);

> +  if (gsi_end_p (gsi))

> +    return NULL;

> +

> +  g = gsi_stmt (gsi);

> +  if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS))

> +    return g;

> +  return NULL;

> +}

> +

> +/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS

> +   to VALUE and update any immediate uses of it's LHS.  */

> +

> +void

> +fold_loop_internal_call (gimple *g, tree value)

> +{

> +  tree lhs = gimple_call_lhs (g);

> +  use_operand_p use_p;

> +  imm_use_iterator iter;

> +  gimple *use_stmt;

> +  gimple_stmt_iterator gsi = gsi_for_stmt (g);

> +

> +  update_call_from_tree (&gsi, value);

> +  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)

> +    {

> +      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)

> +	SET_USE (use_p, value);

> +      update_stmt (use_stmt);

> +    }

> +}

> +

>  /* Move a single-entry, single-exit region delimited by ENTRY_BB and

>     EXIT_BB to function DEST_CFUN.  The whole region is replaced by a

>     single basic block in the original CFG and the new basic block is

> @@ -7510,7 +7551,6 @@ move_sese_region_to_fn (struct function

>  	  }

>      }

>  

> -

>    /* Adjust the number of blocks in the tree root of the outlined part.  */

>    get_loop (dest_cfun, 0)->num_nodes = bbs.length () + 2;

>  

> @@ -7521,19 +7561,77 @@ move_sese_region_to_fn (struct function

>    /* Fix up orig_loop_num.  If the block referenced in it has been moved

>       to dest_cfun, update orig_loop_num field, otherwise clear it.  */

>    struct loop *dloop;

> +  signed char *moved_orig_loop_num = NULL;

>    FOR_EACH_LOOP_FN (dest_cfun, dloop, 0)

>      if (dloop->orig_loop_num)

>        {

> +	if (moved_orig_loop_num == NULL)

> +	  moved_orig_loop_num

> +	    = XCNEWVEC (signed char, vec_safe_length (larray));

>  	if ((*larray)[dloop->orig_loop_num] != NULL

>  	    && get_loop (saved_cfun, dloop->orig_loop_num) == NULL)

> -	  dloop->orig_loop_num = (*larray)[dloop->orig_loop_num]->num;

> +	  {

> +	    if (moved_orig_loop_num[dloop->orig_loop_num] >= 0

> +		&& moved_orig_loop_num[dloop->orig_loop_num] < 2)

> +	      moved_orig_loop_num[dloop->orig_loop_num]++;

> +	    dloop->orig_loop_num = (*larray)[dloop->orig_loop_num]->num;

> +	  }

>  	else

> -	  dloop->orig_loop_num = 0;

> +	  {

> +	    moved_orig_loop_num[dloop->orig_loop_num] = -1;

> +	    dloop->orig_loop_num = 0;

> +	  }

>        }

> -  ggc_free (larray);

> -

>    pop_cfun ();

>  

> +  if (moved_orig_loop_num)

> +    {

> +      FOR_EACH_VEC_ELT (bbs, i, bb)

> +	{

> +	  gimple *g = find_loop_dist_alias (bb);

> +	  if (g == NULL)

> +	    continue;

> +

> +	  int orig_loop_num = tree_to_shwi (gimple_call_arg (g, 0));

> +	  gcc_assert (orig_loop_num

> +		      && (unsigned) orig_loop_num < vec_safe_length (larray));

> +	  if (moved_orig_loop_num[orig_loop_num] == 2)

> +	    {

> +	      /* If we have moved both loops with this orig_loop_num into

> +		 dest_cfun and the LOOP_DIST_ALIAS call is being moved there

> +		 too, update the first argument.  */

> +	      gcc_assert ((*larray)[dloop->orig_loop_num] != NULL

> +			  && (get_loop (saved_cfun, dloop->orig_loop_num)

> +			      == NULL));

> +	      tree t = build_int_cst (integer_type_node,

> +				      (*larray)[dloop->orig_loop_num]->num);

> +	      gimple_call_set_arg (g, 0, t);

> +	      update_stmt (g);

> +	      /* Make sure the following loop will not update it.  */

> +	      moved_orig_loop_num[orig_loop_num] = 0;

> +	    }

> +	  else

> +	    /* Otherwise at least one of the loops stayed in saved_cfun.

> +	       Remove the LOOP_DIST_ALIAS call.  */

> +	    fold_loop_internal_call (g, gimple_call_arg (g, 1));

> +	}

> +      FOR_EACH_BB_FN (bb, saved_cfun)

> +	{

> +	  gimple *g = find_loop_dist_alias (bb);

> +	  if (g == NULL)

> +	    continue;

> +	  int orig_loop_num = tree_to_shwi (gimple_call_arg (g, 0));

> +	  gcc_assert (orig_loop_num

> +		      && (unsigned) orig_loop_num < vec_safe_length (larray));

> +	  if (moved_orig_loop_num[orig_loop_num])

> +	    /* LOOP_DIST_ALIAS call remained in saved_cfun, if at least one

> +	       of the corresponding loops was moved, remove it.  */

> +	    fold_loop_internal_call (g, gimple_call_arg (g, 1));

> +	}

> +      XDELETEVEC (moved_orig_loop_num);

> +    }

> +  ggc_free (larray);

> +

>    /* Move blocks from BBS into DEST_CFUN.  */

>    gcc_assert (bbs.length () >= 2);

>    after = dest_cfun->cfg->x_entry_block_ptr;

> --- gcc/testsuite/gcc.dg/graphite/pr83359.c.jj	2017-12-11 11:43:10.433737382 +0100

> +++ gcc/testsuite/gcc.dg/graphite/pr83359.c	2017-12-11 11:43:01.000000000 +0100

> @@ -0,0 +1,40 @@

> +/* PR tree-optimization/83359 */

> +/* { dg-do compile { target pthread } } */

> +/* { dg-options "-O3 -floop-parallelize-all -ftree-parallelize-loops=2" } */

> +

> +int a, b, c;

> +

> +void

> +foo (int x, int y)

> +{

> +  int *d = &a;

> +  int *e = &x;

> +

> +  for (a = 0; a < 1; ++a)

> +    d = &x;

> +

> +  while (b < 10)

> +    {

> +      for (b = 0; b < 1; ++b)

> +        if (x == 0)

> +          while (x < 1)

> +            ++x;

> +        else

> +          while (x < 1)

> +            {

> +              d = &y;

> +              ++x;

> +            }

> +      ++b;

> +    }

> +

> +  for (;;)

> +    for (c = 0; c < 2; ++c)

> +      {

> +        if (*d != 0)

> +          a = *e;

> +

> +        e = &b;

> +        y = 0;

> +      }

> +}

> 

> 	Jakub

> 

> 


-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

Patch

--- gcc/tree-cfg.h.jj	2017-09-05 23:28:14.000000000 +0200
+++ gcc/tree-cfg.h	2017-12-11 12:35:24.284777550 +0100
@@ -77,6 +77,7 @@  extern void gather_blocks_in_sese_region
 					  vec<basic_block> *bbs_p);
 extern void verify_sese (basic_block, basic_block, vec<basic_block> *);
 extern bool gather_ssa_name_hash_map_from (tree const &, tree const &, void *);
+extern void fold_loop_internal_call (gimple *, tree);
 extern basic_block move_sese_region_to_fn (struct function *, basic_block,
 				           basic_block, tree);
 extern void dump_function_to_file (tree, FILE *, dump_flags_t);
--- gcc/tree-vectorizer.c.jj	2017-09-01 09:26:37.000000000 +0200
+++ gcc/tree-vectorizer.c	2017-12-11 12:33:41.436055580 +0100
@@ -464,27 +464,6 @@  vect_loop_vectorized_call (struct loop *
   return NULL;
 }
 
-/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS
-   to VALUE and update any immediate uses of it's LHS.  */
-
-static void
-fold_loop_internal_call (gimple *g, tree value)
-{
-  tree lhs = gimple_call_lhs (g);
-  use_operand_p use_p;
-  imm_use_iterator iter;
-  gimple *use_stmt;
-  gimple_stmt_iterator gsi = gsi_for_stmt (g);
-
-  update_call_from_tree (&gsi, value);
-  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
-    {
-      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
-	SET_USE (use_p, value);
-      update_stmt (use_stmt);
-    }
-}
-
 /* If LOOP has been versioned during loop distribution, return the gurading
    internal call.  */
 
--- gcc/tree-cfg.c.jj	2017-12-07 18:05:30.000000000 +0100
+++ gcc/tree-cfg.c	2017-12-11 12:34:55.054140750 +0100
@@ -7337,6 +7337,47 @@  gather_ssa_name_hash_map_from (tree cons
   return true;
 }
 
+/* Return LOOP_DIST_ALIAS call if present in BB.  */
+
+static gimple *
+find_loop_dist_alias (basic_block bb)
+{
+  gimple *g = last_stmt (bb);
+  if (g == NULL || gimple_code (g) != GIMPLE_COND)
+    return NULL;
+
+  gimple_stmt_iterator gsi = gsi_for_stmt (g);
+  gsi_prev (&gsi);
+  if (gsi_end_p (gsi))
+    return NULL;
+
+  g = gsi_stmt (gsi);
+  if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS))
+    return g;
+  return NULL;
+}
+
+/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS
+   to VALUE and update any immediate uses of it's LHS.  */
+
+void
+fold_loop_internal_call (gimple *g, tree value)
+{
+  tree lhs = gimple_call_lhs (g);
+  use_operand_p use_p;
+  imm_use_iterator iter;
+  gimple *use_stmt;
+  gimple_stmt_iterator gsi = gsi_for_stmt (g);
+
+  update_call_from_tree (&gsi, value);
+  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
+    {
+      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+	SET_USE (use_p, value);
+      update_stmt (use_stmt);
+    }
+}
+
 /* Move a single-entry, single-exit region delimited by ENTRY_BB and
    EXIT_BB to function DEST_CFUN.  The whole region is replaced by a
    single basic block in the original CFG and the new basic block is
@@ -7510,7 +7551,6 @@  move_sese_region_to_fn (struct function
 	  }
     }
 
-
   /* Adjust the number of blocks in the tree root of the outlined part.  */
   get_loop (dest_cfun, 0)->num_nodes = bbs.length () + 2;
 
@@ -7521,19 +7561,77 @@  move_sese_region_to_fn (struct function
   /* Fix up orig_loop_num.  If the block referenced in it has been moved
      to dest_cfun, update orig_loop_num field, otherwise clear it.  */
   struct loop *dloop;
+  signed char *moved_orig_loop_num = NULL;
   FOR_EACH_LOOP_FN (dest_cfun, dloop, 0)
     if (dloop->orig_loop_num)
       {
+	if (moved_orig_loop_num == NULL)
+	  moved_orig_loop_num
+	    = XCNEWVEC (signed char, vec_safe_length (larray));
 	if ((*larray)[dloop->orig_loop_num] != NULL
 	    && get_loop (saved_cfun, dloop->orig_loop_num) == NULL)
-	  dloop->orig_loop_num = (*larray)[dloop->orig_loop_num]->num;
+	  {
+	    if (moved_orig_loop_num[dloop->orig_loop_num] >= 0
+		&& moved_orig_loop_num[dloop->orig_loop_num] < 2)
+	      moved_orig_loop_num[dloop->orig_loop_num]++;
+	    dloop->orig_loop_num = (*larray)[dloop->orig_loop_num]->num;
+	  }
 	else
-	  dloop->orig_loop_num = 0;
+	  {
+	    moved_orig_loop_num[dloop->orig_loop_num] = -1;
+	    dloop->orig_loop_num = 0;
+	  }
       }
-  ggc_free (larray);
-
   pop_cfun ();
 
+  if (moved_orig_loop_num)
+    {
+      FOR_EACH_VEC_ELT (bbs, i, bb)
+	{
+	  gimple *g = find_loop_dist_alias (bb);
+	  if (g == NULL)
+	    continue;
+
+	  int orig_loop_num = tree_to_shwi (gimple_call_arg (g, 0));
+	  gcc_assert (orig_loop_num
+		      && (unsigned) orig_loop_num < vec_safe_length (larray));
+	  if (moved_orig_loop_num[orig_loop_num] == 2)
+	    {
+	      /* If we have moved both loops with this orig_loop_num into
+		 dest_cfun and the LOOP_DIST_ALIAS call is being moved there
+		 too, update the first argument.  */
+	      gcc_assert ((*larray)[dloop->orig_loop_num] != NULL
+			  && (get_loop (saved_cfun, dloop->orig_loop_num)
+			      == NULL));
+	      tree t = build_int_cst (integer_type_node,
+				      (*larray)[dloop->orig_loop_num]->num);
+	      gimple_call_set_arg (g, 0, t);
+	      update_stmt (g);
+	      /* Make sure the following loop will not update it.  */
+	      moved_orig_loop_num[orig_loop_num] = 0;
+	    }
+	  else
+	    /* Otherwise at least one of the loops stayed in saved_cfun.
+	       Remove the LOOP_DIST_ALIAS call.  */
+	    fold_loop_internal_call (g, gimple_call_arg (g, 1));
+	}
+      FOR_EACH_BB_FN (bb, saved_cfun)
+	{
+	  gimple *g = find_loop_dist_alias (bb);
+	  if (g == NULL)
+	    continue;
+	  int orig_loop_num = tree_to_shwi (gimple_call_arg (g, 0));
+	  gcc_assert (orig_loop_num
+		      && (unsigned) orig_loop_num < vec_safe_length (larray));
+	  if (moved_orig_loop_num[orig_loop_num])
+	    /* LOOP_DIST_ALIAS call remained in saved_cfun, if at least one
+	       of the corresponding loops was moved, remove it.  */
+	    fold_loop_internal_call (g, gimple_call_arg (g, 1));
+	}
+      XDELETEVEC (moved_orig_loop_num);
+    }
+  ggc_free (larray);
+
   /* Move blocks from BBS into DEST_CFUN.  */
   gcc_assert (bbs.length () >= 2);
   after = dest_cfun->cfg->x_entry_block_ptr;
--- gcc/testsuite/gcc.dg/graphite/pr83359.c.jj	2017-12-11 11:43:10.433737382 +0100
+++ gcc/testsuite/gcc.dg/graphite/pr83359.c	2017-12-11 11:43:01.000000000 +0100
@@ -0,0 +1,40 @@ 
+/* PR tree-optimization/83359 */
+/* { dg-do compile { target pthread } } */
+/* { dg-options "-O3 -floop-parallelize-all -ftree-parallelize-loops=2" } */
+
+int a, b, c;
+
+void
+foo (int x, int y)
+{
+  int *d = &a;
+  int *e = &x;
+
+  for (a = 0; a < 1; ++a)
+    d = &x;
+
+  while (b < 10)
+    {
+      for (b = 0; b < 1; ++b)
+        if (x == 0)
+          while (x < 1)
+            ++x;
+        else
+          while (x < 1)
+            {
+              d = &y;
+              ++x;
+            }
+      ++b;
+    }
+
+  for (;;)
+    for (c = 0; c < 2; ++c)
+      {
+        if (*d != 0)
+          a = *e;
+
+        e = &b;
+        y = 0;
+      }
+}