[RFC] Sort region RPO according to SCC membership

Message ID nycvar.YFH.7.76.2007201601270.9963@zhemvz.fhfr.qr
State New
Headers show
Series
  • [RFC] Sort region RPO according to SCC membership
Related show

Commit Message

Richard Biener July 20, 2020, 2:02 p.m.
This produces a more optimal RPO order for iteration processing
by making sure that SCC members are processed before blocks reachable
from SCC exits.  This avoids iterating blocks unrelated to the current
iteration for RPO VN.

Overall reduction in the number of visited blocks isn't spectacular
for bootstrap (~1%) but single cases see up to a 10% reduction.

There's cleanup on the plate for followups which is merging the SCC
DFS walk with the RPO computing one as well as always requesting
iteration for the single caller.  The same function can be used
to optimize var-tracking iteration order (but that would already
benefit from "simpler" SCC finding and only toplevel sorting).
The sort comparator is a bit ugly and it is O(scc depth) which
makes the sorting itself approach quadraticness.  As said
var-tracking would benefit from simpler separation of the
function into independent regions which this also provides.

The DFS based SCC discovery could also replace our dominator
based loop finding with appropriate marking (or representing)
of irreducible regions.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2020-07-20  Richard Biener  <rguenther@suse.de>

	* cfganal.c (cmp_rpo_for_iteration): New callback for qsort.
	(tag_header): New helper.
	(rev_post_order_and_mark_dfs_back_seme): Compute SCC
	membership and sort RPO when requesting an iteration
	optimized order.
---
 gcc/cfganal.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 191 insertions(+)

-- 
2.26.2

Patch

diff --git a/gcc/cfganal.c b/gcc/cfganal.c
index 5c85ebe3c1a..1c91c1e6bf5 100644
--- a/gcc/cfganal.c
+++ b/gcc/cfganal.c
@@ -1060,6 +1060,108 @@  pre_and_rev_post_order_compute (int *pre_order, int *rev_post_order,
   return pre_order_num;
 }
 
+
+struct crfi_data { int *scc; int *bb_to_rpo; unsigned is_header; };
+
+/* qsort callback sorting a RPO array of block indices in a way
+   bringing SCC members next to each other.  */
+
+static int
+cmp_rpo_for_iteration (const void *a_, const void *b_, void *data_)
+{
+  int a = *(const int *)a_;
+  int b = *(const int *)b_;
+  const crfi_data *data = (const crfi_data *)data_;
+
+  /* Fast case.  */
+  if (data->scc[a] == data->scc[b]
+      || data->scc[a] == b
+      || data->scc[b] == a)
+    return data->bb_to_rpo[a] - data->bb_to_rpo[b];
+
+  /* Need to find a common loop containing A and B.  */
+  unsigned depth_a
+    = (BASIC_BLOCK_FOR_FN (cfun, a)->flags & data->is_header) ? 1 : 0;
+  unsigned depth_b
+    = (BASIC_BLOCK_FOR_FN (cfun, b)->flags & data->is_header) ? 1 : 0;
+  int header_of_a = depth_a ? a : data->scc[a];
+  int header_of_b = depth_b ? b : data->scc[b];
+  int tem = a;
+  while (1)
+    {
+      /* A nested in B.  */
+      if (data->scc[tem] == header_of_b)
+	return data->bb_to_rpo[tem] - data->bb_to_rpo[b];
+      tem = data->scc[tem];
+      if (tem == -1)
+	break;
+      depth_a++;
+    }
+  tem = b;
+  while (1)
+    {
+      /* B nested in A.  */
+      if (data->scc[tem] == header_of_a)
+	return data->bb_to_rpo[a] - data->bb_to_rpo[tem];
+      tem = data->scc[tem];
+      if (tem == -1)
+	break;
+      depth_b++;
+    }
+
+  /* The loops of A and B are siblings, find the parents at the same
+     loop depth.  */
+  a = header_of_a;
+  b = header_of_b;
+  while (depth_a > depth_b)
+    {
+      depth_a--;
+      a = data->scc[a];
+    }
+  while (depth_b > depth_a)
+    {
+      depth_b--;
+      b = data->scc[b];
+    }
+  gcc_assert (a != b);
+  /* All of the above could be O(1) with something like loop->superloops[]. */
+
+  while (data->scc[a] != data->scc[b])
+    {
+      a = data->scc[a];
+      b = data->scc[b];
+      gcc_assert (a != -1 && b != -1);
+    }
+
+  return data->bb_to_rpo[a] - data->bb_to_rpo[b];
+}
+
+/* Helper for the SCC finding in rev_post_order_and_mark_dfs_back_seme.  */
+
+static void
+tag_header (int b, int h, int *scc, int *dfs)
+{
+  if (h == -1 || b == h)
+    return;
+  int cur1 = b;
+  int cur2 = h;
+  while (scc[cur1] != -1)
+    {
+      int ih = scc[cur1];
+      if (ih == cur2)
+	return;
+      if (dfs[ih] < dfs[cur2])
+	{
+	  scc[cur1] = cur2;
+	  cur1 = cur2;
+	  cur2 = ih;
+	}
+      else
+	cur1 = ih;
+    }
+  scc[cur1] = cur2;
+}
+
 /* Unlike pre_and_rev_post_order_compute we fill rev_post_order backwards
    so iterating in RPO order needs to start with rev_post_order[n - 1]
    going to rev_post_order[0].  If FOR_ITERATION is true then try to
@@ -1165,6 +1267,95 @@  rev_post_order_and_mark_dfs_back_seme (struct function *fn, edge entry,
     BASIC_BLOCK_FOR_FN (fn, rev_post_order[i])->flags
       &= ~(post_assigned|visited);
 
+  if (for_iteration)
+    {
+      auto_vec<edge_iterator, 20> stack (n_basic_blocks_for_fn (fn) + 1);
+      auto_bb_flag is_header (fn);
+      int dfsnum = 1;
+      int *dfs = XNEWVEC (int, 2 * last_basic_block_for_fn (fn));
+      int *scc = dfs + last_basic_block_for_fn (fn);
+
+      basic_block dest = entry->dest;
+      edge_iterator ei;
+
+      /* DFS process DEST.  */
+find_loops:
+      dfs[dest->index] = dfsnum;
+      scc[dest->index] = -1;
+      dfsnum++;
+      gcc_assert ((dest->flags & (is_header|visited)) == 0);
+      dest->flags |= visited;
+      ei = ei_start (dest->succs);
+      while (!ei_end_p (ei))
+	{
+	  if (bitmap_bit_p (exit_bbs, ei_edge (ei)->dest->index))
+	    ;
+	  else if (!(ei_edge (ei)->dest->flags & visited))
+	    {
+	      stack.quick_push (ei);
+	      dest = ei_edge (ei)->dest;
+	      /* DFS recurse on DEST.  */
+	      goto find_loops;
+
+ret_from_find_loops:
+	      /* Return point of DFS recursion.  */
+	      ei = stack.pop ();
+	      dest = ei_edge (ei)->src;
+	      int header = scc[ei_edge (ei)->dest->index];
+	      tag_header (dest->index, header, scc, dfs);
+	      dfsnum = dfs[dest->index] + 1;
+	    }
+	  else
+	    {
+	      if (dfs[ei_edge (ei)->dest->index] > 0)
+		{
+		  ei_edge (ei)->dest->flags |= is_header;
+		  tag_header (dest->index, ei_edge (ei)->dest->index, scc, dfs);
+		}
+	      else if (scc[ei_edge (ei)->dest->index] == -1)
+		;
+	      else
+		{
+		  int header = scc[ei_edge (ei)->dest->index];
+		  if (dfs[header] > 0)
+		    tag_header (dest->index, header, scc, dfs);
+		  else
+		    {
+		      /* A re-entry into an existing loop.  */
+		      while (scc[header] != -1)
+			{
+			  header = scc[header];
+			  if (dfs[header] > 0)
+			    {
+			      tag_header (dest->index, header, scc, dfs);
+			      break;
+			    }
+			}
+		    }
+		}
+	    }
+	  ei_next (&ei);
+	}
+      dfs[dest->index] = 0;
+      if (!stack.is_empty ())
+	/* Return from DFS recursion.  */
+	goto ret_from_find_loops;
+
+      /* BB to RPO order mapping.  */
+      for (int i = 0; i < rev_post_order_num; ++i)
+	dfs[rev_post_order[i]] = i;
+      crfi_data data = { scc, dfs, is_header };
+      gcc_sort_r (rev_post_order, rev_post_order_num, sizeof (int),
+		  cmp_rpo_for_iteration, &data);
+
+      XDELETEVEC (dfs);
+
+      /* Clear the temporarily allocated flags.  */
+      for (int i = 0; i < rev_post_order_num; ++i)
+	BASIC_BLOCK_FOR_FN (fn, rev_post_order[i])->flags
+	  &= ~(visited|is_header);
+    }
+
   return rev_post_order_num;
 }