[6/6] Restrict predcom using register pressure information

Message ID DB6PR0802MB25044C0B907B393B7BE608BEE7860@DB6PR0802MB2504.eurprd08.prod.outlook.com
State New
Headers show
Series
  • [1/6] Compute type mode and register class mapping
Related show

Commit Message

Bin Cheng May 4, 2018, 4:24 p.m.
Hi,
This patch restricts predcom pass using register pressure information.
In case of high register pressure, we now prune additional chains as well
as disable unrolling in predcom.  In generally, I think this patch set is
useful.

Bootstrap and test on x86_64 ongoing.  Any comments?

Thanks,
bin
2018-04-27  Bin Cheng  <bin.cheng@arm.com>

	* tree-predcom.c (stor-layout.h, tree-ssa-live.h): Include.
	(REG_RELAX_RATIO, prune_chains): New.
	(tree_predictive_commoning_loop): Compute reg pressure using class
	region.  Prune chains based on reg pressure.  Force to not unroll
	if reg pressure is high.

Comments

Bin.Cheng May 29, 2018, 4:04 p.m. | #1
On Fri, May 4, 2018 at 5:24 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
> Hi,

> This patch restricts predcom pass using register pressure information.

> In case of high register pressure, we now prune additional chains as well

> as disable unrolling in predcom.  In generally, I think this patch set is

> useful.

>

> Bootstrap and test on x86_64 ongoing.  Any comments?

Simple update in line with changes in previous patch.

Thanks,
bin
>

> Thanks,

> bin

> 2018-04-27  Bin Cheng  <bin.cheng@arm.com>

>

>         * tree-predcom.c (stor-layout.h, tree-ssa-live.h): Include.

>         (REG_RELAX_RATIO, prune_chains): New.

>         (tree_predictive_commoning_loop): Compute reg pressure using class

>         region.  Prune chains based on reg pressure.  Force to not unroll

>         if reg pressure is high.
From b78c779907b98930fc4b36e5558d6f315bb4475b Mon Sep 17 00:00:00 2001
From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com>
Date: Wed, 25 Apr 2018 16:30:41 +0100
Subject: [PATCH 6/6] pcom-reg-pressure-20180428

---
 gcc/tree-predcom.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index aeadbf7..60316e9 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -217,6 +217,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "ssa.h"
 #include "gimple-pretty-print.h"
+#include "stor-layout.h"
 #include "alias.h"
 #include "fold-const.h"
 #include "cfgloop.h"
@@ -227,6 +228,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-loop-ivopts.h"
 #include "tree-ssa-loop-manip.h"
 #include "tree-ssa-loop-niter.h"
+#include "tree-ssa-live.h"
 #include "tree-ssa-loop.h"
 #include "tree-into-ssa.h"
 #include "tree-dfa.h"
@@ -242,6 +244,10 @@ along with GCC; see the file COPYING3.  If not see
 
 #define MAX_DISTANCE (target_avail_regs[GENERAL_REGS] < 16 ? 4 : 8)
 
+/* The ratio by which register pressure check is relaxed.  */
+
+#define REG_RELAX_RATIO (2)
+
 /* Data references (or phi nodes that carry data reference values across
    loop iterations).  */
 
@@ -3156,6 +3162,59 @@ insert_init_seqs (struct loop *loop, vec<chain_p> chains)
       }
 }
 
+/* Prune chains causing high register pressure.  */
+
+static void
+prune_chains (vec<chain_p> *chains, unsigned *max_pressure)
+{
+  bool pruned_p = false;
+  machine_mode mode;
+  enum reg_class cl;
+  unsigned i, new_pressure;
+
+  for (i = 0; i < chains->length ();)
+    {
+      chain_p chain = (*chains)[i];
+      /* Always allow combined chain and zero-length chain.  */
+      if (chain->combined || chain->type == CT_COMBINATION
+	  || chain->length == 0 || chain->type == CT_STORE_STORE)
+	{
+	  i++;
+	  continue;
+	}
+
+      gcc_assert (chain->refs.length () > 0);
+      mode = TYPE_MODE (TREE_TYPE (chain->refs[0]->ref->ref));
+      /* Bypass chain that doesn't contribute to any reg_class, although
+	 something could be wrong when mapping type mode to reg_class.  */
+      if (ira_mode_classes[mode] == NO_REGS)
+	{
+	  i++;
+	  continue;
+	}
+
+      cl = ira_pressure_class_translate[ira_mode_classes[mode]];
+      /* Prune chain if it causes higher register pressure than available
+	 registers; otherwise keep the chain and update register pressure
+	 information.  */
+      new_pressure = max_pressure[cl] + chain->length - 1;
+      if (new_pressure <= target_avail_regs[cl] * REG_RELAX_RATIO)
+	{
+	  i++;
+	  max_pressure[cl] = new_pressure;
+	}
+      else
+	{
+	  release_chain (chain);
+	  chains->unordered_remove (i);
+	  pruned_p = true;
+	}
+    }
+
+  if (pruned_p && dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "Prune chain because of high reg pressure\n");
+}
+
 /* Performs predictive commoning for LOOP.  Sets bit 1<<0 of return value
    if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
    form was corrupted.  */
@@ -3171,6 +3230,9 @@ tree_predictive_commoning_loop (struct loop *loop)
   struct tree_niter_desc desc;
   bool unroll = false, loop_closed_ssa = false;
   edge exit;
+  lr_region *region;
+  unsigned max_pressure[N_REG_CLASSES];
+  bool high_pressure_p;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Processing loop %d\n",  loop->num);
@@ -3239,6 +3301,11 @@ tree_predictive_commoning_loop (struct loop *loop)
   /* Try to combine the chains that are always worked with together.  */
   try_combine_chains (loop, &chains);
 
+  region = new lr_region (loop, max_pressure, NULL, NULL, NULL);
+  high_pressure_p = region->calculate_pressure ();
+  delete region;
+  prune_chains (&chains, max_pressure);
+
   insert_init_seqs (loop, chains);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -3250,6 +3317,13 @@ tree_predictive_commoning_loop (struct loop *loop)
   /* Determine the unroll factor, and if the loop should be unrolled, ensure
      that its number of iterations is divisible by the factor.  */
   unroll_factor = determine_unroll_factor (chains);
+  /* Force to not unroll if register pressure is high.  */
+  if (high_pressure_p && unroll_factor > 1)
+    {
+      unroll_factor = 1;
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "Force to not unroll because of high reg pressure\n");
+    }
   scev_reset ();
   unroll = (unroll_factor > 1
 	    && can_unroll_loop_p (loop, unroll_factor, &desc));
David Malcolm May 29, 2018, 5:18 p.m. | #2
On Tue, 2018-05-29 at 17:04 +0100, Bin.Cheng wrote:
> On Fri, May 4, 2018 at 5:24 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:

> > Hi,

> > This patch restricts predcom pass using register pressure

> > information.

> > In case of high register pressure, we now prune additional chains

> > as well

> > as disable unrolling in predcom.  In generally, I think this patch

> > set is

> > useful.

> > 

> > Bootstrap and test on x86_64 ongoing.  Any comments?

> 

> Simple update in line with changes in previous patch.

> 

> Thanks,

> bin

> > 

> > Thanks,

> > bin

> > 2018-04-27  Bin Cheng  <bin.cheng@arm.com>

> > 

> >         * tree-predcom.c (stor-layout.h, tree-ssa-live.h): Include.

> >         (REG_RELAX_RATIO, prune_chains): New.

> >         (tree_predictive_commoning_loop): Compute reg pressure

> > using class

> >         region.  Prune chains based on reg pressure.  Force to not

> > unroll

> >         if reg pressure is high.


[...snip...]

> @@ -3239,6 +3301,11 @@ tree_predictive_commoning_loop (struct loop *loop)

>    /* Try to combine the chains that are always worked with together.  */

>    try_combine_chains (loop, &chains);

>  

> +  region = new lr_region (loop, max_pressure, NULL, NULL, NULL);

> +  high_pressure_p = region->calculate_pressure ();

> +  delete region;

> +  prune_chains (&chains, max_pressure);

> +


Possibly a silly question, but why the new/delete of "region" here?
Couldn't this just be an on-stack object, with something like:

  lr_region region (loop, max_pressure, NULL, NULL, NULL);
  high_pressure_p = region.calculate_pressure ();
  prune_chains (&chains, max_pressure);

or:

  {
    lr_region region (loop, max_pressure, NULL, NULL, NULL);
    high_pressure_p = region.calculate_pressure ();
  }
  prune_chains (&chains, max_pressure);

if it's important to do the cleanup before prune_chains?

Dave
Bin.Cheng May 29, 2018, 5:24 p.m. | #3
On Tue, May 29, 2018 at 6:18 PM, David Malcolm <dmalcolm@redhat.com> wrote:
> On Tue, 2018-05-29 at 17:04 +0100, Bin.Cheng wrote:

>> On Fri, May 4, 2018 at 5:24 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:

>> > Hi,

>> > This patch restricts predcom pass using register pressure

>> > information.

>> > In case of high register pressure, we now prune additional chains

>> > as well

>> > as disable unrolling in predcom.  In generally, I think this patch

>> > set is

>> > useful.

>> >

>> > Bootstrap and test on x86_64 ongoing.  Any comments?

>>

>> Simple update in line with changes in previous patch.

>>

>> Thanks,

>> bin

>> >

>> > Thanks,

>> > bin

>> > 2018-04-27  Bin Cheng  <bin.cheng@arm.com>

>> >

>> >         * tree-predcom.c (stor-layout.h, tree-ssa-live.h): Include.

>> >         (REG_RELAX_RATIO, prune_chains): New.

>> >         (tree_predictive_commoning_loop): Compute reg pressure

>> > using class

>> >         region.  Prune chains based on reg pressure.  Force to not

>> > unroll

>> >         if reg pressure is high.

>

> [...snip...]

>

>> @@ -3239,6 +3301,11 @@ tree_predictive_commoning_loop (struct loop *loop)

>>    /* Try to combine the chains that are always worked with together.  */

>>    try_combine_chains (loop, &chains);

>>

>> +  region = new lr_region (loop, max_pressure, NULL, NULL, NULL);

>> +  high_pressure_p = region->calculate_pressure ();

>> +  delete region;

>> +  prune_chains (&chains, max_pressure);

>> +

>

> Possibly a silly question, but why the new/delete of "region" here?

> Couldn't this just be an on-stack object, with something like:

Yes, right.  It contained dynamically allocated memory before, so made
early deleting.  Now a local object will do.

Thanks,
bin
>

>   lr_region region (loop, max_pressure, NULL, NULL, NULL);

>   high_pressure_p = region.calculate_pressure ();

>   prune_chains (&chains, max_pressure);

>

> or:

>

>   {

>     lr_region region (loop, max_pressure, NULL, NULL, NULL);

>     high_pressure_p = region.calculate_pressure ();

>   }

>   prune_chains (&chains, max_pressure);

>

> if it's important to do the cleanup before prune_chains?

>

> Dave

Patch

From 1b488665f8fea619c4ce35f71650c342df69de2f Mon Sep 17 00:00:00 2001
From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com>
Date: Wed, 25 Apr 2018 16:30:41 +0100
Subject: [PATCH 6/6] pcom-reg-pressure-20180423

---
 gcc/tree-predcom.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index aeadbf7..d0c18b3 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -217,6 +217,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "ssa.h"
 #include "gimple-pretty-print.h"
+#include "stor-layout.h"
 #include "alias.h"
 #include "fold-const.h"
 #include "cfgloop.h"
@@ -227,6 +228,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-loop-ivopts.h"
 #include "tree-ssa-loop-manip.h"
 #include "tree-ssa-loop-niter.h"
+#include "tree-ssa-live.h"
 #include "tree-ssa-loop.h"
 #include "tree-into-ssa.h"
 #include "tree-dfa.h"
@@ -242,6 +244,10 @@  along with GCC; see the file COPYING3.  If not see
 
 #define MAX_DISTANCE (target_avail_regs[GENERAL_REGS] < 16 ? 4 : 8)
 
+/* The ratio by which register pressure check is relaxed.  */
+
+#define REG_RELAX_RATIO (2)
+
 /* Data references (or phi nodes that carry data reference values across
    loop iterations).  */
 
@@ -3156,6 +3162,59 @@  insert_init_seqs (struct loop *loop, vec<chain_p> chains)
       }
 }
 
+/* Prune chains causing high register pressure.  */
+
+static void
+prune_chains (vec<chain_p> *chains, unsigned *max_pressure)
+{
+  bool pruned_p = false;
+  machine_mode mode;
+  enum reg_class cl;
+  unsigned i, new_pressure;
+
+  for (i = 0; i < chains->length ();)
+    {
+      chain_p chain = (*chains)[i];
+      /* Always allow combined chain and zero-length chain.  */
+      if (chain->combined || chain->type == CT_COMBINATION
+	  || chain->length == 0 || chain->type == CT_STORE_STORE)
+	{
+	  i++;
+	  continue;
+	}
+
+      gcc_assert (chain->refs.length () > 0);
+      mode = TYPE_MODE (TREE_TYPE (chain->refs[0]->ref->ref));
+      /* Bypass chain that doesn't contribute to any reg_class, although
+	 something could be wrong when mapping type mode to reg_class.  */
+      if (ira_mode_classes[mode] == NO_REGS)
+	{
+	  i++;
+	  continue;
+	}
+
+      cl = ira_pressure_class_translate[ira_mode_classes[mode]];
+      /* Prune chain if it causes higher register pressure than available
+	 registers; otherwise keep the chain and update register pressure
+	 information.  */
+      new_pressure = max_pressure[cl] + chain->length - 1;
+      if (new_pressure <= target_avail_regs[cl] * REG_RELAX_RATIO)
+	{
+	  i++;
+	  max_pressure[cl] = new_pressure;
+	}
+      else
+	{
+	  release_chain (chain);
+	  chains->unordered_remove (i);
+	  pruned_p = true;
+	}
+    }
+
+  if (pruned_p && dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "Prune chain because of high reg pressure\n");
+}
+
 /* Performs predictive commoning for LOOP.  Sets bit 1<<0 of return value
    if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
    form was corrupted.  */
@@ -3171,6 +3230,9 @@  tree_predictive_commoning_loop (struct loop *loop)
   struct tree_niter_desc desc;
   bool unroll = false, loop_closed_ssa = false;
   edge exit;
+  lr_region *region;
+  unsigned max_pressure[N_REG_CLASSES];
+  bool high_pressure_p;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Processing loop %d\n",  loop->num);
@@ -3239,6 +3301,11 @@  tree_predictive_commoning_loop (struct loop *loop)
   /* Try to combine the chains that are always worked with together.  */
   try_combine_chains (loop, &chains);
 
+  region = new lr_region (loop);
+  high_pressure_p = region->calculate_pressure (max_pressure);
+  delete region;
+  prune_chains (&chains, max_pressure);
+
   insert_init_seqs (loop, chains);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -3250,6 +3317,13 @@  tree_predictive_commoning_loop (struct loop *loop)
   /* Determine the unroll factor, and if the loop should be unrolled, ensure
      that its number of iterations is divisible by the factor.  */
   unroll_factor = determine_unroll_factor (chains);
+  /* Force to not unroll if register pressure is high.  */
+  if (high_pressure_p && unroll_factor > 1)
+    {
+      unroll_factor = 1;
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "Force to not unroll because of high reg pressure\n");
+    }
   scev_reset ();
   unroll = (unroll_factor > 1
 	    && can_unroll_loop_p (loop, unroll_factor, &desc));
-- 
1.9.1