[3/9] ifcvt: Only created temporaries as needed.

Message ID 20190802151028.15590-4-rdapp@linux.ibm.com
State New
Headers show
Series
  • Improve icvt "convert multiple"
Related show

Commit Message

Robin Dapp Aug. 2, 2019, 3:10 p.m.
noce_convert_multiple_sets creates temporaries for the destination of
every emitted cmov and expects subsequent passes to get rid of them.  This
does not happen every time and even if the temporaries are removed, code
generation can be affected adversely.  In this patch, temporaries are
only created if the destination of a set is used in an emitted condition
check.

---
 gcc/ifcvt.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 3 deletions(-)

-- 
2.17.0

Comments

Richard Sandiford Aug. 6, 2019, 8:01 p.m. | #1
Robin Dapp <rdapp@linux.ibm.com> writes:
> noce_convert_multiple_sets creates temporaries for the destination of

> every emitted cmov and expects subsequent passes to get rid of them.  This

> does not happen every time and even if the temporaries are removed, code

> generation can be affected adversely.  In this patch, temporaries are

> only created if the destination of a set is used in an emitted condition

> check.


Still digesting the series, but a couple of implementation questions:

>

> ---

>  gcc/ifcvt.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---

>  1 file changed, 51 insertions(+), 3 deletions(-)

>

> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c

> index e95ff9ee9b0..253b8a96c1a 100644

> --- a/gcc/ifcvt.c

> +++ b/gcc/ifcvt.c

> @@ -99,6 +99,10 @@ static int dead_or_predicable (basic_block, basic_block, basic_block,

>  			       edge, int);

>  static void noce_emit_move_insn (rtx, rtx);

>  static rtx_insn *block_has_only_trap (basic_block);

> +static void check_need_temps (basic_block bb,

> +                              hash_map<rtx, bool> *need_temp,

> +                              rtx cond);

> +

>  

>  /* Count the number of non-jump active insns in BB.  */

>  

> @@ -3145,6 +3149,12 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)

>    auto_vec<rtx_insn *> unmodified_insns;

>    int count = 0;

>  

> +  hash_map<rtx, bool> need_temps;

> +

> +  check_need_temps (then_bb, &need_temps, cond);


Is the separate need_temps scan required for correctness?  It looked
like we could test:

      if (reg_overlap_mentioned_p (dest, cond))
	...

on-the-fly during the main noce_convert_multiple_sets loop.

> +

> +  hash_map<rtx, bool> temps_created;

> +

>    FOR_BB_INSNS (then_bb, insn)

>      {

>        /* Skip over non-insns.  */

> @@ -3155,10 +3165,20 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)

>        gcc_checking_assert (set);

>  

>        rtx target = SET_DEST (set);

> -      rtx temp = gen_reg_rtx (GET_MODE (target));

>        rtx new_val = SET_SRC (set);

>        rtx old_val = target;

>  

> +      rtx dest = SET_DEST (set);

> +

> +      rtx temp;

> +      if (need_temps.get (dest))

> +       {

> +         temp = gen_reg_rtx (GET_MODE (target));

> +         temps_created.put (target, true);

> +       }

> +      else

> +       temp = target;

> +

>        /* If we were supposed to read from an earlier write in this block,

>  	 we've changed the register allocation.  Rewire the read.  While

>  	 we are looking, also try to catch a swap idiom.  */

> @@ -3242,8 +3262,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)

>  

>    /* Now fixup the assignments.  */

>    for (int i = 0; i < count; i++)

> -    noce_emit_move_insn (targets[i], temporaries[i]);

> -

> +    if (temps_created.get(targets[i]) && targets[i] != temporaries[i])

> +      noce_emit_move_insn (targets[i], temporaries[i]);


Would it work to set temporaries[i] to targets[i] whenever a temporary
isn't needed, and avoid temps_created?

Thanks,
Richard

>  

>    /* Actually emit the sequence if it isn't too expensive.  */

>    rtx_insn *seq = get_insns ();

> @@ -3749,6 +3769,34 @@ check_cond_move_block (basic_block bb,

>    return TRUE;

>  }

>  

> +/* Check for which sets we need to emit temporaries to hold the destination of

> +   a conditional move.  */

> +static void

> +check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)

> +{

> +  rtx_insn *insn;

> +

> +  FOR_BB_INSNS (bb, insn)

> +    {

> +      rtx set, dest;

> +

> +      if (!active_insn_p (insn))

> +	continue;

> +

> +      set = single_set (insn);

> +      if (set == NULL_RTX)

> +	continue;

> +

> +      dest = SET_DEST (set);

> +

> +      /* noce_emit_cmove will emit the condition check every time it is called

> +         so we need a temp register if the destination is modified.  */

> +      if (reg_overlap_mentioned_p (dest, cond))

> +	need_temp->put (dest, true);

> +    }

> +}

> +

> +

>  /* Given a basic block BB suitable for conditional move conversion,

>     a condition COND, and pointer maps THEN_VALS and ELSE_VALS containing

>     the register values depending on COND, emit the insns in the block as
Robin Dapp Aug. 8, 2019, 9:01 a.m. | #2
Hi Richard,

> Is the separate need_temps scan required for correctness?  It looked

> like we could test:

> 

>       if (reg_overlap_mentioned_p (dest, cond))

> 	...

> 

> on-the-fly during the main noce_convert_multiple_sets loop.


right, I didn't re-check it but after changes during interal patch
rounds the check is now degenerated enought to allow this. Going to
refactor that.

> Would it work to set temporaries[i] to targets[i] whenever a temporary

> isn't needed, and avoid temps_created?


Yes, that should work. Already made that change locally.

Regards
 Robin

Patch

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index e95ff9ee9b0..253b8a96c1a 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -99,6 +99,10 @@  static int dead_or_predicable (basic_block, basic_block, basic_block,
 			       edge, int);
 static void noce_emit_move_insn (rtx, rtx);
 static rtx_insn *block_has_only_trap (basic_block);
+static void check_need_temps (basic_block bb,
+                              hash_map<rtx, bool> *need_temp,
+                              rtx cond);
+
 
 /* Count the number of non-jump active insns in BB.  */
 
@@ -3145,6 +3149,12 @@  noce_convert_multiple_sets (struct noce_if_info *if_info)
   auto_vec<rtx_insn *> unmodified_insns;
   int count = 0;
 
+  hash_map<rtx, bool> need_temps;
+
+  check_need_temps (then_bb, &need_temps, cond);
+
+  hash_map<rtx, bool> temps_created;
+
   FOR_BB_INSNS (then_bb, insn)
     {
       /* Skip over non-insns.  */
@@ -3155,10 +3165,20 @@  noce_convert_multiple_sets (struct noce_if_info *if_info)
       gcc_checking_assert (set);
 
       rtx target = SET_DEST (set);
-      rtx temp = gen_reg_rtx (GET_MODE (target));
       rtx new_val = SET_SRC (set);
       rtx old_val = target;
 
+      rtx dest = SET_DEST (set);
+
+      rtx temp;
+      if (need_temps.get (dest))
+       {
+         temp = gen_reg_rtx (GET_MODE (target));
+         temps_created.put (target, true);
+       }
+      else
+       temp = target;
+
       /* If we were supposed to read from an earlier write in this block,
 	 we've changed the register allocation.  Rewire the read.  While
 	 we are looking, also try to catch a swap idiom.  */
@@ -3242,8 +3262,8 @@  noce_convert_multiple_sets (struct noce_if_info *if_info)
 
   /* Now fixup the assignments.  */
   for (int i = 0; i < count; i++)
-    noce_emit_move_insn (targets[i], temporaries[i]);
-
+    if (temps_created.get(targets[i]) && targets[i] != temporaries[i])
+      noce_emit_move_insn (targets[i], temporaries[i]);
 
   /* Actually emit the sequence if it isn't too expensive.  */
   rtx_insn *seq = get_insns ();
@@ -3749,6 +3769,34 @@  check_cond_move_block (basic_block bb,
   return TRUE;
 }
 
+/* Check for which sets we need to emit temporaries to hold the destination of
+   a conditional move.  */
+static void
+check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
+{
+  rtx_insn *insn;
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      rtx set, dest;
+
+      if (!active_insn_p (insn))
+	continue;
+
+      set = single_set (insn);
+      if (set == NULL_RTX)
+	continue;
+
+      dest = SET_DEST (set);
+
+      /* noce_emit_cmove will emit the condition check every time it is called
+         so we need a temp register if the destination is modified.  */
+      if (reg_overlap_mentioned_p (dest, cond))
+	need_temp->put (dest, true);
+    }
+}
+
+
 /* Given a basic block BB suitable for conditional move conversion,
    a condition COND, and pointer maps THEN_VALS and ELSE_VALS containing
    the register values depending on COND, emit the insns in the block as