[8/9] ifcvt: Handle swap-style idioms differently.

Message ID 20190802151028.15590-9-rdapp@linux.ibm.com
State New
Headers show
Series
  • Improve icvt "convert multiple"
Related show

Commit Message

Robin Dapp Aug. 2, 2019, 3:10 p.m.
A swap-style idiom like
 tmp = a
   a = b
   b = tmp
would be transformed like
 tmp_tmp = cond ? a : tmp
 tmp_a   = cond ? b : a
 tmp_b   = cond ? tmp_tmp : b
 [...]
including rewiring the first source operand to previous writes (e.g. tmp ->
tmp_tmp).

The code would recognize this, though, and change the last line to
 tmp_b   = cond ? a : b.

Without additional temporaries we can now emit the following sequence:
 tmp = a	     // (no condition here)
 a = cond ? b : a
 b = cond ? tmp : b
avoiding any rewiring which would break things now.

check_need_cmovs () finds swap-style idioms and marks the first of the
three instructions as not needing a cmove.
---
 gcc/ifcvt.c | 97 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 78 insertions(+), 19 deletions(-)

-- 
2.17.0

Comments

Richard Sandiford Aug. 6, 2019, 8:36 p.m. | #1
Robin Dapp <rdapp@linux.ibm.com> writes:
> A swap-style idiom like

>  tmp = a

>    a = b

>    b = tmp

> would be transformed like

>  tmp_tmp = cond ? a : tmp

>  tmp_a   = cond ? b : a

>  tmp_b   = cond ? tmp_tmp : b

>  [...]

> including rewiring the first source operand to previous writes (e.g. tmp ->

> tmp_tmp).

>

> The code would recognize this, though, and change the last line to

>  tmp_b   = cond ? a : b.

>

> Without additional temporaries we can now emit the following sequence:

>  tmp = a	     // (no condition here)

>  a = cond ? b : a

>  b = cond ? tmp : b

> avoiding any rewiring which would break things now.

>

> check_need_cmovs () finds swap-style idioms and marks the first of the

> three instructions as not needing a cmove.


Looks like a nice optimisation, but could we just test whether the
destination of a set isn't live on exit from the then block?  I think
we could do that on the fly during the main noce_convert_multiple_sets
loop.

Thanks,
Richard

> ---

>  gcc/ifcvt.c | 97 ++++++++++++++++++++++++++++++++++++++++++-----------

>  1 file changed, 78 insertions(+), 19 deletions(-)

>

> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c

> index 955f9541f60..09bf443656c 100644

> --- a/gcc/ifcvt.c

> +++ b/gcc/ifcvt.c

> @@ -103,6 +103,7 @@ static rtx_insn *block_has_only_trap (basic_block);

>  static void check_need_temps (basic_block bb,

>                                hash_map<rtx, bool> *need_temp,

>                                rtx cond);

> +static void check_need_cmovs (basic_block bb, hash_map<rtx, bool> *need_cmov);

>  

>  

>  /* Count the number of non-jump active insns in BB.  */

> @@ -3207,6 +3208,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)

>    int count = 0;

>  

>    hash_map<rtx, bool> need_temps;

> +  hash_map<rtx, bool> need_no_cmovs;

>  

>    /* If we newly set a CC before a cmov, we might need a temporary

>       even though the compare will be removed by a later pass.  Costing

> @@ -3214,6 +3216,9 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)

>  

>    check_need_temps (then_bb, &need_temps, cond);

>  

> +  /* Identify swap-style idioms.  */

> +  check_need_cmovs (then_bb, &need_no_cmovs);

> +

>    hash_map<rtx, bool> temps_created;

>  

>    FOR_BB_INSNS (then_bb, insn)

> @@ -3229,10 +3234,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)

>        rtx new_val = SET_SRC (set);

>        rtx old_val = target;

>  

> -      rtx dest = SET_DEST (set);

> -

>        rtx temp;

> -      if (need_temps.get (dest))

> +      if (need_temps.get (target))

>         {

>           temp = gen_reg_rtx (GET_MODE (target));

>           temps_created.put (target, true);

> @@ -3241,18 +3244,11 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)

>         temp = target;

>  

>        /* If we were supposed to read from an earlier write in this block,

> -	 we've changed the register allocation.  Rewire the read.  While

> -	 we are looking, also try to catch a swap idiom.  */

> +	 we've changed the register allocation.  Rewire the read.   */

>        for (int i = count - 1; i >= 0; --i)

>  	if (reg_overlap_mentioned_p (new_val, targets[i]))

>  	  {

> -	    /* Catch a "swap" style idiom.  */

> -	    if (find_reg_note (insn, REG_DEAD, new_val) != NULL_RTX)

> -	      /* The write to targets[i] is only live until the read

> -		 here.  As the condition codes match, we can propagate

> -		 the set to here.  */

> -	      new_val = SET_SRC (single_set (unmodified_insns[i]));

> -	    else

> +	    if (find_reg_note (insn, REG_DEAD, new_val) == NULL_RTX)

>  	      new_val = temporaries[i];

>  	    break;

>  	  }

> @@ -3324,8 +3320,11 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)

>  

>        {

>  	start_sequence ();

> -	temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,

> -	    x, y, new_val, old_val, NULL_RTX);

> +	if (!need_no_cmovs.get (insn))

> +	  temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,

> +	      x, y, new_val, old_val, NULL_RTX);

> +	else

> +	  noce_emit_move_insn (target, new_val);

>  

>  	/* If we failed to expand the conditional move, drop out and don't

>  	   try to continue.  */

> @@ -3346,13 +3345,16 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)

>  	end_sequence ();

>        }

>  

> -	/* Now try emitting one passing a non-canonicalized cc comparison.

> -	   This allows the backend to emit a cmov directly without additional

> +	/* Now try emitting a cmov passing a non-canonicalized cc comparison.

> +	   This allows backends to emit a cmov directly without additional

>  	   compare.  */

>        {

>  	start_sequence ();

> -	temp_dest2 = noce_emit_cmove (if_info, target, cond_code,

> -	    x, y, new_val, old_val, cc_cmp);

> +	if (!need_no_cmovs.get (insn))

> +	  temp_dest2 = noce_emit_cmove (if_info, target, cond_code,

> +	      x, y, new_val, old_val, cc_cmp);

> +	else

> +	  noce_emit_move_insn (target, new_val);

>  

>  	/* If we failed to expand the conditional move, drop out and don't

>  	   try to continue.  */

> @@ -3931,6 +3933,7 @@ check_cond_move_block (basic_block bb,

>  

>  /* Check for which sets we need to emit temporaries to hold the destination of

>     a conditional move.  */

> +

>  static void

>  check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)

>  {

> @@ -3938,7 +3941,7 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)

>  

>    FOR_BB_INSNS (bb, insn)

>      {

> -      rtx set, dest;

> +      rtx set, src, dest;

>  

>        if (!active_insn_p (insn))

>  	continue;

> @@ -3947,12 +3950,68 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)

>        if (set == NULL_RTX)

>  	continue;

>  

> +      src = SET_SRC (set);

>        dest = SET_DEST (set);

>  

>        /* noce_emit_cmove will emit the condition check every time it is called

>           so we need a temp register if the destination is modified.  */

>        if (reg_overlap_mentioned_p (dest, cond))

>  	need_temp->put (dest, true);

> +

> +    }

> +}

> +

> +/* Find local swap-style idioms in BB and mark the first insn (1)

> +   that is only a temporary as not needing a conditional move as

> +   it is going to be dead afterwards anyway.

> +

> +     (1) int tmp = a;

> +	 a = b;

> +	 b = tmp;

> +

> +

> +        ifcvt

> +	 -->

> +

> +	 load tmp,a

> +	 cmov a,b

> +	 cmov b,tmp */

> +

> +static void

> +check_need_cmovs (basic_block bb, hash_map<rtx, bool> *need_cmov)

> +{

> +  rtx_insn *insn;

> +  int count = 0;

> +  auto_vec<rtx> insns;

> +  auto_vec<rtx> dests;

> +

> +  FOR_BB_INSNS (bb, insn)

> +    {

> +      rtx set, src, dest;

> +

> +      if (!active_insn_p (insn))

> +	continue;

> +

> +      set = single_set (insn);

> +      if (set == NULL_RTX)

> +	continue;

> +

> +      src = SET_SRC (set);

> +      dest = SET_DEST (set);

> +

> +      for (int i = count - 1; i >= 0; --i)

> +	{

> +	  if (reg_overlap_mentioned_p (src, dests[i])

> +	      && find_reg_note (insn, REG_DEAD, src) != NULL_RTX)

> +	    {

> +	      need_cmov->put (insns[i], false);

> +	    }

> +	}

> +

> +      insns.safe_push (insn);

> +      dests.safe_push (dest);

> +

> +      count++;

>      }

>  }
Robin Dapp Aug. 16, 2019, 12:52 p.m. | #2
> Looks like a nice optimisation, but could we just test whether the

> destination of a set isn't live on exit from the then block?  I think

> we could do that on the fly during the main noce_convert_multiple_sets

> loop.


I included this locally along with the rest of the remarks. Any comments
on the rest/bulk of the patch (the part trying to compare costs of both
cmovs "types")?

Regards
 Robin
Richard Sandiford Aug. 16, 2019, 3:25 p.m. | #3
Robin Dapp <rdapp@linux.ibm.com> writes:
>> Looks like a nice optimisation, but could we just test whether the

>> destination of a set isn't live on exit from the then block?  I think

>> we could do that on the fly during the main noce_convert_multiple_sets

>> loop.

>

> I included this locally along with the rest of the remarks. Any comments

> on the rest/bulk of the patch (the part trying to compare costs of both

> cmovs "types")?


I'm still a bit worried about the overlap between the expanded
noce_convert_multiple_sets and cond_move_process_if_block (5/9).
It seems like we're making noce_convert_multiple_set handle most of
the conditional move cases that cond_move_process_if_block can handle.
But like you say, noce_convert_multiple_set is still restricted to
half-diamonds, whereas cond_move_process_if_block can handle full
diamonds.

3/9, 4/9 and 8/9 seems like good independent improvements.
But for 5/9, 6/9 and 7/9, it looks to me like it would be better
to improve cond_move_process_if_block so that it handles all the
conditional move cases you're targetting.

Definitely welcome other opinions though. :-)

Thanks,
Richard
Robin Dapp Aug. 17, 2019, 11:44 a.m. | #4
> I'm still a bit worried about the overlap between the expanded

> noce_convert_multiple_sets and cond_move_process_if_block (5/9).

> It seems like we're making noce_convert_multiple_set handle most of

> the conditional move cases that cond_move_process_if_block can handle.

> But like you say, noce_convert_multiple_set is still restricted to

> half-diamonds, whereas cond_move_process_if_block can handle full

> diamonds.


I see your concern and would also agree on that there should be a
clearer demarcation of which method can handle what.  I'm not really
sure I fully understand the distinction between "noce" and "cond_move"
anyway, when we emit conditional moves in the "noce" parts.  As said
before, in my opinion, both noce_convert_multiple and cond_move_process
should be unified rather sooner than later.

Yet, before and after my suggested patch, noce_convert_multiple_set can
handle blocks that cond_move_process cannot, e.g. cmovs that write to
one of the registers the condition checked (like min/max idioms) or even
cmovs that touch registers that are modified earlier in the block (like
in swap idioms).  As far as I understand, this is why the additional
temporaries have been introduced in the first place - my patch now tries
to limit these temporaries to situations where we really need them.

I would argue that there is still sufficient difference between them,
even though both can now handle constants.

Regards
 Robin

Patch

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 955f9541f60..09bf443656c 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -103,6 +103,7 @@  static rtx_insn *block_has_only_trap (basic_block);
 static void check_need_temps (basic_block bb,
                               hash_map<rtx, bool> *need_temp,
                               rtx cond);
+static void check_need_cmovs (basic_block bb, hash_map<rtx, bool> *need_cmov);
 
 
 /* Count the number of non-jump active insns in BB.  */
@@ -3207,6 +3208,7 @@  noce_convert_multiple_sets (struct noce_if_info *if_info)
   int count = 0;
 
   hash_map<rtx, bool> need_temps;
+  hash_map<rtx, bool> need_no_cmovs;
 
   /* If we newly set a CC before a cmov, we might need a temporary
      even though the compare will be removed by a later pass.  Costing
@@ -3214,6 +3216,9 @@  noce_convert_multiple_sets (struct noce_if_info *if_info)
 
   check_need_temps (then_bb, &need_temps, cond);
 
+  /* Identify swap-style idioms.  */
+  check_need_cmovs (then_bb, &need_no_cmovs);
+
   hash_map<rtx, bool> temps_created;
 
   FOR_BB_INSNS (then_bb, insn)
@@ -3229,10 +3234,8 @@  noce_convert_multiple_sets (struct noce_if_info *if_info)
       rtx new_val = SET_SRC (set);
       rtx old_val = target;
 
-      rtx dest = SET_DEST (set);
-
       rtx temp;
-      if (need_temps.get (dest))
+      if (need_temps.get (target))
        {
          temp = gen_reg_rtx (GET_MODE (target));
          temps_created.put (target, true);
@@ -3241,18 +3244,11 @@  noce_convert_multiple_sets (struct noce_if_info *if_info)
        temp = target;
 
       /* If we were supposed to read from an earlier write in this block,
-	 we've changed the register allocation.  Rewire the read.  While
-	 we are looking, also try to catch a swap idiom.  */
+	 we've changed the register allocation.  Rewire the read.   */
       for (int i = count - 1; i >= 0; --i)
 	if (reg_overlap_mentioned_p (new_val, targets[i]))
 	  {
-	    /* Catch a "swap" style idiom.  */
-	    if (find_reg_note (insn, REG_DEAD, new_val) != NULL_RTX)
-	      /* The write to targets[i] is only live until the read
-		 here.  As the condition codes match, we can propagate
-		 the set to here.  */
-	      new_val = SET_SRC (single_set (unmodified_insns[i]));
-	    else
+	    if (find_reg_note (insn, REG_DEAD, new_val) == NULL_RTX)
 	      new_val = temporaries[i];
 	    break;
 	  }
@@ -3324,8 +3320,11 @@  noce_convert_multiple_sets (struct noce_if_info *if_info)
 
       {
 	start_sequence ();
-	temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
-	    x, y, new_val, old_val, NULL_RTX);
+	if (!need_no_cmovs.get (insn))
+	  temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
+	      x, y, new_val, old_val, NULL_RTX);
+	else
+	  noce_emit_move_insn (target, new_val);
 
 	/* If we failed to expand the conditional move, drop out and don't
 	   try to continue.  */
@@ -3346,13 +3345,16 @@  noce_convert_multiple_sets (struct noce_if_info *if_info)
 	end_sequence ();
       }
 
-	/* Now try emitting one passing a non-canonicalized cc comparison.
-	   This allows the backend to emit a cmov directly without additional
+	/* Now try emitting a cmov passing a non-canonicalized cc comparison.
+	   This allows backends to emit a cmov directly without additional
 	   compare.  */
       {
 	start_sequence ();
-	temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
-	    x, y, new_val, old_val, cc_cmp);
+	if (!need_no_cmovs.get (insn))
+	  temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
+	      x, y, new_val, old_val, cc_cmp);
+	else
+	  noce_emit_move_insn (target, new_val);
 
 	/* If we failed to expand the conditional move, drop out and don't
 	   try to continue.  */
@@ -3931,6 +3933,7 @@  check_cond_move_block (basic_block bb,
 
 /* Check for which sets we need to emit temporaries to hold the destination of
    a conditional move.  */
+
 static void
 check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
 {
@@ -3938,7 +3941,7 @@  check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
 
   FOR_BB_INSNS (bb, insn)
     {
-      rtx set, dest;
+      rtx set, src, dest;
 
       if (!active_insn_p (insn))
 	continue;
@@ -3947,12 +3950,68 @@  check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
       if (set == NULL_RTX)
 	continue;
 
+      src = SET_SRC (set);
       dest = SET_DEST (set);
 
       /* noce_emit_cmove will emit the condition check every time it is called
          so we need a temp register if the destination is modified.  */
       if (reg_overlap_mentioned_p (dest, cond))
 	need_temp->put (dest, true);
+
+    }
+}
+
+/* Find local swap-style idioms in BB and mark the first insn (1)
+   that is only a temporary as not needing a conditional move as
+   it is going to be dead afterwards anyway.
+
+     (1) int tmp = a;
+	 a = b;
+	 b = tmp;
+
+
+        ifcvt
+	 -->
+
+	 load tmp,a
+	 cmov a,b
+	 cmov b,tmp */
+
+static void
+check_need_cmovs (basic_block bb, hash_map<rtx, bool> *need_cmov)
+{
+  rtx_insn *insn;
+  int count = 0;
+  auto_vec<rtx> insns;
+  auto_vec<rtx> dests;
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      rtx set, src, dest;
+
+      if (!active_insn_p (insn))
+	continue;
+
+      set = single_set (insn);
+      if (set == NULL_RTX)
+	continue;
+
+      src = SET_SRC (set);
+      dest = SET_DEST (set);
+
+      for (int i = count - 1; i >= 0; --i)
+	{
+	  if (reg_overlap_mentioned_p (src, dests[i])
+	      && find_reg_note (insn, REG_DEAD, src) != NULL_RTX)
+	    {
+	      need_cmov->put (insns[i], false);
+	    }
+	}
+
+      insns.safe_push (insn);
+      dests.safe_push (dest);
+
+      count++;
     }
 }