[11/25] Simplify vec_merge according to the mask.

Message ID 333c6a5ad2f1c56613c4734a830fc5d8214061d9.1536144068.git.ams@codesourcery.com
State New
Headers show
Series
  • AMD GCN Port
Related show

Commit Message

Andrew Stubbs Sept. 5, 2018, 11:50 a.m.
This patch was part of the original patch we acquired from Honza and Martin.

It simplifies vector elements that are inactive, according to the mask.

2018-09-05  Jan Hubicka  <jh@suse.cz>
	    Martin Jambor  <mjambor@suse.cz>

	* simplify-rtx.c (simplify_merge_mask): New function.
	(simplify_ternary_operation): Use it, also see if VEC_MERGEs with the
	same masks are used in op1 or op2.
---
 gcc/simplify-rtx.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

Comments

Richard Sandiford Sept. 17, 2018, 9:05 a.m. | #1
<ams@codesourcery.com> writes:
> This patch was part of the original patch we acquired from Honza and Martin.

>

> It simplifies vector elements that are inactive, according to the mask.

>

> 2018-09-05  Jan Hubicka  <jh@suse.cz>

> 	    Martin Jambor  <mjambor@suse.cz>

>

> 	* simplify-rtx.c (simplify_merge_mask): New function.

> 	(simplify_ternary_operation): Use it, also see if VEC_MERGEs with the

> 	same masks are used in op1 or op2.


Would be good to have self-tests for the new transforms.

> +/* X is an operand number OP of VEC_MERGE operation with MASK.


"of a".  Might also be worth mentioning that X can be a nested
operation of a VEC_MERGE with a different mode, although it always
has the same number of elements as MASK.

> +   Try to simplify using knowledge that values outside of MASK


"simplify X"

> +   will not be used.  */

> +

> +rtx

> +simplify_merge_mask (rtx x, rtx mask, int op)

> +{

> +  gcc_assert (VECTOR_MODE_P (GET_MODE (x)));

> +  poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));

> +  if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))

> +    {

> +      if (!side_effects_p (XEXP (x, 1 - op)))

> +	return XEXP (x, op);

> +    }

> +  if (side_effects_p (x))

> +    return NULL_RTX;

> +  if (UNARY_P (x)

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))

> +      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))


known_eq, since we require equality for correctness.  Same for the
other tests.

> +    {

> +      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);

> +      if (top0)

> +	return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,

> +				   GET_MODE (XEXP (x, 0)));

> +    }

> +  if (BINARY_P (x)

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))

> +      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))

> +      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))

> +    {

> +      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);

> +      rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);

> +      if (top0 || top1)

> +	return simplify_gen_binary (GET_CODE (x), GET_MODE (x),

> +				    top0 ? top0 : XEXP (x, 0),

> +				    top1 ? top1 : XEXP (x, 1));

> +    }

> +  if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))

> +      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))

> +      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))

> +      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))

> +    {

> +      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);

> +      rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);

> +      rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);

> +      if (top0 || top1)

> +	return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),

> +				     GET_MODE (XEXP (x, 0)),

> +				     top0 ? top0 : XEXP (x, 0),

> +				     top1 ? top1 : XEXP (x, 1),

> +				     top2 ? top2 : XEXP (x, 2));

> +    }

> +  return NULL_RTX;

> +}

> +

>  

>  /* Simplify CODE, an operation with result mode MODE and three operands,

>     OP0, OP1, and OP2.  OP0_MODE was the mode of OP0 before it became

> @@ -5967,6 +6026,28 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,

>  	  && !side_effects_p (op2) && !side_effects_p (op1))

>  	return op0;

>  

> +      if (!side_effects_p (op2))

> +	{

> +	  rtx top0 = simplify_merge_mask (op0, op2, 0);

> +	  rtx top1 = simplify_merge_mask (op1, op2, 1);

> +	  if (top0 || top1)

> +	    return simplify_gen_ternary (code, mode, mode,

> +					 top0 ? top0 : op0,

> +					 top1 ? top1 : op1, op2);

> +	}

> +

> +      if (GET_CODE (op0) == VEC_MERGE

> +	  && rtx_equal_p (op2, XEXP (op0, 2))

> +	  && !side_effects_p (XEXP (op0, 1)) && !side_effects_p (op2))

> +	return simplify_gen_ternary (code, mode, mode,

> +				     XEXP (op0, 0), op1, op2);

> +

> +      if (GET_CODE (op1) == VEC_MERGE

> +	  && rtx_equal_p (op2, XEXP (op1, 2))

> +	  && !side_effects_p (XEXP (op0, 0)) && !side_effects_p (op2))

> +	return simplify_gen_ternary (code, mode, mode,

> +				     XEXP (op0, 1), op1, op2);


Doesn't simplify_merge_mask make the second two redundant?  I couldn't
see the difference between them and the first condition tested by
simplify_merge_mask.

Thanks,
Richard
Andrew Stubbs Sept. 20, 2018, 3:26 p.m. | #2
On 17/09/18 10:05, Richard Sandiford wrote:
> Would be good to have self-tests for the new transforms.

[...]
> known_eq, since we require equality for correctness.  Same for the

> other tests.


How about the attached? I've made the edits you requested and written 
some self-tests.

> Doesn't simplify_merge_mask make the second two redundant?  I couldn't

> see the difference between them and the first condition tested by

> simplify_merge_mask.


Yes, I think you're right. Removed, now.

Andrew
Simplify vec_merge according to the mask.

This patch was part of the original patch we acquired from Honza and Martin.

It simplifies nested vec_merge operations using the same mask.

Self-tests are included.

2018-09-20  Andrew Stubbs  <ams@codesourcery.com>
	    Jan Hubicka  <jh@suse.cz>
	    Martin Jambor  <mjambor@suse.cz>

	* simplify-rtx.c (simplify_merge_mask): New function.
	(simplify_ternary_operation): Use it, also see if VEC_MERGEs with the
	same masks are used in op1 or op2.
	(test_vec_merge): New function.
	(test_vector_ops): Call test_vec_merge.

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index f77e1aa..13b2882 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5578,6 +5578,68 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val)
   return NULL_RTX;
 }
 
+/* Try to simplify nested VEC_MERGE operations by comparing the masks.  The
+   nested operations need not use the same vector mode, but must have the same
+   number of elements.
+
+   X is an operand number OP of a VEC_MERGE operation with MASK.
+   Returns NULL_RTX if no simplification is possible.  */
+
+rtx
+simplify_merge_mask (rtx x, rtx mask, int op)
+{
+  gcc_assert (VECTOR_MODE_P (GET_MODE (x)));
+  poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));
+  if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
+    {
+      if (!side_effects_p (XEXP (x, 1 - op)))
+	return XEXP (x, op);
+    }
+  if (side_effects_p (x))
+    return NULL_RTX;
+  if (UNARY_P (x)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))
+    {
+      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+      if (top0)
+	return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,
+				   GET_MODE (XEXP (x, 0)));
+    }
+  if (BINARY_P (x)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))
+    {
+      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+      rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+      if (top0 || top1)
+	return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
+				    top0 ? top0 : XEXP (x, 0),
+				    top1 ? top1 : XEXP (x, 1));
+    }
+  if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))
+    {
+      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+      rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+      rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);
+      if (top0 || top1)
+	return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),
+				     GET_MODE (XEXP (x, 0)),
+				     top0 ? top0 : XEXP (x, 0),
+				     top1 ? top1 : XEXP (x, 1),
+				     top2 ? top2 : XEXP (x, 2));
+    }
+  return NULL_RTX;
+}
+
 
 /* Simplify CODE, an operation with result mode MODE and three operands,
    OP0, OP1, and OP2.  OP0_MODE was the mode of OP0 before it became
@@ -5967,6 +6029,16 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
 	  && !side_effects_p (op2) && !side_effects_p (op1))
 	return op0;
 
+      if (!side_effects_p (op2))
+	{
+	  rtx top0 = simplify_merge_mask (op0, op2, 0);
+	  rtx top1 = simplify_merge_mask (op1, op2, 1);
+	  if (top0 || top1)
+	    return simplify_gen_ternary (code, mode, mode,
+					 top0 ? top0 : op0,
+					 top1 ? top1 : op1, op2);
+	}
+
       break;
 
     default:
@@ -6932,6 +7004,71 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg)
 					    constm1_rtx));
 }
 
+/* Verify simplify_merge_mask works correctly.  */
+
+static void
+test_vec_merge (machine_mode mode)
+{
+  rtx op0 = make_test_reg (mode);
+  rtx op1 = make_test_reg (mode);
+  rtx op2 = make_test_reg (mode);
+  rtx op3 = make_test_reg (mode);
+  rtx op4 = make_test_reg (mode);
+  rtx op5 = make_test_reg (mode);
+  rtx mask1 = make_test_reg (SImode);
+  rtx mask2 = make_test_reg (SImode);
+  rtx vm1 = gen_rtx_VEC_MERGE (mode, op0, op1, mask1);
+  rtx vm2 = gen_rtx_VEC_MERGE (mode, op2, op3, mask1);
+  rtx vm3 = gen_rtx_VEC_MERGE (mode, op4, op5, mask1);
+
+  /* Simple vec_merge.  */
+  ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0));
+  ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1));
+  ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 0));
+  ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 1));
+
+  /* Nested vec_merge.  */
+  rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1);
+  ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0));
+  ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1));
+
+  /* Intermediate unary op. */
+  rtx unop = gen_rtx_NOT (mode, vm1);
+  ASSERT_EQ (op0, XEXP (simplify_merge_mask (unop, mask1, 0), 0));
+  ASSERT_EQ (op1, XEXP (simplify_merge_mask (unop, mask1, 1), 0));
+
+  /* Intermediate binary op. */
+  rtx binop = gen_rtx_PLUS (mode, vm1, vm2);
+  rtx res = simplify_merge_mask (binop, mask1, 0);
+  ASSERT_EQ (op0, XEXP (res, 0));
+  ASSERT_EQ (op2, XEXP (res, 1));
+  res = simplify_merge_mask (binop, mask1, 1);
+  ASSERT_EQ (op1, XEXP (res, 0));
+  ASSERT_EQ (op3, XEXP (res, 1));
+
+  /* Intermediate ternary op. */
+  rtx tenop = gen_rtx_FMA (mode, vm1, vm2, vm3);
+  res = simplify_merge_mask (tenop, mask1, 0);
+  ASSERT_EQ (op0, XEXP (res, 0));
+  ASSERT_EQ (op2, XEXP (res, 1));
+  ASSERT_EQ (op4, XEXP (res, 2));
+  res = simplify_merge_mask (tenop, mask1, 1);
+  ASSERT_EQ (op1, XEXP (res, 0));
+  ASSERT_EQ (op3, XEXP (res, 1));
+  ASSERT_EQ (op5, XEXP (res, 2));
+
+  /* Side effects.  */
+  rtx badop0 = gen_rtx_PRE_INC (mode, op0);
+  rtx badvm = gen_rtx_VEC_MERGE (mode, badop0, op1, mask1);
+  ASSERT_EQ (badop0, simplify_merge_mask (badvm, mask1, 0));
+  ASSERT_EQ (NULL_RTX, simplify_merge_mask (badvm, mask1, 1));
+
+  /* Called indirectly.  */
+  res = simplify_rtx (nvm);
+  ASSERT_EQ (op0, XEXP (res, 0));
+  ASSERT_EQ (op3, XEXP (res, 1));
+}
+
 /* Verify some simplifications involving vectors.  */
 
 static void
@@ -6947,6 +7084,7 @@ test_vector_ops ()
 	  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
 	      && maybe_gt (GET_MODE_NUNITS (mode), 2))
 	    test_vector_ops_series (mode, scalar_reg);
+	  test_vec_merge (mode);
 	}
     }
 }
Andrew Stubbs Sept. 26, 2018, 4:23 p.m. | #3
Ping.

On 20/09/18 16:26, Andrew Stubbs wrote:
> On 17/09/18 10:05, Richard Sandiford wrote:

>> Would be good to have self-tests for the new transforms.

> [...]

>> known_eq, since we require equality for correctness.  Same for the

>> other tests.

> 

> How about the attached? I've made the edits you requested and written 

> some self-tests.

> 

>> Doesn't simplify_merge_mask make the second two redundant?  I couldn't

>> see the difference between them and the first condition tested by

>> simplify_merge_mask.

> 

> Yes, I think you're right. Removed, now.

> 

> Andrew

>
Richard Sandiford Sept. 26, 2018, 4:48 p.m. | #4
Andrew Stubbs <ams@codesourcery.com> writes:
> On 17/09/18 10:05, Richard Sandiford wrote:

>> Would be good to have self-tests for the new transforms.

> [...]

>> known_eq, since we require equality for correctness.  Same for the

>> other tests.

>

> How about the attached? I've made the edits you requested and written 

> some self-tests.

>

>> Doesn't simplify_merge_mask make the second two redundant?  I couldn't

>> see the difference between them and the first condition tested by

>> simplify_merge_mask.

>

> Yes, I think you're right. Removed, now.

>

> Andrew

>

> Simplify vec_merge according to the mask.

>

> This patch was part of the original patch we acquired from Honza and Martin.

>

> It simplifies nested vec_merge operations using the same mask.

>

> Self-tests are included.

>

> 2018-09-20  Andrew Stubbs  <ams@codesourcery.com>

> 	    Jan Hubicka  <jh@suse.cz>

> 	    Martin Jambor  <mjambor@suse.cz>

>

> 	* simplify-rtx.c (simplify_merge_mask): New function.

> 	(simplify_ternary_operation): Use it, also see if VEC_MERGEs with the

> 	same masks are used in op1 or op2.

> 	(test_vec_merge): New function.

> 	(test_vector_ops): Call test_vec_merge.

>

> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c

> index f77e1aa..13b2882 100644

> --- a/gcc/simplify-rtx.c

> +++ b/gcc/simplify-rtx.c

> @@ -5578,6 +5578,68 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val)

>    return NULL_RTX;

>  }

>  

> +/* Try to simplify nested VEC_MERGE operations by comparing the masks.  The

> +   nested operations need not use the same vector mode, but must have the same

> +   number of elements.

> +

> +   X is an operand number OP of a VEC_MERGE operation with MASK.

> +   Returns NULL_RTX if no simplification is possible.  */


X isn't always operand OP, it can be nested within it.  How about:

/* Try to simplify X given that it appears within operand OP of a
   VEC_MERGE operation whose mask is MASK.  X need not use the same
   vector mode as the VEC_MERGE, but it must have the same number of
   elements.

   Return the simplified X on success, otherwise return NULL_RTX.  */

> +

> +rtx

> +simplify_merge_mask (rtx x, rtx mask, int op)

> +{

> +  gcc_assert (VECTOR_MODE_P (GET_MODE (x)));

> +  poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));

> +  if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))

> +    {

> +      if (!side_effects_p (XEXP (x, 1 - op)))

> +	return XEXP (x, op);

> +    }

> +  if (side_effects_p (x))

> +    return NULL_RTX;

> +  if (UNARY_P (x)

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))

> +      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))

> +    {

> +      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);

> +      if (top0)

> +	return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,

> +				   GET_MODE (XEXP (x, 0)));

> +    }

> +  if (BINARY_P (x)

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))

> +      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))

> +      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))

> +    {

> +      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);

> +      rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);

> +      if (top0 || top1)

> +	return simplify_gen_binary (GET_CODE (x), GET_MODE (x),

> +				    top0 ? top0 : XEXP (x, 0),

> +				    top1 ? top1 : XEXP (x, 1));

> +    }

> +  if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))

> +      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))

> +      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)

> +      && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))

> +      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))

> +    {

> +      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);

> +      rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);

> +      rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);

> +      if (top0 || top1)


|| top2?

> +	return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),

> +				     GET_MODE (XEXP (x, 0)),

> +				     top0 ? top0 : XEXP (x, 0),

> +				     top1 ? top1 : XEXP (x, 1),

> +				     top2 ? top2 : XEXP (x, 2));

> +    }

> +  return NULL_RTX;

> +}

> +

>  

>  /* Simplify CODE, an operation with result mode MODE and three operands,

>     OP0, OP1, and OP2.  OP0_MODE was the mode of OP0 before it became

> @@ -5967,6 +6029,16 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,

>  	  && !side_effects_p (op2) && !side_effects_p (op1))

>  	return op0;

>  

> +      if (!side_effects_p (op2))

> +	{

> +	  rtx top0 = simplify_merge_mask (op0, op2, 0);

> +	  rtx top1 = simplify_merge_mask (op1, op2, 1);

> +	  if (top0 || top1)

> +	    return simplify_gen_ternary (code, mode, mode,

> +					 top0 ? top0 : op0,

> +					 top1 ? top1 : op1, op2);

> +	}

> +

>        break;

>  

>      default:

> @@ -6932,6 +7004,71 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg)

>  					    constm1_rtx));

>  }

>  

> +/* Verify simplify_merge_mask works correctly.  */

> +

> +static void

> +test_vec_merge (machine_mode mode)

> +{

> +  rtx op0 = make_test_reg (mode);

> +  rtx op1 = make_test_reg (mode);

> +  rtx op2 = make_test_reg (mode);

> +  rtx op3 = make_test_reg (mode);

> +  rtx op4 = make_test_reg (mode);

> +  rtx op5 = make_test_reg (mode);

> +  rtx mask1 = make_test_reg (SImode);

> +  rtx mask2 = make_test_reg (SImode);

> +  rtx vm1 = gen_rtx_VEC_MERGE (mode, op0, op1, mask1);

> +  rtx vm2 = gen_rtx_VEC_MERGE (mode, op2, op3, mask1);

> +  rtx vm3 = gen_rtx_VEC_MERGE (mode, op4, op5, mask1);

> +

> +  /* Simple vec_merge.  */

> +  ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0));

> +  ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1));

> +  ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 0));

> +  ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 1));

> +

> +  /* Nested vec_merge.  */

> +  rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1);

> +  ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0));

> +  ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1));


Think the last two should simplify to op0 and op3, which I guess
means recursing on the "return XEXP (x, op);"

> +  /* Intermediate unary op. */

> +  rtx unop = gen_rtx_NOT (mode, vm1);

> +  ASSERT_EQ (op0, XEXP (simplify_merge_mask (unop, mask1, 0), 0));

> +  ASSERT_EQ (op1, XEXP (simplify_merge_mask (unop, mask1, 1), 0));

> +

> +  /* Intermediate binary op. */

> +  rtx binop = gen_rtx_PLUS (mode, vm1, vm2);

> +  rtx res = simplify_merge_mask (binop, mask1, 0);

> +  ASSERT_EQ (op0, XEXP (res, 0));

> +  ASSERT_EQ (op2, XEXP (res, 1));

> +  res = simplify_merge_mask (binop, mask1, 1);

> +  ASSERT_EQ (op1, XEXP (res, 0));

> +  ASSERT_EQ (op3, XEXP (res, 1));

> +

> +  /* Intermediate ternary op. */

> +  rtx tenop = gen_rtx_FMA (mode, vm1, vm2, vm3);

> +  res = simplify_merge_mask (tenop, mask1, 0);

> +  ASSERT_EQ (op0, XEXP (res, 0));

> +  ASSERT_EQ (op2, XEXP (res, 1));

> +  ASSERT_EQ (op4, XEXP (res, 2));

> +  res = simplify_merge_mask (tenop, mask1, 1);

> +  ASSERT_EQ (op1, XEXP (res, 0));

> +  ASSERT_EQ (op3, XEXP (res, 1));

> +  ASSERT_EQ (op5, XEXP (res, 2));

> [...]

> +  /* Called indirectly.  */

> +  res = simplify_rtx (nvm);

> +  ASSERT_EQ (op0, XEXP (res, 0));

> +  ASSERT_EQ (op3, XEXP (res, 1));


Would probably be better to ASSERT_RTX_EQ against the full simplified rtx,
e.g. gen_rtx_NOT (mode, op0)

Thanks,
Richard
Andrew Stubbs Sept. 26, 2018, 4:57 p.m. | #5
On 26/09/18 17:48, Richard Sandiford wrote:
> Andrew Stubbs <ams@codesourcery.com> writes:

>> +  /* Nested vec_merge.  */

>> +  rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1);

>> +  ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0));

>> +  ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1));

> 

> Think the last two should simplify to op0 and op3, which I guess

> means recursing on the "return XEXP (x, op);"


I thought about doing that, but I noticed that, for example, 
simplify_gen_unary does not recurse into its operand. Is that an 
omission, or is it expected that those operands will already have been 
simplified?

Andrew
Richard Sandiford Sept. 27, 2018, 7:16 a.m. | #6
Andrew Stubbs <ams@codesourcery.com> writes:
> On 26/09/18 17:48, Richard Sandiford wrote:

>> Andrew Stubbs <ams@codesourcery.com> writes:

>>> +  /* Nested vec_merge.  */

>>> +  rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1);

>>> +  ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0));

>>> +  ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1));

>> 

>> Think the last two should simplify to op0 and op3, which I guess

>> means recursing on the "return XEXP (x, op);"

>

> I thought about doing that, but I noticed that, for example, 

> simplify_gen_unary does not recurse into its operand. Is that an 

> omission, or is it expected that those operands will already have been 

> simplified?


Ah, yeah, each operand should already fully be simplified.  But then the
only thing we testing here compared to:

  /* Simple vec_merge.  */
  ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0));
  ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1));

is that we *don't* recurse.  It would be worth adding a comment
to say that, since if we both thought about it, I guess whoever
comes next will too.

And the assumption that existing VEC_MERGEs are fully simplified means
we should return null:

  if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
    {
      if (!side_effects_p (XEXP (x, 1 - op)))
	return XEXP (x, op);
--->here
    }

On keeping the complexity down:

  if (side_effects_p (x))
    return NULL_RTX;

makes this quadratic for chains of unary operations.  Is it really
needed?  The code after it simply recurses on operands and doesn't
discard anything itself, so it looks like the VEC_MERGE call to
side_effects_p would be enough.

Richard
Andrew Stubbs Sept. 27, 2018, 2:11 p.m. | #7
On 27/09/18 08:16, Richard Sandiford wrote:
> On keeping the complexity down:

> 

>    if (side_effects_p (x))

>      return NULL_RTX;

> 

> makes this quadratic for chains of unary operations.  Is it really

> needed?  The code after it simply recurses on operands and doesn't

> discard anything itself, so it looks like the VEC_MERGE call to

> side_effects_p would be enough.


The two calls do not check the same thing. The other one checks the 
other operand of a vec_merge, and this checks the current operand.

I suppose it's safe to discard a VEC_MERGE when the chosen operand 
contains side effects, but I'm not so sure when the VEC_MERGE itself is 
an operand to an operator with side effects. I'm having a hard time 
inventing a scenario in which a PRE_INC could contain a VEC_MERGE, but 
maybe a volatile MEM or ASM_OPERANDS could do?

Conversely, I don't see that side-effects deep down in an expression 
should stop us transforming it as a high level.

Is there an equivalent to side_effects_p that doesn't recurse? Should 
there be?

Andrew
Richard Sandiford Sept. 27, 2018, 4:19 p.m. | #8
Andrew Stubbs <ams@codesourcery.com> writes:
> On 27/09/18 08:16, Richard Sandiford wrote:

>> On keeping the complexity down:

>> 

>>    if (side_effects_p (x))

>>      return NULL_RTX;

>> 

>> makes this quadratic for chains of unary operations.  Is it really

>> needed?  The code after it simply recurses on operands and doesn't

>> discard anything itself, so it looks like the VEC_MERGE call to

>> side_effects_p would be enough.

>

> The two calls do not check the same thing. The other one checks the 

> other operand of a vec_merge, and this checks the current operand.

>

> I suppose it's safe to discard a VEC_MERGE when the chosen operand 

> contains side effects, but I'm not so sure when the VEC_MERGE itself is 

> an operand to an operator with side effects. I'm having a hard time 

> inventing a scenario in which a PRE_INC could contain a VEC_MERGE, but 

> maybe a volatile MEM or ASM_OPERANDS could do?


But we wouldn't recurse for PRE_INC, MEM or ASM_OPERANDS, since they
have the wrong rtx class.  AFAICT no current unary, binary or ternary
operator has that level of side-effect (and that's a good thing).

We also don't guarantee to preserve FP exceptions as side-effects.

> Conversely, I don't see that side-effects deep down in an expression 

> should stop us transforming it as a high level.

>

> Is there an equivalent to side_effects_p that doesn't recurse? Should 

> there be?


Not aware of an existing function, and it might be useful to have
one at some point.  Just not sure we need it for this.

Richard
Andrew Stubbs Sept. 27, 2018, 9:11 p.m. | #9
On 27/09/18 17:19, Richard Sandiford wrote:
> But we wouldn't recurse for PRE_INC, MEM or ASM_OPERANDS, since they

> have the wrong rtx class.  AFAICT no current unary, binary or ternary

> operator has that level of side-effect (and that's a good thing).


OK, in that case I'll remove it and we can cross that bridge if we come 
to it.

This patch should also address your other concerns.

OK?

Andrew
Simplify vec_merge according to the mask.

This patch was part of the original patch we acquired from Honza and Martin.

It simplifies nested vec_merge operations using the same mask.

Self-tests are included.

2018-09-27  Andrew Stubbs  <ams@codesourcery.com>
	    Jan Hubicka  <jh@suse.cz>
	    Martin Jambor  <mjambor@suse.cz>

	* simplify-rtx.c (simplify_merge_mask): New function.
	(simplify_ternary_operation): Use it, also see if VEC_MERGEs with the
	same masks are used in op1 or op2.
	(test_vec_merge): New function.
	(test_vector_ops): Call test_vec_merge.

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index b4c6883..9bc5386 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5578,6 +5578,68 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val)
   return NULL_RTX;
 }
 
+/* Try to simplify X given that it appears within operand OP of a
+   VEC_MERGE operation whose mask is MASK.  X need not use the same
+   vector mode as the VEC_MERGE, but it must have the same number of
+   elements.
+
+   Return the simplified X on success, otherwise return NULL_RTX.  */
+
+rtx
+simplify_merge_mask (rtx x, rtx mask, int op)
+{
+  gcc_assert (VECTOR_MODE_P (GET_MODE (x)));
+  poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));
+  if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
+    {
+      if (side_effects_p (XEXP (x, 1 - op)))
+	return NULL_RTX;
+
+      return XEXP (x, op);
+    }
+  if (UNARY_P (x)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))
+    {
+      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+      if (top0)
+	return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,
+				   GET_MODE (XEXP (x, 0)));
+    }
+  if (BINARY_P (x)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))
+    {
+      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+      rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+      if (top0 || top1)
+	return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
+				    top0 ? top0 : XEXP (x, 0),
+				    top1 ? top1 : XEXP (x, 1));
+    }
+  if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))
+      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))
+    {
+      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+      rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+      rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);
+      if (top0 || top1 || top2)
+	return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),
+				     GET_MODE (XEXP (x, 0)),
+				     top0 ? top0 : XEXP (x, 0),
+				     top1 ? top1 : XEXP (x, 1),
+				     top2 ? top2 : XEXP (x, 2));
+    }
+  return NULL_RTX;
+}
+
 
 /* Simplify CODE, an operation with result mode MODE and three operands,
    OP0, OP1, and OP2.  OP0_MODE was the mode of OP0 before it became
@@ -5967,6 +6029,16 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
 	  && !side_effects_p (op2) && !side_effects_p (op1))
 	return op0;
 
+      if (!side_effects_p (op2))
+	{
+	  rtx top0 = simplify_merge_mask (op0, op2, 0);
+	  rtx top1 = simplify_merge_mask (op1, op2, 1);
+	  if (top0 || top1)
+	    return simplify_gen_ternary (code, mode, mode,
+					 top0 ? top0 : op0,
+					 top1 ? top1 : op1, op2);
+	}
+
       break;
 
     default:
@@ -6856,6 +6928,69 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg)
 					    constm1_rtx));
 }
 
+/* Verify simplify_merge_mask works correctly.  */
+
+static void
+test_vec_merge (machine_mode mode)
+{
+  rtx op0 = make_test_reg (mode);
+  rtx op1 = make_test_reg (mode);
+  rtx op2 = make_test_reg (mode);
+  rtx op3 = make_test_reg (mode);
+  rtx op4 = make_test_reg (mode);
+  rtx op5 = make_test_reg (mode);
+  rtx mask1 = make_test_reg (SImode);
+  rtx mask2 = make_test_reg (SImode);
+  rtx vm1 = gen_rtx_VEC_MERGE (mode, op0, op1, mask1);
+  rtx vm2 = gen_rtx_VEC_MERGE (mode, op2, op3, mask1);
+  rtx vm3 = gen_rtx_VEC_MERGE (mode, op4, op5, mask1);
+
+  /* Simple vec_merge.  */
+  ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0));
+  ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1));
+  ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 0));
+  ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 1));
+
+  /* Nested vec_merge.
+     It's tempting to make this simplify right down to opN, but we don't
+     because all the simplify_* functions assume that the operands have
+     already been simplified.  */
+  rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1);
+  ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0));
+  ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1));
+
+  /* Intermediate unary op. */
+  rtx unop = gen_rtx_NOT (mode, vm1);
+  ASSERT_RTX_EQ (gen_rtx_NOT (mode, op0),
+		 simplify_merge_mask (unop, mask1, 0));
+  ASSERT_RTX_EQ (gen_rtx_NOT (mode, op1),
+		 simplify_merge_mask (unop, mask1, 1));
+
+  /* Intermediate binary op. */
+  rtx binop = gen_rtx_PLUS (mode, vm1, vm2);
+  ASSERT_RTX_EQ (gen_rtx_PLUS (mode, op0, op2), 
+		 simplify_merge_mask (binop, mask1, 0));
+  ASSERT_RTX_EQ (gen_rtx_PLUS (mode, op1, op3),
+		 simplify_merge_mask (binop, mask1, 1));
+
+  /* Intermediate ternary op. */
+  rtx tenop = gen_rtx_FMA (mode, vm1, vm2, vm3);
+  ASSERT_RTX_EQ (gen_rtx_FMA (mode, op0, op2, op4),
+		 simplify_merge_mask (tenop, mask1, 0));
+  ASSERT_RTX_EQ (gen_rtx_FMA (mode, op1, op3, op5),
+		 simplify_merge_mask (tenop, mask1, 1));
+
+  /* Side effects.  */
+  rtx badop0 = gen_rtx_PRE_INC (mode, op0);
+  rtx badvm = gen_rtx_VEC_MERGE (mode, badop0, op1, mask1);
+  ASSERT_EQ (badop0, simplify_merge_mask (badvm, mask1, 0));
+  ASSERT_EQ (NULL_RTX, simplify_merge_mask (badvm, mask1, 1));
+
+  /* Called indirectly.  */
+  ASSERT_RTX_EQ (gen_rtx_VEC_MERGE (mode, op0, op3, mask1),
+		 simplify_rtx (nvm));
+}
+
 /* Verify some simplifications involving vectors.  */
 
 static void
@@ -6871,6 +7006,7 @@ test_vector_ops ()
 	  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
 	      && maybe_gt (GET_MODE_NUNITS (mode), 2))
 	    test_vector_ops_series (mode, scalar_reg);
+	  test_vec_merge (mode);
 	}
     }
 }
Richard Sandiford Sept. 28, 2018, 8:11 a.m. | #10
Andrew Stubbs <ams@codesourcery.com> writes:
> On 27/09/18 17:19, Richard Sandiford wrote:

>> But we wouldn't recurse for PRE_INC, MEM or ASM_OPERANDS, since they

>> have the wrong rtx class.  AFAICT no current unary, binary or ternary

>> operator has that level of side-effect (and that's a good thing).

>

> OK, in that case I'll remove it and we can cross that bridge if we come 

> to it.

>

> This patch should also address your other concerns.

>

> OK?


Yes, thanks.

Richard
Andrew Stubbs Sept. 28, 2018, 1:33 p.m. | #11
On 28/09/18 09:11, Richard Sandiford wrote:
> Yes, thanks.


Committed.

Thanks for all the reviews. :-)

Andrew
H.J. Lu Feb. 22, 2019, 2:43 a.m. | #12
On Fri, Sep 28, 2018 at 6:33 AM Andrew Stubbs <ams@codesourcery.com> wrote:
>

> On 28/09/18 09:11, Richard Sandiford wrote:

> > Yes, thanks.

>

> Committed.

>

> Thanks for all the reviews. :-)

>


This caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89445


-- 
H.J.

Patch

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 89487f2..6f27bda 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5578,6 +5578,65 @@  simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val)
   return NULL_RTX;
 }
 
+/* X is an operand number OP of VEC_MERGE operation with MASK.
+   Try to simplify using knowledge that values outside of MASK
+   will not be used.  */
+
+rtx
+simplify_merge_mask (rtx x, rtx mask, int op)
+{
+  gcc_assert (VECTOR_MODE_P (GET_MODE (x)));
+  poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));
+  if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
+    {
+      if (!side_effects_p (XEXP (x, 1 - op)))
+	return XEXP (x, op);
+    }
+  if (side_effects_p (x))
+    return NULL_RTX;
+  if (UNARY_P (x)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))
+    {
+      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+      if (top0)
+	return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,
+				   GET_MODE (XEXP (x, 0)));
+    }
+  if (BINARY_P (x)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))
+    {
+      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+      rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+      if (top0 || top1)
+	return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
+				    top0 ? top0 : XEXP (x, 0),
+				    top1 ? top1 : XEXP (x, 1));
+    }
+  if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)
+      && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))
+      && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))
+    {
+      rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+      rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+      rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);
+      if (top0 || top1)
+	return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),
+				     GET_MODE (XEXP (x, 0)),
+				     top0 ? top0 : XEXP (x, 0),
+				     top1 ? top1 : XEXP (x, 1),
+				     top2 ? top2 : XEXP (x, 2));
+    }
+  return NULL_RTX;
+}
+
 
 /* Simplify CODE, an operation with result mode MODE and three operands,
    OP0, OP1, and OP2.  OP0_MODE was the mode of OP0 before it became
@@ -5967,6 +6026,28 @@  simplify_ternary_operation (enum rtx_code code, machine_mode mode,
 	  && !side_effects_p (op2) && !side_effects_p (op1))
 	return op0;
 
+      if (!side_effects_p (op2))
+	{
+	  rtx top0 = simplify_merge_mask (op0, op2, 0);
+	  rtx top1 = simplify_merge_mask (op1, op2, 1);
+	  if (top0 || top1)
+	    return simplify_gen_ternary (code, mode, mode,
+					 top0 ? top0 : op0,
+					 top1 ? top1 : op1, op2);
+	}
+
+      if (GET_CODE (op0) == VEC_MERGE
+	  && rtx_equal_p (op2, XEXP (op0, 2))
+	  && !side_effects_p (XEXP (op0, 1)) && !side_effects_p (op2))
+	return simplify_gen_ternary (code, mode, mode,
+				     XEXP (op0, 0), op1, op2);
+
+      if (GET_CODE (op1) == VEC_MERGE
+	  && rtx_equal_p (op2, XEXP (op1, 2))
+	  && !side_effects_p (XEXP (op0, 0)) && !side_effects_p (op2))
+	return simplify_gen_ternary (code, mode, mode,
+				     XEXP (op0, 1), op1, op2);
+
       break;
 
     default: