[01/25] Handle vectors that don't fit in an integer.

Message ID c6dd9ca09af7c41d10feed487994c768694eef50.1536144068.git.ams@codesourcery.com
State New
Headers show
Series
  • AMD GCN Port
Related show

Commit Message

Andrew Stubbs Sept. 5, 2018, 11:48 a.m.
GCN vector sizes range between 64 and 512 bytes, none of which have
correspondingly sized integer modes.  This breaks a number of assumptions
throughout the compiler, but I don't really want to create modes just for this
purpose.

Instead, this patch fixes up the cases that I've found, so far, such that the
compiler tries something else, or fails to optimize, rather than just ICE.

2018-09-05  Andrew Stubbs  <ams@codesourcery.com>
            Kwok Cheung Yeung  <kcy@codesourcery.com>
	    Jan Hubicka  <jh@suse.cz>
	    Martin Jambor  <mjambor@suse.cz>

	gcc/
	* combine.c (gen_lowpart_or_truncate): Return clobber if there is
	not a integer mode if the same size as x.
	(gen_lowpart_for_combine): Fail if there is no integer mode of the
	same size.
	* expr.c (expand_expr_real_1): Force first operand to be in memory
	if it is a vector register and the result is in	BLKmode.
	* tree-vect-stmts.c (vectorizable_store): Don't ICE when
	int_mode_for_size fails.
	(vectorizable_load): Likewise.
---
 gcc/combine.c         | 13 ++++++++++++-
 gcc/expr.c            |  8 ++++++++
 gcc/tree-vect-stmts.c |  8 ++++----
 3 files changed, 24 insertions(+), 5 deletions(-)

Comments

Jakub Jelinek Sept. 5, 2018, 11:54 a.m. | #1
On Wed, Sep 05, 2018 at 12:48:49PM +0100, ams@codesourcery.com wrote:
> +++ b/gcc/combine.c

> @@ -8621,7 +8621,13 @@ gen_lowpart_or_truncate (machine_mode mode, rtx x)

>      {

>        /* Bit-cast X into an integer mode.  */

>        if (!SCALAR_INT_MODE_P (GET_MODE (x)))

> -	x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);

> +	{

> +	  enum machine_mode imode =

> +	    int_mode_for_mode (GET_MODE (x)).require ();


Just a formatting nit, not a review - = should be on the next line.

	Jakub
Richard Sandiford Sept. 14, 2018, 3:37 p.m. | #2
<ams@codesourcery.com> writes:
> GCN vector sizes range between 64 and 512 bytes, none of which have

> correspondingly sized integer modes.  This breaks a number of assumptions

> throughout the compiler, but I don't really want to create modes just for this

> purpose.

>

> Instead, this patch fixes up the cases that I've found, so far, such that the

> compiler tries something else, or fails to optimize, rather than just ICE.

>

> 2018-09-05  Andrew Stubbs  <ams@codesourcery.com>

>             Kwok Cheung Yeung  <kcy@codesourcery.com>

> 	    Jan Hubicka  <jh@suse.cz>

> 	    Martin Jambor  <mjambor@suse.cz>

>

> 	gcc/

> 	* combine.c (gen_lowpart_or_truncate): Return clobber if there is

> 	not a integer mode if the same size as x.

> 	(gen_lowpart_for_combine): Fail if there is no integer mode of the

> 	same size.

> 	* expr.c (expand_expr_real_1): Force first operand to be in memory

> 	if it is a vector register and the result is in	BLKmode.

> 	* tree-vect-stmts.c (vectorizable_store): Don't ICE when

> 	int_mode_for_size fails.

> 	(vectorizable_load): Likewise.

> ---

>  gcc/combine.c         | 13 ++++++++++++-

>  gcc/expr.c            |  8 ++++++++

>  gcc/tree-vect-stmts.c |  8 ++++----

>  3 files changed, 24 insertions(+), 5 deletions(-)

>

> diff --git a/gcc/combine.c b/gcc/combine.c

> index a2649b6..cbf9dae 100644

> --- a/gcc/combine.c

> +++ b/gcc/combine.c

> @@ -8621,7 +8621,13 @@ gen_lowpart_or_truncate (machine_mode mode, rtx x)

>      {

>        /* Bit-cast X into an integer mode.  */

>        if (!SCALAR_INT_MODE_P (GET_MODE (x)))

> -	x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);

> +	{

> +	  enum machine_mode imode =

> +	    int_mode_for_mode (GET_MODE (x)).require ();

> +	  if (imode == BLKmode)

> +	    return gen_rtx_CLOBBER (mode, const0_rtx);

> +	  x = gen_lowpart (imode, x);


require () will ICE if there isn't an integer mode and always returns
a scalar_int_mode, so this looks like a no-op.  I think you want
something like:

    scalar_int_mode imode;
    if (!int_mode_for_mode (GET_MODE (x)).exists (&imode))
      ...

> @@ -11698,6 +11704,11 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)

>    if (omode == imode)

>      return x;

>  

> +  /* This can happen when there is no integer mode corresponding

> +     to a size of vector mode.  */

> +  if (omode == BLKmode)

> +    goto fail;

> +

>    /* We can only support MODE being wider than a word if X is a

>       constant integer or has a mode the same size.  */

>    if (maybe_gt (GET_MODE_SIZE (omode), UNITS_PER_WORD)


This seems like it's working around a bug in ther caller.

> diff --git a/gcc/expr.c b/gcc/expr.c

> index cd5cf12..776254a 100644

> --- a/gcc/expr.c

> +++ b/gcc/expr.c

> @@ -10569,6 +10569,14 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,

>  			  || maybe_gt (bitpos + bitsize,

>  				       GET_MODE_BITSIZE (mode2)));

>  

> +	/* If the result is in BLKmode and the underlying object is a

> +	   vector in a register, and the size of the vector is larger than

> +	   the largest integer mode, then we must force OP0 to be in memory

> +	   as this is assumed in later code.  */

> +	if (REG_P (op0) && VECTOR_MODE_P (mode2) && mode == BLKmode

> +	    && maybe_gt (bitsize, MAX_FIXED_MODE_SIZE))

> +	  must_force_mem = 1;

> +

>  	/* Handle CONCAT first.  */

>  	if (GET_CODE (op0) == CONCAT && !must_force_mem)

>  	  {


Are you sure this is still needed after:

2018-06-04  Richard Sandiford  <richard.sandiford@linaro.org>

	* expr.c (expand_expr_real_1): Force the operand into memory if
	its TYPE_MODE is BLKmode and if there is no integer mode for
	the number of bits being extracted.

If so, what case is it handling differently?

> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c

> index 8d94fca..607a2bd 100644

> --- a/gcc/tree-vect-stmts.c

> +++ b/gcc/tree-vect-stmts.c

> @@ -6702,12 +6702,12 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,

>  		     supported.  */

>  		  unsigned lsize

>  		    = group_size * GET_MODE_BITSIZE (elmode);

> -		  elmode = int_mode_for_size (lsize, 0).require ();

>  		  unsigned int lnunits = const_nunits / group_size;

>  		  /* If we can't construct such a vector fall back to

>  		     element extracts from the original vector type and

>  		     element size stores.  */

> -		  if (mode_for_vector (elmode, lnunits).exists (&vmode)

> +		  if (int_mode_for_size (lsize, 0).exists (&elmode)

> +		      && mode_for_vector (elmode, lnunits).exists (&vmode)

>  		      && VECTOR_MODE_P (vmode)

>  		      && targetm.vector_mode_supported_p (vmode)

>  		      && (convert_optab_handler (vec_extract_optab,

> @@ -7839,11 +7839,11 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,

>  		     to a larger load.  */

>  		  unsigned lsize

>  		    = group_size * TYPE_PRECISION (TREE_TYPE (vectype));

> -		  elmode = int_mode_for_size (lsize, 0).require ();

>  		  unsigned int lnunits = const_nunits / group_size;

>  		  /* If we can't construct such a vector fall back to

>  		     element loads of the original vector type.  */

> -		  if (mode_for_vector (elmode, lnunits).exists (&vmode)

> +		  if (int_mode_for_size (lsize, 0).exists (&elmode)

> +		      && mode_for_vector (elmode, lnunits).exists (&vmode)

>  		      && VECTOR_MODE_P (vmode)

>  		      && targetm.vector_mode_supported_p (vmode)

>  		      && (convert_optab_handler (vec_init_optab, vmode, elmode)


These two are OK independently of the rest (if that's convenient).

Thanks,
Richard
Andrew Stubbs Nov. 15, 2018, 5:20 p.m. | #3
On 14/09/2018 16:37, Richard Sandiford wrote:
>> diff --git a/gcc/combine.c b/gcc/combine.c

>> index a2649b6..cbf9dae 100644

>> --- a/gcc/combine.c

>> +++ b/gcc/combine.c

>> @@ -8621,7 +8621,13 @@ gen_lowpart_or_truncate (machine_mode mode, rtx x)

>>       {

>>         /* Bit-cast X into an integer mode.  */

>>         if (!SCALAR_INT_MODE_P (GET_MODE (x)))

>> -	x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);

>> +	{

>> +	  enum machine_mode imode =

>> +	    int_mode_for_mode (GET_MODE (x)).require ();

>> +	  if (imode == BLKmode)

>> +	    return gen_rtx_CLOBBER (mode, const0_rtx);

>> +	  x = gen_lowpart (imode, x);

> 

> require () will ICE if there isn't an integer mode and always returns

> a scalar_int_mode, so this looks like a no-op.  I think you want

> something like:


This is a patch that I inherited from Honza and Martin and didn't know 
what testcase it fixed.

I think that it being broken shows that it's no longer necessary, and 
reverting the patch and retesting confirms this suspicion.

I've removed it from the patch.

>> @@ -11698,6 +11704,11 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)

>>     if (omode == imode)

>>       return x;

>>   

>> +  /* This can happen when there is no integer mode corresponding

>> +     to a size of vector mode.  */

>> +  if (omode == BLKmode)

>> +    goto fail;

>> +

>>     /* We can only support MODE being wider than a word if X is a

>>        constant integer or has a mode the same size.  */

>>     if (maybe_gt (GET_MODE_SIZE (omode), UNITS_PER_WORD)

> 

> This seems like it's working around a bug in ther caller.


Again, I inherited this hunk. Removing it and retesting shows no 
regressions, so I'm dropping it.

>> diff --git a/gcc/expr.c b/gcc/expr.c

>> index cd5cf12..776254a 100644

>> --- a/gcc/expr.c

>> +++ b/gcc/expr.c

>> @@ -10569,6 +10569,14 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,

>>   			  || maybe_gt (bitpos + bitsize,

>>   				       GET_MODE_BITSIZE (mode2)));

>>   

>> +	/* If the result is in BLKmode and the underlying object is a

>> +	   vector in a register, and the size of the vector is larger than

>> +	   the largest integer mode, then we must force OP0 to be in memory

>> +	   as this is assumed in later code.  */

>> +	if (REG_P (op0) && VECTOR_MODE_P (mode2) && mode == BLKmode

>> +	    && maybe_gt (bitsize, MAX_FIXED_MODE_SIZE))

>> +	  must_force_mem = 1;

>> +

>>   	/* Handle CONCAT first.  */

>>   	if (GET_CODE (op0) == CONCAT && !must_force_mem)

>>   	  {

> 

> Are you sure this is still needed after:

> 

> 2018-06-04  Richard Sandiford  <richard.sandiford@linaro.org>

> 

> 	* expr.c (expand_expr_real_1): Force the operand into memory if

> 	its TYPE_MODE is BLKmode and if there is no integer mode for

> 	the number of bits being extracted.


Apparently you're right about this. Hunk dropped.

>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c

>> index 8d94fca..607a2bd 100644

>> --- a/gcc/tree-vect-stmts.c

>> +++ b/gcc/tree-vect-stmts.c

>> @@ -6702,12 +6702,12 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,

>>   		     supported.  */

>>   		  unsigned lsize

>>   		    = group_size * GET_MODE_BITSIZE (elmode);

>> -		  elmode = int_mode_for_size (lsize, 0).require ();

>>   		  unsigned int lnunits = const_nunits / group_size;

>>   		  /* If we can't construct such a vector fall back to

>>   		     element extracts from the original vector type and

>>   		     element size stores.  */

>> -		  if (mode_for_vector (elmode, lnunits).exists (&vmode)

>> +		  if (int_mode_for_size (lsize, 0).exists (&elmode)

>> +		      && mode_for_vector (elmode, lnunits).exists (&vmode)

>>   		      && VECTOR_MODE_P (vmode)

>>   		      && targetm.vector_mode_supported_p (vmode)

>>   		      && (convert_optab_handler (vec_extract_optab,

>> @@ -7839,11 +7839,11 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,

>>   		     to a larger load.  */

>>   		  unsigned lsize

>>   		    = group_size * TYPE_PRECISION (TREE_TYPE (vectype));

>> -		  elmode = int_mode_for_size (lsize, 0).require ();

>>   		  unsigned int lnunits = const_nunits / group_size;

>>   		  /* If we can't construct such a vector fall back to

>>   		     element loads of the original vector type.  */

>> -		  if (mode_for_vector (elmode, lnunits).exists (&vmode)

>> +		  if (int_mode_for_size (lsize, 0).exists (&elmode)

>> +		      && mode_for_vector (elmode, lnunits).exists (&vmode)

>>   		      && VECTOR_MODE_P (vmode)

>>   		      && targetm.vector_mode_supported_p (vmode)

>>   		      && (convert_optab_handler (vec_init_optab, vmode, elmode)

> 

> These two are OK independently of the rest (if that's convenient).


Thanks, I've committed the attached. These are the only parts of the 
patch that remain. I've confirmed that there are failures without these 
hunks.

Andrew
Handle vectors that don't fit in an integer.

GCN vector sizes range between 64 and 512 bytes, none of which have
correspondingly sized integer modes.  This breaks a number of assumptions
throughout the compiler, but I don't really want to create modes just for this
purpose.

Instead, this patch fixes up the cases that I've found, so far, such that the
compiler tries something else, or fails to optimize, rather than just ICE.

2018-11-15  Andrew Stubbs  <ams@codesourcery.com>
            Kwok Cheung Yeung  <kcy@codesourcery.com>

	gcc/
	* tree-vect-stmts.c (vectorizable_store): Don't ICE when
	int_mode_for_size fails.
	(vectorizable_load): Likewise.

diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 75d77d2..3509d29 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -6672,12 +6672,12 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		     supported.  */
 		  unsigned lsize
 		    = group_size * GET_MODE_BITSIZE (elmode);
-		  elmode = int_mode_for_size (lsize, 0).require ();
 		  unsigned int lnunits = const_nunits / group_size;
 		  /* If we can't construct such a vector fall back to
 		     element extracts from the original vector type and
 		     element size stores.  */
-		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
+		  if (int_mode_for_size (lsize, 0).exists (&elmode)
+		      && mode_for_vector (elmode, lnunits).exists (&vmode)
 		      && VECTOR_MODE_P (vmode)
 		      && targetm.vector_mode_supported_p (vmode)
 		      && (convert_optab_handler (vec_extract_optab,
@@ -7806,11 +7806,11 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		     to a larger load.  */
 		  unsigned lsize
 		    = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
-		  elmode = int_mode_for_size (lsize, 0).require ();
 		  unsigned int lnunits = const_nunits / group_size;
 		  /* If we can't construct such a vector fall back to
 		     element loads of the original vector type.  */
-		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
+		  if (int_mode_for_size (lsize, 0).exists (&elmode)
+		      && mode_for_vector (elmode, lnunits).exists (&vmode)
 		      && VECTOR_MODE_P (vmode)
 		      && targetm.vector_mode_supported_p (vmode)
 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)

Patch

diff --git a/gcc/combine.c b/gcc/combine.c
index a2649b6..cbf9dae 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -8621,7 +8621,13 @@  gen_lowpart_or_truncate (machine_mode mode, rtx x)
     {
       /* Bit-cast X into an integer mode.  */
       if (!SCALAR_INT_MODE_P (GET_MODE (x)))
-	x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
+	{
+	  enum machine_mode imode =
+	    int_mode_for_mode (GET_MODE (x)).require ();
+	  if (imode == BLKmode)
+	    return gen_rtx_CLOBBER (mode, const0_rtx);
+	  x = gen_lowpart (imode, x);
+	}
       x = simplify_gen_unary (TRUNCATE, int_mode_for_mode (mode).require (),
 			      x, GET_MODE (x));
     }
@@ -11698,6 +11704,11 @@  gen_lowpart_for_combine (machine_mode omode, rtx x)
   if (omode == imode)
     return x;
 
+  /* This can happen when there is no integer mode corresponding
+     to a size of vector mode.  */
+  if (omode == BLKmode)
+    goto fail;
+
   /* We can only support MODE being wider than a word if X is a
      constant integer or has a mode the same size.  */
   if (maybe_gt (GET_MODE_SIZE (omode), UNITS_PER_WORD)
diff --git a/gcc/expr.c b/gcc/expr.c
index cd5cf12..776254a 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -10569,6 +10569,14 @@  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 			  || maybe_gt (bitpos + bitsize,
 				       GET_MODE_BITSIZE (mode2)));
 
+	/* If the result is in BLKmode and the underlying object is a
+	   vector in a register, and the size of the vector is larger than
+	   the largest integer mode, then we must force OP0 to be in memory
+	   as this is assumed in later code.  */
+	if (REG_P (op0) && VECTOR_MODE_P (mode2) && mode == BLKmode
+	    && maybe_gt (bitsize, MAX_FIXED_MODE_SIZE))
+	  must_force_mem = 1;
+
 	/* Handle CONCAT first.  */
 	if (GET_CODE (op0) == CONCAT && !must_force_mem)
 	  {
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 8d94fca..607a2bd 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -6702,12 +6702,12 @@  vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		     supported.  */
 		  unsigned lsize
 		    = group_size * GET_MODE_BITSIZE (elmode);
-		  elmode = int_mode_for_size (lsize, 0).require ();
 		  unsigned int lnunits = const_nunits / group_size;
 		  /* If we can't construct such a vector fall back to
 		     element extracts from the original vector type and
 		     element size stores.  */
-		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
+		  if (int_mode_for_size (lsize, 0).exists (&elmode)
+		      && mode_for_vector (elmode, lnunits).exists (&vmode)
 		      && VECTOR_MODE_P (vmode)
 		      && targetm.vector_mode_supported_p (vmode)
 		      && (convert_optab_handler (vec_extract_optab,
@@ -7839,11 +7839,11 @@  vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		     to a larger load.  */
 		  unsigned lsize
 		    = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
-		  elmode = int_mode_for_size (lsize, 0).require ();
 		  unsigned int lnunits = const_nunits / group_size;
 		  /* If we can't construct such a vector fall back to
 		     element loads of the original vector type.  */
-		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
+		  if (int_mode_for_size (lsize, 0).exists (&elmode)
+		      && mode_for_vector (elmode, lnunits).exists (&vmode)
 		      && VECTOR_MODE_P (vmode)
 		      && targetm.vector_mode_supported_p (vmode)
 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)