Extra subreg fold for variable-length CONST_VECTORs

Message ID 87fu7la5jb.fsf@linaro.org
State New
Headers show
Series
  • Extra subreg fold for variable-length CONST_VECTORs
Related show

Commit Message

Richard Sandiford Jan. 4, 2018, 5:38 p.m.
The SVE support for the new CONST_VECTOR encoding needs to be able
to extract the first N bits of the vector and duplicate it.  This patch
adds a simplify_subreg rule for that.

The code is covered by the gcc.target/aarch64/sve_slp_*.c tests.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
Also tested by comparing the before and after assembly output for at
least one target per CPU directory.  OK to install?

Richard


2018-01-04  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* simplify-rtx.c (simplify_immed_subreg): Add an inner_bytes
	parameter and use it instead of GET_MODE_SIZE (innermode).  Use
	inner_bytes * BITS_PER_UNIT instead of GET_MODE_BITSIZE (innermode).
	Use CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)) instead of
	GET_MODE_NUNITS (innermode).  Also add a first_elem parameter.
	Change innermode from fixed_mode_size to machine_mode.
	(simplify_subreg): Update call accordingly.  Handle a constant-sized
	subreg of a variable-length CONST_VECTOR.

Comments

Richard Sandiford Jan. 12, 2018, 10 a.m. | #1
Ping.

FWIW, the SLP test failures it fixes were ICEs rather than code-quality
tests, so this is a correctness fix rather than an optimisation.

Thanks,
Richard

Richard Sandiford <richard.sandiford@linaro.org> writes:
> The SVE support for the new CONST_VECTOR encoding needs to be able

> to extract the first N bits of the vector and duplicate it.  This patch

> adds a simplify_subreg rule for that.

>

> The code is covered by the gcc.target/aarch64/sve_slp_*.c tests.

>

> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.

> Also tested by comparing the before and after assembly output for at

> least one target per CPU directory.  OK to install?

>

> Richard

>

>

> 2018-01-04  Richard Sandiford  <richard.sandiford@linaro.org>

>

> gcc/

> 	* simplify-rtx.c (simplify_immed_subreg): Add an inner_bytes

> 	parameter and use it instead of GET_MODE_SIZE (innermode).  Use

> 	inner_bytes * BITS_PER_UNIT instead of GET_MODE_BITSIZE (innermode).

> 	Use CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)) instead of

> 	GET_MODE_NUNITS (innermode).  Also add a first_elem parameter.

> 	Change innermode from fixed_mode_size to machine_mode.

> 	(simplify_subreg): Update call accordingly.  Handle a constant-sized

> 	subreg of a variable-length CONST_VECTOR.

>

> Index: gcc/simplify-rtx.c

> ===================================================================

> --- gcc/simplify-rtx.c	2018-01-03 21:42:44.569646782 +0000

> +++ gcc/simplify-rtx.c	2018-01-04 17:35:25.747473457 +0000

> @@ -5971,13 +5971,16 @@ simplify_ternary_operation (enum rtx_cod

>     or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or

>     CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR.

>  

> -   Works by unpacking OP into a collection of 8-bit values

> +   Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values

>     represented as a little-endian array of 'unsigned char', selecting by BYTE,

> -   and then repacking them again for OUTERMODE.  */

> +   and then repacking them again for OUTERMODE.  If OP is a CONST_VECTOR,

> +   FIRST_ELEM is the number of the first element to extract, otherwise

> +   FIRST_ELEM is ignored.  */

>  

>  static rtx

>  simplify_immed_subreg (fixed_size_mode outermode, rtx op,

> -		       fixed_size_mode innermode, unsigned int byte)

> +		       machine_mode innermode, unsigned int byte,

> +		       unsigned int first_elem, unsigned int inner_bytes)

>  {

>    enum {

>      value_bit = 8,

> @@ -6007,13 +6010,13 @@ simplify_immed_subreg (fixed_size_mode o

>  

>    /* We support any size mode.  */

>    max_bitsize = MAX (GET_MODE_BITSIZE (outermode),

> -		     GET_MODE_BITSIZE (innermode));

> +		     inner_bytes * BITS_PER_UNIT);

>  

>    /* Unpack the value.  */

>  

>    if (GET_CODE (op) == CONST_VECTOR)

>      {

> -      num_elem = GET_MODE_NUNITS (innermode);

> +      num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode));

>        elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode);

>      }

>    else

> @@ -6030,7 +6033,7 @@ simplify_immed_subreg (fixed_size_mode o

>      {

>        unsigned char * vp;

>        rtx el = (GET_CODE (op) == CONST_VECTOR

> -		? CONST_VECTOR_ELT (op, elem)

> +		? CONST_VECTOR_ELT (op, first_elem + elem)

>  		: op);

>  

>        /* Vectors are kept in target memory order.  (This is probably

> @@ -6157,10 +6160,9 @@ simplify_immed_subreg (fixed_size_mode o

>    /* Renumber BYTE so that the least-significant byte is byte 0.  A special

>       case is paradoxical SUBREGs, which shouldn't be adjusted since they

>       will already have offset 0.  */

> -  if (GET_MODE_SIZE (innermode) >= GET_MODE_SIZE (outermode))

> +  if (inner_bytes >= GET_MODE_SIZE (outermode))

>      {

> -      unsigned ibyte = (GET_MODE_SIZE (innermode) - GET_MODE_SIZE (outermode)

> -			- byte);

> +      unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte;

>        unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;

>        unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;

>        byte = (subword_byte % UNITS_PER_WORD

> @@ -6169,7 +6171,7 @@ simplify_immed_subreg (fixed_size_mode o

>  

>    /* BYTE should still be inside OP.  (Note that BYTE is unsigned,

>       so if it's become negative it will instead be very large.)  */

> -  gcc_assert (byte < GET_MODE_SIZE (innermode));

> +  gcc_assert (byte < inner_bytes);

>  

>    /* Convert from bytes to chunks of size value_bit.  */

>    value_start = byte * (BITS_PER_UNIT / value_bit);

> @@ -6358,7 +6360,18 @@ simplify_subreg (machine_mode outermode,

>        if (is_a <fixed_size_mode> (outermode, &fs_outermode)

>  	  && is_a <fixed_size_mode> (innermode, &fs_innermode)

>  	  && byte.is_constant (&cbyte))

> -	return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte);

> +	return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte,

> +				      0, GET_MODE_SIZE (fs_innermode));

> +

> +      /* Handle constant-sized outer modes and variable-sized inner modes.  */

> +      unsigned HOST_WIDE_INT first_elem;

> +      if (GET_CODE (op) == CONST_VECTOR

> +	  && is_a <fixed_size_mode> (outermode, &fs_outermode)

> +	  && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode),

> +				  &first_elem))

> +	return simplify_immed_subreg (fs_outermode, op, innermode, 0,

> +				      first_elem,

> +				      GET_MODE_SIZE (fs_outermode));

>  

>        return NULL_RTX;

>      }
Jeff Law Jan. 13, 2018, 3:42 p.m. | #2
On 01/12/2018 03:00 AM, Richard Sandiford wrote:
> Ping.

> 

> FWIW, the SLP test failures it fixes were ICEs rather than code-quality

> tests, so this is a correctness fix rather than an optimisation.

> 

> Thanks,

> Richard

> 

> Richard Sandiford <richard.sandiford@linaro.org> writes:

>> The SVE support for the new CONST_VECTOR encoding needs to be able

>> to extract the first N bits of the vector and duplicate it.  This patch

>> adds a simplify_subreg rule for that.

>>

>> The code is covered by the gcc.target/aarch64/sve_slp_*.c tests.

>>

>> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.

>> Also tested by comparing the before and after assembly output for at

>> least one target per CPU directory.  OK to install?

>>

>> Richard

>>

>>

>> 2018-01-04  Richard Sandiford  <richard.sandiford@linaro.org>

>>

>> gcc/

>> 	* simplify-rtx.c (simplify_immed_subreg): Add an inner_bytes

>> 	parameter and use it instead of GET_MODE_SIZE (innermode).  Use

>> 	inner_bytes * BITS_PER_UNIT instead of GET_MODE_BITSIZE (innermode).

>> 	Use CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)) instead of

>> 	GET_MODE_NUNITS (innermode).  Also add a first_elem parameter.

>> 	Change innermode from fixed_mode_size to machine_mode.

>> 	(simplify_subreg): Update call accordingly.  Handle a constant-sized

>> 	subreg of a variable-length CONST_VECTOR.

OK.
jeff

Patch

Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c	2018-01-03 21:42:44.569646782 +0000
+++ gcc/simplify-rtx.c	2018-01-04 17:35:25.747473457 +0000
@@ -5971,13 +5971,16 @@  simplify_ternary_operation (enum rtx_cod
    or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or
    CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR.
 
-   Works by unpacking OP into a collection of 8-bit values
+   Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values
    represented as a little-endian array of 'unsigned char', selecting by BYTE,
-   and then repacking them again for OUTERMODE.  */
+   and then repacking them again for OUTERMODE.  If OP is a CONST_VECTOR,
+   FIRST_ELEM is the number of the first element to extract, otherwise
+   FIRST_ELEM is ignored.  */
 
 static rtx
 simplify_immed_subreg (fixed_size_mode outermode, rtx op,
-		       fixed_size_mode innermode, unsigned int byte)
+		       machine_mode innermode, unsigned int byte,
+		       unsigned int first_elem, unsigned int inner_bytes)
 {
   enum {
     value_bit = 8,
@@ -6007,13 +6010,13 @@  simplify_immed_subreg (fixed_size_mode o
 
   /* We support any size mode.  */
   max_bitsize = MAX (GET_MODE_BITSIZE (outermode),
-		     GET_MODE_BITSIZE (innermode));
+		     inner_bytes * BITS_PER_UNIT);
 
   /* Unpack the value.  */
 
   if (GET_CODE (op) == CONST_VECTOR)
     {
-      num_elem = GET_MODE_NUNITS (innermode);
+      num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode));
       elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode);
     }
   else
@@ -6030,7 +6033,7 @@  simplify_immed_subreg (fixed_size_mode o
     {
       unsigned char * vp;
       rtx el = (GET_CODE (op) == CONST_VECTOR
-		? CONST_VECTOR_ELT (op, elem)
+		? CONST_VECTOR_ELT (op, first_elem + elem)
 		: op);
 
       /* Vectors are kept in target memory order.  (This is probably
@@ -6157,10 +6160,9 @@  simplify_immed_subreg (fixed_size_mode o
   /* Renumber BYTE so that the least-significant byte is byte 0.  A special
      case is paradoxical SUBREGs, which shouldn't be adjusted since they
      will already have offset 0.  */
-  if (GET_MODE_SIZE (innermode) >= GET_MODE_SIZE (outermode))
+  if (inner_bytes >= GET_MODE_SIZE (outermode))
     {
-      unsigned ibyte = (GET_MODE_SIZE (innermode) - GET_MODE_SIZE (outermode)
-			- byte);
+      unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte;
       unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
       unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
       byte = (subword_byte % UNITS_PER_WORD
@@ -6169,7 +6171,7 @@  simplify_immed_subreg (fixed_size_mode o
 
   /* BYTE should still be inside OP.  (Note that BYTE is unsigned,
      so if it's become negative it will instead be very large.)  */
-  gcc_assert (byte < GET_MODE_SIZE (innermode));
+  gcc_assert (byte < inner_bytes);
 
   /* Convert from bytes to chunks of size value_bit.  */
   value_start = byte * (BITS_PER_UNIT / value_bit);
@@ -6358,7 +6360,18 @@  simplify_subreg (machine_mode outermode,
       if (is_a <fixed_size_mode> (outermode, &fs_outermode)
 	  && is_a <fixed_size_mode> (innermode, &fs_innermode)
 	  && byte.is_constant (&cbyte))
-	return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte);
+	return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte,
+				      0, GET_MODE_SIZE (fs_innermode));
+
+      /* Handle constant-sized outer modes and variable-sized inner modes.  */
+      unsigned HOST_WIDE_INT first_elem;
+      if (GET_CODE (op) == CONST_VECTOR
+	  && is_a <fixed_size_mode> (outermode, &fs_outermode)
+	  && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode),
+				  &first_elem))
+	return simplify_immed_subreg (fs_outermode, op, innermode, 0,
+				      first_elem,
+				      GET_MODE_SIZE (fs_outermode));
 
       return NULL_RTX;
     }