PR84033, powerpc64le -moptimize-swaps bad code with vec_vbpermq

Message ID 20180125100921.GV20622@bubble.grove.modra.org
State New
Headers show
Series
  • PR84033, powerpc64le -moptimize-swaps bad code with vec_vbpermq
Related show

Commit Message

Alan Modra Jan. 25, 2018, 10:09 a.m.
vbpermq produces its output in bits 48..63 of the target vector reg,
so the output cannot be lane swapped.  Bootstrapped and regression
tested powerpc64le-linux.  OK to apply mainline, and backport to the
branches?

gcc/
	PR target/84033
	* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Exclude
	UNSPEC_VBPERMQ.  Sort other unspecs.
gcc/testsuite/
	PR target/84033
	* gcc.target/powerpc/swaps-p8-46.c: New.


-- 
Alan Modra
Australia Development Lab, IBM

Comments

Bill Schmidt Jan. 25, 2018, 2:01 p.m. | #1
On Jan 25, 2018, at 4:09 AM, Alan Modra <amodra@gmail.com> wrote:
> 

> vbpermq produces its output in bits 48..63 of the target vector reg,

> so the output cannot be lane swapped.  Bootstrapped and regression

> tested powerpc64le-linux.  OK to apply mainline, and backport to the

> branches?


I can't approve, but FWIW, I agree with this patch.

Bill

> 

> gcc/

> 	PR target/84033

> 	* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Exclude

> 	UNSPEC_VBPERMQ.  Sort other unspecs.

> gcc/testsuite/

> 	PR target/84033

> 	* gcc.target/powerpc/swaps-p8-46.c: New.

> 

> diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c

> index 1f95290..ffcbba9 100644

> --- a/gcc/config/rs6000/rs6000-p8swap.c

> +++ b/gcc/config/rs6000/rs6000-p8swap.c

> @@ -741,6 +741,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)

> 	  {

> 	  default:

> 	    break;

> +	  case UNSPEC_VBPERMQ:

> 	  case UNSPEC_VMRGH_DIRECT:

> 	  case UNSPEC_VMRGL_DIRECT:

> 	  case UNSPEC_VPACK_SIGN_SIGN_SAT:

> @@ -762,8 +763,14 @@ rtx_is_swappable_p (rtx op, unsigned int *special)

> 	  case UNSPEC_VSUMSWS:

> 	  case UNSPEC_VSUMSWS_DIRECT:

> 	  case UNSPEC_VSX_CONCAT:

> +	  case UNSPEC_VSX_CVDPSPN:

> +	  case UNSPEC_VSX_CVSPDP:

> +	  case UNSPEC_VSX_CVSPDPN:

> +	  case UNSPEC_VSX_EXTRACT:

> 	  case UNSPEC_VSX_SET:

> 	  case UNSPEC_VSX_SLDWI:

> +	  case UNSPEC_VSX_VEC_INIT:

> +	  case UNSPEC_VSX_VSLO:

> 	  case UNSPEC_VUNPACK_HI_SIGN:

> 	  case UNSPEC_VUNPACK_HI_SIGN_DIRECT:

> 	  case UNSPEC_VUNPACK_LO_SIGN:

> @@ -774,12 +781,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special)

> 	  case UNSPEC_VUPKLPX:

> 	  case UNSPEC_VUPKLS_V4SF:

> 	  case UNSPEC_VUPKLU_V4SF:

> -	  case UNSPEC_VSX_CVDPSPN:

> -	  case UNSPEC_VSX_CVSPDP:

> -	  case UNSPEC_VSX_CVSPDPN:

> -	  case UNSPEC_VSX_EXTRACT:

> -	  case UNSPEC_VSX_VSLO:

> -	  case UNSPEC_VSX_VEC_INIT:

> 	    return 0;

> 	  case UNSPEC_VSPLT_DIRECT:

> 	  case UNSPEC_VSX_XXSPLTD:

> diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c

> new file mode 100644

> index 0000000..23494b6

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c

> @@ -0,0 +1,34 @@

> +/* { dg-do run { target { powerpc64le-*-* } } } */

> +/* { dg-require-effective-target powerpc_p8vector_ok } */

> +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */

> +/* { dg-options "-mcpu=power8 -O2 " } */

> +

> +typedef __attribute__ ((__aligned__ (8))) unsigned long long __m64;

> +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));

> +

> +/* PR84033.  Extracted from xmmintrin.h but with a pointer param to

> +   allow swaps to happen when not inline.  */

> +int __attribute__ ((__noinline__))

> +_mm_movemask_ps (__m128 *__A)

> +{

> +  __vector __m64 result;

> +  static const __vector unsigned int perm_mask =

> +    {

> +      0x00204060, 0x80808080, 0x80808080, 0x80808080

> +    };

> +

> +  result = (__vector __m64)

> +    __builtin_vec_vbpermq ((__vector unsigned char) (*__A),

> +			   (__vector unsigned char) perm_mask);

> +  return result[1];

> +}

> +

> +int

> +main (void)

> +{

> +  union { unsigned int i[4]; __m128 m; } x

> +    = { 0x80000000, 0x80000000, 0x7fffffff, 0x7fffffff };

> +  if (_mm_movemask_ps (&x.m) != 3)

> +    __builtin_abort ();

> +  return 0;

> +}

> 

> -- 

> Alan Modra

> Australia Development Lab, IBM

>
Segher Boessenkool Jan. 25, 2018, 8:33 p.m. | #2
On Thu, Jan 25, 2018 at 08:39:21PM +1030, Alan Modra wrote:
> vbpermq produces its output in bits 48..63 of the target vector reg,

> so the output cannot be lane swapped.  Bootstrapped and regression

> tested powerpc64le-linux.  OK to apply mainline, and backport to the

> branches?

> 

> gcc/

> 	PR target/84033

> 	* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Exclude

> 	UNSPEC_VBPERMQ.  Sort other unspecs.

> gcc/testsuite/

> 	PR target/84033

> 	* gcc.target/powerpc/swaps-p8-46.c: New.


> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c

> @@ -0,0 +1,34 @@

> +/* { dg-do run { target { powerpc64le-*-* } } } */


/* { dg-do run { target { lp64 } } } */

would be better, since you only care whether you are running 64-bit
(and the target triple does not tell you; although standard configs
for powerpc64le pretty much enforce it, of course).

Okay for trunk with or without that change; okay for the branches after
looking for fallout.  Thanks!


Segher

Patch

diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c
index 1f95290..ffcbba9 100644
--- a/gcc/config/rs6000/rs6000-p8swap.c
+++ b/gcc/config/rs6000/rs6000-p8swap.c
@@ -741,6 +741,7 @@  rtx_is_swappable_p (rtx op, unsigned int *special)
 	  {
 	  default:
 	    break;
+	  case UNSPEC_VBPERMQ:
 	  case UNSPEC_VMRGH_DIRECT:
 	  case UNSPEC_VMRGL_DIRECT:
 	  case UNSPEC_VPACK_SIGN_SIGN_SAT:
@@ -762,8 +763,14 @@  rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VSUMSWS:
 	  case UNSPEC_VSUMSWS_DIRECT:
 	  case UNSPEC_VSX_CONCAT:
+	  case UNSPEC_VSX_CVDPSPN:
+	  case UNSPEC_VSX_CVSPDP:
+	  case UNSPEC_VSX_CVSPDPN:
+	  case UNSPEC_VSX_EXTRACT:
 	  case UNSPEC_VSX_SET:
 	  case UNSPEC_VSX_SLDWI:
+	  case UNSPEC_VSX_VEC_INIT:
+	  case UNSPEC_VSX_VSLO:
 	  case UNSPEC_VUNPACK_HI_SIGN:
 	  case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
 	  case UNSPEC_VUNPACK_LO_SIGN:
@@ -774,12 +781,6 @@  rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VUPKLPX:
 	  case UNSPEC_VUPKLS_V4SF:
 	  case UNSPEC_VUPKLU_V4SF:
-	  case UNSPEC_VSX_CVDPSPN:
-	  case UNSPEC_VSX_CVSPDP:
-	  case UNSPEC_VSX_CVSPDPN:
-	  case UNSPEC_VSX_EXTRACT:
-	  case UNSPEC_VSX_VSLO:
-	  case UNSPEC_VSX_VEC_INIT:
 	    return 0;
 	  case UNSPEC_VSPLT_DIRECT:
 	  case UNSPEC_VSX_XXSPLTD:
diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c
new file mode 100644
index 0000000..23494b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c
@@ -0,0 +1,34 @@ 
+/* { dg-do run { target { powerpc64le-*-* } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O2 " } */
+
+typedef __attribute__ ((__aligned__ (8))) unsigned long long __m64;
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* PR84033.  Extracted from xmmintrin.h but with a pointer param to
+   allow swaps to happen when not inline.  */
+int __attribute__ ((__noinline__))
+_mm_movemask_ps (__m128 *__A)
+{
+  __vector __m64 result;
+  static const __vector unsigned int perm_mask =
+    {
+      0x00204060, 0x80808080, 0x80808080, 0x80808080
+    };
+
+  result = (__vector __m64)
+    __builtin_vec_vbpermq ((__vector unsigned char) (*__A),
+			   (__vector unsigned char) perm_mask);
+  return result[1];
+}
+
+int
+main (void)
+{
+  union { unsigned int i[4]; __m128 m; } x
+    = { 0x80000000, 0x80000000, 0x7fffffff, 0x7fffffff };
+  if (_mm_movemask_ps (&x.m) != 3)
+    __builtin_abort ();
+  return 0;
+}