[committed] i386: Implement V2SF shuffles

Message ID CAFULd4ZPrk3TBepiHhy3N=pDNVRujJA3Ut14rJVaUQnkRSvdCg@mail.gmail.com
State New
Headers show
Series
  • [committed] i386: Implement V2SF shuffles
Related show

Commit Message

Kees Cook via Gcc-patches May 27, 2020, 3:16 p.m.
2020-05-27  UroŇ° Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:
    * config/i386/mmx.md (mmx_pswapdsf2): Add SSE alternatives.
    Enable insn pattern for TARGET_MMX_WITH_SSE.
    (*mmx_movshdup): New insn pattern.
    (*mmx_movsldup): Ditto.
    (*mmx_movss): Ditto.
    * config/i386/i386-expand.c (ix86_vectorize_vec_perm_const):
    Handle E_V2SFmode.
    (expand_vec_perm_movs): Handle E_V2SFmode.
    (expand_vec_perm_even_odd): Ditto.
    (expand_vec_perm_broadcast_1): Assert that E_V2SFmode
    is already handled by standard shuffle patterns.

gcc/testsuite/ChangeLog:
    * gcc.target/i386/vperm-v2sf.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 338b4f7cf4f..96f70ae5aaa 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -16319,6 +16319,7 @@  expand_vec_perm_movs (struct expand_vec_perm_d *d)
     return false;
 
   if (!(TARGET_SSE && vmode == V4SFmode)
+      && !(TARGET_MMX_WITH_SSE && vmode == V2SFmode)
       && !(TARGET_SSE2 && vmode == V2DFmode))
     return false;
 
@@ -18639,6 +18640,13 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
       /* These are always directly implementable by expand_vec_perm_1.  */
       gcc_unreachable ();
 
+    case E_V2SFmode:
+      gcc_assert (TARGET_MMX_WITH_SSE);
+      /* We have no suitable instructions.  */
+      if (d->testing_p)
+	return false;
+      break;
+
     case E_V4HImode:
       if (d->testing_p)
 	break;
@@ -18834,8 +18842,9 @@  expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
       gcc_unreachable ();
 
     case E_V2DFmode:
-    case E_V2DImode:
+    case E_V2SFmode:
     case E_V4SFmode:
+    case E_V2DImode:
     case E_V2SImode:
     case E_V4SImode:
       /* These are always implementable using standard shuffle patterns.  */
@@ -19329,6 +19338,7 @@  ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
       if (d.testing_p && TARGET_SSSE3)
 	return true;
       break;
+    case E_V2SFmode:
     case E_V2SImode:
     case E_V4HImode:
       if (!TARGET_MMX_WITH_SSE)
@@ -19367,7 +19377,7 @@  ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
 
       /* Implementable with shufps or pshufd.  */
       if (d.one_operand_p
-	  && (d.vmode == V4SFmode
+	  && (d.vmode == V4SFmode || d.vmode == V2SFmode
 	      || d.vmode == V4SImode || d.vmode == V2SImode))
 	return true;
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 215162dedb5..271c1c2e833 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -938,32 +938,85 @@ 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_insn "mmx_pswapdv2sf2"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-	(vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
-			 (parallel [(const_int 1) (const_int 0)])))]
-  "TARGET_3DNOW_A"
-  "pswapd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2SF")])
+  [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
+	(vec_select:V2SF
+	  (match_operand:V2SF 1 "register_mmxmem_operand" "ym,0,Yv")
+	  (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A || TARGET_MMX_WITH_SSE"
+  "@
+   pswapd\t{%1, %0|%0, %1}
+   shufps\t{$0xe1, %1, %0|%0, %1, 0xe1}
+   vshufps\t{$0xe1, %1, %1, %0|%0, %1, %1, 0xe1}"
+  [(set_attr "isa" "*,sse_noavx,avx")
+   (set_attr "mmx_isa" "native,*,*")
+   (set_attr "type" "mmxcvt,ssemov,ssemov")
+   (set_attr "prefix_extra" "1,*,*")
+   (set_attr "mode" "V2SF,V4SF,V4SF")])
+
+(define_insn "*mmx_movshdup"
+  [(set (match_operand:V2SF 0 "register_operand" "=v,x")
+	(vec_select:V2SF
+	  (match_operand:V2SF 1 "register_operand" "v,0")
+	  (parallel [(const_int 1) (const_int 1)])))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   %vmovshdup\t{%1, %0|%0, %1}
+   shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}"
+  [(set_attr "isa" "sse3,*")
+   (set_attr "type" "sse,sseshuf1")
+   (set_attr "length_immediate" "*,1")
+   (set_attr "prefix_rep" "1,*")
+   (set_attr "prefix" "maybe_vex,orig")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*mmx_movsldup"
+  [(set (match_operand:V2SF 0 "register_operand" "=v,x")
+	(vec_select:V2SF
+	  (match_operand:V2SF 1 "register_operand" "v,0")
+	  (parallel [(const_int 0) (const_int 0)])))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   %vmovsldup\t{%1, %0|%0, %1}
+   shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}"
+  [(set_attr "isa" "sse3,*")
+   (set_attr "type" "sse,sseshuf1")
+   (set_attr "length_immediate" "*,1")
+   (set_attr "prefix_rep" "1,*")
+   (set_attr "prefix" "maybe_vex,orig")
+   (set_attr "mode" "V4SF")])
 
 (define_insn "*vec_dupv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V2SF 0 "register_operand" "=y,Yv,x")
 	(vec_duplicate:V2SF
-	  (match_operand:SF 1 "register_operand" "0,0,Yv")))]
+	  (match_operand:SF 1 "register_operand" "0,Yv,0")))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    punpckldq\t%0, %0
-   shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}
-   %vmovsldup\t{%1, %0|%0, %1}"
-  [(set_attr "isa" "*,sse_noavx,sse3")
+   %vmovsldup\t{%1, %0|%0, %1}
+   shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}"
+  [(set_attr "isa" "*,sse3,sse_noavx")
    (set_attr "mmx_isa" "native,*,*")
-   (set_attr "type" "mmxcvt,sseshuf1,sse")
-   (set_attr "length_immediate" "*,1,*")
-   (set_attr "prefix_rep" "*,*,1")
-   (set_attr "prefix" "*,orig,maybe_vex")
+   (set_attr "type" "mmxcvt,sse,sseshuf1")
+   (set_attr "length_immediate" "*,*,1")
+   (set_attr "prefix_rep" "*,1,*")
+   (set_attr "prefix" "*,maybe_vex,orig")
    (set_attr "mode" "DI,V4SF,V4SF")])
 
+(define_insn "*mmx_movss"
+  [(set (match_operand:V2SF 0 "register_operand"   "=x,v")
+	(vec_merge:V2SF
+	  (match_operand:V2SF 2 "register_operand" " x,v")
+	  (match_operand:V2SF 1 "register_operand" " 0,v")
+	  (const_int 1)))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   movss\t{%2, %0|%0, %2}
+   vmovss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix" "orig,maybe_evex")
+   (set_attr "mode" "SF")])
+
 (define_insn "*mmx_concatv2sf"
   [(set (match_operand:V2SF 0 "register_operand"     "=y,y")
 	(vec_concat:V2SF
diff --git a/gcc/testsuite/gcc.target/i386/vperm-v2sf.c b/gcc/testsuite/gcc.target/i386/vperm-v2sf.c
new file mode 100644
index 00000000000..7bf6defb0f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vperm-v2sf.c
@@ -0,0 +1,41 @@ 
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "isa-check.h"
+#include "sse-os-support.h"
+
+typedef float S;
+typedef float V __attribute__((vector_size(8)));
+typedef int IV __attribute__((vector_size(8)));
+typedef union { S s[2]; V v; } U;
+
+static U i[2], b, c;
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+#define assert(T) ((T) || (__builtin_trap (), 0))
+
+#define TEST(E0, E1) \
+  b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1}); \
+  c.s[0] = i[0].s[E0]; \
+  c.s[1] = i[0].s[E1]; \
+  __asm__("" : : : "memory"); \
+  assert (memcmp (&b, &c, sizeof(c)) == 0);
+
+#include "vperm-2-2.inc"
+
+int main()
+{
+  check_isa ();
+
+  if (!sse_os_support ())
+    exit (0);
+
+  i[0].s[0] = 0;
+  i[0].s[1] = 1;
+  i[0].s[2] = 2;
+  i[0].s[3] = 3;
+
+  check();
+  return 0;
+}