[rs6000] GCC 7 backport for vec_insert4b, vec_extract4b support

Message ID 1519662403.5702.9.camel@us.ibm.com
State New
Headers show
Series
  • [rs6000] GCC 7 backport for vec_insert4b, vec_extract4b support
Related show

Commit Message

Carl Love Feb. 26, 2018, 4:26 p.m.
GCC maintainers:

The following patch is a back port for the GCC 7 branch of mainline
commit 257747. The mainline commit added support for builtins
vec_insert4b and vec_extract4b.  This is the first of two patches. 
This patch adds the ABI specified builtin instances.  A second patch to
be posted shortly will remove the non ABI instances of these builtins.

This patch has been tested on GCC 7:

  powerpc64le-unknown-linux-gnu (Power 8 LE)
  powerpc64-unknown-linux-gnu (Power 8 BE)

with no regressions.

Let me know if the patch looks OK or not. Thanks.

               Carl Love
-----------------------------------------------------------------

gcc/ChangeLog

2018-02-26  Carl Love  <cel@us.ibm.com>

	* config/rs6000/altivec.h: Add builtin names vec_extract4b
	vec_insert4b.
	* config/rs6000/rs6000-builtin.def: Add INSERT4B and EXTRACT4B
	definitions.
	* config/rs6000/rs6000-c.c: Add the definitions for
	P9V_BUILTIN_VEC_EXTRACT4B and P9V_BUILTIN_VEC_INSERT4B.
	* config/rs6000/rs6000.c (altivec_expand_builtin): Add
	P9V_BUILTIN_EXTRACT4B and P9V_BUILTIN_INSERT4B case statements.
	* config/rs6000/vsx.md: Add define_insn extract4b.  Add define_expand
	definition for insert4b and define insn *insert3b_internal.
	* doc/extend.texi: Add documentation for vec_extract4b.

gcc/testsuite/ChangeLog

2018-02-26  Carl Love  <cel@us.ibm.com>

	*gcc.target/powerpc/builtins-7-p9-runnable.c: New runnable test file
	for the ABI definitions for vec_extract4b and vec_insert4b.
---
 gcc/config/rs6000/altivec.h                        |   2 +
 gcc/config/rs6000/rs6000-builtin.def               |   4 +
 gcc/config/rs6000/rs6000-c.c                       |   8 +
 gcc/config/rs6000/rs6000.c                         |   2 +
 gcc/config/rs6000/vsx.md                           |  41 +++++
 gcc/doc/extend.texi                                |   7 +
 .../gcc.target/powerpc/builtins-7-p9-runnable.c    | 169 +++++++++++++++++++++
 7 files changed, 233 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c

-- 
2.7.4

Comments

Segher Boessenkool Feb. 26, 2018, 5:46 p.m. | #1
On Mon, Feb 26, 2018 at 08:26:43AM -0800, Carl Love wrote:
> 

> 2018-02-26  Carl Love  <cel@us.ibm.com>

> 

> 	*gcc.target/powerpc/builtins-7-p9-runnable.c: New runnable test file

> 	for the ABI definitions for vec_extract4b and vec_insert4b.


(Space after the asterisk).

This is just the same as the trunk version, right?  If so: okay for all
release branches.  Thanks!


Segher

Patch

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index b9de05a..3011a87 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -400,6 +400,8 @@ 
 #define vec_vctzw __builtin_vec_vctzw
 #define vec_vextract4b __builtin_vec_vextract4b
 #define vec_vinsert4b __builtin_vec_vinsert4b
+#define vec_extract4b __builtin_vec_extract4b
+#define vec_insert4b __builtin_vec_insert4b
 #define vec_vprtyb __builtin_vec_vprtyb
 #define vec_vprtybd __builtin_vec_vprtybd
 #define vec_vprtybw __builtin_vec_vprtybw
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a39f936..46ae21a 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2036,6 +2036,8 @@  BU_P9V_AV_2 (VEXTUWRX, "vextuwrx",		CONST,	vextuwrx)
 BU_P9V_VSX_2 (VEXTRACT4B,   "vextract4b",	CONST,	vextract4b)
 BU_P9V_VSX_3 (VINSERT4B,    "vinsert4b",	CONST,	vinsert4b)
 BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di",	CONST,	vinsert4b_di)
+BU_P9V_VSX_3 (INSERT4B,    "insert4b",		CONST,  insert4b)
+BU_P9V_VSX_2 (EXTRACT4B,   "extract4b", 	CONST,  extract4b)
 
 /* 3 argument vector functions returning void, treated as SPECIAL,
    added in ISA 3.0 (power9).  */
@@ -2084,10 +2086,12 @@  BU_P9V_OVERLOAD_2 (LXVL,	"lxvl")
 BU_P9V_OVERLOAD_2 (VEXTULX,	"vextulx")
 BU_P9V_OVERLOAD_2 (VEXTURX,	"vexturx")
 BU_P9V_OVERLOAD_2 (VEXTRACT4B,	"vextract4b")
+BU_P9V_OVERLOAD_2 (EXTRACT4B,  "extract4b")
 
 /* ISA 3.0 Vector scalar overloaded 3 argument functions */
 BU_P9V_OVERLOAD_3 (STXVL,	"stxvl")
 BU_P9V_OVERLOAD_3 (VINSERT4B,	"vinsert4b")
+BU_P9V_OVERLOAD_3 (INSERT4B,    "insert4b")
 
 /* Overloaded CMPNE support was implemented prior to Power 9,
    so is not mentioned here.  */
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index d9905f7..c3134fc 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -5047,6 +5047,8 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 },
   { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B,
     RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 },
+  { P9V_BUILTIN_VEC_EXTRACT4B, P9V_BUILTIN_EXTRACT4B,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 },
 
   { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUBLX,
     RS6000_BTI_INTQI, RS6000_BTI_UINTSI,
@@ -5101,6 +5103,12 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
 
+  { P9V_BUILTIN_VEC_INSERT4B, P9V_BUILTIN_INSERT4B,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI },
+  { P9V_BUILTIN_VEC_INSERT4B, P9V_BUILTIN_INSERT4B,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI },
   { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B,
     RS6000_BTI_V16QI, RS6000_BTI_V4SI,
     RS6000_BTI_V16QI, RS6000_BTI_UINTSI },
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6bd1d68..21ce297 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16385,6 +16385,7 @@  altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
 
     case P9V_BUILTIN_VEXTRACT4B:
     case P9V_BUILTIN_VEC_VEXTRACT4B:
+    case P9V_BUILTIN_VEC_EXTRACT4B:
       arg1 = CALL_EXPR_ARG (exp, 1);
       STRIP_NOPS (arg1);
 
@@ -16402,6 +16403,7 @@  altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case P9V_BUILTIN_VINSERT4B:
     case P9V_BUILTIN_VINSERT4B_DI:
     case P9V_BUILTIN_VEC_VINSERT4B:
+    case P9V_BUILTIN_VEC_INSERT4B:
       arg2 = CALL_EXPR_ARG (exp, 2);
       STRIP_NOPS (arg2);
 
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 59ae0bc..c00238b 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4085,6 +4085,47 @@ 
 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
 ;; endian version needs to adjust the byte number, and the V4SI element in
 ;; vinsert4b.
+(define_insn "extract4b"
+  [(set (match_operand:V2DI 0 "vsx_register_operand")
+       (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+                     (match_operand:QI 2 "const_0_to_12_operand" "n")]
+                    UNSPEC_XXEXTRACTUW))]
+  "TARGET_P9_VECTOR"
+{
+  if (!VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (12 - INTVAL (operands[2]));
+
+  return "xxextractuw %x0,%x1,%2";
+})
+
+(define_expand "insert4b"
+  [(set (match_operand:V16QI 0 "vsx_register_operand")
+	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
+		       (match_operand:V16QI 2 "vsx_register_operand")
+		       (match_operand:QI 3 "const_0_to_12_operand")]
+		   UNSPEC_XXINSERTW))]
+  "TARGET_P9_VECTOR"
+{
+  if (!VECTOR_ELT_ORDER_BIG)
+    {
+      rtx op1 = operands[1];
+      rtx v4si_tmp = gen_reg_rtx (V4SImode);
+      emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
+      operands[1] = v4si_tmp;
+      operands[3] = GEN_INT (12 - INTVAL (operands[3]));
+    }
+})
+
+(define_insn "*insert4b_internal"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
+		       (match_operand:V16QI 2 "vsx_register_operand" "0")
+		       (match_operand:QI 3 "const_0_to_12_operand" "n")]
+		   UNSPEC_XXINSERTW))]
+  "TARGET_P9_VECTOR"
+  "xxinsertw %x0,%x1,%3"
+  [(set_attr "type" "vecperm")])
+
 (define_expand "vextract4b"
   [(set (match_operand:DI 0 "gpc_reg_operand")
 	(unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 9db9e0e..d1fa318 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18125,8 +18125,15 @@  vector int vec_vctzw (vector int);
 vector unsigned int vec_vctzw (vector int);
 
 long long vec_vextract4b (const vector signed char, const int);
+vector unsigned long long vec_extract4b (vector unsigned char,
+                                         const int);
+long long vec_extract4b (const vector signed char, const int);
 long long vec_vextract4b (const vector unsigned char, const int);
 
+vector unsigned char vec_insert4b (vector signed int, vector unsigned char,
+                                   const int);
+vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char,
+                                   const int);
 vector signed char vec_insert4b (vector int, vector signed char, const int);
 vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char,
                                    const int);
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c
new file mode 100644
index 0000000..137b46b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c
@@ -0,0 +1,169 @@ 
+/* { dg-do run { target { powerpc*-*-* && p9vector_hw } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include <altivec.h>
+#define TRUE 1
+#define FALSE 0
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define EXTRACT 0
+
+void abort (void);
+
+int result_wrong_ull (vector unsigned long long vec_expected,
+		      vector unsigned long long vec_actual)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    if (vec_expected[i] != vec_actual[i])
+      return TRUE;
+
+  return FALSE;
+}
+
+int result_wrong_uc (vector unsigned char vec_expected,
+		     vector unsigned char vec_actual)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    if (vec_expected[i] != vec_actual[i])
+      return TRUE;
+
+  return FALSE;
+}
+
+#ifdef DEBUG
+void print_ull (vector unsigned long long vec_expected,
+		vector unsigned long long vec_actual)
+{
+  int i;
+
+  printf("expected unsigned long long data\n");
+  for (i = 0; i < 2; i++)
+    printf(" %lld,", vec_expected[i]);
+
+  printf("\nactual signed char data\n");
+  for (i = 0; i < 2; i++)
+    printf(" %lld,", vec_actual[i]);
+  printf("\n");
+}
+
+void print_uc (vector unsigned char vec_expected,
+	       vector unsigned char vec_actual)
+{
+  int i;
+
+  printf("expected unsigned char data\n");
+  for (i = 0; i < 16; i++)
+    printf(" %d,", vec_expected[i]);
+
+  printf("\nactual unsigned char data\n");
+  for (i = 0; i < 16; i++)
+    printf(" %d,", vec_actual[i]);
+  printf("\n");
+}
+#endif
+
+#if EXTRACT
+vector unsigned long long
+vext (vector unsigned char *vc)
+{
+  return vextract_si_vchar (*vc, 5);
+}
+#endif
+
+int main()
+{
+   vector signed int vsi_arg;
+   vector unsigned char vec_uc_arg, vec_uc_result, vec_uc_expected;
+   vector unsigned long long vec_ull_result, vec_ull_expected;
+   unsigned long long ull_result, ull_expected;
+
+   vec_uc_arg = (vector unsigned char){1, 2, 3, 4,
+				       5, 6, 7, 8,
+				       9, 10, 11, 12,
+				       13, 14, 15, 16};
+
+   vsi_arg = (vector signed int){0xA, 0xB, 0xC, 0xD};
+
+   vec_uc_expected = (vector unsigned char){0xC, 0, 0, 0,
+					    5, 6, 7, 8,
+					    9, 10, 11, 12,
+					    13, 14, 15, 16};
+   /* Test vec_insert4b() */
+   /* Insert into char 0 location */
+   vec_uc_result = vec_insert4b (vsi_arg, vec_uc_arg, 0);
+
+   if (result_wrong_uc(vec_uc_expected, vec_uc_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_insert4b pos 0, result does not match expected result\n");
+	print_uc (vec_uc_expected, vec_uc_result);
+#else
+        abort();
+#endif
+      }
+
+   /* insert into char 4 location */
+   vec_uc_expected = (vector unsigned char){1, 2, 3, 4,
+					    0xC, 0, 0, 0,
+					    9, 10, 11, 12,
+					    13, 14, 15, 16};
+   vec_uc_result = vec_insert4b (vsi_arg, vec_uc_arg, 4);
+
+   if (result_wrong_uc(vec_uc_expected, vec_uc_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_insert4b pos 4, result does not match expected result\n");
+	print_uc (vec_uc_expected, vec_uc_result);
+#else
+        abort();
+#endif
+      }
+
+   /* Test vec_extract4b() */
+   /* Extract 4b, from char 0 location */
+   vec_uc_arg = (vector unsigned char){10, 0, 0, 0,
+				       20, 0, 0, 0,
+				       30, 0, 0, 0,
+				       40, 0, 0, 0};
+
+   vec_ull_expected = (vector unsigned long long){0, 10};
+   vec_ull_result = vec_extract4b(vec_uc_arg, 0);
+
+   if (result_wrong_ull(vec_ull_expected, vec_ull_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_extract4b pos 0, result does not match expected result\n");
+	print_ull (vec_ull_expected, vec_ull_result);
+#else
+        abort();
+#endif
+      }
+
+   /* Extract 4b, from char 12 location */
+   vec_uc_arg = (vector unsigned char){10, 0, 0, 0,
+				       20, 0, 0, 0,
+				       30, 0, 0, 0,
+				       40, 0, 0, 0};
+
+   vec_ull_expected = (vector unsigned long long){0, 40};
+   vec_ull_result = vec_extract4b(vec_uc_arg, 12);
+
+   if (result_wrong_ull(vec_ull_expected, vec_ull_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_extract4b pos 12, result does not match expected result\n");
+	print_ull (vec_ull_expected, vec_ull_result);
+#else
+        abort();
+#endif
+      }
+}