[committed] amdgcn: sub-dword vector min/max/shift/bit operators

Message ID d16d0075-b279-1aa1-71d4-5586046feaa5@codesourcery.com
State New
Headers show
Series
  • [committed] amdgcn: sub-dword vector min/max/shift/bit operators
Related show

Commit Message

Andrew Stubbs Feb. 27, 2020, 5:18 p.m.
This patch adds V64QI and V64HI implementations of smin, umin, smax, 
umax, ashift, ashiftrt, lshiftrt, and, ior, xor, not, and popcount.

None of these operators have a specific machine instruction, so they 
need to use V64SI instructions.  For scalar code expr.c can DTRT 
automatically, but not so for vector operations.

The min/max and shift operators emit explicit extends and truncates 
around the actual operator. I don't believe those are needed for the bit 
operators but it can be easily implemented if needed.

There can be more optimal implementations in future, but right now I'm 
interested in correctness. For example, some of the instructions can 
have the extend and/or truncate combined into one "DPP" instruction, so 
I intend to add pattern for the combine pass to use. Similarly, there 
are load instructions with built-in extends, and I can change the 
representation of the stores to allow combining truncates.

Andrew

Patch

amdgcn: sub-dword vector min/max/shift/bit operators

2020-02-27  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/gcn/gcn-valu.md (VEC_SUBDWORD_MODE): New mode iterator.
	(<expander><mode>2<exec>): Change modes to VEC_ALL1REG_INT_MODE.
	(<expander><mode>3<exec>): Likewise.
	(<expander><mode>3): New.
	(v<expander><mode>3): New.
	(<expander><mode>3): New.
	(<expander><mode>3<exec>): Rename to ...
	(<expander>v64si3<exec>): ... this, and change modes to V64SI.
	* config/gcn/gcn.md (mnemonic): Use '%B' for not.

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index a0cc9a2d8fc..40e864a8de7 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -16,6 +16,10 @@ 
 
 ;; {{{ Vector iterators
 
+; Vector modes for sub-dword modes
+(define_mode_iterator VEC_SUBDWORD_MODE
+		      [V64QI V64HI])
+
 ; Vector modes for one vector register
 (define_mode_iterator VEC_1REG_MODE
 		      [V64SI V64HF V64SF])
@@ -1881,20 +1885,20 @@ 
 (define_code_iterator minmaxop [smin smax umin umax])
 
 (define_insn "<expander><mode>2<exec>"
-  [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand"    "=  v")
-	(bitunop:VEC_1REG_INT_MODE
-	  (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
+  [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "gcn_valu_dst_operand"    "=  v")
+	(bitunop:VEC_ALL1REG_INT_MODE
+	  (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
   ""
   "v_<mnemonic>0\t%0, %1"
   [(set_attr "type" "vop1")
    (set_attr "length" "8")])
 
 (define_insn "<expander><mode>3<exec>"
-  [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "=  v,RD")
-	(bitop:VEC_1REG_INT_MODE
-	  (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
+  [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "gcn_valu_dst_operand" "=  v,RD")
+	(bitop:VEC_ALL1REG_INT_MODE
+	  (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_valu_src0_operand"
 								  "%  v, 0")
-	  (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
+	  (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_valu_src1com_operand"
 								  "vSvB, v")))]
   ""
   "@
@@ -1967,6 +1971,27 @@ 
   [(set_attr "type" "vmult,ds")
    (set_attr "length" "16,8")])
 
+(define_expand "<expander><mode>3"
+  [(set (match_operand:VEC_SUBDWORD_MODE 0 "register_operand"  "= v")
+	(shiftop:VEC_SUBDWORD_MODE
+	  (match_operand:VEC_SUBDWORD_MODE 1 "gcn_alu_operand" "  v")
+	  (vec_duplicate:VEC_SUBDWORD_MODE
+	    (match_operand:SI 2 "gcn_alu_operand"	       "SvB"))))]
+  ""
+  {
+    enum {ashift, lshiftrt, ashiftrt};
+    bool unsignedp = (<code> == lshiftrt);
+    rtx insi1 = gen_reg_rtx (V64SImode);
+    rtx insi2 = gen_reg_rtx (SImode);
+    rtx outsi = gen_reg_rtx (V64SImode);
+
+    convert_move (insi1, operands[1], unsignedp);
+    convert_move (insi2, operands[2], unsignedp);
+    emit_insn (gen_<expander>v64si3 (outsi, insi1, insi2));
+    convert_move (operands[0], outsi, unsignedp);
+    DONE;
+  })
+
 (define_insn "<expander>v64si3<exec>"
   [(set (match_operand:V64SI 0 "register_operand"  "= v")
 	(shiftop:V64SI
@@ -1978,6 +2003,26 @@ 
   [(set_attr "type" "vop2")
    (set_attr "length" "8")])
 
+(define_expand "v<expander><mode>3"
+  [(set (match_operand:VEC_SUBDWORD_MODE 0 "register_operand"  "=v")
+	(shiftop:VEC_SUBDWORD_MODE
+	  (match_operand:VEC_SUBDWORD_MODE 1 "gcn_alu_operand" " v")
+	  (match_operand:VEC_SUBDWORD_MODE 2 "gcn_alu_operand" "vB")))]
+  ""
+  {
+    enum {ashift, lshiftrt, ashiftrt};
+    bool unsignedp = (<code> == ashift || <code> == ashiftrt);
+    rtx insi1 = gen_reg_rtx (V64SImode);
+    rtx insi2 = gen_reg_rtx (V64SImode);
+    rtx outsi = gen_reg_rtx (V64SImode);
+
+    convert_move (insi1, operands[1], unsignedp);
+    convert_move (insi2, operands[2], unsignedp);
+    emit_insn (gen_v<expander>v64si3 (outsi, insi1, insi2));
+    convert_move (operands[0], outsi, unsignedp);
+    DONE;
+  })
+
 (define_insn "v<expander>v64si3<exec>"
   [(set (match_operand:V64SI 0 "register_operand"  "=v")
 	(shiftop:V64SI
@@ -1988,13 +2033,31 @@ 
   [(set_attr "type" "vop2")
    (set_attr "length" "8")])
 
-(define_insn "<expander><mode>3<exec>"
-  [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "=  v,RD")
-	(minmaxop:VEC_1REG_INT_MODE
-	  (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
-								  "%  v, 0")
-	  (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
-								  "vSvB, v")))]
+(define_expand "<expander><mode>3"
+  [(set (match_operand:VEC_SUBDWORD_MODE 0 "gcn_valu_dst_operand")
+	(minmaxop:VEC_SUBDWORD_MODE
+	  (match_operand:VEC_SUBDWORD_MODE 1 "gcn_valu_src0_operand")
+	  (match_operand:VEC_SUBDWORD_MODE 2 "gcn_valu_src1com_operand")))]
+  ""
+  {
+    enum {smin, umin, smax, umax};
+    bool unsignedp = (<code> == umax || <code> == umin);
+    rtx insi1 = gen_reg_rtx (V64SImode);
+    rtx insi2 = gen_reg_rtx (V64SImode);
+    rtx outsi = gen_reg_rtx (V64SImode);
+
+    convert_move (insi1, operands[1], unsignedp);
+    convert_move (insi2, operands[2], unsignedp);
+    emit_insn (gen_<code>v64si3 (outsi, insi1, insi2));
+    convert_move (operands[0], outsi, unsignedp);
+    DONE;
+  })
+
+(define_insn "<expander>v64si3<exec>"
+  [(set (match_operand:V64SI 0 "gcn_valu_dst_operand"	    "=  v,RD")
+	(minmaxop:V64SI
+	  (match_operand:V64SI 1 "gcn_valu_src0_operand"    "%  v, 0")
+	  (match_operand:V64SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
   ""
   "@
    v_<mnemonic>0\t%0, %2, %1
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index d8b49dfd640..1bd3330f90b 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -319,7 +319,7 @@ 
    (smax "max%i")
    (umin "min%u")
    (umax "max%u")
-   (not "not%b")
+   (not "not%B")
    (popcount "bcnt_u32%b")])
 
 (define_code_attr bare_mnemonic