[05/46] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2

Message ID 20190201211809.963-6-hjl.tools@gmail.com
State New
Headers show
Series
  • Implement MMX intrinsics with SSE
Related show

Commit Message

H.J. Lu Feb. 1, 2019, 9:17 p.m.
Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
plus moving bits 64:95 to bits 32:63 in SSE register.  Only SSE register
source operand is allowed.

	PR target/89021
	* config/i386/constraints.md (Yx): Any SSE register if MMX is
	disabled in 64-bit mode.
	(Yy): Any EVEX encodable SSE register for AVX512VL target,
	otherwise any SSE register if MMX is disabled in 64-bit mode.
	* config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
	New prototype.
	(ix86_split_mmx_pack): Likewise.
	* config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
	function.
	(ix86_split_mmx_pack): Likewise.
	* config/i386/mmx.md (any_s_truncate): New code iterator.
	(s_trunsuffix): New code attr.
	(mmx_packsswb): Removed.
	(mmx_packssdw): Likewise.
	(mmx_packuswb): Likewise.
	(mmx_pack<s_trunsuffix>swb): New define_insn_and_split to emulate
	MMX packsswb/packuswb with SSE2.
	(mmx_packssdw): Likewise.
---
 gcc/config/i386/constraints.md | 10 ++++++
 gcc/config/i386/i386-protos.h  |  3 ++
 gcc/config/i386/i386.c         | 54 +++++++++++++++++++++++++++++++
 gcc/config/i386/mmx.md         | 59 +++++++++++++++++-----------------
 4 files changed, 97 insertions(+), 29 deletions(-)

-- 
2.20.1

Patch

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 33921aea267..6e9244ad77f 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -110,6 +110,9 @@ 
 ;;  v	any EVEX encodable SSE register for AVX512VL target,
 ;;	otherwise any SSE register
 ;;  h	EVEX encodable SSE register with number factor of four
+;;  x	SSE register if MMX is disabled in 64-bit mode
+;;  y	any EVEX encodable SSE register for AVX512VL target, otherwise
+;;      any SSE register if MMX is disabled in 64-bit mode
 
 (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
  "First SSE register (@code{%xmm0}).")
@@ -146,6 +149,13 @@ 
  "TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
  "@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
 
+(define_register_constraint "Yx" "TARGET_MMX_WITH_SSE ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register if MMX is disabled in 64-bit mode.")
+
+(define_register_constraint "Yy"
+ "TARGET_MMX_WITH_SSE ? (TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS) : NO_REGS"
+ "@internal Any EVEX encodable SSE register for AVX512VL target, otherwise any SSE register if MMX is disabled in 64-bit mode.")
+
 ;; We use the B prefix to denote any number of internal operands:
 ;;  f  FLAGS_REG
 ;;  g  GOT memory operand.
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 2d600173917..bb96a420a85 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -200,6 +200,9 @@  extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
 
 extern rtx ix86_split_stack_guard (void);
 
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif	/* TREE_CODE  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4e67abe8764..fde32983fa2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19952,6 +19952,60 @@  ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
     gcc_unreachable ();
 }
 
+/* Move bits 64:95 to bits 32:63.  */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+					  GEN_INT (0), GEN_INT (0)));
+  rtx dest = gen_rtx_REG (V4SImode, REGNO (op));
+  op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  rtx insn = gen_rtx_SET (dest, op);
+  emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2.  */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
+  machine_mode dmode = GET_MODE (op0);
+  machine_mode smode = GET_MODE (op1);
+  machine_mode inner_dmode = GET_MODE_INNER (dmode);
+  machine_mode inner_smode = GET_MODE_INNER (smode);
+
+  /* Get the corresponding SSE mode for destination.  */
+  int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+  machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+					    nunits).require ();
+  machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+						 nunits / 2).require ();
+
+  /* Get the corresponding SSE mode for source.  */
+  nunits = 16 / GET_MODE_SIZE (inner_smode);
+  machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
+					    nunits).require ();
+
+  /* Generate SSE pack with signed/unsigned saturation.  */
+  rtx dest = gen_rtx_REG (sse_dmode, REGNO (op0));
+  op1 = gen_rtx_REG (sse_smode, REGNO (op1));
+  op2 = gen_rtx_REG (sse_smode, REGNO (op2));
+
+  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
+						    op1, op2));
+  emit_insn (insn);
+
+  ix86_move_vector_high_sse_to_mmx (op0);
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
    operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 0d44aa60c79..c183f949a7c 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -58,6 +58,11 @@ 
 ;; Mapping from integer vector mode to mnemonic suffix
 (define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
 
+;; Used in signed and unsigned truncations with saturation.
+(define_code_iterator any_s_truncate [ss_truncate us_truncate])
+;; Instruction suffix for truncations with saturation.
+(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Move patterns
@@ -1046,41 +1051,37 @@ 
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "mmx_packsswb"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
 	(vec_concat:V8QI
-	  (ss_truncate:V4QI
-	    (match_operand:V4HI 1 "register_operand" "0"))
-	  (ss_truncate:V4QI
-	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
-  "TARGET_MMX"
-  "packsswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+	  (any_s_truncate:V4QI
+	    (match_operand:V4HI 1 "register_operand" "0,0,Yy"))
+	  (any_s_truncate:V4QI
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy"))))]
+  "TARGET_MMX_INSNS"
+  "pack<s_trunsuffix>swb\t{%2, %0|%0, %2}"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+  "ix86_split_mmx_pack (operands, <any_s_truncate:CODE>);"
+  [(set_attr "isa" "*,noavx,avx")
+   (set_attr "type" "mmxshft,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_packssdw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_packssdw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 	(vec_concat:V4HI
 	  (ss_truncate:V2HI
-	    (match_operand:V2SI 1 "register_operand" "0"))
+	    (match_operand:V2SI 1 "register_operand" "0,0,Yy"))
 	  (ss_truncate:V2HI
-	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
-  "TARGET_MMX"
+	    (match_operand:V2SI 2 "nonimmediate_operand" "ym,Yx,Yy"))))]
+  "TARGET_MMX_INSNS"
   "packssdw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_packuswb"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-	(vec_concat:V8QI
-	  (us_truncate:V4QI
-	    (match_operand:V4HI 1 "register_operand" "0"))
-	  (us_truncate:V4QI
-	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
-  "TARGET_MMX"
-  "packuswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+  "ix86_split_mmx_pack (operands, SS_TRUNCATE);"
+  [(set_attr "isa" "*,noavx,avx")
+   (set_attr "type" "mmxshft,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_punpckhbw"
   [(set (match_operand:V8QI 0 "register_operand" "=y")