[01/43] i386: Allow 64-bit vector modes in SSE registers

Message ID 20190210001947.27278-2-hjl.tools@gmail.com
State New
Headers show
Series
  • V3: Emulate MMX intrinsics with SSE
Related show

Commit Message

H.J. Lu Feb. 10, 2019, 12:19 a.m.
In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support 64-bit vectors.

	PR target/89021
	* config/i386/i386.c (ix86_set_reg_reg_cost): Also support
	VALID_MMX_WITH_SSE_REG_MODE.
	(ix86_vector_mode_supported_p): Likewise.
	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
	(TARGET_MMX_WITH_SSE_P): Likewise.
	(VALID_MMX_WITH_SSE_REG_MODE): Likewise.
---
 gcc/config/i386/i386.c |  3 +++
 gcc/config/i386/i386.h | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

-- 
2.20.1

Comments

Uros Bizjak Feb. 10, 2019, 9:43 a.m. | #1
On 2/10/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> In 64-bit mode, SSE2 can be used to emulate MMX instructions without

> 3DNOW.  We can use SSE2 to support 64-bit vectors.

>

> 	PR target/89021

> 	* config/i386/i386.c (ix86_set_reg_reg_cost): Also support

> 	VALID_MMX_WITH_SSE_REG_MODE.

> 	(ix86_vector_mode_supported_p): Likewise.

> 	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.

> 	(TARGET_MMX_WITH_SSE_P): Likewise.

> 	(VALID_MMX_WITH_SSE_REG_MODE): Likewise.

> ---

>  gcc/config/i386/i386.c |  3 +++

>  gcc/config/i386/i386.h | 14 ++++++++++++++

>  2 files changed, 17 insertions(+)

>

> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

> index 12bc7926f86..ba02c26c8b2 100644

> --- a/gcc/config/i386/i386.c

> +++ b/gcc/config/i386/i386.c

> @@ -40235,6 +40235,7 @@ ix86_set_reg_reg_cost (machine_mode mode)

>  	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))

>  	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))

>  	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))

> +	  || (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))

>  	  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))


With V2SFmode out of the way (see below) we can finaly use

(TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode).

This is a cost function, and we do have DImode and SImode in SSE registers.

>  	units = GET_MODE_SIZE (mode);

>      }

> @@ -44057,6 +44058,8 @@ ix86_vector_mode_supported_p (machine_mode mode)

>      return true;

>    if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))

>      return true;

> +  if (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))

> +    return true;


Assuming middle end won't ask for scalar modes, and with V2SFmode out
of the way, we can also use

(TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode)

here.

>    if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))

>      return true;

>    if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))

> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

> index 83b025e0cf5..3ae0900caa0 100644

> --- a/gcc/config/i386/i386.h

> +++ b/gcc/config/i386/i386.h

> @@ -201,6 +201,13 @@ see the files COPYING3 and COPYING.RUNTIME

> respectively.  If not, see

>  #define TARGET_16BIT	TARGET_CODE16

>  #define TARGET_16BIT_P(x)	TARGET_CODE16_P(x)

>

> +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.

> +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */

> +#define TARGET_MMX_WITH_SSE \

> +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)

> +#define TARGET_MMX_WITH_SSE_P(x) \

> +  (TARGET_64BIT_P (x) && TARGET_SSE2_P (x) && !TARGET_3DNOW_P (x))


The above is not acceptable, the choice of native MMX should not
depend on -m3dnow flag. So, instead of FIXME, please leave partial
conversion of V2SF mode out of the patchset, V2SF values should still
live in MMX registers.

Actually, -m3dnow is a dead end, deprecated insn set, so I see no
reason to emulate V2SF at all. SSE doesn't have native V2SF
instructions, and emulating reciprocals will trap due to 0.0 in the
high two elements. There are also hard to emulate reciprocal step
instructions.

Also, the purpose of the patchset is to convert MMX builtins, since
SSE builtins depend on them, so at the end we can avoid enabling MMX
registers with -msse, and thus making -mmmx orthogonal to -msse. We
don't wan to sneak in an autovectorization of V2SF with the patchset.

>  #include "config/vxworks-dummy.h"

>

>  #include "config/i386/i386-opts.h"

> @@ -1143,6 +1150,13 @@ extern const char *host_detect_local_cpu (int argc,

> const char **argv);

>     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\

>     || (MODE) == TFmode || (MODE) == V1TImode)

>

> +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we

> +   want to include only 8-byte vector modes, like V2SFmode, but not


No, we don't want to include V2SF mode vectors.

> +   DImode nor SImode.  */

> +#define VALID_MMX_WITH_SSE_REG_MODE(MODE)				\

> +  ((MODE) == V1DImode || (MODE) == V8QImode || (MODE) == V4HImode	\

> +   || (MODE) == V2SImode || (MODE) == V2SFmode)


Without V2SFmode, the above definition is unneeded.

Uros.

>  #define VALID_SSE2_REG_MODE(MODE)					\

>    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\

>     || (MODE) == V2DImode || (MODE) == DFmode)

> --

> 2.20.1

>

>

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 12bc7926f86..ba02c26c8b2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40235,6 +40235,7 @@  ix86_set_reg_reg_cost (machine_mode mode)
 	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+	  || (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))
 	  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
 	units = GET_MODE_SIZE (mode);
     }
@@ -44057,6 +44058,8 @@  ix86_vector_mode_supported_p (machine_mode mode)
     return true;
   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
     return true;
+  if (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))
+    return true;
   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
     return true;
   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..3ae0900caa0 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,13 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_16BIT	TARGET_CODE16
 #define TARGET_16BIT_P(x)	TARGET_CODE16_P(x)
 
+/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
+   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
+#define TARGET_MMX_WITH_SSE \
+  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
+#define TARGET_MMX_WITH_SSE_P(x) \
+  (TARGET_64BIT_P (x) && TARGET_SSE2_P (x) && !TARGET_3DNOW_P (x))
+
 #include "config/vxworks-dummy.h"
 
 #include "config/i386/i386-opts.h"
@@ -1143,6 +1150,13 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
    || (MODE) == TFmode || (MODE) == V1TImode)
 
+/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
+   want to include only 8-byte vector modes, like V2SFmode, but not
+   DImode nor SImode.  */
+#define VALID_MMX_WITH_SSE_REG_MODE(MODE)				\
+  ((MODE) == V1DImode || (MODE) == V8QImode || (MODE) == V4HImode	\
+   || (MODE) == V2SImode || (MODE) == V2SFmode)
+
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
    || (MODE) == V2DImode || (MODE) == DFmode)