[01/43] i386: Allow 64-bit vector modes in SSE registers

Message ID 20190209132352.1828-2-hjl.tools@gmail.com
State New
Headers show
Series
  • V2: Emulate MMX intrinsics with SSE
Related show

Commit Message

H.J. Lu Feb. 9, 2019, 1:23 p.m.
In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support 64-bit vectors.

	PR target/89021
	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
	* config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
	modes for TARGET_MMX_WITH_SSE.
	(SSE_REG_MODE_P): Likewise.
---
 gcc/config/i386/i386.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

-- 
2.20.1

Comments

Uros Bizjak Feb. 9, 2019, 2:09 p.m. | #1
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> In 64-bit mode, SSE2 can be used to emulate MMX instructions without

> 3DNOW.  We can use SSE2 to support 64-bit vectors.

>

> 	PR target/89021

> 	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.

> 	* config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector

> 	modes for TARGET_MMX_WITH_SSE.

> 	(SSE_REG_MODE_P): Likewise.

> ---

>  gcc/config/i386/i386.h | 20 ++++++++++++++++++--

>  1 file changed, 18 insertions(+), 2 deletions(-)

>

> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

> index 83b025e0cf5..c1df3ec3326 100644

> --- a/gcc/config/i386/i386.h

> +++ b/gcc/config/i386/i386.h

> @@ -585,6 +585,11 @@ extern unsigned char

> ix86_arch_features[X86_ARCH_LAST];

>

>  #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)

>

> +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.

> +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */

> +#define TARGET_MMX_WITH_SSE \

> +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)

> +

>  extern unsigned char x86_prefetch_sse;

>  #define TARGET_PREFETCH_SSE	x86_prefetch_sse

>

> @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc,

> const char **argv);

>     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\

>     || (MODE) == TFmode || (MODE) == V1TImode)

>

> +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we

> +   want to include 8-byte vector modes, like V2SFmode, but not DImode

> +   nor SImode.  */


This is strange, since we already allow all MMX modes in SSE
registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
return:

return ((TARGET_AVX
             && VALID_AVX256_REG_OR_OI_MODE (mode))
            || VALID_SSE_REG_MODE (mode)
            || VALID_SSE2_REG_MODE (mode)
            || VALID_MMX_REG_MODE (mode)
            || VALID_MMX_REG_MODE_3DNOW (mode));

I'd expect that changed VALID_SSE2_REG_MODE affects only
ix86_vector_mode_supported_p when MMX is disabled and perhaps
ix86_set_reg_reg_cost cost function.

Are there any concrete issues when allowing all MMX (including 3DNOW?)
modes in VALID_SSE2_REG_MODE?

Uros.

>  #define VALID_SSE2_REG_MODE(MODE)					\

>    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\

> -   || (MODE) == V2DImode || (MODE) == DFmode)

> +   || (MODE) == V2DImode || (MODE) == DFmode				\

> +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\

> +			       || (MODE) == V4HImode			\

> +			       || (MODE) == V2SImode			\

> +			       || (MODE) == V2SFmode)))

>

>  #define VALID_SSE_REG_MODE(MODE)					\

>    ((MODE) == V1TImode || (MODE) == TImode				\

> @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc,

> const char **argv);

>     || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode	\

>     || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode	\

>     || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode	\

> -   || (MODE) == V16SFmode)

> +   || (MODE) == V16SFmode						\

> +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\

> +			       || (MODE) == V4HImode			\

> +			       || (MODE) == V2SImode			\

> +			       || (MODE) == V2SFmode)))

>

>  #define X87_FLOAT_MODE_P(MODE)	\

>    (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) ==

> XFmode))

> --

> 2.20.1

>

>
H.J. Lu Feb. 9, 2019, 2:31 p.m. | #2
On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>

> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:

> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without

> > 3DNOW.  We can use SSE2 to support 64-bit vectors.

> >

> >       PR target/89021

> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.

> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector

> >       modes for TARGET_MMX_WITH_SSE.

> >       (SSE_REG_MODE_P): Likewise.

> > ---

> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--

> >  1 file changed, 18 insertions(+), 2 deletions(-)

> >

> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

> > index 83b025e0cf5..c1df3ec3326 100644

> > --- a/gcc/config/i386/i386.h

> > +++ b/gcc/config/i386/i386.h

> > @@ -585,6 +585,11 @@ extern unsigned char

> > ix86_arch_features[X86_ARCH_LAST];

> >

> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)

> >

> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.

> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */

> > +#define TARGET_MMX_WITH_SSE \

> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)

> > +

> >  extern unsigned char x86_prefetch_sse;

> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse

> >

> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc,

> > const char **argv);

> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \

> >     || (MODE) == TFmode || (MODE) == V1TImode)

> >

> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we

> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode

> > +   nor SImode.  */

>

> This is strange, since we already allow all MMX modes in SSE

> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we

> return:

>

> return ((TARGET_AVX

>              && VALID_AVX256_REG_OR_OI_MODE (mode))

>             || VALID_SSE_REG_MODE (mode)

>             || VALID_SSE2_REG_MODE (mode)

>             || VALID_MMX_REG_MODE (mode)

>             || VALID_MMX_REG_MODE_3DNOW (mode));

>

> I'd expect that changed VALID_SSE2_REG_MODE affects only

> ix86_vector_mode_supported_p when MMX is disabled and perhaps

> ix86_set_reg_reg_cost cost function.

>

> Are there any concrete issues when allowing all MMX (including 3DNOW?)

> modes in VALID_SSE2_REG_MODE?


The problem is with DImode and SImode.  All other vector modes,  including
V2SF is OK.  With DImode and SImode, I got following regressions:

FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
cannot be created; target is discardable"
FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n
FAIL: go test misc/cgo/testcarchive

 gcc.dg/pr39323-3.c  is due to

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261

and

/* Decide whether a variable of mode MODE should be 128 bit aligned.  */
#define ALIGN_MODE_128(MODE) \
 ((MODE) == XFmode || SSE_REG_MODE_P (MODE))

SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P
nor VALID_SSE2_REG_MODE.

> Uros.

>

> >  #define VALID_SSE2_REG_MODE(MODE)                                    \

> >    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode   \

> > -   || (MODE) == V2DImode || (MODE) == DFmode)

> > +   || (MODE) == V2DImode || (MODE) == DFmode                         \

> > +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode      \

> > +                            || (MODE) == V4HImode                    \

> > +                            || (MODE) == V2SImode                    \

> > +                            || (MODE) == V2SFmode)))

> >

> >  #define VALID_SSE_REG_MODE(MODE)                                     \

> >    ((MODE) == V1TImode || (MODE) == TImode                            \

> > @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc,

> > const char **argv);

> >     || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \

> >     || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode        \

> >     || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode       \

> > -   || (MODE) == V16SFmode)

> > +   || (MODE) == V16SFmode                                            \

> > +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode      \

> > +                            || (MODE) == V4HImode                    \

> > +                            || (MODE) == V2SImode                    \

> > +                            || (MODE) == V2SFmode)))

> >

> >  #define X87_FLOAT_MODE_P(MODE)       \

> >    (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) ==

> > XFmode))

> > --

> > 2.20.1

> >

> >




-- 
H.J.
Uros Bizjak Feb. 9, 2019, 3:03 p.m. | #3
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:

>>

>> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:

>> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without

>> > 3DNOW.  We can use SSE2 to support 64-bit vectors.

>> >

>> >       PR target/89021

>> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.

>> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector

>> >       modes for TARGET_MMX_WITH_SSE.

>> >       (SSE_REG_MODE_P): Likewise.

>> > ---

>> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--

>> >  1 file changed, 18 insertions(+), 2 deletions(-)

>> >

>> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

>> > index 83b025e0cf5..c1df3ec3326 100644

>> > --- a/gcc/config/i386/i386.h

>> > +++ b/gcc/config/i386/i386.h

>> > @@ -585,6 +585,11 @@ extern unsigned char

>> > ix86_arch_features[X86_ARCH_LAST];

>> >

>> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)

>> >

>> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.

>> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.

>> > */

>> > +#define TARGET_MMX_WITH_SSE \

>> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)

>> > +

>> >  extern unsigned char x86_prefetch_sse;

>> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse

>> >

>> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int

>> > argc,

>> > const char **argv);

>> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \

>> >     || (MODE) == TFmode || (MODE) == V1TImode)

>> >

>> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we

>> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode

>> > +   nor SImode.  */

>>

>> This is strange, since we already allow all MMX modes in SSE

>> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we

>> return:

>>

>> return ((TARGET_AVX

>>              && VALID_AVX256_REG_OR_OI_MODE (mode))

>>             || VALID_SSE_REG_MODE (mode)

>>             || VALID_SSE2_REG_MODE (mode)

>>             || VALID_MMX_REG_MODE (mode)

>>             || VALID_MMX_REG_MODE_3DNOW (mode));

>>

>> I'd expect that changed VALID_SSE2_REG_MODE affects only

>> ix86_vector_mode_supported_p when MMX is disabled and perhaps

>> ix86_set_reg_reg_cost cost function.

>>

>> Are there any concrete issues when allowing all MMX (including 3DNOW?)

>> modes in VALID_SSE2_REG_MODE?

>

> The problem is with DImode and SImode.  All other vector modes,  including

> V2SF is OK.  With DImode and SImode, I got following regressions:

>

> FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias

> cannot be created; target is discardable"

> FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n

> FAIL: go test misc/cgo/testcarchive

>

>  gcc.dg/pr39323-3.c  is due to

>

> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261

>

> and

>

> /* Decide whether a variable of mode MODE should be 128 bit aligned.  */

> #define ALIGN_MODE_128(MODE) \

>  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))


Hm, this is a bit worrying, we don't want to introduce ABI
incompatibilites w.r.t. alignment. We still need to be ABI compatible
for MMX values and emit unaligned loads/stores when necessary.

> SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different

> places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P

> nor VALID_SSE2_REG_MODE.


I think we have to review the usage of these two changed defines to
prevent any ABI issues or other hidden issues.

Uros.
H.J. Lu Feb. 9, 2019, 3:07 p.m. | #4
On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>

> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:

> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:

> >>

> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:

> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without

> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.

> >> >

> >> >       PR target/89021

> >> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.

> >> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector

> >> >       modes for TARGET_MMX_WITH_SSE.

> >> >       (SSE_REG_MODE_P): Likewise.

> >> > ---

> >> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--

> >> >  1 file changed, 18 insertions(+), 2 deletions(-)

> >> >

> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

> >> > index 83b025e0cf5..c1df3ec3326 100644

> >> > --- a/gcc/config/i386/i386.h

> >> > +++ b/gcc/config/i386/i386.h

> >> > @@ -585,6 +585,11 @@ extern unsigned char

> >> > ix86_arch_features[X86_ARCH_LAST];

> >> >

> >> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)

> >> >

> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.

> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.

> >> > */

> >> > +#define TARGET_MMX_WITH_SSE \

> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)

> >> > +

> >> >  extern unsigned char x86_prefetch_sse;

> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse

> >> >

> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int

> >> > argc,

> >> > const char **argv);

> >> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \

> >> >     || (MODE) == TFmode || (MODE) == V1TImode)

> >> >

> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we

> >> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode

> >> > +   nor SImode.  */

> >>

> >> This is strange, since we already allow all MMX modes in SSE

> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we

> >> return:

> >>

> >> return ((TARGET_AVX

> >>              && VALID_AVX256_REG_OR_OI_MODE (mode))

> >>             || VALID_SSE_REG_MODE (mode)

> >>             || VALID_SSE2_REG_MODE (mode)

> >>             || VALID_MMX_REG_MODE (mode)

> >>             || VALID_MMX_REG_MODE_3DNOW (mode));

> >>

> >> I'd expect that changed VALID_SSE2_REG_MODE affects only

> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps

> >> ix86_set_reg_reg_cost cost function.

> >>

> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)

> >> modes in VALID_SSE2_REG_MODE?

> >

> > The problem is with DImode and SImode.  All other vector modes,  including

> > V2SF is OK.  With DImode and SImode, I got following regressions:

> >

> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias

> > cannot be created; target is discardable"

> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n

> > FAIL: go test misc/cgo/testcarchive

> >

> >  gcc.dg/pr39323-3.c  is due to

> >

> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261

> >

> > and

> >

> > /* Decide whether a variable of mode MODE should be 128 bit aligned.  */

> > #define ALIGN_MODE_128(MODE) \

> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))

>

> Hm, this is a bit worrying, we don't want to introduce ABI

> incompatibilites w.r.t. alignment. We still need to be ABI compatible

> for MMX values and emit unaligned loads/stores when necessary.


We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
And I don't think we should put DI and SI in them.

> > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different

> > places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P

> > nor VALID_SSE2_REG_MODE.

>

> I think we have to review the usage of these two changed defines to

> prevent any ABI issues or other hidden issues.

>


Absolutely.

-- 
H.J.
Uros Bizjak Feb. 9, 2019, 6:27 p.m. | #5
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote:

>>

>> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:

>> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:

>> >>

>> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:

>> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without

>> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.

>> >> >

>> >> >       PR target/89021

>> >> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.

>> >> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit

>> >> > vector

>> >> >       modes for TARGET_MMX_WITH_SSE.

>> >> >       (SSE_REG_MODE_P): Likewise.

>> >> > ---

>> >> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--

>> >> >  1 file changed, 18 insertions(+), 2 deletions(-)

>> >> >

>> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

>> >> > index 83b025e0cf5..c1df3ec3326 100644

>> >> > --- a/gcc/config/i386/i386.h

>> >> > +++ b/gcc/config/i386/i386.h

>> >> > @@ -585,6 +585,11 @@ extern unsigned char

>> >> > ix86_arch_features[X86_ARCH_LAST];

>> >> >

>> >> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)

>> >> >

>> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.

>> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE

>> >> > emulation.

>> >> > */

>> >> > +#define TARGET_MMX_WITH_SSE \

>> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)

>> >> > +

>> >> >  extern unsigned char x86_prefetch_sse;

>> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse

>> >> >

>> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int

>> >> > argc,

>> >> > const char **argv);

>> >> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode

>> >> > \

>> >> >     || (MODE) == TFmode || (MODE) == V1TImode)

>> >> >

>> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since

>> >> > we

>> >> > +   want to include 8-byte vector modes, like V2SFmode, but not

>> >> > DImode

>> >> > +   nor SImode.  */

>> >>

>> >> This is strange, since we already allow all MMX modes in SSE

>> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we

>> >> return:

>> >>

>> >> return ((TARGET_AVX

>> >>              && VALID_AVX256_REG_OR_OI_MODE (mode))

>> >>             || VALID_SSE_REG_MODE (mode)

>> >>             || VALID_SSE2_REG_MODE (mode)

>> >>             || VALID_MMX_REG_MODE (mode)

>> >>             || VALID_MMX_REG_MODE_3DNOW (mode));

>> >>

>> >> I'd expect that changed VALID_SSE2_REG_MODE affects only

>> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps

>> >> ix86_set_reg_reg_cost cost function.

>> >>

>> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)

>> >> modes in VALID_SSE2_REG_MODE?

>> >

>> > The problem is with DImode and SImode.  All other vector modes,

>> > including

>> > V2SF is OK.  With DImode and SImode, I got following regressions:

>> >

>> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias

>> > cannot be created; target is discardable"

>> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[

>> > \t]*\n

>> > FAIL: go test misc/cgo/testcarchive

>> >

>> >  gcc.dg/pr39323-3.c  is due to

>> >

>> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261

>> >

>> > and

>> >

>> > /* Decide whether a variable of mode MODE should be 128 bit aligned.

>> > */

>> > #define ALIGN_MODE_128(MODE) \

>> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))

>>

>> Hm, this is a bit worrying, we don't want to introduce ABI

>> incompatibilites w.r.t. alignment. We still need to be ABI compatible

>> for MMX values and emit unaligned loads/stores when necessary.

>

> We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.

> And I don't think we should put DI and SI in them.


Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to
a big-hammer approach of changing wide-used defines like
SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,
ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,
while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is
changed.

Uros.


>

>> > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different

>> > places.   i386 backend may not be prepared to deal them in

>> > SSE_REG_MODE_P

>> > nor VALID_SSE2_REG_MODE.

>>

>> I think we have to review the usage of these two changed defines to

>> prevent any ABI issues or other hidden issues.

>>

>

> Absolutely.

>

> --

> H.J.

>
H.J. Lu Feb. 9, 2019, 6:32 p.m. | #6
On Sat, Feb 9, 2019 at 10:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>

> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:

> > On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote:

> >>

> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:

> >> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:

> >> >>

> >> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:

> >> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without

> >> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.

> >> >> >

> >> >> >       PR target/89021

> >> >> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.

> >> >> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit

> >> >> > vector

> >> >> >       modes for TARGET_MMX_WITH_SSE.

> >> >> >       (SSE_REG_MODE_P): Likewise.

> >> >> > ---

> >> >> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--

> >> >> >  1 file changed, 18 insertions(+), 2 deletions(-)

> >> >> >

> >> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

> >> >> > index 83b025e0cf5..c1df3ec3326 100644

> >> >> > --- a/gcc/config/i386/i386.h

> >> >> > +++ b/gcc/config/i386/i386.h

> >> >> > @@ -585,6 +585,11 @@ extern unsigned char

> >> >> > ix86_arch_features[X86_ARCH_LAST];

> >> >> >

> >> >> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)

> >> >> >

> >> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.

> >> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE

> >> >> > emulation.

> >> >> > */

> >> >> > +#define TARGET_MMX_WITH_SSE \

> >> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)

> >> >> > +

> >> >> >  extern unsigned char x86_prefetch_sse;

> >> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse

> >> >> >

> >> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int

> >> >> > argc,

> >> >> > const char **argv);

> >> >> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode

> >> >> > \

> >> >> >     || (MODE) == TFmode || (MODE) == V1TImode)

> >> >> >

> >> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since

> >> >> > we

> >> >> > +   want to include 8-byte vector modes, like V2SFmode, but not

> >> >> > DImode

> >> >> > +   nor SImode.  */

> >> >>

> >> >> This is strange, since we already allow all MMX modes in SSE

> >> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we

> >> >> return:

> >> >>

> >> >> return ((TARGET_AVX

> >> >>              && VALID_AVX256_REG_OR_OI_MODE (mode))

> >> >>             || VALID_SSE_REG_MODE (mode)

> >> >>             || VALID_SSE2_REG_MODE (mode)

> >> >>             || VALID_MMX_REG_MODE (mode)

> >> >>             || VALID_MMX_REG_MODE_3DNOW (mode));

> >> >>

> >> >> I'd expect that changed VALID_SSE2_REG_MODE affects only

> >> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps

> >> >> ix86_set_reg_reg_cost cost function.

> >> >>

> >> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)

> >> >> modes in VALID_SSE2_REG_MODE?

> >> >

> >> > The problem is with DImode and SImode.  All other vector modes,

> >> > including

> >> > V2SF is OK.  With DImode and SImode, I got following regressions:

> >> >

> >> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias

> >> > cannot be created; target is discardable"

> >> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[

> >> > \t]*\n

> >> > FAIL: go test misc/cgo/testcarchive

> >> >

> >> >  gcc.dg/pr39323-3.c  is due to

> >> >

> >> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261

> >> >

> >> > and

> >> >

> >> > /* Decide whether a variable of mode MODE should be 128 bit aligned.

> >> > */

> >> > #define ALIGN_MODE_128(MODE) \

> >> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))

> >>

> >> Hm, this is a bit worrying, we don't want to introduce ABI

> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible

> >> for MMX values and emit unaligned loads/stores when necessary.

> >

> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.

> > And I don't think we should put DI and SI in them.

>

> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they

> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&

> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to


Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.
We only want 8-byte vector modes here.

> a big-hammer approach of changing wide-used defines like

> SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,

> ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,

> while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is

> changed.


I will give it a try.

-- 
H.J.
Uros Bizjak Feb. 9, 2019, 6:41 p.m. | #7
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
>> >> Hm, this is a bit worrying, we don't want to introduce ABI

>> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible

>> >> for MMX values and emit unaligned loads/stores when necessary.

>> >

>> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.

>> > And I don't think we should put DI and SI in them.

>>

>> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they

>> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&

>> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to

>

> Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.

> We only want 8-byte vector modes here.


Well, I'm not forcing VALID_MMX_REG_MODE here, it is just an example;
the important part is in the addition of (TARGET_MMX_WITH_SSE &&
some_modes). Surely, we don't want to align SImode to 128 bits in
ALIGN_MODE_128.

Uros.

>> a big-hammer approach of changing wide-used defines like

>> SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,

>> ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,

>> while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is

>> changed.

>

> I will give it a try.

>

> --

> H.J.

>
H.J. Lu Feb. 9, 2019, 6:52 p.m. | #8
On Sat, Feb 9, 2019 at 10:41 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>

> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:

> >> >> Hm, this is a bit worrying, we don't want to introduce ABI

> >> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible

> >> >> for MMX values and emit unaligned loads/stores when necessary.

> >> >

> >> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.

> >> > And I don't think we should put DI and SI in them.

> >>

> >> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they

> >> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&

> >> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to

> >

> > Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.

> > We only want 8-byte vector modes here.

>

> Well, I'm not forcing VALID_MMX_REG_MODE here, it is just an example;

> the important part is in the addition of (TARGET_MMX_WITH_SSE &&

> some_modes). Surely, we don't want to align SImode to 128 bits in

> ALIGN_MODE_128.

>


I am testing this.

-- 
H.J.
From 1a3a4c4d2e133d99c6671788a8475efe39804dbb Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Thu, 24 Jan 2019 08:27:41 -0800
Subject: [PATCH] i386: Allow 64-bit vector modes in SSE registers

In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support 64-bit vectors.

	PR target/89021
	* config/i386/i386.c (ix86_set_reg_reg_cost): Also support
	VALID_MMX_WITH_SSE_REG_MODE.
	(ix86_vector_mode_supported_p): Likewise.
	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
	(TARGET_MMX_WITH_SSE_P): Likewise.
	(VALID_MMX_WITH_SSE_REG_MODE): Likewise.
---
 gcc/config/i386/i386.c |  3 +++
 gcc/config/i386/i386.h | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 12bc7926f86..ba02c26c8b2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40235,6 +40235,7 @@ ix86_set_reg_reg_cost (machine_mode mode)
 	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+	  || (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))
 	  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
 	units = GET_MODE_SIZE (mode);
     }
@@ -44057,6 +44058,8 @@ ix86_vector_mode_supported_p (machine_mode mode)
     return true;
   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
     return true;
+  if (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))
+    return true;
   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
     return true;
   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..f75fd426293 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_16BIT	TARGET_CODE16
 #define TARGET_16BIT_P(x)	TARGET_CODE16_P(x)
 
+/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
+   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
+#define TARGET_MMX_WITH_SSE \
+  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
+#define TARGET_MMX_WITH_SSE_P(x) \
+  (TARGET_64BIT_P (x) && TARGET_SSE2_P (x) && !TARGET_3DNOW_P (x))
+
 #include "config/vxworks-dummy.h"
 
 #include "config/i386/i386-opts.h"
@@ -1143,6 +1150,13 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
    || (MODE) == TFmode || (MODE) == V1TImode)
 
+/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
+   want to include 8-byte vector modes, like V2SFmode, but not DImode
+   nor SImode.  */
+#define VALID_MMX_WITH_SSE_REG_MODE(MODE)				\
+  ((MODE) == V1DImode || (MODE) == V8QImode || (MODE) == V4HImode	\
+   || (MODE) == V2SImode || (MODE) == V2SFmode)
+
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
    || (MODE) == V2DImode || (MODE) == DFmode)

Patch

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..c1df3ec3326 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -585,6 +585,11 @@  extern unsigned char ix86_arch_features[X86_ARCH_LAST];
 
 #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
 
+/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
+   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
+#define TARGET_MMX_WITH_SSE \
+  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
+
 extern unsigned char x86_prefetch_sse;
 #define TARGET_PREFETCH_SSE	x86_prefetch_sse
 
@@ -1143,9 +1148,16 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
    || (MODE) == TFmode || (MODE) == V1TImode)
 
+/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
+   want to include 8-byte vector modes, like V2SFmode, but not DImode
+   nor SImode.  */
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
-   || (MODE) == V2DImode || (MODE) == DFmode)
+   || (MODE) == V2DImode || (MODE) == DFmode				\
+   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\
+			       || (MODE) == V4HImode			\
+			       || (MODE) == V2SImode			\
+			       || (MODE) == V2SFmode)))
 
 #define VALID_SSE_REG_MODE(MODE)					\
   ((MODE) == V1TImode || (MODE) == TImode				\
@@ -1188,7 +1200,11 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode	\
    || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode	\
    || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode	\
-   || (MODE) == V16SFmode)
+   || (MODE) == V16SFmode						\
+   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\
+			       || (MODE) == V4HImode			\
+			       || (MODE) == V2SImode			\
+			       || (MODE) == V2SFmode)))
 
 #define X87_FLOAT_MODE_P(MODE)	\
   (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))