x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions

Message ID 20190228191011.22517-1-hjl.tools@gmail.com
State New
Headers show
Series
  • x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions
Related show

Commit Message

H.J. Lu Feb. 28, 2019, 7:10 p.m.
32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
when 32-bit indices are used as addresses, like in

vgatherdps %ymm7, 0(,%ymm9,1), %ymm6

32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
for x32 if there is no base register nor symbol.

This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with

-Ofast -funroll-loops -march=haswell

gcc/

	PR target/89523
	* config/i386/i386.c (ix86_print_operand): Also handle '_' to
	add addr32 prefix if required.
	(ix86_print_operand_punct_valid_p): Allow '_'.
	* config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend
	"%_".
	(*avx512pf_gatherpf<mode>df_mask): Likewise.
	(*avx512pf_scatterpf<mode>sf_mask): Likewise.
	(*avx512pf_scatterpf<mode>df_mask): Likewise.
	(*avx2_gathersi<mode>): Likewise.
	(*avx2_gathersi<mode>_2): Likewise.
	(*avx2_gatherdi<mode>): Likewise.
	(*avx2_gatherdi<mode>_2): Likewise.
	(*avx2_gatherdi<mode>_3): Likewise.
	(*avx2_gatherdi<mode>_4): Likewise.
	(*avx512f_gathersi<mode>): Likewise.
	(*avx512f_gathersi<mode>_2): Likewise.
	(*avx512f_gatherdi<mode>): Likewise.
	(*avx512f_gatherdi<mode>_2): Likewise.
	(*avx512f_scattersi<mode>): Likewise.
	(*avx512f_scatterdi<mode>): Likewise.

gcc/testsuite/

	PR target/89523
	* gcc.target/i386/pr89523-1.c: New test.
	* gcc.target/i386/pr89523-2.c: Likewise.
	* gcc.target/i386/pr89523-3.c: Likewise.
	* gcc.target/i386/pr89523-4.c: Likewise.
	* gcc.target/i386/pr89523-5.c: Likewise.
	* gcc.target/i386/pr89523-6.c: Likewise.
	* gcc.target/i386/pr89523-7.c: Likewise.
	* gcc.target/i386/pr89523-8.c: Likewise.
	* gcc.target/i386/pr89523-9.c: Likewise.

xxx
---
 gcc/config/i386/i386.c                    | 39 ++++++++++++++++++-
 gcc/config/i386/sse.md                    | 46 +++++++++++------------
 gcc/testsuite/gcc.target/i386/pr89523-1.c | 24 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-2.c | 17 +++++++++
 gcc/testsuite/gcc.target/i386/pr89523-3.c | 17 +++++++++
 gcc/testsuite/gcc.target/i386/pr89523-4.c | 16 ++++++++
 gcc/testsuite/gcc.target/i386/pr89523-5.c | 18 +++++++++
 gcc/testsuite/gcc.target/i386/pr89523-6.c | 17 +++++++++
 gcc/testsuite/gcc.target/i386/pr89523-7.c | 19 ++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-8.c | 19 ++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-9.c | 16 ++++++++
 11 files changed, 224 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c

-- 
2.20.1

Comments

Uros Bizjak March 3, 2019, 5:27 p.m. | #1
On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>

> 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,

> when 32-bit indices are used as addresses, like in

>

> vgatherdps %ymm7, 0(,%ymm9,1), %ymm6

>

> 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which

> is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions

> for x32 if there is no base register nor symbol.

>

> This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with

>

> -Ofast -funroll-loops -march=haswell


1. Testcases 2 to 9 fail on fedora-29 with:

In file included from /usr/include/features.h:452,
                 from /usr/include/bits/libc-header-start.h:33,
                 from /usr/include/stdlib.h:25,
                 from /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27,
                 from /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34,
                 from /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29,
                 from
/home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7:
/usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such
file or directory

2. Does the patch work with -maddress-mode={short,long}?

3. The implementation is wrong. You should use operand substitution
with VSIB address as operand, not substitution without operand.

4. The PR is not a regression.

Uros.

>

> gcc/

>

>         PR target/89523

>         * config/i386/i386.c (ix86_print_operand): Also handle '_' to

>         add addr32 prefix if required.

>         (ix86_print_operand_punct_valid_p): Allow '_'.

>         * config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend

>         "%_".

>         (*avx512pf_gatherpf<mode>df_mask): Likewise.

>         (*avx512pf_scatterpf<mode>sf_mask): Likewise.

>         (*avx512pf_scatterpf<mode>df_mask): Likewise.

>         (*avx2_gathersi<mode>): Likewise.

>         (*avx2_gathersi<mode>_2): Likewise.

>         (*avx2_gatherdi<mode>): Likewise.

>         (*avx2_gatherdi<mode>_2): Likewise.

>         (*avx2_gatherdi<mode>_3): Likewise.

>         (*avx2_gatherdi<mode>_4): Likewise.

>         (*avx512f_gathersi<mode>): Likewise.

>         (*avx512f_gathersi<mode>_2): Likewise.

>         (*avx512f_gatherdi<mode>): Likewise.

>         (*avx512f_gatherdi<mode>_2): Likewise.

>         (*avx512f_scattersi<mode>): Likewise.

>         (*avx512f_scatterdi<mode>): Likewise.

>

> gcc/testsuite/

>

>         PR target/89523

>         * gcc.target/i386/pr89523-1.c: New test.

>         * gcc.target/i386/pr89523-2.c: Likewise.

>         * gcc.target/i386/pr89523-3.c: Likewise.

>         * gcc.target/i386/pr89523-4.c: Likewise.

>         * gcc.target/i386/pr89523-5.c: Likewise.

>         * gcc.target/i386/pr89523-6.c: Likewise.

>         * gcc.target/i386/pr89523-7.c: Likewise.

>         * gcc.target/i386/pr89523-8.c: Likewise.

>         * gcc.target/i386/pr89523-9.c: Likewise.

>

> xxx

> ---

>  gcc/config/i386/i386.c                    | 39 ++++++++++++++++++-

>  gcc/config/i386/sse.md                    | 46 +++++++++++------------

>  gcc/testsuite/gcc.target/i386/pr89523-1.c | 24 ++++++++++++

>  gcc/testsuite/gcc.target/i386/pr89523-2.c | 17 +++++++++

>  gcc/testsuite/gcc.target/i386/pr89523-3.c | 17 +++++++++

>  gcc/testsuite/gcc.target/i386/pr89523-4.c | 16 ++++++++

>  gcc/testsuite/gcc.target/i386/pr89523-5.c | 18 +++++++++

>  gcc/testsuite/gcc.target/i386/pr89523-6.c | 17 +++++++++

>  gcc/testsuite/gcc.target/i386/pr89523-7.c | 19 ++++++++++

>  gcc/testsuite/gcc.target/i386/pr89523-8.c | 19 ++++++++++

>  gcc/testsuite/gcc.target/i386/pr89523-9.c | 16 ++++++++

>  11 files changed, 224 insertions(+), 24 deletions(-)

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c

>

> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

> index b8357a7db5d..336696136de 100644

> --- a/gcc/config/i386/i386.c

> +++ b/gcc/config/i386/i386.c

> @@ -17805,6 +17805,7 @@ print_reg (rtx x, int code, FILE *file)

>     ~ -- print "i" if TARGET_AVX2, "f" otherwise.

>     ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode

>     ! -- print NOTRACK prefix for jxx/call/ret instructions if required.

> +   _ -- print addr32 prefix if required.

>   */

>

>  void

> @@ -18356,6 +18357,42 @@ ix86_print_operand (FILE *file, rtx x, int code)

>             fputs ("addr32 ", file);

>           return;

>

> +       case '_':

> +         if (TARGET_X32)

> +           {

> +             subrtx_var_iterator::array_type array;

> +             FOR_EACH_SUBRTX_VAR (iter, array,

> +                                  PATTERN (current_output_insn), ALL)

> +               {

> +                 rtx addr = *iter;

> +                 if (!MEM_P (addr))

> +                   continue;

> +                 addr = XEXP (addr, 0);

> +                 if (GET_CODE (addr) == UNSPEC

> +                     && XINT (addr, 1) == UNSPEC_VSIBADDR)

> +                   {

> +                     /* NB: 32-bit indices in VSIB address are

> +                        sign-extended to 64 bits. In x32, if 32-bit

> +                        address 0xf7fa3010 is sign-extended to

> +                        0xfffffffff7fa3010 which is invalid address.

> +                        Add addr32 prefix if there is no base register

> +                        nor symbol.  */

> +                     bool ok;

> +                     struct ix86_address parts;

> +                     ok = ix86_decompose_address (XVECEXP (addr, 0, 0),

> +                                                  &parts);

> +                     gcc_assert (ok && parts.index == NULL_RTX);

> +                     if (parts.base == NULL_RTX

> +                         && (parts.disp == NULL_RTX

> +                             || !symbolic_operand (parts.disp,

> +                                                   GET_MODE (parts.disp))))

> +                       fputs ("addr32 ", file);

> +                     break;

> +                   }

> +               }

> +           }

> +         return;

> +

>         case '!':

>           if (ix86_notrack_prefixed_insn_p (current_output_insn))

>             fputs ("notrack ", file);

> @@ -18507,7 +18544,7 @@ static bool

>  ix86_print_operand_punct_valid_p (unsigned char code)

>  {

>    return (code == '*' || code == '+' || code == '&' || code == ';'

> -         || code == '~' || code == '^' || code == '!');

> +         || code == '~' || code == '^' || code == '!' || code == '_');

>  }

>

>  /* Print a memory operand whose address is ADDR.  */

> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

> index ac299495b2c..13692e47123 100644

> --- a/gcc/config/i386/sse.md

> +++ b/gcc/config/i386/sse.md

> @@ -17401,9 +17401,9 @@

>      case 3:

>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as

>          gas changed what it requires incompatibly.  */

> -      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> +      return "%_vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

>      case 2:

> -      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> +      return "%_vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

>      default:

>        gcc_unreachable ();

>      }

> @@ -17448,9 +17448,9 @@

>      case 3:

>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as

>          gas changed what it requires incompatibly.  */

> -      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> +      return "%_vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

>      case 2:

> -      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> +      return "%_vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

>      default:

>        gcc_unreachable ();

>      }

> @@ -17496,10 +17496,10 @@

>      case 7:

>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as

>          gas changed what it requires incompatibly.  */

> -      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> +      return "%_vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

>      case 2:

>      case 6:

> -      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> +      return "%_vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

>      default:

>        gcc_unreachable ();

>      }

> @@ -17545,10 +17545,10 @@

>      case 7:

>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as

>          gas changed what it requires incompatibly.  */

> -      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> +      return "%_vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

>      case 2:

>      case 6:

> -      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> +      return "%_vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

>      default:

>        gcc_unreachable ();

>      }

> @@ -20292,7 +20292,7 @@

>           UNSPEC_GATHER))

>     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]

>    "TARGET_AVX2"

> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"

> +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "vex")

>     (set_attr "mode" "<sseinsnmode>")])

> @@ -20312,7 +20312,7 @@

>           UNSPEC_GATHER))

>     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]

>    "TARGET_AVX2"

> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"

> +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "vex")

>     (set_attr "mode" "<sseinsnmode>")])

> @@ -20353,7 +20353,7 @@

>           UNSPEC_GATHER))

>     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]

>    "TARGET_AVX2"

> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"

> +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "vex")

>     (set_attr "mode" "<sseinsnmode>")])

> @@ -20375,8 +20375,8 @@

>    "TARGET_AVX2"

>  {

>    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)

> -    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";

> -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";

> +    return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";

> +  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";

>  }

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "vex")

> @@ -20400,7 +20400,7 @@

>                      (const_int 2) (const_int 3)])))

>     (clobber (match_scratch:VI4F_256 1 "=&x"))]

>    "TARGET_AVX2"

> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"

> +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "vex")

>     (set_attr "mode" "<sseinsnmode>")])

> @@ -20423,7 +20423,7 @@

>                      (const_int 2) (const_int 3)])))

>     (clobber (match_scratch:VI4F_256 1 "=&x"))]

>    "TARGET_AVX2"

> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"

> +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "vex")

>     (set_attr "mode" "<sseinsnmode>")])

> @@ -20463,7 +20463,7 @@

>    "TARGET_AVX512F"

>  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as

>  ;; gas changed what it requires incompatibly.

> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"

> +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "evex")

>     (set_attr "mode" "<sseinsnmode>")])

> @@ -20484,7 +20484,7 @@

>    "TARGET_AVX512F"

>  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as

>  ;; gas changed what it requires incompatibly.

> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"

> +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "evex")

>     (set_attr "mode" "<sseinsnmode>")])

> @@ -20525,7 +20525,7 @@

>    "TARGET_AVX512F"

>  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as

>  ;; gas changed what it requires incompatibly.

> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"

> +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "evex")

>     (set_attr "mode" "<sseinsnmode>")])

> @@ -20550,11 +20550,11 @@

>    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)

>      {

>        if (<MODE_SIZE> != 64)

> -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";

> +       return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";

>        else

> -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";

> +       return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";

>      }

> -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";

> +  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";

>  }

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "evex")

> @@ -20593,7 +20593,7 @@

>    "TARGET_AVX512F"

>  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as

>  ;; gas changed what it requires incompatibly.

> -  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"

> +  "%_v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "evex")

>     (set_attr "mode" "<sseinsnmode>")])

> @@ -20631,7 +20631,7 @@

>    "TARGET_AVX512F"

>  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as

>  ;; gas changed what it requires incompatibly.

> -  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"

> +  "%_v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"

>    [(set_attr "type" "ssemov")

>     (set_attr "prefix" "evex")

>     (set_attr "mode" "<sseinsnmode>")])

> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1.c b/gcc/testsuite/gcc.target/i386/pr89523-1.c

> new file mode 100644

> index 00000000000..f7ed24d1592

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr89523-1.c

> @@ -0,0 +1,24 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-require-effective-target maybe_x32 } */

> +/* { dg-options "-mx32 -Ofast -funroll-loops -march=haswell" } */

> +/* { dg-final { scan-assembler-not "\tvgather" } } */

> +/* { dg-final { scan-assembler "addr32 vgather" } } */

> +

> +void foo (void);

> +

> +extern float *ncost;

> +

> +float

> +bar (int type, int num)

> +{

> +  int i;

> +  float cost;

> +

> +  cost = 0;

> +  for (i = 0; i < num; i++)

> +    if (type)

> +      cost += ncost[i];

> +    else

> +      foo ();

> +  return (cost);

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c

> new file mode 100644

> index 00000000000..7423f579b5e

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c

> @@ -0,0 +1,17 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-require-effective-target maybe_x32 } */

> +/* { dg-options "-mx32 -O2 -march=haswell" } */

> +/* { dg-final { scan-assembler "\tvgather" } } */

> +/* { dg-final { scan-assembler-not "addr32 vgather" } } */

> +

> +#include <immintrin.h>

> +

> +__m128d x;

> +double *base;

> +__m128i idx;

> +

> +void extern

> +avx2_test (void)

> +{

> +  x = _mm_i32gather_pd (base, idx, 1);

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c

> new file mode 100644

> index 00000000000..606f9aac659

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c

> @@ -0,0 +1,17 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-require-effective-target maybe_x32 } */

> +/* { dg-options "-mx32 -O2 -march=haswell" } */

> +/* { dg-final { scan-assembler "\tvgather" } } */

> +/* { dg-final { scan-assembler-not "addr32 vgather" } } */

> +

> +#include <immintrin.h>

> +

> +__m128d x;

> +double *base;

> +__m128i idx;

> +

> +void extern

> +avx2_test (void)

> +{

> +  x = _mm_i64gather_pd (base, idx, 1);

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c

> new file mode 100644

> index 00000000000..155b818191f

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c

> @@ -0,0 +1,16 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-require-effective-target maybe_x32 } */

> +/* { dg-options "-mx32 -O2 -march=haswell" } */

> +/* { dg-final { scan-assembler-not "\tvgather" } } */

> +/* { dg-final { scan-assembler "addr32 vgather" } } */

> +

> +#include <immintrin.h>

> +

> +__m128d x;

> +__m128i idx;

> +

> +void extern

> +avx2_test (void)

> +{

> +  x = _mm_i32gather_pd (NULL, idx, 1);

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c

> new file mode 100644

> index 00000000000..11210ff8f78

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c

> @@ -0,0 +1,18 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-require-effective-target maybe_x32 } */

> +/* { dg-options "-mx32 -O2 -mavx512pf" } */

> +/* { dg-final { scan-assembler "\tvgather" } } */

> +/* { dg-final { scan-assembler-not "addr32 vgather" } } */

> +

> +#include <immintrin.h>

> +

> +volatile __m256i idx;

> +volatile __mmask8 m8;

> +void *base;

> +

> +void extern

> +avx512pf_test (void)

> +{

> +  _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0);

> +  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0);

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c

> new file mode 100644

> index 00000000000..0254ad435e3

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c

> @@ -0,0 +1,17 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-require-effective-target maybe_x32 } */

> +/* { dg-options "-mx32 -O2 -mavx512pf" } */

> +/* { dg-final { scan-assembler-not "\tvgather" } } */

> +/* { dg-final { scan-assembler "addr32 vgather" } } */

> +

> +#include <immintrin.h>

> +

> +volatile __m256i idx;

> +volatile __mmask8 m8;

> +

> +void extern

> +avx512pf_test (void)

> +{

> +  _mm512_prefetch_i32gather_pd (idx, NULL, 8, _MM_HINT_T0);

> +  _mm512_mask_prefetch_i32gather_pd (idx, m8, NULL, 8, _MM_HINT_T0);

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c

> new file mode 100644

> index 00000000000..1c357bc8505

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c

> @@ -0,0 +1,19 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-require-effective-target maybe_x32 } */

> +/* { dg-options "-mx32 -O2 -mavx512f" } */

> +/* { dg-final { scan-assembler "\tvscatter" } } */

> +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */

> +

> +#include <immintrin.h>

> +

> +volatile __m512d src;

> +volatile __m256i idx;

> +volatile __mmask8 m8;

> +double *addr;

> +

> +void extern

> +avx512f_test (void)

> +{

> +  _mm512_i32scatter_pd (addr, idx, src, 8);

> +  _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c

> new file mode 100644

> index 00000000000..37b0a0bebb7

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c

> @@ -0,0 +1,19 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-require-effective-target maybe_x32 } */

> +/* { dg-options "-mx32 -O2 -mavx512f" } */

> +/* { dg-final { scan-assembler "\tvscatter" } } */

> +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */

> +

> +#include <immintrin.h>

> +

> +volatile __m512d src;

> +volatile __m512i idx;

> +volatile __mmask8 m8;

> +double *addr;

> +

> +void extern

> +avx512f_test (void)

> +{

> +  _mm512_i64scatter_pd (addr, idx, src, 8);

> +  _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c

> new file mode 100644

> index 00000000000..a878f1e9efb

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c

> @@ -0,0 +1,16 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-require-effective-target maybe_x32 } */

> +/* { dg-options "-mx32 -O2 -mavx512f" } */

> +/* { dg-final { scan-assembler-not "\tvscatter" } } */

> +/* { dg-final { scan-assembler "addr32 vscatter" } } */

> +

> +#include <immintrin.h>

> +

> +volatile __m512d src;

> +volatile __m256i idx;

> +

> +void extern

> +avx512f_test (void)

> +{

> +  _mm512_i32scatter_pd (NULL, idx, src, 8);

> +}

> --

> 2.20.1

>
H.J. Lu March 3, 2019, 9:17 p.m. | #2
On Sun, Mar 3, 2019 at 9:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>

> On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:

> >

> > 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,

> > when 32-bit indices are used as addresses, like in

> >

> > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6

> >

> > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which

> > is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions

> > for x32 if there is no base register nor symbol.

> >

> > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with

> >

> > -Ofast -funroll-loops -march=haswell

>

> 1. Testcases 2 to 9 fail on fedora-29 with:

>

> In file included from /usr/include/features.h:452,

>                  from /usr/include/bits/libc-header-start.h:33,

>                  from /usr/include/stdlib.h:25,

>                  from /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27,

>                  from /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34,

>                  from /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29,

>                  from

> /home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7:

> /usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such

> file or directory


I will update tests to remove  "#include immintrin.h"

> 2. Does the patch work with -maddress-mode={short,long}?


Yes.

> 3. The implementation is wrong. You should use operand substitution

> with VSIB address as operand, not substitution without operand.


How can I add an addr32 prefix with operand substitution?  This is
very similar to "%^".  My updated patch will use "%^".

> 4. The PR is not a regression.


Correct.

H.J.
> Uros.

>

> >

> > gcc/

> >

> >         PR target/89523

> >         * config/i386/i386.c (ix86_print_operand): Also handle '_' to

> >         add addr32 prefix if required.

> >         (ix86_print_operand_punct_valid_p): Allow '_'.

> >         * config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend

> >         "%_".

> >         (*avx512pf_gatherpf<mode>df_mask): Likewise.

> >         (*avx512pf_scatterpf<mode>sf_mask): Likewise.

> >         (*avx512pf_scatterpf<mode>df_mask): Likewise.

> >         (*avx2_gathersi<mode>): Likewise.

> >         (*avx2_gathersi<mode>_2): Likewise.

> >         (*avx2_gatherdi<mode>): Likewise.

> >         (*avx2_gatherdi<mode>_2): Likewise.

> >         (*avx2_gatherdi<mode>_3): Likewise.

> >         (*avx2_gatherdi<mode>_4): Likewise.

> >         (*avx512f_gathersi<mode>): Likewise.

> >         (*avx512f_gathersi<mode>_2): Likewise.

> >         (*avx512f_gatherdi<mode>): Likewise.

> >         (*avx512f_gatherdi<mode>_2): Likewise.

> >         (*avx512f_scattersi<mode>): Likewise.

> >         (*avx512f_scatterdi<mode>): Likewise.

> >

> > gcc/testsuite/

> >

> >         PR target/89523

> >         * gcc.target/i386/pr89523-1.c: New test.

> >         * gcc.target/i386/pr89523-2.c: Likewise.

> >         * gcc.target/i386/pr89523-3.c: Likewise.

> >         * gcc.target/i386/pr89523-4.c: Likewise.

> >         * gcc.target/i386/pr89523-5.c: Likewise.

> >         * gcc.target/i386/pr89523-6.c: Likewise.

> >         * gcc.target/i386/pr89523-7.c: Likewise.

> >         * gcc.target/i386/pr89523-8.c: Likewise.

> >         * gcc.target/i386/pr89523-9.c: Likewise.

> >

> > xxx

> > ---

> >  gcc/config/i386/i386.c                    | 39 ++++++++++++++++++-

> >  gcc/config/i386/sse.md                    | 46 +++++++++++------------

> >  gcc/testsuite/gcc.target/i386/pr89523-1.c | 24 ++++++++++++

> >  gcc/testsuite/gcc.target/i386/pr89523-2.c | 17 +++++++++

> >  gcc/testsuite/gcc.target/i386/pr89523-3.c | 17 +++++++++

> >  gcc/testsuite/gcc.target/i386/pr89523-4.c | 16 ++++++++

> >  gcc/testsuite/gcc.target/i386/pr89523-5.c | 18 +++++++++

> >  gcc/testsuite/gcc.target/i386/pr89523-6.c | 17 +++++++++

> >  gcc/testsuite/gcc.target/i386/pr89523-7.c | 19 ++++++++++

> >  gcc/testsuite/gcc.target/i386/pr89523-8.c | 19 ++++++++++

> >  gcc/testsuite/gcc.target/i386/pr89523-9.c | 16 ++++++++

> >  11 files changed, 224 insertions(+), 24 deletions(-)

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1.c

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c

> >

> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

> > index b8357a7db5d..336696136de 100644

> > --- a/gcc/config/i386/i386.c

> > +++ b/gcc/config/i386/i386.c

> > @@ -17805,6 +17805,7 @@ print_reg (rtx x, int code, FILE *file)

> >     ~ -- print "i" if TARGET_AVX2, "f" otherwise.

> >     ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode

> >     ! -- print NOTRACK prefix for jxx/call/ret instructions if required.

> > +   _ -- print addr32 prefix if required.

> >   */

> >

> >  void

> > @@ -18356,6 +18357,42 @@ ix86_print_operand (FILE *file, rtx x, int code)

> >             fputs ("addr32 ", file);

> >           return;

> >

> > +       case '_':

> > +         if (TARGET_X32)

> > +           {

> > +             subrtx_var_iterator::array_type array;

> > +             FOR_EACH_SUBRTX_VAR (iter, array,

> > +                                  PATTERN (current_output_insn), ALL)

> > +               {

> > +                 rtx addr = *iter;

> > +                 if (!MEM_P (addr))

> > +                   continue;

> > +                 addr = XEXP (addr, 0);

> > +                 if (GET_CODE (addr) == UNSPEC

> > +                     && XINT (addr, 1) == UNSPEC_VSIBADDR)

> > +                   {

> > +                     /* NB: 32-bit indices in VSIB address are

> > +                        sign-extended to 64 bits. In x32, if 32-bit

> > +                        address 0xf7fa3010 is sign-extended to

> > +                        0xfffffffff7fa3010 which is invalid address.

> > +                        Add addr32 prefix if there is no base register

> > +                        nor symbol.  */

> > +                     bool ok;

> > +                     struct ix86_address parts;

> > +                     ok = ix86_decompose_address (XVECEXP (addr, 0, 0),

> > +                                                  &parts);

> > +                     gcc_assert (ok && parts.index == NULL_RTX);

> > +                     if (parts.base == NULL_RTX

> > +                         && (parts.disp == NULL_RTX

> > +                             || !symbolic_operand (parts.disp,

> > +                                                   GET_MODE (parts.disp))))

> > +                       fputs ("addr32 ", file);

> > +                     break;

> > +                   }

> > +               }

> > +           }

> > +         return;

> > +

> >         case '!':

> >           if (ix86_notrack_prefixed_insn_p (current_output_insn))

> >             fputs ("notrack ", file);

> > @@ -18507,7 +18544,7 @@ static bool

> >  ix86_print_operand_punct_valid_p (unsigned char code)

> >  {

> >    return (code == '*' || code == '+' || code == '&' || code == ';'

> > -         || code == '~' || code == '^' || code == '!');

> > +         || code == '~' || code == '^' || code == '!' || code == '_');

> >  }

> >

> >  /* Print a memory operand whose address is ADDR.  */

> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

> > index ac299495b2c..13692e47123 100644

> > --- a/gcc/config/i386/sse.md

> > +++ b/gcc/config/i386/sse.md

> > @@ -17401,9 +17401,9 @@

> >      case 3:

> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as

> >          gas changed what it requires incompatibly.  */

> > -      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> > +      return "%_vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> >      case 2:

> > -      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> > +      return "%_vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> >      default:

> >        gcc_unreachable ();

> >      }

> > @@ -17448,9 +17448,9 @@

> >      case 3:

> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as

> >          gas changed what it requires incompatibly.  */

> > -      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> > +      return "%_vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> >      case 2:

> > -      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> > +      return "%_vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> >      default:

> >        gcc_unreachable ();

> >      }

> > @@ -17496,10 +17496,10 @@

> >      case 7:

> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as

> >          gas changed what it requires incompatibly.  */

> > -      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> > +      return "%_vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> >      case 2:

> >      case 6:

> > -      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> > +      return "%_vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

> >      default:

> >        gcc_unreachable ();

> >      }

> > @@ -17545,10 +17545,10 @@

> >      case 7:

> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as

> >          gas changed what it requires incompatibly.  */

> > -      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> > +      return "%_vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> >      case 2:

> >      case 6:

> > -      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> > +      return "%_vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";

> >      default:

> >        gcc_unreachable ();

> >      }

> > @@ -20292,7 +20292,7 @@

> >           UNSPEC_GATHER))

> >     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]

> >    "TARGET_AVX2"

> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"

> > +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "vex")

> >     (set_attr "mode" "<sseinsnmode>")])

> > @@ -20312,7 +20312,7 @@

> >           UNSPEC_GATHER))

> >     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]

> >    "TARGET_AVX2"

> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"

> > +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "vex")

> >     (set_attr "mode" "<sseinsnmode>")])

> > @@ -20353,7 +20353,7 @@

> >           UNSPEC_GATHER))

> >     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]

> >    "TARGET_AVX2"

> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"

> > +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "vex")

> >     (set_attr "mode" "<sseinsnmode>")])

> > @@ -20375,8 +20375,8 @@

> >    "TARGET_AVX2"

> >  {

> >    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)

> > -    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";

> > -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";

> > +    return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";

> > +  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";

> >  }

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "vex")

> > @@ -20400,7 +20400,7 @@

> >                      (const_int 2) (const_int 3)])))

> >     (clobber (match_scratch:VI4F_256 1 "=&x"))]

> >    "TARGET_AVX2"

> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"

> > +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "vex")

> >     (set_attr "mode" "<sseinsnmode>")])

> > @@ -20423,7 +20423,7 @@

> >                      (const_int 2) (const_int 3)])))

> >     (clobber (match_scratch:VI4F_256 1 "=&x"))]

> >    "TARGET_AVX2"

> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"

> > +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "vex")

> >     (set_attr "mode" "<sseinsnmode>")])

> > @@ -20463,7 +20463,7 @@

> >    "TARGET_AVX512F"

> >  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as

> >  ;; gas changed what it requires incompatibly.

> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"

> > +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "evex")

> >     (set_attr "mode" "<sseinsnmode>")])

> > @@ -20484,7 +20484,7 @@

> >    "TARGET_AVX512F"

> >  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as

> >  ;; gas changed what it requires incompatibly.

> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"

> > +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "evex")

> >     (set_attr "mode" "<sseinsnmode>")])

> > @@ -20525,7 +20525,7 @@

> >    "TARGET_AVX512F"

> >  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as

> >  ;; gas changed what it requires incompatibly.

> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"

> > +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "evex")

> >     (set_attr "mode" "<sseinsnmode>")])

> > @@ -20550,11 +20550,11 @@

> >    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)

> >      {

> >        if (<MODE_SIZE> != 64)

> > -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";

> > +       return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";

> >        else

> > -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";

> > +       return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";

> >      }

> > -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";

> > +  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";

> >  }

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "evex")

> > @@ -20593,7 +20593,7 @@

> >    "TARGET_AVX512F"

> >  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as

> >  ;; gas changed what it requires incompatibly.

> > -  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"

> > +  "%_v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "evex")

> >     (set_attr "mode" "<sseinsnmode>")])

> > @@ -20631,7 +20631,7 @@

> >    "TARGET_AVX512F"

> >  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as

> >  ;; gas changed what it requires incompatibly.

> > -  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"

> > +  "%_v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"

> >    [(set_attr "type" "ssemov")

> >     (set_attr "prefix" "evex")

> >     (set_attr "mode" "<sseinsnmode>")])

> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1.c b/gcc/testsuite/gcc.target/i386/pr89523-1.c

> > new file mode 100644

> > index 00000000000..f7ed24d1592

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-1.c

> > @@ -0,0 +1,24 @@

> > +/* { dg-do compile { target { ! ia32 } } } */

> > +/* { dg-require-effective-target maybe_x32 } */

> > +/* { dg-options "-mx32 -Ofast -funroll-loops -march=haswell" } */

> > +/* { dg-final { scan-assembler-not "\tvgather" } } */

> > +/* { dg-final { scan-assembler "addr32 vgather" } } */

> > +

> > +void foo (void);

> > +

> > +extern float *ncost;

> > +

> > +float

> > +bar (int type, int num)

> > +{

> > +  int i;

> > +  float cost;

> > +

> > +  cost = 0;

> > +  for (i = 0; i < num; i++)

> > +    if (type)

> > +      cost += ncost[i];

> > +    else

> > +      foo ();

> > +  return (cost);

> > +}

> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c

> > new file mode 100644

> > index 00000000000..7423f579b5e

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c

> > @@ -0,0 +1,17 @@

> > +/* { dg-do compile { target { ! ia32 } } } */

> > +/* { dg-require-effective-target maybe_x32 } */

> > +/* { dg-options "-mx32 -O2 -march=haswell" } */

> > +/* { dg-final { scan-assembler "\tvgather" } } */

> > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */

> > +

> > +#include <immintrin.h>

> > +

> > +__m128d x;

> > +double *base;

> > +__m128i idx;

> > +

> > +void extern

> > +avx2_test (void)

> > +{

> > +  x = _mm_i32gather_pd (base, idx, 1);

> > +}

> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c

> > new file mode 100644

> > index 00000000000..606f9aac659

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c

> > @@ -0,0 +1,17 @@

> > +/* { dg-do compile { target { ! ia32 } } } */

> > +/* { dg-require-effective-target maybe_x32 } */

> > +/* { dg-options "-mx32 -O2 -march=haswell" } */

> > +/* { dg-final { scan-assembler "\tvgather" } } */

> > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */

> > +

> > +#include <immintrin.h>

> > +

> > +__m128d x;

> > +double *base;

> > +__m128i idx;

> > +

> > +void extern

> > +avx2_test (void)

> > +{

> > +  x = _mm_i64gather_pd (base, idx, 1);

> > +}

> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c

> > new file mode 100644

> > index 00000000000..155b818191f

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c

> > @@ -0,0 +1,16 @@

> > +/* { dg-do compile { target { ! ia32 } } } */

> > +/* { dg-require-effective-target maybe_x32 } */

> > +/* { dg-options "-mx32 -O2 -march=haswell" } */

> > +/* { dg-final { scan-assembler-not "\tvgather" } } */

> > +/* { dg-final { scan-assembler "addr32 vgather" } } */

> > +

> > +#include <immintrin.h>

> > +

> > +__m128d x;

> > +__m128i idx;

> > +

> > +void extern

> > +avx2_test (void)

> > +{

> > +  x = _mm_i32gather_pd (NULL, idx, 1);

> > +}

> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c

> > new file mode 100644

> > index 00000000000..11210ff8f78

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c

> > @@ -0,0 +1,18 @@

> > +/* { dg-do compile { target { ! ia32 } } } */

> > +/* { dg-require-effective-target maybe_x32 } */

> > +/* { dg-options "-mx32 -O2 -mavx512pf" } */

> > +/* { dg-final { scan-assembler "\tvgather" } } */

> > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */

> > +

> > +#include <immintrin.h>

> > +

> > +volatile __m256i idx;

> > +volatile __mmask8 m8;

> > +void *base;

> > +

> > +void extern

> > +avx512pf_test (void)

> > +{

> > +  _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0);

> > +  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0);

> > +}

> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c

> > new file mode 100644

> > index 00000000000..0254ad435e3

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c

> > @@ -0,0 +1,17 @@

> > +/* { dg-do compile { target { ! ia32 } } } */

> > +/* { dg-require-effective-target maybe_x32 } */

> > +/* { dg-options "-mx32 -O2 -mavx512pf" } */

> > +/* { dg-final { scan-assembler-not "\tvgather" } } */

> > +/* { dg-final { scan-assembler "addr32 vgather" } } */

> > +

> > +#include <immintrin.h>

> > +

> > +volatile __m256i idx;

> > +volatile __mmask8 m8;

> > +

> > +void extern

> > +avx512pf_test (void)

> > +{

> > +  _mm512_prefetch_i32gather_pd (idx, NULL, 8, _MM_HINT_T0);

> > +  _mm512_mask_prefetch_i32gather_pd (idx, m8, NULL, 8, _MM_HINT_T0);

> > +}

> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c

> > new file mode 100644

> > index 00000000000..1c357bc8505

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c

> > @@ -0,0 +1,19 @@

> > +/* { dg-do compile { target { ! ia32 } } } */

> > +/* { dg-require-effective-target maybe_x32 } */

> > +/* { dg-options "-mx32 -O2 -mavx512f" } */

> > +/* { dg-final { scan-assembler "\tvscatter" } } */

> > +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */

> > +

> > +#include <immintrin.h>

> > +

> > +volatile __m512d src;

> > +volatile __m256i idx;

> > +volatile __mmask8 m8;

> > +double *addr;

> > +

> > +void extern

> > +avx512f_test (void)

> > +{

> > +  _mm512_i32scatter_pd (addr, idx, src, 8);

> > +  _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);

> > +}

> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c

> > new file mode 100644

> > index 00000000000..37b0a0bebb7

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c

> > @@ -0,0 +1,19 @@

> > +/* { dg-do compile { target { ! ia32 } } } */

> > +/* { dg-require-effective-target maybe_x32 } */

> > +/* { dg-options "-mx32 -O2 -mavx512f" } */

> > +/* { dg-final { scan-assembler "\tvscatter" } } */

> > +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */

> > +

> > +#include <immintrin.h>

> > +

> > +volatile __m512d src;

> > +volatile __m512i idx;

> > +volatile __mmask8 m8;

> > +double *addr;

> > +

> > +void extern

> > +avx512f_test (void)

> > +{

> > +  _mm512_i64scatter_pd (addr, idx, src, 8);

> > +  _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);

> > +}

> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c

> > new file mode 100644

> > index 00000000000..a878f1e9efb

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c

> > @@ -0,0 +1,16 @@

> > +/* { dg-do compile { target { ! ia32 } } } */

> > +/* { dg-require-effective-target maybe_x32 } */

> > +/* { dg-options "-mx32 -O2 -mavx512f" } */

> > +/* { dg-final { scan-assembler-not "\tvscatter" } } */

> > +/* { dg-final { scan-assembler "addr32 vscatter" } } */

> > +

> > +#include <immintrin.h>

> > +

> > +volatile __m512d src;

> > +volatile __m256i idx;

> > +

> > +void extern

> > +avx512f_test (void)

> > +{

> > +  _mm512_i32scatter_pd (NULL, idx, src, 8);

> > +}

> > --

> > 2.20.1

> >




-- 
H.J.
Uros Bizjak March 3, 2019, 9:34 p.m. | #3
On Sun, Mar 3, 2019 at 10:18 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>

> On Sun, Mar 3, 2019 at 9:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:

> >

> > On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:

> > >

> > > 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,

> > > when 32-bit indices are used as addresses, like in

> > >

> > > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6

> > >

> > > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which

> > > is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions

> > > for x32 if there is no base register nor symbol.

> > >

> > > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with

> > >

> > > -Ofast -funroll-loops -march=haswell

> >

> > 1. Testcases 2 to 9 fail on fedora-29 with:

> >

> > In file included from /usr/include/features.h:452,

> >                  from /usr/include/bits/libc-header-start.h:33,

> >                  from /usr/include/stdlib.h:25,

> >                  from /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27,

> >                  from /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34,

> >                  from /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29,

> >                  from

> > /home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7:

> > /usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such

> > file or directory

>

> I will update tests to remove  "#include immintrin.h"

>

> > 2. Does the patch work with -maddress-mode={short,long}?

>

> Yes.

>

> > 3. The implementation is wrong. You should use operand substitution

> > with VSIB address as operand, not substitution without operand.

>

> How can I add an addr32 prefix with operand substitution?  This is

> very similar to "%^".  My updated patch will use "%^".


Yes, using %^ is what I think would be the optimal solution. Other
than that, in your proposed patch, operand-less %_ scans the entire
current_output_insn to dig to the UNSPEC_VSIBADDR. You can just use
operand substitution, and do e.g. "%X2vgatherpf0..." where 'X'
processes operand 2 (vsib_address_operand) and conditionally outputs
addr32.

BTW: In a new version of the patch, please specify what is changed
from the previous version. Otherwise, review of a new version is more
or less a guesswork what changed.

Uros.

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b8357a7db5d..336696136de 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17805,6 +17805,7 @@  print_reg (rtx x, int code, FILE *file)
    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
    ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
+   _ -- print addr32 prefix if required.
  */
 
 void
@@ -18356,6 +18357,42 @@  ix86_print_operand (FILE *file, rtx x, int code)
 	    fputs ("addr32 ", file);
 	  return;
 
+	case '_':
+	  if (TARGET_X32)
+	    {
+	      subrtx_var_iterator::array_type array;
+	      FOR_EACH_SUBRTX_VAR (iter, array,
+				   PATTERN (current_output_insn), ALL)
+		{
+		  rtx addr = *iter;
+		  if (!MEM_P (addr))
+		    continue;
+		  addr = XEXP (addr, 0);
+		  if (GET_CODE (addr) == UNSPEC
+		      && XINT (addr, 1) == UNSPEC_VSIBADDR)
+		    {
+		      /* NB: 32-bit indices in VSIB address are
+			 sign-extended to 64 bits. In x32, if 32-bit
+			 address 0xf7fa3010 is sign-extended to
+			 0xfffffffff7fa3010 which is invalid address.
+			 Add addr32 prefix if there is no base register
+			 nor symbol.  */
+		      bool ok;
+		      struct ix86_address parts;
+		      ok = ix86_decompose_address (XVECEXP (addr, 0, 0),
+						   &parts);
+		      gcc_assert (ok && parts.index == NULL_RTX);
+		      if (parts.base == NULL_RTX
+			  && (parts.disp == NULL_RTX
+			      || !symbolic_operand (parts.disp,
+						    GET_MODE (parts.disp))))
+			fputs ("addr32 ", file);
+		      break;
+		    }
+		}
+	    }
+	  return;
+
 	case '!':
 	  if (ix86_notrack_prefixed_insn_p (current_output_insn))
 	    fputs ("notrack ", file);
@@ -18507,7 +18544,7 @@  static bool
 ix86_print_operand_punct_valid_p (unsigned char code)
 {
   return (code == '*' || code == '+' || code == '&' || code == ';'
-	  || code == '~' || code == '^' || code == '!');
+	  || code == '~' || code == '^' || code == '!' || code == '_');
 }
 
 /* Print a memory operand whose address is ADDR.  */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ac299495b2c..13692e47123 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17401,9 +17401,9 @@ 
     case 3:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
-      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17448,9 +17448,9 @@ 
     case 3:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
-      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17496,10 +17496,10 @@ 
     case 7:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
     case 6:
-      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17545,10 +17545,10 @@ 
     case 7:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
     case 6:
-      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -20292,7 +20292,7 @@ 
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
+  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20312,7 +20312,7 @@ 
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
+  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20353,7 +20353,7 @@ 
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
+  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20375,8 +20375,8 @@ 
   "TARGET_AVX2"
 {
   if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
-    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
-  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
+    return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
+  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
 }
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
@@ -20400,7 +20400,7 @@ 
 		     (const_int 2) (const_int 3)])))
    (clobber (match_scratch:VI4F_256 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
+  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20423,7 +20423,7 @@ 
 		     (const_int 2) (const_int 3)])))
    (clobber (match_scratch:VI4F_256 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
+  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20463,7 +20463,7 @@ 
   "TARGET_AVX512F"
 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
+  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20484,7 +20484,7 @@ 
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
+  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20525,7 +20525,7 @@ 
   "TARGET_AVX512F"
 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
+  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20550,11 +20550,11 @@ 
   if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
     {
       if (<MODE_SIZE> != 64)
-	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
+	return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
       else
-	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
+	return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
     }
-  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
+  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
 }
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -20593,7 +20593,7 @@ 
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
+  "%_v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20631,7 +20631,7 @@ 
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
+  "%_v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1.c b/gcc/testsuite/gcc.target/i386/pr89523-1.c
new file mode 100644
index 00000000000..f7ed24d1592
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-1.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -Ofast -funroll-loops -march=haswell" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+void foo (void);
+
+extern float *ncost;
+
+float
+bar (int type, int num)
+{
+  int i;
+  float cost;
+
+  cost = 0;
+  for (i = 0; i < num; i++)
+    if (type)
+      cost += ncost[i];
+    else
+      foo ();
+  return (cost);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c
new file mode 100644
index 00000000000..7423f579b5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+#include <immintrin.h>
+
+__m128d x;
+double *base;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i32gather_pd (base, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c
new file mode 100644
index 00000000000..606f9aac659
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+#include <immintrin.h>
+
+__m128d x;
+double *base;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i64gather_pd (base, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c
new file mode 100644
index 00000000000..155b818191f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+#include <immintrin.h>
+
+__m128d x;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i32gather_pd (NULL, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c
new file mode 100644
index 00000000000..11210ff8f78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512pf" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0);
+  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c
new file mode 100644
index 00000000000..0254ad435e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512pf" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i32gather_pd (idx, NULL, 8, _MM_HINT_T0);
+  _mm512_mask_prefetch_i32gather_pd (idx, m8, NULL, 8, _MM_HINT_T0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c
new file mode 100644
index 00000000000..1c357bc8505
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler "\tvscatter" } } */
+/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
+
+#include <immintrin.h>
+
+volatile __m512d src;
+volatile __m256i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i32scatter_pd (addr, idx, src, 8);
+  _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c
new file mode 100644
index 00000000000..37b0a0bebb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler "\tvscatter" } } */
+/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
+
+#include <immintrin.h>
+
+volatile __m512d src;
+volatile __m512i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i64scatter_pd (addr, idx, src, 8);
+  _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c
new file mode 100644
index 00000000000..a878f1e9efb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler-not "\tvscatter" } } */
+/* { dg-final { scan-assembler "addr32 vscatter" } } */
+
+#include <immintrin.h>
+
+volatile __m512d src;
+volatile __m256i idx;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i32scatter_pd (NULL, idx, src, 8);
+}