x86: Don't compile pr82735-[345].c for x32

Message ID CAMe9rOpS8rd2uR2xWUYWkJAXAzY+91ejdu6=Udb3yhABU8P6Sg@mail.gmail.com
State New
Headers show
Series
  • x86: Don't compile pr82735-[345].c for x32
Related show

Commit Message

H.J. Lu via Gcc-patches June 7, 2021, 7:04 p.m.
On Thu, Jun 3, 2021 at 11:31 PM Hongtao Liu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>

> On Fri, Jun 4, 2021 at 2:27 PM Uros Bizjak via Gcc-patches

> <gcc-patches@gcc.gnu.org> wrote:

> >

> > On Thu, Jun 3, 2021 at 8:54 AM liuhongt <hongtao.liu@intel.com> wrote:

> > >

> > > When __builtin_ia32_vzeroupper is called explicitly, the corresponding

> > > vzeroupper pattern does not carry any CLOBBERS or SETs before LRA,

> > > which leads to incorrect optimization in pass_reload. In order to

> > > solve this problem, this patch refine instructions as call_insns in

> > > which the call has a special vzeroupper ABI.

> > >

> > > gcc/ChangeLog:

> > >

> > >         PR target/82735

> > >         * config/i386/i386-expand.c (ix86_expand_builtin): Remove

> > >         assignment of cfun->machine->has_explicit_vzeroupper.

> > >         * config/i386/i386-features.c

> > >         (ix86_add_reg_usage_to_vzerouppers): Delete.

> > >         (ix86_add_reg_usage_to_vzeroupper): Ditto.

> > >         (rest_of_handle_insert_vzeroupper): Remove

> > >         ix86_add_reg_usage_to_vzerouppers, add df_analyze at the end

> > >         of the function.

> > >         (gate): Remove cfun->machine->has_explicit_vzeroupper.

> > >         * config/i386/i386-protos.h (ix86_expand_avx_vzeroupper):

> > >         Declared.

> > >         * config/i386/i386.c (ix86_insn_callee_abi): New function.

> > >         (ix86_initialize_callee_abi): Ditto.

> > >         (ix86_expand_avx_vzeroupper): Ditto.

> > >         (ix86_hard_regno_call_part_clobbered): Adjust for vzeroupper

> > >         ABI.

> > >         (TARGET_INSN_CALLEE_ABI): Define as ix86_insn_callee_abi.

> > >         (ix86_emit_mode_set): Call ix86_expand_avx_vzeroupper

> > >         directly.

> > >         * config/i386/i386.h (struct GTY(()) machine_function): Delete

> > >         has_explicit_vzeroupper.

> > >         * config/i386/i386.md (enum unspec): New member

> > >         UNSPEC_CALLEE_ABI.

> > >         (I386_DEFAULT,I386_VZEROUPPER,I386_UNKNOWN): New

> > >         define_constants for insn callee abi index.

> > >         * config/i386/predicates.md (vzeroupper_pattern): Adjust.

> > >         * config/i386/sse.md (UNSPECV_VZEROUPPER): Deleted.

> > >         (avx_vzeroupper): Call ix86_expand_avx_vzeroupper.

> > >         (*avx_vzeroupper): Rename to ..

> > >         (avx_vzeroupper_callee_abi): .. this, and adjust pattern as

> > >         call_insn which has a special vzeroupper ABI.

> > >         (*avx_vzeroupper_1): Deleted.

> > >

> > > gcc/testsuite/ChangeLog:

> > >

> > >         PR target/82735

> > >         * gcc.target/i386/pr82735-1.c: New test.

> > >         * gcc.target/i386/pr82735-2.c: New test.

> > >         * gcc.target/i386/pr82735-3.c: New test.

> > >         * gcc.target/i386/pr82735-4.c: New test.

> > >         * gcc.target/i386/pr82735-5.c: New test.

> >

> > LGTM, with a small nit below.

> >

> > Thanks,

> > Uros.

> >

> > > ---

> > >  gcc/config/i386/i386-expand.c             |  4 -

> > >  gcc/config/i386/i386-features.c           | 99 +++--------------------

> > >  gcc/config/i386/i386-protos.h             |  1 +

> > >  gcc/config/i386/i386.c                    | 55 ++++++++++++-

> > >  gcc/config/i386/i386.h                    |  4 -

> > >  gcc/config/i386/i386.md                   | 10 +++

> > >  gcc/config/i386/predicates.md             |  5 +-

> > >  gcc/config/i386/sse.md                    | 59 ++++----------

> > >  gcc/testsuite/gcc.target/i386/pr82735-1.c | 29 +++++++

> > >  gcc/testsuite/gcc.target/i386/pr82735-2.c | 22 +++++

> > >  gcc/testsuite/gcc.target/i386/pr82735-3.c |  5 ++

> > >  gcc/testsuite/gcc.target/i386/pr82735-4.c | 48 +++++++++++

> > >  gcc/testsuite/gcc.target/i386/pr82735-5.c | 54 +++++++++++++

> > >  13 files changed, 252 insertions(+), 143 deletions(-)

> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr82735-1.c

> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr82735-2.c

> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr82735-3.c

> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr82735-4.c

> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr82735-5.c

> > >

> > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c

> > > index 9f3d41955a2..d25d59aa4e7 100644

> > > --- a/gcc/config/i386/i386-expand.c

> > > +++ b/gcc/config/i386/i386-expand.c

> > > @@ -13282,10 +13282,6 @@ rdseed_step:

> > >

> > >        return 0;

> > >

> > > -    case IX86_BUILTIN_VZEROUPPER:

> > > -      cfun->machine->has_explicit_vzeroupper = true;

> > > -      break;

> > > -

> > >      default:

> > >        break;

> > >      }

> > > diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c

> > > index 77783a154b6..a25769ae478 100644

> > > --- a/gcc/config/i386/i386-features.c

> > > +++ b/gcc/config/i386/i386-features.c

> > > @@ -1768,92 +1768,22 @@ convert_scalars_to_vector (bool timode_p)

> > >    return 0;

> > >  }

> > >

> > > -/* Modify the vzeroupper pattern in INSN so that it describes the effect

> > > -   that the instruction has on the SSE registers.  LIVE_REGS are the set

> > > -   of registers that are live across the instruction.

> > > -

> > > -   For a live register R we use:

> > > -

> > > -     (set (reg:V2DF R) (reg:V2DF R))

> > > -

> > > -   which preserves the low 128 bits but clobbers the upper bits.  */

> > > -

> > > -static void

> > > -ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)

> > > -{

> > > -  rtx pattern = PATTERN (insn);

> > > -  unsigned int nregs = TARGET_64BIT ? 16 : 8;

> > > -  unsigned int npats = nregs;

> > > -  for (unsigned int i = 0; i < nregs; ++i)

> > > -    {

> > > -      unsigned int regno = GET_SSE_REGNO (i);

> > > -      if (!bitmap_bit_p (live_regs, regno))

> > > -       npats--;

> > > -    }

> > > -  if (npats == 0)

> > > -    return;

> > > -  rtvec vec = rtvec_alloc (npats + 1);

> > > -  RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);

> > > -  for (unsigned int i = 0, j = 0; i < nregs; ++i)

> > > -    {

> > > -      unsigned int regno = GET_SSE_REGNO (i);

> > > -      if (!bitmap_bit_p (live_regs, regno))

> > > -       continue;

> > > -      rtx reg = gen_rtx_REG (V2DImode, regno);

> > > -      ++j;

> > > -      RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg);

> > > -    }

> > > -  XVEC (pattern, 0) = vec;

> > > -  INSN_CODE (insn) = -1;

> > > -  df_insn_rescan (insn);

> > > -}

> > > -

> > > -/* Walk the vzeroupper instructions in the function and annotate them

> > > -   with the effect that they have on the SSE registers.  */

> > > -

> > > -static void

> > > -ix86_add_reg_usage_to_vzerouppers (void)

> > > -{

> > > -  basic_block bb;

> > > -  rtx_insn *insn;

> > > -  auto_bitmap live_regs;

> > > -

> > > -  df_analyze ();

> > > -  FOR_EACH_BB_FN (bb, cfun)

> > > -    {

> > > -      bitmap_copy (live_regs, df_get_live_out (bb));

> > > -      df_simulate_initialize_backwards (bb, live_regs);

> > > -      FOR_BB_INSNS_REVERSE (bb, insn)

> > > -       {

> > > -         if (!NONDEBUG_INSN_P (insn))

> > > -           continue;

> > > -         if (vzeroupper_pattern (PATTERN (insn), VOIDmode))

> > > -           ix86_add_reg_usage_to_vzeroupper (insn, live_regs);

> > > -         df_simulate_one_insn_backwards (bb, insn, live_regs);

> > > -       }

> > > -    }

> > > -}

> > > -

> > >  static unsigned int

> > >  rest_of_handle_insert_vzeroupper (void)

> > >  {

> > > -  if (TARGET_VZEROUPPER

> > > -      && flag_expensive_optimizations

> > > -      && !optimize_size)

> > > -    {

> > > -      /* vzeroupper instructions are inserted immediately after reload to

> > > -        account for possible spills from 256bit or 512bit registers.  The pass

> > > -        reuses mode switching infrastructure by re-running mode insertion

> > > -        pass, so disable entities that have already been processed.  */

> > > -      for (int i = 0; i < MAX_386_ENTITIES; i++)

> > > -       ix86_optimize_mode_switching[i] = 0;

> > > +  /* vzeroupper instructions are inserted immediately after reload to

> > > +     account for possible spills from 256bit or 512bit registers.  The pass

> > > +     reuses mode switching infrastructure by re-running mode insertion

> > > +     pass, so disable entities that have already been processed.  */

> > > +  for (int i = 0; i < MAX_386_ENTITIES; i++)

> > > +    ix86_optimize_mode_switching[i] = 0;

> > >

> > > -      ix86_optimize_mode_switching[AVX_U128] = 1;

> > > +  ix86_optimize_mode_switching[AVX_U128] = 1;

> > >

> > > -      /* Call optimize_mode_switching.  */

> > > -      g->get_passes ()->execute_pass_mode_switching ();

> > > -    }

> > > -  ix86_add_reg_usage_to_vzerouppers ();

> > > +  /* Call optimize_mode_switching.  */

> > > +  g->get_passes ()->execute_pass_mode_switching ();

> > > +

> > > +  df_analyze ();

> > >    return 0;

> > >  }

> > >

> > > @@ -1882,11 +1812,8 @@ public:

> > >    /* opt_pass methods: */

> > >    virtual bool gate (function *)

> > >      {

> > > -      return TARGET_AVX

> > > -            && ((TARGET_VZEROUPPER

> > > -                 && flag_expensive_optimizations

> > > -                 && !optimize_size)

> > > -                || cfun->machine->has_explicit_vzeroupper);

> > > +      return TARGET_AVX && TARGET_VZEROUPPER

> > > +       && flag_expensive_optimizations && !optimize_size;

> > >      }

> > >

> > >    virtual unsigned int execute (function *)

> > > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h

> > > index 7782cf1163f..e6ac9390777 100644

> > > --- a/gcc/config/i386/i386-protos.h

> > > +++ b/gcc/config/i386/i386-protos.h

> > > @@ -216,6 +216,7 @@ extern rtx ix86_split_stack_guard (void);

> > >  extern void ix86_move_vector_high_sse_to_mmx (rtx);

> > >  extern void ix86_split_mmx_pack (rtx[], enum rtx_code);

> > >  extern void ix86_split_mmx_punpck (rtx[], bool);

> > > +extern void ix86_expand_avx_vzeroupper (void);

> > >

> > >  #ifdef TREE_CODE

> > >  extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);

> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

> > > index 743d8a25fe3..f0b66dd0d56 100644

> > > --- a/gcc/config/i386/i386.c

> > > +++ b/gcc/config/i386/i386.c

> > > @@ -14426,7 +14426,7 @@ ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,

> > >        break;

> > >      case AVX_U128:

> > >        if (mode == AVX_U128_CLEAN)

> > > -       emit_insn (gen_avx_vzeroupper ());

> > > +       ix86_expand_avx_vzeroupper ();

> > >        break;

> > >      case I387_ROUNDEVEN:

> > >      case I387_TRUNC:

> > > @@ -19494,15 +19494,63 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)

> > >    return false;

> > >  }

> > >

> > > +/* Implement TARGET_INSN_CALLEE_ABI.  */

> > > +

> > > +const predefined_function_abi &

> > > +ix86_insn_callee_abi (const rtx_insn *insn)

> > > +{

> > > +  unsigned int abi_id = 0;

> > > +  rtx pat = PATTERN (insn);

> > > +  if (vzeroupper_pattern (pat, VOIDmode))

> > > +    abi_id = I386_VZEROUPPER;

> > > +

> > > +  return function_abis[abi_id];

> > > +}

> > > +

> > > +/* Initialize function_abis with corresponding abi_id,

> > > +   currently only handle vzeroupper.  */

> > > +void

> > > +ix86_initialize_callee_abi (unsigned int abi_id)

> > > +{

> > > +  gcc_assert (abi_id == I386_VZEROUPPER);

> > > +  predefined_function_abi &vzeroupper_abi = function_abis[abi_id];

> > > +  if (!vzeroupper_abi.initialized_p ())

> > > +    {

> > > +      HARD_REG_SET full_reg_clobbers;

> > > +      CLEAR_HARD_REG_SET (full_reg_clobbers);

> > > +      vzeroupper_abi.initialize (I386_VZEROUPPER, full_reg_clobbers);

> > > +    }

> > > +}

> > > +

> > > +void

> > > +ix86_expand_avx_vzeroupper (void)

> > > +{

> > > +  /* Initialize vzeroupper_abi here.  */

> > > +  ix86_initialize_callee_abi (I386_VZEROUPPER);

> > > +  rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());

> > > +  /* Return false for non-local goto in can_nonlocal_goto.  */

> > > +  make_reg_eh_region_note (insn, 0, INT_MIN);

> > > +  /* Flag used for call_insn indicates it's a fake call.  */

> > > +  RTX_FLAG (insn, used) = 1;

> > > +}

> > > +

> > > +

> > >  /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  The only ABI that

> > >     saves SSE registers across calls is Win64 (thus no need to check the

> > >     current ABI here), and with AVX enabled Win64 only guarantees that

> > >     the low 16 bytes are saved.  */

> > >

> > >  static bool

> > > -ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,

> > > +ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,

> > >                                      machine_mode mode)

> > >  {

> > > +  /* Special ABI for vzeroupper which only clobber higher part of sse regs.  */

> > > +  if (abi_id == I386_VZEROUPPER)

> > > +      return (GET_MODE_SIZE (mode) > 16

> > > +             && ((TARGET_64BIT

> > > +                  && (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG)))

> > > +                 || (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))));

> > > +

> > >    return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;

> > >  }

> > >

> > > @@ -23916,6 +23964,9 @@ ix86_run_selftests (void)

> > >  #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \

> > >    ix86_hard_regno_call_part_clobbered

> > >

> > > +#undef TARGET_INSN_CALLEE_ABI

> > > +#define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi

> > > +

> > >  #undef TARGET_CAN_CHANGE_MODE_CLASS

> > >  #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class

> > >

> > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

> > > index 53d503fc6e0..919d0b2418a 100644

> > > --- a/gcc/config/i386/i386.h

> > > +++ b/gcc/config/i386/i386.h

> > > @@ -2659,10 +2659,6 @@ struct GTY(()) machine_function {

> > >    /* True if the function needs a stack frame.  */

> > >    BOOL_BITFIELD stack_frame_required : 1;

> > >

> > > -  /* True if __builtin_ia32_vzeroupper () has been expanded in current

> > > -     function.  */

> > > -  BOOL_BITFIELD has_explicit_vzeroupper : 1;

> > > -

> > >    /* True if we should act silently, rather than raise an error for

> > >       invalid calls.  */

> > >    BOOL_BITFIELD silent_p : 1;

> > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md

> > > index 2fc8fae30f3..5d9f5aa39ac 100644

> > > --- a/gcc/config/i386/i386.md

> > > +++ b/gcc/config/i386/i386.md

> > > @@ -191,6 +191,10 @@ (define_c_enum "unspec" [

> > >    ;; For MOVDIRI and MOVDIR64B support

> > >    UNSPEC_MOVDIRI

> > >    UNSPEC_MOVDIR64B

> > > +

> > > +  ;; For insn_callee_abi:

> > > +  UNSPEC_CALLEE_ABI

> > > +

> > >  ])

> > >

> > >  (define_c_enum "unspecv" [

> > > @@ -447,6 +451,12 @@ (define_constants

> > >     (FIRST_PSEUDO_REG           76)

> > >    ])

> > >

> > > +;; Insn callee abi index.

> > > +(define_constants

> > > +  [(I386_DEFAULT       0)

> > > +   (I386_VZEROUPPER    1)

> > > +   (I386_UNKNOWN       2)])

> >

> > Please name these ABI_DEFAULT, ABI_VZEROUPPER and ABI_UNKNOWN.

> Yes, thanks for the review.

> I'll hold this patch until middle-end part is approved.

> >

> > > +

> > >  ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls

> > >  ;; from i386.c.

> > >

> > > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md

> > > index abd307ebdb8..8b787553f32 100644

> > > --- a/gcc/config/i386/predicates.md

> > > +++ b/gcc/config/i386/predicates.md

> > > @@ -1599,8 +1599,9 @@ (define_predicate "vzeroall_pattern"

> > >  ;; return true if OP is a vzeroupper pattern.

> > >  (define_predicate "vzeroupper_pattern"

> > >    (and (match_code "parallel")

> > > -       (match_code "unspec_volatile" "a")

> > > -       (match_test "XINT (XVECEXP (op, 0, 0), 1) == UNSPECV_VZEROUPPER")))

> > > +       (match_code "unspec" "b")

> > > +       (match_test "XINT (XVECEXP (op, 0, 1), 1) == UNSPEC_CALLEE_ABI")

> > > +       (match_test "INTVAL (XVECEXP (XVECEXP (op, 0, 1), 0, 0)) == I386_VZEROUPPER")))

> > >

> > >  ;; Return true if OP is an addsub vec_merge operation

> > >  (define_predicate "addsub_vm_operator"

> > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

> > > index a4503ddcb73..949347a3247 100644

> > > --- a/gcc/config/i386/sse.md

> > > +++ b/gcc/config/i386/sse.md

> > > @@ -205,7 +205,6 @@ (define_c_enum "unspecv" [

> > >    UNSPECV_MONITOR

> > >    UNSPECV_MWAIT

> > >    UNSPECV_VZEROALL

> > > -  UNSPECV_VZEROUPPER

> > >

> > >    ;; For KEYLOCKER

> > >    UNSPECV_LOADIWKEY

> > > @@ -20857,14 +20856,22 @@ (define_insn "*avx_vzeroall"

> > >  ;; if the upper 128bits are unused.  Initially we expand the instructions

> > >  ;; as though they had no effect on the SSE registers, but later add SETs and

> > >  ;; CLOBBERs to the PARALLEL to model the real effect.

> > > +

> > >  (define_expand "avx_vzeroupper"

> > > -  [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]

> > > -  "TARGET_AVX")

> > > +  [(parallel [(call (mem:QI (const_int 0))

> > > +                   (const_int 0))

> > > +            (unspec [(const_int I386_VZEROUPPER)] UNSPEC_CALLEE_ABI)])]

> > > +  "TARGET_AVX"

> > > +{

> > > +  ix86_expand_avx_vzeroupper ();

> > > +  DONE;

> > > +})

> > >

> > > -(define_insn "*avx_vzeroupper"

> > > -  [(match_parallel 0 "vzeroupper_pattern"

> > > -     [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]

> > > -  "TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"

> > > +(define_insn "avx_vzeroupper_callee_abi"

> > > +  [(call (mem:QI (const_int 0))

> > > +        (const_int 0))

> > > +    (unspec [(const_int I386_VZEROUPPER)] UNSPEC_CALLEE_ABI)]

> > > +  "TARGET_AVX"

> > >    "vzeroupper"

> > >    [(set_attr "type" "sse")

> > >     (set_attr "modrm" "0")

> > > @@ -20873,44 +20880,6 @@ (define_insn "*avx_vzeroupper"

> > >     (set_attr "btver2_decode" "vector")

> > >     (set_attr "mode" "OI")])

> > >

> > > -(define_insn_and_split "*avx_vzeroupper_1"

> > > -  [(match_parallel 0 "vzeroupper_pattern"

> > > -     [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]

> > > -  "TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"

> > > -  "#"

> > > -  "&& epilogue_completed"

> > > -  [(match_dup 0)]

> > > -{

> > > -  /* For IPA-RA purposes, make it clear the instruction clobbers

> > > -     even XMM registers not mentioned explicitly in the pattern.  */

> > > -  unsigned int nregs = TARGET_64BIT ? 16 : 8;

> > > -  unsigned int npats = XVECLEN (operands[0], 0);

> > > -  rtvec vec = rtvec_alloc (nregs + 1);

> > > -  RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);

> > > -  for (unsigned int i = 0, j = 1; i < nregs; ++i)

> > > -    {

> > > -      unsigned int regno = GET_SSE_REGNO (i);

> > > -      if (j < npats

> > > -         && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)

> > > -       {

> > > -         RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);

> > > -         j++;

> > > -       }

> > > -      else

> > > -       {

> > > -         rtx reg = gen_rtx_REG (V2DImode, regno);

> > > -         RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);

> > > -       }

> > > -    }

> > > -  operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);

> > > -}

> > > -  [(set_attr "type" "sse")

> > > -   (set_attr "modrm" "0")

> > > -   (set_attr "memory" "none")

> > > -   (set_attr "prefix" "vex")

> > > -   (set_attr "btver2_decode" "vector")

> > > -   (set_attr "mode" "OI")])

> > > -

> > >  (define_mode_attr pbroadcast_evex_isa

> > >    [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")

> > >     (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")

> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82735-1.c b/gcc/testsuite/gcc.target/i386/pr82735-1.c

> > > new file mode 100644

> > > index 00000000000..1a63b9ae9c9

> > > --- /dev/null

> > > +++ b/gcc/testsuite/gcc.target/i386/pr82735-1.c

> > > @@ -0,0 +1,29 @@

> > > +/* { dg-do run } */

> > > +/* { dg-options "-O2 -mavx" } */

> > > +/* { dg-require-effective-target avx } */

> > > +

> > > +#include "avx-check.h"

> > > +

> > > +void

> > > +__attribute__ ((noipa))

> > > +mtest(char *dest)

> > > +{

> > > +  __m256i ymm1 = _mm256_set1_epi8((char)0x1);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32), ymm1);

> > > +  _mm256_zeroupper();

> > > +  __m256i ymm2 = _mm256_set1_epi8((char)0x1);

> > > +  _mm256_storeu_si256((__m256i *)dest, ymm2);

> > > +}

> > > +

> > > +void

> > > +avx_test ()

> > > +{

> > > +  char buf[64];

> > > +  for (int i = 0; i != 64; i++)

> > > +    buf[i] = 2;

> > > +  mtest (buf);

> > > +

> > > +  for (int i = 0; i < 32; ++i)

> > > +    if (buf[i] != 1)

> > > +      __builtin_abort ();

> > > +}

> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82735-2.c b/gcc/testsuite/gcc.target/i386/pr82735-2.c

> > > new file mode 100644

> > > index 00000000000..ac9d006f794

> > > --- /dev/null

> > > +++ b/gcc/testsuite/gcc.target/i386/pr82735-2.c

> > > @@ -0,0 +1,22 @@

> > > +/* { dg-do compile } */

> > > +/* { dg-options "-mavx -O2" } */

> > > +

> > > +#include <immintrin.h>

> > > +

> > > +void test(char *dest)

> > > +{

> > > +  /* xmm1 can be propagated to xmm2 by CSE.  */

> > > +  __m128i xmm1 = _mm_set_epi8(0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,

> > > +                             0x9, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16);

> > > +  _mm_storeu_si128((__m128i *)(dest + 32), xmm1);

> > > +  _mm256_zeroupper();

> > > +  _mm256_zeroupper();

> > > +  _mm256_zeroupper();

> > > +  _mm256_zeroupper();

> > > +  _mm256_zeroupper();

> > > +  __m128i xmm2 = xmm1;

> > > +  _mm_storeu_si128((__m128i *)dest, xmm2);

> > > +}

> > > +

> > > +/* Darwin local constant symbol is "lC0", ELF targets ".LC0" */

> > > +/* { dg-final { scan-assembler-times {(?n)vmovdqa\t\.?[Ll]C0[^,]*, %xmm[0-9]} 1 } } */

> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82735-3.c b/gcc/testsuite/gcc.target/i386/pr82735-3.c

> > > new file mode 100644

> > > index 00000000000..e3f801e6924

> > > --- /dev/null

> > > +++ b/gcc/testsuite/gcc.target/i386/pr82735-3.c

> > > @@ -0,0 +1,5 @@

> > > +/* { dg-do compile } */

> > > +/* { dg-options "-mavx -O2 -mabi=ms" } */

> > > +/* { dg-final { scan-assembler-not {(?n)xmm([6-9]|1[0-5])} } } */

> > > +

> > > +#include "pr82735-2.c"

> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82735-4.c b/gcc/testsuite/gcc.target/i386/pr82735-4.c

> > > new file mode 100644

> > > index 00000000000..78c0a6cb2c8

> > > --- /dev/null

> > > +++ b/gcc/testsuite/gcc.target/i386/pr82735-4.c

> > > @@ -0,0 +1,48 @@

> > > +/* { dg-do compile { target { ! ia32 } } }  */

> > > +/* { dg-options "-mavx -O2 -mabi=ms -mno-avx512f -masm=att" } */

> > > +/* { dg-final { scan-assembler-times {(?n)(?:vmovdqa[1-9]*|vmovap[sd])[\t ]*%xmm[0-9]+, [0-9]*\(%rsp\)} 10 } } */

> > > +/* { dg-final { scan-assembler-times {(?n)(?:vmovdqa[1-9]*|vmovap[sd])[\t ]*[0-9]*\(%rsp\), %xmm[0-9]+} 10 } } */

> > > +

> > > +#include <immintrin.h>

> > > +

> > > +void test(char *dest)

> > > +{

> > > +  __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;

> > > +  asm volatile ("vmovdqa\t%%ymm0, %0\n\t"

> > > +               "vmovdqa\t%%ymm0, %1\n\t"

> > > +               "vmovdqa\t%%ymm0, %2\n\t"

> > > +               "vmovdqa\t%%ymm0, %3\n\t"

> > > +               "vmovdqa\t%%ymm0, %4\n\t"

> > > +               "vmovdqa\t%%ymm0, %5\n\t"

> > > +               "vmovdqa\t%%ymm0, %6\n\t"

> > > +               "vmovdqa\t%%ymm0, %7\n\t"

> > > +               "vmovdqa\t%%ymm0, %8\n\t"

> > > +               "vmovdqa\t%%ymm0, %9\n\t"

> > > +               "vmovdqa\t%%ymm0, %10\n\t"

> > > +               "vmovdqa\t%%ymm0, %11\n\t"

> > > +               "vmovdqa\t%%ymm0, %12\n\t"

> > > +               "vmovdqa\t%%ymm0, %13\n\t"

> > > +               "vmovdqa\t%%ymm0, %14\n\t"

> > > +               "vmovdqa\t%%ymm0, %15\n\t"

> > > +               : "=v" (ymm1), "=v" (ymm2), "=v"(ymm3), "=v" (ymm4), "=v" (ymm5),

> > > +                 "=v" (ymm6), "=v" (ymm7), "=v"(ymm8), "=v" (ymm9), "=v" (ymm10),

> > > +                 "=v" (ymm11), "=v" (ymm12), "=v"(ymm13), "=v" (ymm14), "=v" (ymm15),

> > > +                 "=v"(ymm0)

> > > +               ::);

> > > +  _mm256_zeroupper();

> > > +  _mm256_storeu_si256((__m256i *)dest, ymm1);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32), ymm2);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 2), ymm3);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 3), ymm4);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 4), ymm5);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 5), ymm6);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 6), ymm7);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 7), ymm8);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 8), ymm9);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 9), ymm10);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 10), ymm11);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 11), ymm12);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 12), ymm13);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 13), ymm14);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 14), ymm15);

> > > +}

> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82735-5.c b/gcc/testsuite/gcc.target/i386/pr82735-5.c

> > > new file mode 100644

> > > index 00000000000..2a58cbe52d0

> > > --- /dev/null

> > > +++ b/gcc/testsuite/gcc.target/i386/pr82735-5.c

> > > @@ -0,0 +1,54 @@

> > > +/* { dg-do compile { target { ! ia32 } } }  */

> > > +/* { dg-options "-mavx -O2 -mabi=ms -mno-avx512f -masm=att" } */

> > > +/* { dg-final { scan-assembler-times {(?n)(?:vmovdqa[1-9]*|vmovap[sd])[\t ]*%xmm[0-9]+, [0-9]*\(%rsp\)} 10 } } */

> > > +/* { dg-final { scan-assembler-times {(?n)(?:vmovdqa[1-9]*|vmovap[sd])[\t ]*[0-9]*\(%rsp\), %xmm[0-9]+} 10 } } */

> > > +

> > > +#include <immintrin.h>

> > > +

> > > +void test(char *dest)

> > > +{

> > > +  __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;

> > > +  asm volatile ("vmovdqa\t%%ymm0, %0\n\t"

> > > +               "vmovdqa\t%%ymm0, %1\n\t"

> > > +               "vmovdqa\t%%ymm0, %2\n\t"

> > > +               "vmovdqa\t%%ymm0, %3\n\t"

> > > +               "vmovdqa\t%%ymm0, %4\n\t"

> > > +               "vmovdqa\t%%ymm0, %5\n\t"

> > > +               "vmovdqa\t%%ymm0, %6\n\t"

> > > +               "vmovdqa\t%%ymm0, %7\n\t"

> > > +               "vmovdqa\t%%ymm0, %8\n\t"

> > > +               "vmovdqa\t%%ymm0, %9\n\t"

> > > +               "vmovdqa\t%%ymm0, %10\n\t"

> > > +               "vmovdqa\t%%ymm0, %11\n\t"

> > > +               "vmovdqa\t%%ymm0, %12\n\t"

> > > +               "vmovdqa\t%%ymm0, %13\n\t"

> > > +               "vmovdqa\t%%ymm0, %14\n\t"

> > > +               "vmovdqa\t%%ymm0, %15\n\t"

> > > +               : "=v" (ymm1), "=v" (ymm2), "=v"(ymm3), "=v" (ymm4), "=v" (ymm5),

> > > +                 "=v" (ymm6), "=v" (ymm7), "=v"(ymm8), "=v" (ymm9), "=v" (ymm10),

> > > +                 "=v" (ymm11), "=v" (ymm12), "=v"(ymm13), "=v" (ymm14), "=v" (ymm15),

> > > +                 "=v"(ymm0)

> > > +               ::);

> > > +  _mm256_zeroupper();

> > > +  _mm256_zeroupper();

> > > +  _mm256_zeroupper();

> > > +  _mm256_zeroupper();

> > > +  _mm256_zeroupper();

> > > +  _mm256_zeroupper();

> > > +  _mm256_zeroupper();

> > > +  _mm256_storeu_si256((__m256i *)dest, ymm1);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32), ymm2);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 2), ymm3);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 3), ymm4);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 4), ymm5);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 5), ymm6);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 6), ymm7);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 7), ymm8);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 8), ymm9);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 9), ymm10);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 10), ymm11);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 11), ymm12);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 12), ymm13);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 13), ymm14);

> > > +  _mm256_storeu_si256((__m256i *)(dest + 32 * 14), ymm15);

> > > +}

> > > --

> > > 2.18.1

> > >


Since -mabi=ms isn't compatible with x32, skip pr82735-[345].c for x32.
I am checking it into master branch.

-- 
H.J.

Patch

From 5e646e1c2ebc8339260a2a44e99f7f04446e9120 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Mon, 7 Jun 2021 11:43:25 -0700
Subject: [PATCH] x86: Don't compile pr82735-[345].c for x32

Since -mabi=ms isn't compatible with x32, skip pr82735-[345].c for x32.

	PR target/82735
	* gcc.target/i386/pr82735-3.c: Don't compile for x32.
	* gcc.target/i386/pr82735-4.c: Likewise.
	* gcc.target/i386/pr82735-5.c: Likewise.
---
 gcc/testsuite/gcc.target/i386/pr82735-3.c | 2 +-
 gcc/testsuite/gcc.target/i386/pr82735-4.c | 2 +-
 gcc/testsuite/gcc.target/i386/pr82735-5.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr82735-3.c b/gcc/testsuite/gcc.target/i386/pr82735-3.c
index e3f801e6924..854087d81a0 100644
--- a/gcc/testsuite/gcc.target/i386/pr82735-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr82735-3.c
@@ -1,4 +1,4 @@ 
-/* { dg-do compile } */
+/* { dg-do compile { target { ! x32 } } } */
 /* { dg-options "-mavx -O2 -mabi=ms" } */
 /* { dg-final { scan-assembler-not {(?n)xmm([6-9]|1[0-5])} } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr82735-4.c b/gcc/testsuite/gcc.target/i386/pr82735-4.c
index 78c0a6cb2c8..2218ffc08ab 100644
--- a/gcc/testsuite/gcc.target/i386/pr82735-4.c
+++ b/gcc/testsuite/gcc.target/i386/pr82735-4.c
@@ -1,4 +1,4 @@ 
-/* { dg-do compile { target { ! ia32 } } }  */
+/* { dg-do compile { target { lp64 } } }  */
 /* { dg-options "-mavx -O2 -mabi=ms -mno-avx512f -masm=att" } */
 /* { dg-final { scan-assembler-times {(?n)(?:vmovdqa[1-9]*|vmovap[sd])[\t ]*%xmm[0-9]+, [0-9]*\(%rsp\)} 10 } } */
 /* { dg-final { scan-assembler-times {(?n)(?:vmovdqa[1-9]*|vmovap[sd])[\t ]*[0-9]*\(%rsp\), %xmm[0-9]+} 10 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82735-5.c b/gcc/testsuite/gcc.target/i386/pr82735-5.c
index 2a58cbe52d0..a9ef05f4e7b 100644
--- a/gcc/testsuite/gcc.target/i386/pr82735-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr82735-5.c
@@ -1,4 +1,4 @@ 
-/* { dg-do compile { target { ! ia32 } } }  */
+/* { dg-do compile { target { lp64 } } }  */
 /* { dg-options "-mavx -O2 -mabi=ms -mno-avx512f -masm=att" } */
 /* { dg-final { scan-assembler-times {(?n)(?:vmovdqa[1-9]*|vmovap[sd])[\t ]*%xmm[0-9]+, [0-9]*\(%rsp\)} 10 } } */
 /* { dg-final { scan-assembler-times {(?n)(?:vmovdqa[1-9]*|vmovap[sd])[\t ]*[0-9]*\(%rsp\), %xmm[0-9]+} 10 } } */
-- 
2.31.1