V3: [PATCH] Update preferred_stack_boundary only when expanding function call

Message ID CAMe9rOp5sTEUtSw2c_-+GHBBKUS9u2Tr_RHJSD7XbjosVqkidQ@mail.gmail.com
State New
Headers show
Series
  • V3: [PATCH] Update preferred_stack_boundary only when expanding function call
Related show

Commit Message

H.J. Lu June 13, 2019, 10:27 p.m.
On Sat, Jun 8, 2019 at 12:14 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>

> "H.J. Lu" <hjl.tools@gmail.com> writes:

> > On Fri, Jun 7, 2019 at 1:22 AM Richard Biener <rguenther@suse.de> wrote:

> >>

> >> On Fri, 7 Jun 2019, Richard Sandiford wrote:

> >>

> >> > "H.J. Lu" <hjl.tools@gmail.com> writes:

> >> > > locate_and_pad_parm is called when expanding function call from

> >> > > initialize_argument_information and when generating function body

> >> > > from assign_parm_find_entry_rtl:

> >> > >

> >> > >   /* Remember if the outgoing parameter requires extra alignment on the

> >> > >      calling function side.  */

> >> > >   if (crtl->stack_alignment_needed < boundary)

> >> > >     crtl->stack_alignment_needed = boundary;

> >> > >   if (crtl->preferred_stack_boundary < boundary)

> >> > >     crtl->preferred_stack_boundary = boundary;

> >> > >

> >> > > stack_alignment_needed and preferred_stack_boundary should be updated

> >> > > only when expanding function call, not when generating function body.

> >> > > Add an argument, outgoing_p, to locate_and_pad_parm to indicate for

> >> > > expanding function call.  Update stack_alignment_needed and

> >> > > preferred_stack_boundary if the parameter is passed on stack and only

> >> > > when expanding function call.

> >> > >

> >> > > Tested on Linux/x86-64.

> >> > >

> >> > > OK for trunk?

> >> > >

> >> > > Thanks.

> >> > >

> >> > > --

> >> > > H.J.

> >> > >

> >> > > From e91e20ad8e10373db2c6d8f99a3da0bbf46c5c34 Mon Sep 17 00:00:00 2001

> >> > > From: "H.J. Lu" <hjl.tools@gmail.com>

> >> > > Date: Wed, 5 Jun 2019 12:55:19 -0700

> >> > > Subject: [PATCH] Update preferred_stack_boundary only when expanding function

> >> > >  call

> >> > >

> >> > > locate_and_pad_parm is called when expanding function call from

> >> > > initialize_argument_information and when generating function body

> >> > > from assign_parm_find_entry_rtl:

> >> > >

> >> > >   /* Remember if the outgoing parameter requires extra alignment on the

> >> > >      calling function side.  */

> >> > >   if (crtl->stack_alignment_needed < boundary)

> >> > >     crtl->stack_alignment_needed = boundary;

> >> > >   if (crtl->preferred_stack_boundary < boundary)

> >> > >     crtl->preferred_stack_boundary = boundary;

> >> > >

> >> > > stack_alignment_needed and preferred_stack_boundary should be updated

> >> > > only when expanding function call, not when generating function body.

> >> > > Add an argument, outgoing_p, to locate_and_pad_parm to indicate for

> >> > > expanding function call.  Update stack_alignment_needed and

> >> > > preferred_stack_boundary if the parameter is passed on stack and only

> >> > > when expanding function call.

> >> > >

> >> > > Tested on Linux/x86-64.

> >> > >

> >> > > gcc/

> >> > >

> >> > >     PR rtl-optimization/90765

> >> > >     * function.c (assign_parm_find_entry_rtl): Pass false to

> >> > >     locate_and_pad_parm.

> >> > >     (locate_and_pad_parm): Add an argument, outgoing_p, to indicate

> >> > >     for expanding function call.  Update stack_alignment_needed and

> >> > >     preferred_stack_boundary only if outgoing_p is true and the

> >> > >     the parameter is passed on stack.

> >> > >     * function.h (locate_and_pad_parm): Add an argument, outgoing_p,

> >> > >     defaulting to true.

> >> > >

> >> > > gcc/testsuite/

> >> > >

> >> > >     PR rtl-optimization/90765

> >> > >     * gcc.target/i386/pr90765-1.c: New test.

> >> > >     * gcc.target/i386/pr90765-2.c: Likewise.

> >> > > ---

> >> > >  gcc/function.c                            | 21 +++++++++++++--------

> >> > >  gcc/function.h                            |  3 ++-

> >> > >  gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 +++++++++++

> >> > >  gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 ++++++++++++++++++

> >> > >  4 files changed, 44 insertions(+), 9 deletions(-)

> >> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c

> >> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c

> >> > >

> >> > > diff --git a/gcc/function.c b/gcc/function.c

> >> > > index e30ee259bec..9b6673f6f0d 100644

> >> > > --- a/gcc/function.c

> >> > > +++ b/gcc/function.c

> >> > > @@ -2601,7 +2601,7 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all *all,

> >> > >    locate_and_pad_parm (data->promoted_mode, data->passed_type, in_regs,

> >> > >                    all->reg_parm_stack_space,

> >> > >                    entry_parm ? data->partial : 0, current_function_decl,

> >> > > -                  &all->stack_args_size, &data->locate);

> >> > > +                  &all->stack_args_size, &data->locate, false);

> >> > >

> >> > >    /* Update parm_stack_boundary if this parameter is passed in the

> >> > >       stack.  */

> >> > > @@ -3954,7 +3954,8 @@ locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs,

> >> > >                  int reg_parm_stack_space, int partial,

> >> > >                  tree fndecl ATTRIBUTE_UNUSED,

> >> > >                  struct args_size *initial_offset_ptr,

> >> > > -                struct locate_and_pad_arg_data *locate)

> >> > > +                struct locate_and_pad_arg_data *locate,

> >> > > +                bool outgoing_p)

> >> > >  {

> >> > >    tree sizetree;

> >> > >    pad_direction where_pad;

> >> > > @@ -4021,12 +4022,16 @@ locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs,

> >> > >     }

> >> > >      }

> >> > >

> >> > > -  /* Remember if the outgoing parameter requires extra alignment on the

> >> > > -     calling function side.  */

> >> > > -  if (crtl->stack_alignment_needed < boundary)

> >> > > -    crtl->stack_alignment_needed = boundary;

> >> > > -  if (crtl->preferred_stack_boundary < boundary)

> >> > > -    crtl->preferred_stack_boundary = boundary;

> >> > > +  if (outgoing_p && !in_regs)

> >> > > +    {

> >> > > +      /* Remember if the outgoing parameter requires extra alignment on

> >> > > +    the calling function side if this parameter is passed in the

> >> > > +    stack.  */

> >> > > +      if (crtl->stack_alignment_needed < boundary)

> >> > > +   crtl->stack_alignment_needed = boundary;

> >> > > +      if (crtl->preferred_stack_boundary < boundary)

> >> > > +   crtl->preferred_stack_boundary = boundary;

> >> > > +    }

> >> >

> >> > Just my opinion, but I think this shows that the code isn't really

> >> > in the right place.  IMO locate_and_pad_parm should just describe

> >> > the ABI and is too low-level to be modifying global state like this.

> >> >

> >> > I think we could instead do this by walking over the argument vectors

> >> > in a subroutine of expand_call and emit_library_call_value_1.

> >> > expand_call is also where we do:

> >> >

> >> >   /* Ensure current function's preferred stack boundary is at least

> >> >      what we need.  Stack alignment may also increase preferred stack

> >> >      boundary.  */

> >> >   if (crtl->preferred_stack_boundary < preferred_stack_boundary)

> >> >     crtl->preferred_stack_boundary = preferred_stack_boundary;

> >> >   else

> >> >     preferred_stack_boundary = crtl->preferred_stack_boundary;

> >>

> >> Agreed - that would be much cleaner.

> >>

> >

> > Here is the updated patch to add update_stack_alignment_for_call.

> >

> > Tested on Linux/x86-64.    OK for trunk?

> >

> > Thanks.

> >

> > --

> > H.J.

> >

> > From 00d81edc1e62c43c2084483df055ea68f4047679 Mon Sep 17 00:00:00 2001

> > From: "H.J. Lu" <hjl.tools@gmail.com>

> > Date: Wed, 5 Jun 2019 12:55:19 -0700

> > Subject: [PATCH] Update preferred_stack_boundary only when expanding function

> >  call

> >

> > locate_and_pad_parm is called when expanding function call from

> > initialize_argument_information and when generating function body

> > from assign_parm_find_entry_rtl:

> >

> >   /* Remember if the outgoing parameter requires extra alignment on the

> >      calling function side.  */

> >   if (crtl->stack_alignment_needed < boundary)

> >     crtl->stack_alignment_needed = boundary;

> >   if (crtl->preferred_stack_boundary < boundary)

> >     crtl->preferred_stack_boundary = boundary;

> >

> > stack_alignment_needed and preferred_stack_boundary should be updated

> > only when expanding function call, not when generating function body.

> >

> > Add update_stack_alignment_for_call to update stack alignment when

> > outgoing parameter is passed in the stack.

> >

> > gcc/

> >

> >       PR rtl-optimization/90765

> >       * calls.c (update_stack_alignment_for_call): New function.

> >       (initialize_argument_information): Call

> >       update_stack_alignment_for_call when outgoing parameter is

> >       passed in the stack.

> >       (emit_library_call_value_1): Likewise.

> >       * function.c (locate_and_pad_parm): Don't update

> >       stack_alignment_needed and preferred_stack_boundary.

> >

> > gcc/testsuite/

> >

> >       PR rtl-optimization/90765

> >       * gcc.target/i386/pr90765-1.c: New test.

> >       * gcc.target/i386/pr90765-2.c: Likewise.

> > ---

> >  gcc/calls.c                               | 33 ++++++++++++++++++-----

> >  gcc/function.c                            |  7 -----

> >  gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 ++++++++

> >  gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 +++++++++++++

> >  4 files changed, 55 insertions(+), 14 deletions(-)

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c

> >

> > diff --git a/gcc/calls.c b/gcc/calls.c

> > index c8a42680041..8ba82fbb6c0 100644

> > --- a/gcc/calls.c

> > +++ b/gcc/calls.c

> > @@ -1841,6 +1841,19 @@ maybe_complain_about_tail_call (tree call_expr, const char *reason)

> >    error_at (EXPR_LOCATION (call_expr), "cannot tail-call: %s", reason);

> >  }

> >

> > +/* Update stack alignment when the parameter is passed in the stack

> > +   since the outgoing parameter requires extra alignment on the calling

> > +   function side. */

> > +

> > +static void

> > +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate)

> > +{

> > +  if (crtl->stack_alignment_needed < locate->boundary)

> > +    crtl->stack_alignment_needed = locate->boundary;

> > +  if (crtl->preferred_stack_boundary < locate->boundary)

> > +    crtl->preferred_stack_boundary = locate->boundary;

> > +}

> > +

> >  /* Fill in ARGS_SIZE and ARGS array based on the parameters found in

> >     CALL_EXPR EXP.

> >

> > @@ -2162,15 +2175,18 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,

> >        if (args[i].reg == 0 || args[i].partial != 0

> >              || reg_parm_stack_space > 0

> >              || args[i].pass_on_stack)

> > -     locate_and_pad_parm (mode, type,

> > +     {

> > +       locate_and_pad_parm (mode, type,

> >  #ifdef STACK_PARMS_IN_REG_PARM_AREA

> > -                          1,

> > +                            1,

> >  #else

> > -                          args[i].reg != 0,

> > +                            args[i].reg != 0,

> >  #endif

> > -                          reg_parm_stack_space,

> > -                          args[i].pass_on_stack ? 0 : args[i].partial,

> > -                          fndecl, args_size, &args[i].locate);

> > +                            reg_parm_stack_space,

> > +                            args[i].pass_on_stack ? 0 : args[i].partial,

> > +                            fndecl, args_size, &args[i].locate);

> > +       update_stack_alignment_for_call (&args[i].locate);

> > +     }

> >  #ifdef BLOCK_REG_PADDING

> >        else

> >       /* The argument is passed entirely in registers.  See at which

> > @@ -4861,7 +4877,10 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,

> >

> >        if (argvec[count].reg == 0 || argvec[count].partial != 0

> >         || reg_parm_stack_space > 0)

> > -     args_size.constant += argvec[count].locate.size.constant;

> > +     {

> > +       args_size.constant += argvec[count].locate.size.constant;

> > +       update_stack_alignment_for_call (&argvec[count].locate);

> > +     }

> >

> >        targetm.calls.function_arg_advance (args_so_far, Pmode, (tree) 0, true);

>

> There are two calls to locate_and_pad_parm in emit_library_call_value_1,

> but I think this only handles the second.

>

> IMO it'd be clearer/safer to iterate over the argument vector separately

> rather than do it on the fly.  In the case of expand_call it would make

> sense to do it near the crtl->preferred_stack_boundary stuff quoted above,

> so that this is all in one place.

>


Fixed.

There is the updated patch.   Tested on Linux/x86-64.  OK for trunk?

Thanks.

-- 
H.J.

Comments

Richard Sandiford June 14, 2019, 3:31 p.m. | #1
"H.J. Lu" <hjl.tools@gmail.com> writes:
> diff --git a/gcc/calls.c b/gcc/calls.c

> index c8a42680041..6ab138e7bb0 100644

> --- a/gcc/calls.c

> +++ b/gcc/calls.c

> @@ -3226,6 +3226,19 @@ can_implement_as_sibling_call_p (tree exp,

>    return true;

>  }

>  

> +/* Update stack alignment when the parameter is passed in the stack

> +   since the outgoing parameter requires extra alignment on the calling

> +   function side. */

> +

> +static void

> +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate)

> +{

> +  if (crtl->stack_alignment_needed < locate->boundary)

> +    crtl->stack_alignment_needed = locate->boundary;

> +  if (crtl->preferred_stack_boundary < locate->boundary)

> +    crtl->preferred_stack_boundary = locate->boundary;

> +}

> +

>  /* Generate all the code for a CALL_EXPR exp

>     and return an rtx for its value.

>     Store the value in TARGET (specified as an rtx) if convenient.

> @@ -3703,6 +3716,12 @@ expand_call (tree exp, rtx target, int ignore)

>    /* Ensure current function's preferred stack boundary is at least

>       what we need.  Stack alignment may also increase preferred stack

>       boundary.  */

> +  for (i = 0; i < num_actuals; i++)

> +    if (reg_parm_stack_space > 0

> +	|| args[i].reg == 0

> +	|| args[i].partial != 0

> +	|| args[i].pass_on_stack)

> +      update_stack_alignment_for_call (&args[i].locate);

>    if (crtl->preferred_stack_boundary < preferred_stack_boundary)

>      crtl->preferred_stack_boundary = preferred_stack_boundary;

>    else

> @@ -4961,6 +4980,12 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,

>        targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true);

>      }

>  

> +  for (int i = 0; i < nargs; i++)

> +    if (reg_parm_stack_space > 0

> +	|| argvec[i].reg == 0

> +	|| argvec[i].partial != 0)

> +      update_stack_alignment_for_call (&argvec[i].locate);

> +


It's safe to test argvec[i].pass_on_stack here too, since the vector
is initialised to zeros.  So I think we should move the "if"s into the
new function:

static void
update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate)
{
  if (reg_parm_stack_space > 0
      || locate->reg == 0
      || locate->partial != 0
      || locate->pass_on_stack)
    {
      if (crtl->stack_alignment_needed < locate->boundary)
	crtl->stack_alignment_needed = locate->boundary;
      if (crtl->preferred_stack_boundary < locate->boundary)
	crtl->preferred_stack_boundary = locate->boundary;
    }
}

OK with that change, thanks.

Richard
H.J. Lu June 14, 2019, 3:53 p.m. | #2
On Fri, Jun 14, 2019 at 8:31 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>

> "H.J. Lu" <hjl.tools@gmail.com> writes:

> > diff --git a/gcc/calls.c b/gcc/calls.c

> > index c8a42680041..6ab138e7bb0 100644

> > --- a/gcc/calls.c

> > +++ b/gcc/calls.c

> > @@ -3226,6 +3226,19 @@ can_implement_as_sibling_call_p (tree exp,

> >    return true;

> >  }

> >

> > +/* Update stack alignment when the parameter is passed in the stack

> > +   since the outgoing parameter requires extra alignment on the calling

> > +   function side. */

> > +

> > +static void

> > +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate)

> > +{

> > +  if (crtl->stack_alignment_needed < locate->boundary)

> > +    crtl->stack_alignment_needed = locate->boundary;

> > +  if (crtl->preferred_stack_boundary < locate->boundary)

> > +    crtl->preferred_stack_boundary = locate->boundary;

> > +}

> > +

> >  /* Generate all the code for a CALL_EXPR exp

> >     and return an rtx for its value.

> >     Store the value in TARGET (specified as an rtx) if convenient.

> > @@ -3703,6 +3716,12 @@ expand_call (tree exp, rtx target, int ignore)

> >    /* Ensure current function's preferred stack boundary is at least

> >       what we need.  Stack alignment may also increase preferred stack

> >       boundary.  */

> > +  for (i = 0; i < num_actuals; i++)

> > +    if (reg_parm_stack_space > 0

> > +     || args[i].reg == 0

> > +     || args[i].partial != 0

> > +     || args[i].pass_on_stack)

> > +      update_stack_alignment_for_call (&args[i].locate);

> >    if (crtl->preferred_stack_boundary < preferred_stack_boundary)

> >      crtl->preferred_stack_boundary = preferred_stack_boundary;

> >    else

> > @@ -4961,6 +4980,12 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,

> >        targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true);

> >      }

> >

> > +  for (int i = 0; i < nargs; i++)

> > +    if (reg_parm_stack_space > 0

> > +     || argvec[i].reg == 0

> > +     || argvec[i].partial != 0)

> > +      update_stack_alignment_for_call (&argvec[i].locate);

> > +

>

> It's safe to test argvec[i].pass_on_stack here too, since the vector


There is no pass_on_stack in argvec:

  struct arg
  {
    rtx value;
    machine_mode mode;
    rtx reg;
    int partial;
    struct locate_and_pad_arg_data locate;
    rtx save_area;
  };
  struct arg *argvec;

> is initialised to zeros.  So I think we should move the "if"s into the

> new function:

>

> static void

> update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate)

> {

>   if (reg_parm_stack_space > 0

>       || locate->reg == 0

>       || locate->partial != 0

>       || locate->pass_on_stack)


Since we have

struct locate_and_pad_arg_data
{
  /* Size of this argument on the stack, rounded up for any padding it
     gets.  If REG_PARM_STACK_SPACE is defined, then register parms are
     counted here, otherwise they aren't.  */
  struct args_size size;
  /* Offset of this argument from beginning of stack-args.  */
  struct args_size offset;
  /* Offset to the start of the stack slot.  Different from OFFSET
     if this arg pads downward.  */
  struct args_size slot_offset;
  /* The amount that the stack pointer needs to be adjusted to
     force alignment for the next argument.  */
  struct args_size alignment_pad;
  /* Which way we should pad this arg.  */
  pad_direction where_pad;
  /* slot_offset is at least this aligned.  */
  unsigned int boundary;
};

we can't check reg, partial nor pass_on_stack here.

>     {

>       if (crtl->stack_alignment_needed < locate->boundary)

>         crtl->stack_alignment_needed = locate->boundary;

>       if (crtl->preferred_stack_boundary < locate->boundary)

>         crtl->preferred_stack_boundary = locate->boundary;

>     }

> }

>

> OK with that change, thanks.

>


Is my original patch OK?

Thanks.

-- 
H.J.
Richard Sandiford June 14, 2019, 4:01 p.m. | #3
"H.J. Lu" <hjl.tools@gmail.com> writes:
> On Fri, Jun 14, 2019 at 8:31 AM Richard Sandiford

> <richard.sandiford@arm.com> wrote:

>>

>> "H.J. Lu" <hjl.tools@gmail.com> writes:

>> > diff --git a/gcc/calls.c b/gcc/calls.c

>> > index c8a42680041..6ab138e7bb0 100644

>> > --- a/gcc/calls.c

>> > +++ b/gcc/calls.c

>> > @@ -3226,6 +3226,19 @@ can_implement_as_sibling_call_p (tree exp,

>> >    return true;

>> >  }

>> >

>> > +/* Update stack alignment when the parameter is passed in the stack

>> > +   since the outgoing parameter requires extra alignment on the calling

>> > +   function side. */

>> > +

>> > +static void

>> > +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate)

>> > +{

>> > +  if (crtl->stack_alignment_needed < locate->boundary)

>> > +    crtl->stack_alignment_needed = locate->boundary;

>> > +  if (crtl->preferred_stack_boundary < locate->boundary)

>> > +    crtl->preferred_stack_boundary = locate->boundary;

>> > +}

>> > +

>> >  /* Generate all the code for a CALL_EXPR exp

>> >     and return an rtx for its value.

>> >     Store the value in TARGET (specified as an rtx) if convenient.

>> > @@ -3703,6 +3716,12 @@ expand_call (tree exp, rtx target, int ignore)

>> >    /* Ensure current function's preferred stack boundary is at least

>> >       what we need.  Stack alignment may also increase preferred stack

>> >       boundary.  */

>> > +  for (i = 0; i < num_actuals; i++)

>> > +    if (reg_parm_stack_space > 0

>> > +     || args[i].reg == 0

>> > +     || args[i].partial != 0

>> > +     || args[i].pass_on_stack)

>> > +      update_stack_alignment_for_call (&args[i].locate);

>> >    if (crtl->preferred_stack_boundary < preferred_stack_boundary)

>> >      crtl->preferred_stack_boundary = preferred_stack_boundary;

>> >    else

>> > @@ -4961,6 +4980,12 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,

>> >        targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true);

>> >      }

>> >

>> > +  for (int i = 0; i < nargs; i++)

>> > +    if (reg_parm_stack_space > 0

>> > +     || argvec[i].reg == 0

>> > +     || argvec[i].partial != 0)

>> > +      update_stack_alignment_for_call (&argvec[i].locate);

>> > +

>>

>> It's safe to test argvec[i].pass_on_stack here too, since the vector

>

> There is no pass_on_stack in argvec:

>

>   struct arg

>   {

>     rtx value;

>     machine_mode mode;

>     rtx reg;

>     int partial;

>     struct locate_and_pad_arg_data locate;

>     rtx save_area;

>   };

>   struct arg *argvec;

>

>> is initialised to zeros.  So I think we should move the "if"s into the

>> new function:

>>

>> static void

>> update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate)

>> {

>>   if (reg_parm_stack_space > 0

>>       || locate->reg == 0

>>       || locate->partial != 0

>>       || locate->pass_on_stack)

>

> Since we have

>

> struct locate_and_pad_arg_data

> {

>   /* Size of this argument on the stack, rounded up for any padding it

>      gets.  If REG_PARM_STACK_SPACE is defined, then register parms are

>      counted here, otherwise they aren't.  */

>   struct args_size size;

>   /* Offset of this argument from beginning of stack-args.  */

>   struct args_size offset;

>   /* Offset to the start of the stack slot.  Different from OFFSET

>      if this arg pads downward.  */

>   struct args_size slot_offset;

>   /* The amount that the stack pointer needs to be adjusted to

>      force alignment for the next argument.  */

>   struct args_size alignment_pad;

>   /* Which way we should pad this arg.  */

>   pad_direction where_pad;

>   /* slot_offset is at least this aligned.  */

>   unsigned int boundary;

> };

>

> we can't check reg, partial nor pass_on_stack here.


Sorry, missed that they were different structures.

>>     {

>>       if (crtl->stack_alignment_needed < locate->boundary)

>>         crtl->stack_alignment_needed = locate->boundary;

>>       if (crtl->preferred_stack_boundary < locate->boundary)

>>         crtl->preferred_stack_boundary = locate->boundary;

>>     }

>> }

>>

>> OK with that change, thanks.

>>

>

> Is my original patch OK?


Yes, thanks.

Richard

Patch

From 053632d7724ad3299008fc83c64c5d401c1b2ebe Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 5 Jun 2019 12:55:19 -0700
Subject: [PATCH] Update preferred_stack_boundary only when expanding function
 call

locate_and_pad_parm is called when expanding function call from
initialize_argument_information and when generating function body
from assign_parm_find_entry_rtl:

  /* Remember if the outgoing parameter requires extra alignment on the
     calling function side.  */
  if (crtl->stack_alignment_needed < boundary)
    crtl->stack_alignment_needed = boundary;
  if (crtl->preferred_stack_boundary < boundary)
    crtl->preferred_stack_boundary = boundary;

stack_alignment_needed and preferred_stack_boundary should be updated
only when expanding function call, not when generating function body.

Add update_stack_alignment_for_call to update stack alignment when
outgoing parameter is passed in the stack.

gcc/

	PR rtl-optimization/90765
	* calls.c (update_stack_alignment_for_call): New function.
	(expand_call): Call update_stack_alignment_for_call when
	outgoing parameter is passed in the stack.
	(emit_library_call_value_1): Likewise.
	* function.c (locate_and_pad_parm): Don't update
	stack_alignment_needed and preferred_stack_boundary.

gcc/testsuite/

	PR rtl-optimization/90765
	* gcc.target/i386/pr90765-1.c: New test.
	* gcc.target/i386/pr90765-2.c: Likewise.
---
 gcc/calls.c                               | 25 +++++++++++++++++++++++
 gcc/function.c                            |  7 -------
 gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 ++++++++++
 gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 ++++++++++++++++
 4 files changed, 54 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c

diff --git a/gcc/calls.c b/gcc/calls.c
index c8a42680041..6ab138e7bb0 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -3226,6 +3226,19 @@  can_implement_as_sibling_call_p (tree exp,
   return true;
 }
 
+/* Update stack alignment when the parameter is passed in the stack
+   since the outgoing parameter requires extra alignment on the calling
+   function side. */
+
+static void
+update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate)
+{
+  if (crtl->stack_alignment_needed < locate->boundary)
+    crtl->stack_alignment_needed = locate->boundary;
+  if (crtl->preferred_stack_boundary < locate->boundary)
+    crtl->preferred_stack_boundary = locate->boundary;
+}
+
 /* Generate all the code for a CALL_EXPR exp
    and return an rtx for its value.
    Store the value in TARGET (specified as an rtx) if convenient.
@@ -3703,6 +3716,12 @@  expand_call (tree exp, rtx target, int ignore)
   /* Ensure current function's preferred stack boundary is at least
      what we need.  Stack alignment may also increase preferred stack
      boundary.  */
+  for (i = 0; i < num_actuals; i++)
+    if (reg_parm_stack_space > 0
+	|| args[i].reg == 0
+	|| args[i].partial != 0
+	|| args[i].pass_on_stack)
+      update_stack_alignment_for_call (&args[i].locate);
   if (crtl->preferred_stack_boundary < preferred_stack_boundary)
     crtl->preferred_stack_boundary = preferred_stack_boundary;
   else
@@ -4961,6 +4980,12 @@  emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
       targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true);
     }
 
+  for (int i = 0; i < nargs; i++)
+    if (reg_parm_stack_space > 0
+	|| argvec[i].reg == 0
+	|| argvec[i].partial != 0)
+      update_stack_alignment_for_call (&argvec[i].locate);
+
   /* If this machine requires an external definition for library
      functions, write one out.  */
   assemble_external_libcall (fun);
diff --git a/gcc/function.c b/gcc/function.c
index e30ee259bec..45b65dc0fd2 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -4021,13 +4021,6 @@  locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs,
 	}
     }
 
-  /* Remember if the outgoing parameter requires extra alignment on the
-     calling function side.  */
-  if (crtl->stack_alignment_needed < boundary)
-    crtl->stack_alignment_needed = boundary;
-  if (crtl->preferred_stack_boundary < boundary)
-    crtl->preferred_stack_boundary = boundary;
-
   if (ARGS_GROW_DOWNWARD)
     {
       locate->slot_offset.constant = -initial_offset_ptr->constant;
diff --git a/gcc/testsuite/gcc.target/i386/pr90765-1.c b/gcc/testsuite/gcc.target/i386/pr90765-1.c
new file mode 100644
index 00000000000..178c3ff8054
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90765-1.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]*\\$-64,\[\\t \]*%\[re\]?sp" } } */
+
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+void
+foo (__v16si x, int i0, int i1, int i2, int i3, int i4, int i5, __v16si *p)
+{
+  *p = x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90765-2.c b/gcc/testsuite/gcc.target/i386/pr90765-2.c
new file mode 100644
index 00000000000..45cf1f03747
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90765-2.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler "and\[lq\]?\[\\t \]*\\$-64,\[\\t \]*%\[re\]?sp" } } */
+/* { dg-skip-if "" { x86_64-*-mingw* } } */
+
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+extern void foo (__v16si, __v16si, __v16si, __v16si, __v16si, __v16si,
+		 __v16si, __v16si, __v16si, int, int, int, int, int,
+		 int, __v16si *);
+
+extern __v16si x, y;
+
+void
+bar (void)
+{
+  foo (x, x, x, x, x, x, x, x, x, 0, 1, 2, 3, 4, 5, &y);
+}
-- 
2.20.1