V2: [PATCH] Update preferred_stack_boundary only when expanding function call

Message ID CAMe9rOoYJhMwQgjmN5bxrtx=t8iKxTPYH0HvTcQ5R3y7kFQo0g@mail.gmail.com
State New
Headers show
Series
  • V2: [PATCH] Update preferred_stack_boundary only when expanding function call
Related show

Commit Message

H.J. Lu June 7, 2019, 6:15 p.m.
On Fri, Jun 7, 2019 at 1:22 AM Richard Biener <rguenther@suse.de> wrote:
>

> On Fri, 7 Jun 2019, Richard Sandiford wrote:

>

> > "H.J. Lu" <hjl.tools@gmail.com> writes:

> > > locate_and_pad_parm is called when expanding function call from

> > > initialize_argument_information and when generating function body

> > > from assign_parm_find_entry_rtl:

> > >

> > >   /* Remember if the outgoing parameter requires extra alignment on the

> > >      calling function side.  */

> > >   if (crtl->stack_alignment_needed < boundary)

> > >     crtl->stack_alignment_needed = boundary;

> > >   if (crtl->preferred_stack_boundary < boundary)

> > >     crtl->preferred_stack_boundary = boundary;

> > >

> > > stack_alignment_needed and preferred_stack_boundary should be updated

> > > only when expanding function call, not when generating function body.

> > > Add an argument, outgoing_p, to locate_and_pad_parm to indicate for

> > > expanding function call.  Update stack_alignment_needed and

> > > preferred_stack_boundary if the parameter is passed on stack and only

> > > when expanding function call.

> > >

> > > Tested on Linux/x86-64.

> > >

> > > OK for trunk?

> > >

> > > Thanks.

> > >

> > > --

> > > H.J.

> > >

> > > From e91e20ad8e10373db2c6d8f99a3da0bbf46c5c34 Mon Sep 17 00:00:00 2001

> > > From: "H.J. Lu" <hjl.tools@gmail.com>

> > > Date: Wed, 5 Jun 2019 12:55:19 -0700

> > > Subject: [PATCH] Update preferred_stack_boundary only when expanding function

> > >  call

> > >

> > > locate_and_pad_parm is called when expanding function call from

> > > initialize_argument_information and when generating function body

> > > from assign_parm_find_entry_rtl:

> > >

> > >   /* Remember if the outgoing parameter requires extra alignment on the

> > >      calling function side.  */

> > >   if (crtl->stack_alignment_needed < boundary)

> > >     crtl->stack_alignment_needed = boundary;

> > >   if (crtl->preferred_stack_boundary < boundary)

> > >     crtl->preferred_stack_boundary = boundary;

> > >

> > > stack_alignment_needed and preferred_stack_boundary should be updated

> > > only when expanding function call, not when generating function body.

> > > Add an argument, outgoing_p, to locate_and_pad_parm to indicate for

> > > expanding function call.  Update stack_alignment_needed and

> > > preferred_stack_boundary if the parameter is passed on stack and only

> > > when expanding function call.

> > >

> > > Tested on Linux/x86-64.

> > >

> > > gcc/

> > >

> > >     PR rtl-optimization/90765

> > >     * function.c (assign_parm_find_entry_rtl): Pass false to

> > >     locate_and_pad_parm.

> > >     (locate_and_pad_parm): Add an argument, outgoing_p, to indicate

> > >     for expanding function call.  Update stack_alignment_needed and

> > >     preferred_stack_boundary only if outgoing_p is true and the

> > >     the parameter is passed on stack.

> > >     * function.h (locate_and_pad_parm): Add an argument, outgoing_p,

> > >     defaulting to true.

> > >

> > > gcc/testsuite/

> > >

> > >     PR rtl-optimization/90765

> > >     * gcc.target/i386/pr90765-1.c: New test.

> > >     * gcc.target/i386/pr90765-2.c: Likewise.

> > > ---

> > >  gcc/function.c                            | 21 +++++++++++++--------

> > >  gcc/function.h                            |  3 ++-

> > >  gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 +++++++++++

> > >  gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 ++++++++++++++++++

> > >  4 files changed, 44 insertions(+), 9 deletions(-)

> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c

> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c

> > >

> > > diff --git a/gcc/function.c b/gcc/function.c

> > > index e30ee259bec..9b6673f6f0d 100644

> > > --- a/gcc/function.c

> > > +++ b/gcc/function.c

> > > @@ -2601,7 +2601,7 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all *all,

> > >    locate_and_pad_parm (data->promoted_mode, data->passed_type, in_regs,

> > >                    all->reg_parm_stack_space,

> > >                    entry_parm ? data->partial : 0, current_function_decl,

> > > -                  &all->stack_args_size, &data->locate);

> > > +                  &all->stack_args_size, &data->locate, false);

> > >

> > >    /* Update parm_stack_boundary if this parameter is passed in the

> > >       stack.  */

> > > @@ -3954,7 +3954,8 @@ locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs,

> > >                  int reg_parm_stack_space, int partial,

> > >                  tree fndecl ATTRIBUTE_UNUSED,

> > >                  struct args_size *initial_offset_ptr,

> > > -                struct locate_and_pad_arg_data *locate)

> > > +                struct locate_and_pad_arg_data *locate,

> > > +                bool outgoing_p)

> > >  {

> > >    tree sizetree;

> > >    pad_direction where_pad;

> > > @@ -4021,12 +4022,16 @@ locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs,

> > >     }

> > >      }

> > >

> > > -  /* Remember if the outgoing parameter requires extra alignment on the

> > > -     calling function side.  */

> > > -  if (crtl->stack_alignment_needed < boundary)

> > > -    crtl->stack_alignment_needed = boundary;

> > > -  if (crtl->preferred_stack_boundary < boundary)

> > > -    crtl->preferred_stack_boundary = boundary;

> > > +  if (outgoing_p && !in_regs)

> > > +    {

> > > +      /* Remember if the outgoing parameter requires extra alignment on

> > > +    the calling function side if this parameter is passed in the

> > > +    stack.  */

> > > +      if (crtl->stack_alignment_needed < boundary)

> > > +   crtl->stack_alignment_needed = boundary;

> > > +      if (crtl->preferred_stack_boundary < boundary)

> > > +   crtl->preferred_stack_boundary = boundary;

> > > +    }

> >

> > Just my opinion, but I think this shows that the code isn't really

> > in the right place.  IMO locate_and_pad_parm should just describe

> > the ABI and is too low-level to be modifying global state like this.

> >

> > I think we could instead do this by walking over the argument vectors

> > in a subroutine of expand_call and emit_library_call_value_1.

> > expand_call is also where we do:

> >

> >   /* Ensure current function's preferred stack boundary is at least

> >      what we need.  Stack alignment may also increase preferred stack

> >      boundary.  */

> >   if (crtl->preferred_stack_boundary < preferred_stack_boundary)

> >     crtl->preferred_stack_boundary = preferred_stack_boundary;

> >   else

> >     preferred_stack_boundary = crtl->preferred_stack_boundary;

>

> Agreed - that would be much cleaner.

>


Here is the updated patch to add update_stack_alignment_for_call.

Tested on Linux/x86-64.    OK for trunk?

Thanks.

-- 
H.J.

Comments

Richard Sandiford June 8, 2019, 7:14 a.m. | #1
"H.J. Lu" <hjl.tools@gmail.com> writes:
> On Fri, Jun 7, 2019 at 1:22 AM Richard Biener <rguenther@suse.de> wrote:

>>

>> On Fri, 7 Jun 2019, Richard Sandiford wrote:

>>

>> > "H.J. Lu" <hjl.tools@gmail.com> writes:

>> > > locate_and_pad_parm is called when expanding function call from

>> > > initialize_argument_information and when generating function body

>> > > from assign_parm_find_entry_rtl:

>> > >

>> > >   /* Remember if the outgoing parameter requires extra alignment on the

>> > >      calling function side.  */

>> > >   if (crtl->stack_alignment_needed < boundary)

>> > >     crtl->stack_alignment_needed = boundary;

>> > >   if (crtl->preferred_stack_boundary < boundary)

>> > >     crtl->preferred_stack_boundary = boundary;

>> > >

>> > > stack_alignment_needed and preferred_stack_boundary should be updated

>> > > only when expanding function call, not when generating function body.

>> > > Add an argument, outgoing_p, to locate_and_pad_parm to indicate for

>> > > expanding function call.  Update stack_alignment_needed and

>> > > preferred_stack_boundary if the parameter is passed on stack and only

>> > > when expanding function call.

>> > >

>> > > Tested on Linux/x86-64.

>> > >

>> > > OK for trunk?

>> > >

>> > > Thanks.

>> > >

>> > > --

>> > > H.J.

>> > >

>> > > From e91e20ad8e10373db2c6d8f99a3da0bbf46c5c34 Mon Sep 17 00:00:00 2001

>> > > From: "H.J. Lu" <hjl.tools@gmail.com>

>> > > Date: Wed, 5 Jun 2019 12:55:19 -0700

>> > > Subject: [PATCH] Update preferred_stack_boundary only when expanding function

>> > >  call

>> > >

>> > > locate_and_pad_parm is called when expanding function call from

>> > > initialize_argument_information and when generating function body

>> > > from assign_parm_find_entry_rtl:

>> > >

>> > >   /* Remember if the outgoing parameter requires extra alignment on the

>> > >      calling function side.  */

>> > >   if (crtl->stack_alignment_needed < boundary)

>> > >     crtl->stack_alignment_needed = boundary;

>> > >   if (crtl->preferred_stack_boundary < boundary)

>> > >     crtl->preferred_stack_boundary = boundary;

>> > >

>> > > stack_alignment_needed and preferred_stack_boundary should be updated

>> > > only when expanding function call, not when generating function body.

>> > > Add an argument, outgoing_p, to locate_and_pad_parm to indicate for

>> > > expanding function call.  Update stack_alignment_needed and

>> > > preferred_stack_boundary if the parameter is passed on stack and only

>> > > when expanding function call.

>> > >

>> > > Tested on Linux/x86-64.

>> > >

>> > > gcc/

>> > >

>> > >     PR rtl-optimization/90765

>> > >     * function.c (assign_parm_find_entry_rtl): Pass false to

>> > >     locate_and_pad_parm.

>> > >     (locate_and_pad_parm): Add an argument, outgoing_p, to indicate

>> > >     for expanding function call.  Update stack_alignment_needed and

>> > >     preferred_stack_boundary only if outgoing_p is true and the

>> > >     the parameter is passed on stack.

>> > >     * function.h (locate_and_pad_parm): Add an argument, outgoing_p,

>> > >     defaulting to true.

>> > >

>> > > gcc/testsuite/

>> > >

>> > >     PR rtl-optimization/90765

>> > >     * gcc.target/i386/pr90765-1.c: New test.

>> > >     * gcc.target/i386/pr90765-2.c: Likewise.

>> > > ---

>> > >  gcc/function.c                            | 21 +++++++++++++--------

>> > >  gcc/function.h                            |  3 ++-

>> > >  gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 +++++++++++

>> > >  gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 ++++++++++++++++++

>> > >  4 files changed, 44 insertions(+), 9 deletions(-)

>> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c

>> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c

>> > >

>> > > diff --git a/gcc/function.c b/gcc/function.c

>> > > index e30ee259bec..9b6673f6f0d 100644

>> > > --- a/gcc/function.c

>> > > +++ b/gcc/function.c

>> > > @@ -2601,7 +2601,7 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all *all,

>> > >    locate_and_pad_parm (data->promoted_mode, data->passed_type, in_regs,

>> > >                    all->reg_parm_stack_space,

>> > >                    entry_parm ? data->partial : 0, current_function_decl,

>> > > -                  &all->stack_args_size, &data->locate);

>> > > +                  &all->stack_args_size, &data->locate, false);

>> > >

>> > >    /* Update parm_stack_boundary if this parameter is passed in the

>> > >       stack.  */

>> > > @@ -3954,7 +3954,8 @@ locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs,

>> > >                  int reg_parm_stack_space, int partial,

>> > >                  tree fndecl ATTRIBUTE_UNUSED,

>> > >                  struct args_size *initial_offset_ptr,

>> > > -                struct locate_and_pad_arg_data *locate)

>> > > +                struct locate_and_pad_arg_data *locate,

>> > > +                bool outgoing_p)

>> > >  {

>> > >    tree sizetree;

>> > >    pad_direction where_pad;

>> > > @@ -4021,12 +4022,16 @@ locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs,

>> > >     }

>> > >      }

>> > >

>> > > -  /* Remember if the outgoing parameter requires extra alignment on the

>> > > -     calling function side.  */

>> > > -  if (crtl->stack_alignment_needed < boundary)

>> > > -    crtl->stack_alignment_needed = boundary;

>> > > -  if (crtl->preferred_stack_boundary < boundary)

>> > > -    crtl->preferred_stack_boundary = boundary;

>> > > +  if (outgoing_p && !in_regs)

>> > > +    {

>> > > +      /* Remember if the outgoing parameter requires extra alignment on

>> > > +    the calling function side if this parameter is passed in the

>> > > +    stack.  */

>> > > +      if (crtl->stack_alignment_needed < boundary)

>> > > +   crtl->stack_alignment_needed = boundary;

>> > > +      if (crtl->preferred_stack_boundary < boundary)

>> > > +   crtl->preferred_stack_boundary = boundary;

>> > > +    }

>> >

>> > Just my opinion, but I think this shows that the code isn't really

>> > in the right place.  IMO locate_and_pad_parm should just describe

>> > the ABI and is too low-level to be modifying global state like this.

>> >

>> > I think we could instead do this by walking over the argument vectors

>> > in a subroutine of expand_call and emit_library_call_value_1.

>> > expand_call is also where we do:

>> >

>> >   /* Ensure current function's preferred stack boundary is at least

>> >      what we need.  Stack alignment may also increase preferred stack

>> >      boundary.  */

>> >   if (crtl->preferred_stack_boundary < preferred_stack_boundary)

>> >     crtl->preferred_stack_boundary = preferred_stack_boundary;

>> >   else

>> >     preferred_stack_boundary = crtl->preferred_stack_boundary;

>>

>> Agreed - that would be much cleaner.

>>

>

> Here is the updated patch to add update_stack_alignment_for_call.

>

> Tested on Linux/x86-64.    OK for trunk?

>

> Thanks.

>

> -- 

> H.J.

>

> From 00d81edc1e62c43c2084483df055ea68f4047679 Mon Sep 17 00:00:00 2001

> From: "H.J. Lu" <hjl.tools@gmail.com>

> Date: Wed, 5 Jun 2019 12:55:19 -0700

> Subject: [PATCH] Update preferred_stack_boundary only when expanding function

>  call

>

> locate_and_pad_parm is called when expanding function call from

> initialize_argument_information and when generating function body

> from assign_parm_find_entry_rtl:

>

>   /* Remember if the outgoing parameter requires extra alignment on the

>      calling function side.  */

>   if (crtl->stack_alignment_needed < boundary)

>     crtl->stack_alignment_needed = boundary;

>   if (crtl->preferred_stack_boundary < boundary)

>     crtl->preferred_stack_boundary = boundary;

>

> stack_alignment_needed and preferred_stack_boundary should be updated

> only when expanding function call, not when generating function body.

>

> Add update_stack_alignment_for_call to update stack alignment when

> outgoing parameter is passed in the stack.

>

> gcc/

>

> 	PR rtl-optimization/90765

> 	* calls.c (update_stack_alignment_for_call): New function.

> 	(initialize_argument_information): Call

> 	update_stack_alignment_for_call when outgoing parameter is

> 	passed in the stack.

> 	(emit_library_call_value_1): Likewise.

> 	* function.c (locate_and_pad_parm): Don't update

> 	stack_alignment_needed and preferred_stack_boundary.

>

> gcc/testsuite/

>

> 	PR rtl-optimization/90765

> 	* gcc.target/i386/pr90765-1.c: New test.

> 	* gcc.target/i386/pr90765-2.c: Likewise.

> ---

>  gcc/calls.c                               | 33 ++++++++++++++++++-----

>  gcc/function.c                            |  7 -----

>  gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 ++++++++

>  gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 +++++++++++++

>  4 files changed, 55 insertions(+), 14 deletions(-)

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c

>

> diff --git a/gcc/calls.c b/gcc/calls.c

> index c8a42680041..8ba82fbb6c0 100644

> --- a/gcc/calls.c

> +++ b/gcc/calls.c

> @@ -1841,6 +1841,19 @@ maybe_complain_about_tail_call (tree call_expr, const char *reason)

>    error_at (EXPR_LOCATION (call_expr), "cannot tail-call: %s", reason);

>  }

>  

> +/* Update stack alignment when the parameter is passed in the stack

> +   since the outgoing parameter requires extra alignment on the calling

> +   function side. */

> +

> +static void

> +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate)

> +{

> +  if (crtl->stack_alignment_needed < locate->boundary)

> +    crtl->stack_alignment_needed = locate->boundary;

> +  if (crtl->preferred_stack_boundary < locate->boundary)

> +    crtl->preferred_stack_boundary = locate->boundary;

> +}

> +

>  /* Fill in ARGS_SIZE and ARGS array based on the parameters found in

>     CALL_EXPR EXP.

>  

> @@ -2162,15 +2175,18 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,

>        if (args[i].reg == 0 || args[i].partial != 0

>  	       || reg_parm_stack_space > 0

>  	       || args[i].pass_on_stack)

> -	locate_and_pad_parm (mode, type,

> +	{

> +	  locate_and_pad_parm (mode, type,

>  #ifdef STACK_PARMS_IN_REG_PARM_AREA

> -			     1,

> +			       1,

>  #else

> -			     args[i].reg != 0,

> +			       args[i].reg != 0,

>  #endif

> -			     reg_parm_stack_space,

> -			     args[i].pass_on_stack ? 0 : args[i].partial,

> -			     fndecl, args_size, &args[i].locate);

> +			       reg_parm_stack_space,

> +			       args[i].pass_on_stack ? 0 : args[i].partial,

> +			       fndecl, args_size, &args[i].locate);

> +	  update_stack_alignment_for_call (&args[i].locate);

> +	}

>  #ifdef BLOCK_REG_PADDING

>        else

>  	/* The argument is passed entirely in registers.  See at which

> @@ -4861,7 +4877,10 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,

>  

>        if (argvec[count].reg == 0 || argvec[count].partial != 0

>  	  || reg_parm_stack_space > 0)

> -	args_size.constant += argvec[count].locate.size.constant;

> +	{

> +	  args_size.constant += argvec[count].locate.size.constant;

> +	  update_stack_alignment_for_call (&argvec[count].locate);

> +	}

>  

>        targetm.calls.function_arg_advance (args_so_far, Pmode, (tree) 0, true);


There are two calls to locate_and_pad_parm in emit_library_call_value_1,
but I think this only handles the second.

IMO it'd be clearer/safer to iterate over the argument vector separately
rather than do it on the fly.  In the case of expand_call it would make
sense to do it near the crtl->preferred_stack_boundary stuff quoted above,
so that this is all in one place.

Thanks,
Richard

>  

> diff --git a/gcc/function.c b/gcc/function.c

> index e30ee259bec..45b65dc0fd2 100644

> --- a/gcc/function.c

> +++ b/gcc/function.c

> @@ -4021,13 +4021,6 @@ locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs,

>  	}

>      }

>  

> -  /* Remember if the outgoing parameter requires extra alignment on the

> -     calling function side.  */

> -  if (crtl->stack_alignment_needed < boundary)

> -    crtl->stack_alignment_needed = boundary;

> -  if (crtl->preferred_stack_boundary < boundary)

> -    crtl->preferred_stack_boundary = boundary;

> -

>    if (ARGS_GROW_DOWNWARD)

>      {

>        locate->slot_offset.constant = -initial_offset_ptr->constant;

> diff --git a/gcc/testsuite/gcc.target/i386/pr90765-1.c b/gcc/testsuite/gcc.target/i386/pr90765-1.c

> new file mode 100644

> index 00000000000..178c3ff8054

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr90765-1.c

> @@ -0,0 +1,11 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O2 -mavx512f" } */

> +/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]*\\$-64,\[\\t \]*%\[re\]?sp" } } */

> +

> +typedef int __v16si __attribute__ ((__vector_size__ (64)));

> +

> +void

> +foo (__v16si x, int i0, int i1, int i2, int i3, int i4, int i5, __v16si *p)

> +{

> +  *p = x;

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr90765-2.c b/gcc/testsuite/gcc.target/i386/pr90765-2.c

> new file mode 100644

> index 00000000000..45cf1f03747

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr90765-2.c

> @@ -0,0 +1,18 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O2 -mavx512f" } */

> +/* { dg-final { scan-assembler "and\[lq\]?\[\\t \]*\\$-64,\[\\t \]*%\[re\]?sp" } } */

> +/* { dg-skip-if "" { x86_64-*-mingw* } } */

> +

> +typedef int __v16si __attribute__ ((__vector_size__ (64)));

> +

> +extern void foo (__v16si, __v16si, __v16si, __v16si, __v16si, __v16si,

> +		 __v16si, __v16si, __v16si, int, int, int, int, int,

> +		 int, __v16si *);

> +

> +extern __v16si x, y;

> +

> +void

> +bar (void)

> +{

> +  foo (x, x, x, x, x, x, x, x, x, 0, 1, 2, 3, 4, 5, &y);

> +}

Patch

From 00d81edc1e62c43c2084483df055ea68f4047679 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 5 Jun 2019 12:55:19 -0700
Subject: [PATCH] Update preferred_stack_boundary only when expanding function
 call

locate_and_pad_parm is called when expanding function call from
initialize_argument_information and when generating function body
from assign_parm_find_entry_rtl:

  /* Remember if the outgoing parameter requires extra alignment on the
     calling function side.  */
  if (crtl->stack_alignment_needed < boundary)
    crtl->stack_alignment_needed = boundary;
  if (crtl->preferred_stack_boundary < boundary)
    crtl->preferred_stack_boundary = boundary;

stack_alignment_needed and preferred_stack_boundary should be updated
only when expanding function call, not when generating function body.

Add update_stack_alignment_for_call to update stack alignment when
outgoing parameter is passed in the stack.

gcc/

	PR rtl-optimization/90765
	* calls.c (update_stack_alignment_for_call): New function.
	(initialize_argument_information): Call
	update_stack_alignment_for_call when outgoing parameter is
	passed in the stack.
	(emit_library_call_value_1): Likewise.
	* function.c (locate_and_pad_parm): Don't update
	stack_alignment_needed and preferred_stack_boundary.

gcc/testsuite/

	PR rtl-optimization/90765
	* gcc.target/i386/pr90765-1.c: New test.
	* gcc.target/i386/pr90765-2.c: Likewise.
---
 gcc/calls.c                               | 33 ++++++++++++++++++-----
 gcc/function.c                            |  7 -----
 gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 ++++++++
 gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 +++++++++++++
 4 files changed, 55 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c

diff --git a/gcc/calls.c b/gcc/calls.c
index c8a42680041..8ba82fbb6c0 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1841,6 +1841,19 @@  maybe_complain_about_tail_call (tree call_expr, const char *reason)
   error_at (EXPR_LOCATION (call_expr), "cannot tail-call: %s", reason);
 }
 
+/* Update stack alignment when the parameter is passed in the stack
+   since the outgoing parameter requires extra alignment on the calling
+   function side. */
+
+static void
+update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate)
+{
+  if (crtl->stack_alignment_needed < locate->boundary)
+    crtl->stack_alignment_needed = locate->boundary;
+  if (crtl->preferred_stack_boundary < locate->boundary)
+    crtl->preferred_stack_boundary = locate->boundary;
+}
+
 /* Fill in ARGS_SIZE and ARGS array based on the parameters found in
    CALL_EXPR EXP.
 
@@ -2162,15 +2175,18 @@  initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
       if (args[i].reg == 0 || args[i].partial != 0
 	       || reg_parm_stack_space > 0
 	       || args[i].pass_on_stack)
-	locate_and_pad_parm (mode, type,
+	{
+	  locate_and_pad_parm (mode, type,
 #ifdef STACK_PARMS_IN_REG_PARM_AREA
-			     1,
+			       1,
 #else
-			     args[i].reg != 0,
+			       args[i].reg != 0,
 #endif
-			     reg_parm_stack_space,
-			     args[i].pass_on_stack ? 0 : args[i].partial,
-			     fndecl, args_size, &args[i].locate);
+			       reg_parm_stack_space,
+			       args[i].pass_on_stack ? 0 : args[i].partial,
+			       fndecl, args_size, &args[i].locate);
+	  update_stack_alignment_for_call (&args[i].locate);
+	}
 #ifdef BLOCK_REG_PADDING
       else
 	/* The argument is passed entirely in registers.  See at which
@@ -4861,7 +4877,10 @@  emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
 
       if (argvec[count].reg == 0 || argvec[count].partial != 0
 	  || reg_parm_stack_space > 0)
-	args_size.constant += argvec[count].locate.size.constant;
+	{
+	  args_size.constant += argvec[count].locate.size.constant;
+	  update_stack_alignment_for_call (&argvec[count].locate);
+	}
 
       targetm.calls.function_arg_advance (args_so_far, Pmode, (tree) 0, true);
 
diff --git a/gcc/function.c b/gcc/function.c
index e30ee259bec..45b65dc0fd2 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -4021,13 +4021,6 @@  locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs,
 	}
     }
 
-  /* Remember if the outgoing parameter requires extra alignment on the
-     calling function side.  */
-  if (crtl->stack_alignment_needed < boundary)
-    crtl->stack_alignment_needed = boundary;
-  if (crtl->preferred_stack_boundary < boundary)
-    crtl->preferred_stack_boundary = boundary;
-
   if (ARGS_GROW_DOWNWARD)
     {
       locate->slot_offset.constant = -initial_offset_ptr->constant;
diff --git a/gcc/testsuite/gcc.target/i386/pr90765-1.c b/gcc/testsuite/gcc.target/i386/pr90765-1.c
new file mode 100644
index 00000000000..178c3ff8054
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90765-1.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]*\\$-64,\[\\t \]*%\[re\]?sp" } } */
+
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+void
+foo (__v16si x, int i0, int i1, int i2, int i3, int i4, int i5, __v16si *p)
+{
+  *p = x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90765-2.c b/gcc/testsuite/gcc.target/i386/pr90765-2.c
new file mode 100644
index 00000000000..45cf1f03747
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90765-2.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler "and\[lq\]?\[\\t \]*\\$-64,\[\\t \]*%\[re\]?sp" } } */
+/* { dg-skip-if "" { x86_64-*-mingw* } } */
+
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+extern void foo (__v16si, __v16si, __v16si, __v16si, __v16si, __v16si,
+		 __v16si, __v16si, __v16si, int, int, int, int, int,
+		 int, __v16si *);
+
+extern __v16si x, y;
+
+void
+bar (void)
+{
+  foo (x, x, x, x, x, x, x, x, x, 0, 1, 2, 3, 4, 5, &y);
+}
-- 
2.20.1