i386: Also adjust stack frame for stack slot alignment

Message ID 20180109212946.GA23168@gmail.com
State New
Headers show
Series
  • i386: Also adjust stack frame for stack slot alignment
Related show

Commit Message

H.J. Lu Jan. 9, 2018, 9:29 p.m.
We should also adjust stack_realign_offset for the largest alignment of
stack slot actually used when stack realignment isn't needed.  This is
required to keep stack frame properly aligned to satisfy the largest
alignment of stack slots.

Tested on Linux/i686 and Linux/x86-64.

OK for trunk?

Thanks.

H.J.
---
gcc/

	PR target/83735
	* config/i386/i386.c (ix86_compute_frame_layout): Always adjust
	stack_realign_offset for the largest alignment of stack slot
	actually used.
	(ix86_find_max_used_stack_alignment): New function.
	(ix86_finalize_stack_frame_flags): Use it.  Set
	max_used_stack_alignment if we don't realign stack.
	* config/i386/i386.h (machine_function): Add
	max_used_stack_alignment.

gcc/testsuite/

	PR target/83735
	* gcc.target/i386/pr83735.c: New test.
---
 gcc/config/i386/i386.c                  | 120 +++++++++++++++++++-------------
 gcc/config/i386/i386.h                  |   3 +
 gcc/testsuite/gcc.target/i386/pr83735.c |  55 +++++++++++++++
 3 files changed, 131 insertions(+), 47 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr83735.c

-- 
2.14.3

Comments

Uros Bizjak Jan. 10, 2018, 7:04 a.m. | #1
On Tue, Jan 9, 2018 at 10:29 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> We should also adjust stack_realign_offset for the largest alignment of

> stack slot actually used when stack realignment isn't needed.  This is

> required to keep stack frame properly aligned to satisfy the largest

> alignment of stack slots.

>

> Tested on Linux/i686 and Linux/x86-64.

>

> OK for trunk?

>

> Thanks.

>

> H.J.

> ---

> gcc/

>

>         PR target/83735

>         * config/i386/i386.c (ix86_compute_frame_layout): Always adjust

>         stack_realign_offset for the largest alignment of stack slot

>         actually used.

>         (ix86_find_max_used_stack_alignment): New function.

>         (ix86_finalize_stack_frame_flags): Use it.  Set

>         max_used_stack_alignment if we don't realign stack.

>         * config/i386/i386.h (machine_function): Add

>         max_used_stack_alignment.

>

> gcc/testsuite/

>

>         PR target/83735

>         * gcc.target/i386/pr83735.c: New test.


LGTM, but you know this part better than I.

Thanks,
Uros.

> ---

>  gcc/config/i386/i386.c                  | 120 +++++++++++++++++++-------------

>  gcc/config/i386/i386.h                  |   3 +

>  gcc/testsuite/gcc.target/i386/pr83735.c |  55 +++++++++++++++

>  3 files changed, 131 insertions(+), 47 deletions(-)

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr83735.c

>

> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

> index 8696f931806..8c08394ede7 100644

> --- a/gcc/config/i386/i386.c

> +++ b/gcc/config/i386/i386.c

> @@ -11259,7 +11259,11 @@ ix86_compute_frame_layout (void)

>    /* Calculate the size of the va-arg area (not including padding, if any).  */

>    frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;

>

> -  if (stack_realign_fp)

> +  /* Also adjust stack_realign_offset for the largest alignment of

> +     stack slot actually used.  */

> +  if (stack_realign_fp

> +      || (cfun->machine->max_used_stack_alignment != 0

> +         && (offset % cfun->machine->max_used_stack_alignment) != 0))

>      {

>        /* We may need a 16-byte aligned stack for the remainder of the

>          register save area, but the stack frame for the local function

> @@ -12668,6 +12672,62 @@ output_probe_stack_range (rtx reg, rtx end)

>    return "";

>  }

>

> +/* Return true if stack frame is required.  Update STACK_ALIGNMENT

> +   to the largest alignment, in bits, of stack slot used if stack

> +   frame is required and CHECK_STACK_SLOT is true.  */

> +

> +static bool

> +ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,

> +                                   bool check_stack_slot)

> +{

> +  HARD_REG_SET set_up_by_prologue, prologue_used;

> +  basic_block bb;

> +

> +  CLEAR_HARD_REG_SET (prologue_used);

> +  CLEAR_HARD_REG_SET (set_up_by_prologue);

> +  add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);

> +  add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);

> +  add_to_hard_reg_set (&set_up_by_prologue, Pmode,

> +                      HARD_FRAME_POINTER_REGNUM);

> +

> +  /* The preferred stack alignment is the minimum stack alignment.  */

> +  if (stack_alignment > crtl->preferred_stack_boundary)

> +    stack_alignment = crtl->preferred_stack_boundary;

> +

> +  bool require_stack_frame = false;

> +

> +  FOR_EACH_BB_FN (bb, cfun)

> +    {

> +      rtx_insn *insn;

> +      FOR_BB_INSNS (bb, insn)

> +       if (NONDEBUG_INSN_P (insn)

> +           && requires_stack_frame_p (insn, prologue_used,

> +                                      set_up_by_prologue))

> +         {

> +           require_stack_frame = true;

> +

> +           if (check_stack_slot)

> +             {

> +               /* Find the maximum stack alignment.  */

> +               subrtx_iterator::array_type array;

> +               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)

> +                 if (MEM_P (*iter)

> +                     && (reg_mentioned_p (stack_pointer_rtx,

> +                                          *iter)

> +                         || reg_mentioned_p (frame_pointer_rtx,

> +                                             *iter)))

> +                   {

> +                     unsigned int alignment = MEM_ALIGN (*iter);

> +                     if (alignment > stack_alignment)

> +                       stack_alignment = alignment;

> +                   }

> +             }

> +         }

> +    }

> +

> +  return require_stack_frame;

> +}

> +

>  /* Finalize stack_realign_needed and frame_pointer_needed flags, which

>     will guide prologue/epilogue to be generated in correct form.  */

>

> @@ -12718,52 +12778,8 @@ ix86_finalize_stack_frame_flags (void)

>        && ix86_nsaved_sseregs () == 0

>        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)

>      {

> -      HARD_REG_SET set_up_by_prologue, prologue_used;

> -      basic_block bb;

> -

> -      CLEAR_HARD_REG_SET (prologue_used);

> -      CLEAR_HARD_REG_SET (set_up_by_prologue);

> -      add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);

> -      add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);

> -      add_to_hard_reg_set (&set_up_by_prologue, Pmode,

> -                          HARD_FRAME_POINTER_REGNUM);

> -

> -      /* The preferred stack alignment is the minimum stack alignment.  */

> -      if (stack_alignment > crtl->preferred_stack_boundary)

> -       stack_alignment = crtl->preferred_stack_boundary;

> -

> -      bool require_stack_frame = false;

> -

> -      FOR_EACH_BB_FN (bb, cfun)

> -        {

> -          rtx_insn *insn;

> -         FOR_BB_INSNS (bb, insn)

> -           if (NONDEBUG_INSN_P (insn)

> -               && requires_stack_frame_p (insn, prologue_used,

> -                                          set_up_by_prologue))

> -             {

> -               require_stack_frame = true;

> -

> -               if (stack_realign)

> -                 {

> -                   /* Find the maximum stack alignment.  */

> -                   subrtx_iterator::array_type array;

> -                   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)

> -                     if (MEM_P (*iter)

> -                         && (reg_mentioned_p (stack_pointer_rtx,

> -                                              *iter)

> -                             || reg_mentioned_p (frame_pointer_rtx,

> -                                                 *iter)))

> -                       {

> -                         unsigned int alignment = MEM_ALIGN (*iter);

> -                         if (alignment > stack_alignment)

> -                           stack_alignment = alignment;

> -                       }

> -                 }

> -             }

> -       }

> -

> -      if (require_stack_frame)

> +      if (ix86_find_max_used_stack_alignment (stack_alignment,

> +                                             stack_realign))

>         {

>           /* Stack frame is required.  If stack alignment needed is less

>              than incoming stack boundary, don't realign stack.  */

> @@ -12851,6 +12867,16 @@ ix86_finalize_stack_frame_flags (void)

>           recompute_frame_layout_p = true;

>         }

>      }

> +  else if (crtl->max_used_stack_slot_alignment

> +          > crtl->preferred_stack_boundary)

> +    {

> +      /* We don't need to realign stack.  But we still need to keep

> +        stack frame properly aligned to satisfy the largest alignment

> +        of stack slots.  */

> +      if (ix86_find_max_used_stack_alignment (stack_alignment, true))

> +       cfun->machine->max_used_stack_alignment

> +         = stack_alignment / BITS_PER_UNIT;

> +    }

>

>    if (crtl->stack_realign_needed != stack_realign)

>      recompute_frame_layout_p = true;

> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h

> index 93b7a2c5915..38f264e36b2 100644

> --- a/gcc/config/i386/i386.h

> +++ b/gcc/config/i386/i386.h

> @@ -2596,6 +2596,9 @@ struct GTY(()) machine_function {

>    /* Nonzero if the function places outgoing arguments on stack.  */

>    BOOL_BITFIELD outgoing_args_on_stack : 1;

>

> +  /* The largest alignment, in bytes, of stack slot actually used.  */

> +  unsigned int max_used_stack_alignment;

> +

>    /* During prologue/epilogue generation, the current frame state.

>       Otherwise, the frame state at the end of the prologue.  */

>    struct machine_frame_state fs;

> diff --git a/gcc/testsuite/gcc.target/i386/pr83735.c b/gcc/testsuite/gcc.target/i386/pr83735.c

> new file mode 100644

> index 00000000000..786c90a5839

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr83735.c

> @@ -0,0 +1,55 @@

> +/* { dg-do run } */

> +/* { dg-require-effective-target avx } */

> +/* { dg-options "-O3 -mavx" } */

> +

> +#include <stdlib.h>

> +#include "cpuid.h"

> +#include "m256-check.h"

> +#include "avx-os-support.h"

> +

> +static void __attribute__((constructor))

> +check_avx (void)

> +{

> +  unsigned int eax, ebx, ecx, edx;

> +

> +  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))

> +    exit (0);

> +

> +  /* Run AVX test only if host has AVX support.  */

> +  if (((ecx & (bit_AVX | bit_OSXSAVE)) == (bit_AVX | bit_OSXSAVE))

> +      && avx_os_support ())

> +    return;

> +

> +  exit (0);

> +}

> +

> +struct S

> +{

> +  short b;

> +  long c;

> +  char d;

> +  long e;

> +  unsigned:8;

> +};

> +

> +int f, h, k, l;

> +int g[10];

> +volatile struct S j;

> +char m;

> +

> +int

> +main (void)

> +{

> +  int i;

> +  struct S n;

> +  for (i = 0; i < 6; i++)

> +    {

> +      for (f = 0; f < 10; f++)

> +       g[f] = 4;

> +      n = j;

> +      h = m == 0 ? 1 : 5 % m;

> +      if (l)

> +       n.b = k;

> +    }

> +  return n.b;

> +}

> --

> 2.14.3

>

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8696f931806..8c08394ede7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -11259,7 +11259,11 @@  ix86_compute_frame_layout (void)
   /* Calculate the size of the va-arg area (not including padding, if any).  */
   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
 
-  if (stack_realign_fp)
+  /* Also adjust stack_realign_offset for the largest alignment of
+     stack slot actually used.  */
+  if (stack_realign_fp
+      || (cfun->machine->max_used_stack_alignment != 0
+	  && (offset % cfun->machine->max_used_stack_alignment) != 0))
     {
       /* We may need a 16-byte aligned stack for the remainder of the
 	 register save area, but the stack frame for the local function
@@ -12668,6 +12672,62 @@  output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
+/* Return true if stack frame is required.  Update STACK_ALIGNMENT
+   to the largest alignment, in bits, of stack slot used if stack
+   frame is required and CHECK_STACK_SLOT is true.  */
+
+static bool
+ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
+				    bool check_stack_slot)
+{
+  HARD_REG_SET set_up_by_prologue, prologue_used;
+  basic_block bb;
+
+  CLEAR_HARD_REG_SET (prologue_used);
+  CLEAR_HARD_REG_SET (set_up_by_prologue);
+  add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
+  add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
+  add_to_hard_reg_set (&set_up_by_prologue, Pmode,
+		       HARD_FRAME_POINTER_REGNUM);
+
+  /* The preferred stack alignment is the minimum stack alignment.  */
+  if (stack_alignment > crtl->preferred_stack_boundary)
+    stack_alignment = crtl->preferred_stack_boundary;
+
+  bool require_stack_frame = false;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx_insn *insn;
+      FOR_BB_INSNS (bb, insn)
+	if (NONDEBUG_INSN_P (insn)
+	    && requires_stack_frame_p (insn, prologue_used,
+				       set_up_by_prologue))
+	  {
+	    require_stack_frame = true;
+
+	    if (check_stack_slot)
+	      {
+		/* Find the maximum stack alignment.  */
+		subrtx_iterator::array_type array;
+		FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
+		  if (MEM_P (*iter)
+		      && (reg_mentioned_p (stack_pointer_rtx,
+					   *iter)
+			  || reg_mentioned_p (frame_pointer_rtx,
+					      *iter)))
+		    {
+		      unsigned int alignment = MEM_ALIGN (*iter);
+		      if (alignment > stack_alignment)
+			stack_alignment = alignment;
+		    }
+	      }
+	  }
+    }
+
+  return require_stack_frame;
+}
+
 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
    will guide prologue/epilogue to be generated in correct form.  */
 
@@ -12718,52 +12778,8 @@  ix86_finalize_stack_frame_flags (void)
       && ix86_nsaved_sseregs () == 0
       && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
     {
-      HARD_REG_SET set_up_by_prologue, prologue_used;
-      basic_block bb;
-
-      CLEAR_HARD_REG_SET (prologue_used);
-      CLEAR_HARD_REG_SET (set_up_by_prologue);
-      add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
-      add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
-      add_to_hard_reg_set (&set_up_by_prologue, Pmode,
-			   HARD_FRAME_POINTER_REGNUM);
-
-      /* The preferred stack alignment is the minimum stack alignment.  */
-      if (stack_alignment > crtl->preferred_stack_boundary)
-	stack_alignment = crtl->preferred_stack_boundary;
-
-      bool require_stack_frame = false;
-
-      FOR_EACH_BB_FN (bb, cfun)
-        {
-          rtx_insn *insn;
-	  FOR_BB_INSNS (bb, insn)
-	    if (NONDEBUG_INSN_P (insn)
-		&& requires_stack_frame_p (insn, prologue_used,
-					   set_up_by_prologue))
-	      {
-		require_stack_frame = true;
-
-		if (stack_realign)
-		  {
-		    /* Find the maximum stack alignment.  */
-		    subrtx_iterator::array_type array;
-		    FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
-		      if (MEM_P (*iter)
-			  && (reg_mentioned_p (stack_pointer_rtx,
-					       *iter)
-			      || reg_mentioned_p (frame_pointer_rtx,
-						  *iter)))
-			{
-			  unsigned int alignment = MEM_ALIGN (*iter);
-			  if (alignment > stack_alignment)
-			    stack_alignment = alignment;
-			}
-		  }
-	      }
-	}
-
-      if (require_stack_frame)
+      if (ix86_find_max_used_stack_alignment (stack_alignment,
+					      stack_realign))
 	{
 	  /* Stack frame is required.  If stack alignment needed is less
 	     than incoming stack boundary, don't realign stack.  */
@@ -12851,6 +12867,16 @@  ix86_finalize_stack_frame_flags (void)
 	  recompute_frame_layout_p = true;
 	}
     }
+  else if (crtl->max_used_stack_slot_alignment
+	   > crtl->preferred_stack_boundary)
+    {
+      /* We don't need to realign stack.  But we still need to keep
+	 stack frame properly aligned to satisfy the largest alignment
+	 of stack slots.  */
+      if (ix86_find_max_used_stack_alignment (stack_alignment, true))
+	cfun->machine->max_used_stack_alignment
+	  = stack_alignment / BITS_PER_UNIT;
+    }
 
   if (crtl->stack_realign_needed != stack_realign)
     recompute_frame_layout_p = true;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 93b7a2c5915..38f264e36b2 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2596,6 +2596,9 @@  struct GTY(()) machine_function {
   /* Nonzero if the function places outgoing arguments on stack.  */
   BOOL_BITFIELD outgoing_args_on_stack : 1;
 
+  /* The largest alignment, in bytes, of stack slot actually used.  */
+  unsigned int max_used_stack_alignment;
+
   /* During prologue/epilogue generation, the current frame state.
      Otherwise, the frame state at the end of the prologue.  */
   struct machine_frame_state fs;
diff --git a/gcc/testsuite/gcc.target/i386/pr83735.c b/gcc/testsuite/gcc.target/i386/pr83735.c
new file mode 100644
index 00000000000..786c90a5839
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr83735.c
@@ -0,0 +1,55 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx" } */
+
+#include <stdlib.h>
+#include "cpuid.h"
+#include "m256-check.h"
+#include "avx-os-support.h"
+
+static void __attribute__((constructor))
+check_avx (void)
+{
+  unsigned int eax, ebx, ecx, edx;
+ 
+  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+    exit (0);
+
+  /* Run AVX test only if host has AVX support.  */
+  if (((ecx & (bit_AVX | bit_OSXSAVE)) == (bit_AVX | bit_OSXSAVE))
+      && avx_os_support ())
+    return;
+
+  exit (0);
+}
+
+struct S
+{
+  short b;
+  long c;
+  char d;
+  long e;
+  unsigned:8;
+};
+
+int f, h, k, l;
+int g[10];
+volatile struct S j;
+char m;
+
+int
+main (void)
+{
+  int i;
+  struct S n;
+  for (i = 0; i < 6; i++)
+    {
+      for (f = 0; f < 10; f++)
+	g[f] = 4;
+      n = j;
+      h = m == 0 ? 1 : 5 % m;
+      if (l)
+	n.b = k;
+    }
+  return n.b;
+}