x86: Skip EXT_REX_SSE_REG_P for vzeroupper optimization

Message ID 20200620230617.1001430-1-hjl.tools@gmail.com
State New
Headers show
Series
  • x86: Skip EXT_REX_SSE_REG_P for vzeroupper optimization
Related show

Commit Message

Richard Biener via Gcc-patches June 20, 2020, 11:06 p.m.
Skip EXT_REX_SSE_REG_P for vzeroupper optimization since upper 16 vector
registers don't trigger SSE <-> AVX transition penalty.

gcc/

	PR target/95791
	* config/i386/i386.c (ix86_dirflag_mode_needed): Skip
	EXT_REX_SSE_REG_P.

gcc/testsuite/

	PR target/95791
	* gcc.target/i386/pr95791.c: New test.
---
 gcc/config/i386/i386.c                  |  4 +++-
 gcc/testsuite/gcc.target/i386/pr95791.c | 10 ++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95791.c

-- 
2.26.2

Comments

Richard Biener via Gcc-patches June 22, 2020, 9:21 a.m. | #1
On Sun, Jun 21, 2020 at 1:06 AM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>

> Skip EXT_REX_SSE_REG_P for vzeroupper optimization since upper 16 vector

> registers don't trigger SSE <-> AVX transition penalty.


OK.

Richard.

> gcc/

>

>         PR target/95791

>         * config/i386/i386.c (ix86_dirflag_mode_needed): Skip

>         EXT_REX_SSE_REG_P.

>

> gcc/testsuite/

>

>         PR target/95791

>         * gcc.target/i386/pr95791.c: New test.

> ---

>  gcc/config/i386/i386.c                  |  4 +++-

>  gcc/testsuite/gcc.target/i386/pr95791.c | 10 ++++++++++

>  2 files changed, 13 insertions(+), 1 deletion(-)

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95791.c

>

> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

> index 3b776c08a22..37aaa49996d 100644

> --- a/gcc/config/i386/i386.c

> +++ b/gcc/config/i386/i386.c

> @@ -13846,7 +13846,9 @@ ix86_dirflag_mode_needed (rtx_insn *insn)

>  static bool

>  ix86_check_avx_upper_register (const_rtx exp)

>  {

> -  return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128;

> +  return (SSE_REG_P (exp)

> +         && !EXT_REX_SSE_REG_P (exp)

> +         && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);

>  }

>

>  /* Return needed mode for entity in optimize_mode_switching pass.  */

> diff --git a/gcc/testsuite/gcc.target/i386/pr95791.c b/gcc/testsuite/gcc.target/i386/pr95791.c

> new file mode 100644

> index 00000000000..26a96d47354

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr95791.c

> @@ -0,0 +1,10 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-options "-O2 -mavx512f -mvzeroupper" } */

> +

> +void

> +f(void)

> +{

> +  __asm__ __volatile__("" ::: "zmm16");

> +}

> +

> +/* { dg-final { scan-assembler-not "vzeroupper" } } */

> --

> 2.26.2

>

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3b776c08a22..37aaa49996d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13846,7 +13846,9 @@  ix86_dirflag_mode_needed (rtx_insn *insn)
 static bool
 ix86_check_avx_upper_register (const_rtx exp)
 {
-  return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128;
+  return (SSE_REG_P (exp)
+	  && !EXT_REX_SSE_REG_P (exp)
+	  && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
 }
 
 /* Return needed mode for entity in optimize_mode_switching pass.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr95791.c b/gcc/testsuite/gcc.target/i386/pr95791.c
new file mode 100644
index 00000000000..26a96d47354
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95791.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mvzeroupper" } */
+
+void
+f(void)
+{
+  __asm__ __volatile__("" ::: "zmm16");
+}
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */