i386: Enable AVX/AVX512 features only if supported by OSXSAVE

Message ID 20180329124309.GA12667@intel.com
State New
Headers show
Series
  • i386: Enable AVX/AVX512 features only if supported by OSXSAVE
Related show

Commit Message

H.J. Lu March 29, 2018, 12:43 p.m.
Enable AVX and AVX512 features only if their states are supported by
OSXSAVE.

OK for trunk and release branches?


H.J.
---
	PR target/85100
	* config/i386/cpuinfo.c (XCR_XFEATURE_ENABLED_MASK): New.
	(XSTATE_FP): Likewise.
	(XSTATE_SSE): Likewise.
	(XSTATE_YMM): Likewise.
	(XSTATE_OPMASK): Likewise.
	(XSTATE_ZMM): Likewise.
	(XSTATE_HI_ZMM): Likewise.
	(XCR_AVX_ENABLED_MASK): Likewise.
	(XCR_AVX512F_ENABLED_MASK): Likewise.
	(get_available_features): Enable AVX and AVX512 features only
	if their states are supported by OSXSAVE.
---
 libgcc/config/i386/cpuinfo.c | 134 +++++++++++++++++++++++++++++--------------
 1 file changed, 90 insertions(+), 44 deletions(-)

-- 
2.14.3

Comments

Uros Bizjak March 29, 2018, 1:05 p.m. | #1
On Thu, Mar 29, 2018 at 2:43 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> Enable AVX and AVX512 features only if their states are supported by

> OSXSAVE.

>

> OK for trunk and release branches?

>

>

> H.J.

> ---

>         PR target/85100

>         * config/i386/cpuinfo.c (XCR_XFEATURE_ENABLED_MASK): New.

>         (XSTATE_FP): Likewise.

>         (XSTATE_SSE): Likewise.

>         (XSTATE_YMM): Likewise.

>         (XSTATE_OPMASK): Likewise.

>         (XSTATE_ZMM): Likewise.

>         (XSTATE_HI_ZMM): Likewise.

>         (XCR_AVX_ENABLED_MASK): Likewise.

>         (XCR_AVX512F_ENABLED_MASK): Likewise.

>         (get_available_features): Enable AVX and AVX512 features only

>         if their states are supported by OSXSAVE


OK for trunk and release branches after a couple of days without
problems in trunk.

Thanks,
Uros.

> ---

>  libgcc/config/i386/cpuinfo.c | 134 +++++++++++++++++++++++++++++--------------

>  1 file changed, 90 insertions(+), 44 deletions(-)

>

> diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c

> index 4eb3f5cd944..1dac110a79a 100644

> --- a/libgcc/config/i386/cpuinfo.c

> +++ b/libgcc/config/i386/cpuinfo.c

> @@ -240,6 +240,40 @@ get_available_features (unsigned int ecx, unsigned int edx,

>    unsigned int features = 0;

>    unsigned int features2 = 0;

>

> +  /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */

> +#define XCR_XFEATURE_ENABLED_MASK      0x0

> +#define XSTATE_FP                      0x1

> +#define XSTATE_SSE                     0x2

> +#define XSTATE_YMM                     0x4

> +#define XSTATE_OPMASK                  0x20

> +#define XSTATE_ZMM                     0x40

> +#define XSTATE_HI_ZMM                  0x80

> +

> +#define XCR_AVX_ENABLED_MASK \

> +  (XSTATE_SSE | XSTATE_YMM)

> +#define XCR_AVX512F_ENABLED_MASK \

> +  (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)

> +

> +  /* Check if AVX and AVX512 are usable.  */

> +  int avx_usable = 0;

> +  int avx512_usable = 0;

> +  if ((ecx & bit_OSXSAVE))

> +    {

> +      /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and

> +         ZMM16-ZMM31 states are supported by OSXSAVE.  */

> +      unsigned int xcrlow;

> +      unsigned int xcrhigh;

> +      asm (".byte 0x0f, 0x01, 0xd0"

> +          : "=a" (xcrlow), "=d" (xcrhigh)

> +          : "c" (XCR_XFEATURE_ENABLED_MASK));

> +      if ((xcrlow & XCR_AVX_ENABLED_MASK) == XCR_AVX_ENABLED_MASK)

> +       {

> +         avx_usable = 1;

> +         avx512_usable = ((xcrlow & XCR_AVX512F_ENABLED_MASK)

> +                          == XCR_AVX512F_ENABLED_MASK);

> +       }

> +    }

> +

>  #define set_feature(f) \

>    if (f < 32) features |= (1U << f); else features2 |= (1U << (f - 32))

>

> @@ -265,10 +299,13 @@ get_available_features (unsigned int ecx, unsigned int edx,

>      set_feature (FEATURE_SSE4_1);

>    if (ecx & bit_SSE4_2)

>      set_feature (FEATURE_SSE4_2);

> -  if (ecx & bit_AVX)

> -    set_feature (FEATURE_AVX);

> -  if (ecx & bit_FMA)

> -    set_feature (FEATURE_FMA);

> +  if (avx_usable)

> +    {

> +      if (ecx & bit_AVX)

> +       set_feature (FEATURE_AVX);

> +      if (ecx & bit_FMA)

> +       set_feature (FEATURE_FMA);

> +    }

>

>    /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */

>    if (max_cpuid_level >= 7)

> @@ -276,44 +313,50 @@ get_available_features (unsigned int ecx, unsigned int edx,

>        __cpuid_count (7, 0, eax, ebx, ecx, edx);

>        if (ebx & bit_BMI)

>         set_feature (FEATURE_BMI);

> -      if (ebx & bit_AVX2)

> -       set_feature (FEATURE_AVX2);

> +      if (avx_usable)

> +       {

> +         if (ebx & bit_AVX2)

> +           set_feature (FEATURE_AVX2);

> +       }

>        if (ebx & bit_BMI2)

>         set_feature (FEATURE_BMI2);

> -      if (ebx & bit_AVX512F)

> -       set_feature (FEATURE_AVX512F);

> -      if (ebx & bit_AVX512VL)

> -       set_feature (FEATURE_AVX512VL);

> -      if (ebx & bit_AVX512BW)

> -       set_feature (FEATURE_AVX512BW);

> -      if (ebx & bit_AVX512DQ)

> -       set_feature (FEATURE_AVX512DQ);

> -      if (ebx & bit_AVX512CD)

> -       set_feature (FEATURE_AVX512CD);

> -      if (ebx & bit_AVX512PF)

> -       set_feature (FEATURE_AVX512PF);

> -      if (ebx & bit_AVX512ER)

> -       set_feature (FEATURE_AVX512ER);

> -      if (ebx & bit_AVX512IFMA)

> -       set_feature (FEATURE_AVX512IFMA);

> -      if (ecx & bit_AVX512VBMI)

> -       set_feature (FEATURE_AVX512VBMI);

> -      if (ecx & bit_AVX512VBMI2)

> -       set_feature (FEATURE_AVX512VBMI2);

> -      if (ecx & bit_GFNI)

> -       set_feature (FEATURE_GFNI);

> -      if (ecx & bit_VPCLMULQDQ)

> -       set_feature (FEATURE_VPCLMULQDQ);

> -      if (ecx & bit_AVX512VNNI)

> -       set_feature (FEATURE_AVX512VNNI);

> -      if (ecx & bit_AVX512BITALG)

> -       set_feature (FEATURE_AVX512BITALG);

> -      if (ecx & bit_AVX512VPOPCNTDQ)

> -       set_feature (FEATURE_AVX512VPOPCNTDQ);

> -      if (edx & bit_AVX5124VNNIW)

> -       set_feature (FEATURE_AVX5124VNNIW);

> -      if (edx & bit_AVX5124FMAPS)

> -       set_feature (FEATURE_AVX5124FMAPS);

> +      if (avx512_usable)

> +       {

> +         if (ebx & bit_AVX512F)

> +           set_feature (FEATURE_AVX512F);

> +         if (ebx & bit_AVX512VL)

> +           set_feature (FEATURE_AVX512VL);

> +         if (ebx & bit_AVX512BW)

> +           set_feature (FEATURE_AVX512BW);

> +         if (ebx & bit_AVX512DQ)

> +           set_feature (FEATURE_AVX512DQ);

> +         if (ebx & bit_AVX512CD)

> +           set_feature (FEATURE_AVX512CD);

> +         if (ebx & bit_AVX512PF)

> +           set_feature (FEATURE_AVX512PF);

> +         if (ebx & bit_AVX512ER)

> +           set_feature (FEATURE_AVX512ER);

> +         if (ebx & bit_AVX512IFMA)

> +           set_feature (FEATURE_AVX512IFMA);

> +         if (ecx & bit_AVX512VBMI)

> +           set_feature (FEATURE_AVX512VBMI);

> +         if (ecx & bit_AVX512VBMI2)

> +           set_feature (FEATURE_AVX512VBMI2);

> +         if (ecx & bit_GFNI)

> +           set_feature (FEATURE_GFNI);

> +         if (ecx & bit_VPCLMULQDQ)

> +           set_feature (FEATURE_VPCLMULQDQ);

> +         if (ecx & bit_AVX512VNNI)

> +           set_feature (FEATURE_AVX512VNNI);

> +         if (ecx & bit_AVX512BITALG)

> +           set_feature (FEATURE_AVX512BITALG);

> +         if (ecx & bit_AVX512VPOPCNTDQ)

> +           set_feature (FEATURE_AVX512VPOPCNTDQ);

> +         if (edx & bit_AVX5124VNNIW)

> +           set_feature (FEATURE_AVX5124VNNIW);

> +         if (edx & bit_AVX5124FMAPS)

> +           set_feature (FEATURE_AVX5124FMAPS);

> +       }

>      }

>

>    /* Check cpuid level of extended features.  */

> @@ -325,10 +368,13 @@ get_available_features (unsigned int ecx, unsigned int edx,

>

>        if (ecx & bit_SSE4a)

>         set_feature (FEATURE_SSE4_A);

> -      if (ecx & bit_FMA4)

> -       set_feature (FEATURE_FMA4);

> -      if (ecx & bit_XOP)

> -       set_feature (FEATURE_XOP);

> +      if (avx_usable)

> +       {

> +         if (ecx & bit_FMA4)

> +           set_feature (FEATURE_FMA4);

> +         if (ecx & bit_XOP)

> +           set_feature (FEATURE_XOP);

> +       }

>      }

>

>    __cpu_model.__cpu_features[0] = features;

> --

> 2.14.3

>
Ilya Verbin March 30, 2018, 5:19 p.m. | #2
This check will always disable AVX-512 on macOS, because they
implemented on-demand support:
https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176

(I'm not against this change, just for information).

2018-03-29 16:05 GMT+03:00 Uros Bizjak <ubizjak@gmail.com>:
> On Thu, Mar 29, 2018 at 2:43 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:

>> Enable AVX and AVX512 features only if their states are supported by

>> OSXSAVE.


  -- Ilya
H.J. Lu March 30, 2018, 5:56 p.m. | #3
On Fri, Mar 30, 2018 at 10:19 AM, Ilya Verbin <iverbin@gmail.com> wrote:
> This check will always disable AVX-512 on macOS, because they

> implemented on-demand support:

> https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176

>


Isn't xsaveopt designed for this?

-- 
H.J.
Ilya Verbin March 30, 2018, 6:09 p.m. | #4
2018-03-30 20:56 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
> On Fri, Mar 30, 2018 at 10:19 AM, Ilya Verbin <iverbin@gmail.com> wrote:

>> This check will always disable AVX-512 on macOS, because they

>> implemented on-demand support:

>> https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176

>>

>

> Isn't xsaveopt designed for this?


Maybe the goal was to reduce the size of the area allocated by default
for each thread.

> --

> H.J.


  -- Ilya

Patch

diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c
index 4eb3f5cd944..1dac110a79a 100644
--- a/libgcc/config/i386/cpuinfo.c
+++ b/libgcc/config/i386/cpuinfo.c
@@ -240,6 +240,40 @@  get_available_features (unsigned int ecx, unsigned int edx,
   unsigned int features = 0;
   unsigned int features2 = 0;
 
+  /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
+#define XCR_XFEATURE_ENABLED_MASK	0x0
+#define XSTATE_FP			0x1
+#define XSTATE_SSE			0x2
+#define XSTATE_YMM			0x4
+#define XSTATE_OPMASK			0x20
+#define XSTATE_ZMM			0x40
+#define XSTATE_HI_ZMM			0x80
+
+#define XCR_AVX_ENABLED_MASK \
+  (XSTATE_SSE | XSTATE_YMM)
+#define XCR_AVX512F_ENABLED_MASK \
+  (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
+
+  /* Check if AVX and AVX512 are usable.  */
+  int avx_usable = 0;
+  int avx512_usable = 0;
+  if ((ecx & bit_OSXSAVE))
+    {
+      /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
+         ZMM16-ZMM31 states are supported by OSXSAVE.  */
+      unsigned int xcrlow;
+      unsigned int xcrhigh;
+      asm (".byte 0x0f, 0x01, 0xd0"
+	   : "=a" (xcrlow), "=d" (xcrhigh)
+	   : "c" (XCR_XFEATURE_ENABLED_MASK));
+      if ((xcrlow & XCR_AVX_ENABLED_MASK) == XCR_AVX_ENABLED_MASK)
+	{
+	  avx_usable = 1;
+	  avx512_usable = ((xcrlow & XCR_AVX512F_ENABLED_MASK)
+			   == XCR_AVX512F_ENABLED_MASK);
+	}
+    }
+
 #define set_feature(f) \
   if (f < 32) features |= (1U << f); else features2 |= (1U << (f - 32))
 
@@ -265,10 +299,13 @@  get_available_features (unsigned int ecx, unsigned int edx,
     set_feature (FEATURE_SSE4_1);
   if (ecx & bit_SSE4_2)
     set_feature (FEATURE_SSE4_2);
-  if (ecx & bit_AVX)
-    set_feature (FEATURE_AVX);
-  if (ecx & bit_FMA)
-    set_feature (FEATURE_FMA);
+  if (avx_usable)
+    {
+      if (ecx & bit_AVX)
+	set_feature (FEATURE_AVX);
+      if (ecx & bit_FMA)
+	set_feature (FEATURE_FMA);
+    }
 
   /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
   if (max_cpuid_level >= 7)
@@ -276,44 +313,50 @@  get_available_features (unsigned int ecx, unsigned int edx,
       __cpuid_count (7, 0, eax, ebx, ecx, edx);
       if (ebx & bit_BMI)
 	set_feature (FEATURE_BMI);
-      if (ebx & bit_AVX2)
-	set_feature (FEATURE_AVX2);
+      if (avx_usable)
+	{
+	  if (ebx & bit_AVX2)
+	    set_feature (FEATURE_AVX2);
+	}
       if (ebx & bit_BMI2)
 	set_feature (FEATURE_BMI2);
-      if (ebx & bit_AVX512F)
-	set_feature (FEATURE_AVX512F);
-      if (ebx & bit_AVX512VL)
-	set_feature (FEATURE_AVX512VL);
-      if (ebx & bit_AVX512BW)
-	set_feature (FEATURE_AVX512BW);
-      if (ebx & bit_AVX512DQ)
-	set_feature (FEATURE_AVX512DQ);
-      if (ebx & bit_AVX512CD)
-	set_feature (FEATURE_AVX512CD);
-      if (ebx & bit_AVX512PF)
-	set_feature (FEATURE_AVX512PF);
-      if (ebx & bit_AVX512ER)
-	set_feature (FEATURE_AVX512ER);
-      if (ebx & bit_AVX512IFMA)
-	set_feature (FEATURE_AVX512IFMA);
-      if (ecx & bit_AVX512VBMI)
-	set_feature (FEATURE_AVX512VBMI);
-      if (ecx & bit_AVX512VBMI2)
-	set_feature (FEATURE_AVX512VBMI2);
-      if (ecx & bit_GFNI)
-	set_feature (FEATURE_GFNI);
-      if (ecx & bit_VPCLMULQDQ)
-	set_feature (FEATURE_VPCLMULQDQ);
-      if (ecx & bit_AVX512VNNI)
-	set_feature (FEATURE_AVX512VNNI);
-      if (ecx & bit_AVX512BITALG)
-	set_feature (FEATURE_AVX512BITALG);
-      if (ecx & bit_AVX512VPOPCNTDQ)
-	set_feature (FEATURE_AVX512VPOPCNTDQ);
-      if (edx & bit_AVX5124VNNIW)
-	set_feature (FEATURE_AVX5124VNNIW);
-      if (edx & bit_AVX5124FMAPS)
-	set_feature (FEATURE_AVX5124FMAPS);
+      if (avx512_usable)
+	{
+	  if (ebx & bit_AVX512F)
+	    set_feature (FEATURE_AVX512F);
+	  if (ebx & bit_AVX512VL)
+	    set_feature (FEATURE_AVX512VL);
+	  if (ebx & bit_AVX512BW)
+	    set_feature (FEATURE_AVX512BW);
+	  if (ebx & bit_AVX512DQ)
+	    set_feature (FEATURE_AVX512DQ);
+	  if (ebx & bit_AVX512CD)
+	    set_feature (FEATURE_AVX512CD);
+	  if (ebx & bit_AVX512PF)
+	    set_feature (FEATURE_AVX512PF);
+	  if (ebx & bit_AVX512ER)
+	    set_feature (FEATURE_AVX512ER);
+	  if (ebx & bit_AVX512IFMA)
+	    set_feature (FEATURE_AVX512IFMA);
+	  if (ecx & bit_AVX512VBMI)
+	    set_feature (FEATURE_AVX512VBMI);
+	  if (ecx & bit_AVX512VBMI2)
+	    set_feature (FEATURE_AVX512VBMI2);
+	  if (ecx & bit_GFNI)
+	    set_feature (FEATURE_GFNI);
+	  if (ecx & bit_VPCLMULQDQ)
+	    set_feature (FEATURE_VPCLMULQDQ);
+	  if (ecx & bit_AVX512VNNI)
+	    set_feature (FEATURE_AVX512VNNI);
+	  if (ecx & bit_AVX512BITALG)
+	    set_feature (FEATURE_AVX512BITALG);
+	  if (ecx & bit_AVX512VPOPCNTDQ)
+	    set_feature (FEATURE_AVX512VPOPCNTDQ);
+	  if (edx & bit_AVX5124VNNIW)
+	    set_feature (FEATURE_AVX5124VNNIW);
+	  if (edx & bit_AVX5124FMAPS)
+	    set_feature (FEATURE_AVX5124FMAPS);
+	}
     }
 
   /* Check cpuid level of extended features.  */
@@ -325,10 +368,13 @@  get_available_features (unsigned int ecx, unsigned int edx,
 
       if (ecx & bit_SSE4a)
 	set_feature (FEATURE_SSE4_A);
-      if (ecx & bit_FMA4)
-	set_feature (FEATURE_FMA4);
-      if (ecx & bit_XOP)
-	set_feature (FEATURE_XOP);
+      if (avx_usable)
+	{
+	  if (ecx & bit_FMA4)
+	    set_feature (FEATURE_FMA4);
+	  if (ecx & bit_XOP)
+	    set_feature (FEATURE_XOP);
+	}
     }
     
   __cpu_model.__cpu_features[0] = features;