x86: fix/improve vgf2p8affine*qb insns

Message ID 5D1484FD020000780023B737@prv1-mh.provo.novell.com
State New
Headers show
Series
  • x86: fix/improve vgf2p8affine*qb insns
Related show

Commit Message

Jan Beulich June 27, 2019, 8:57 a.m.
- the affine transformations are not commutative (the two source
  operands have entirely different meaning)
- there's no need for three alternatives
- the nonimmediate_operand/Bm combination can better be vector_operand/m

gcc/
2019-06-27  Jan Beulich  <jbeulich@suse.com>

	* config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>,
	vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier.
	Eliminate redundant alternative.  Use vector_operand plus "m"
	constraint.

gcc/testsuite/
2019-06-27  Jan Beulich  <jbeulich@suse.com>

	* gcc.target/i386/gfni-5.c: New.

---
On top of this (in further patches) it looks like the Bm constraint could be
dropped altogether. At the very least its combination with vector_operand is
pointless, because both resolve to the same vector_memory_operand. Same goes
for round{,_saeonly}_nimm_predicate, which too resolves to vector_operand.

Comments

Uros Bizjak June 27, 2019, 10:20 a.m. | #1
On Thu, Jun 27, 2019 at 10:57 AM Jan Beulich <JBeulich@suse.com> wrote:
>

> - the affine transformations are not commutative (the two source

>   operands have entirely different meaning)

> - there's no need for three alternatives

> - the nonimmediate_operand/Bm combination can better be vector_operand/m

>

> gcc/

> 2019-06-27  Jan Beulich  <jbeulich@suse.com>

>

>         * config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>,

>         vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier.

>         Eliminate redundant alternative.  Use vector_operand plus "m"

>         constraint.


Please just drop % modifier and use vector_operand here. IIRC,
register allocator operates on constraints, it doesn't care for
predicates. But predicates shouldn't be more constrained than
constraints. So, having "m" instead of "Bm" is a bad idea with
vector_operand.

Uros.
>

> gcc/testsuite/

> 2019-06-27  Jan Beulich  <jbeulich@suse.com>

>

>         * gcc.target/i386/gfni-5.c: New.

>

> ---

> On top of this (in further patches) it looks like the Bm constraint could be

> dropped altogether. At the very least its combination with vector_operand is

> pointless, because both resolve to the same vector_memory_operand. Same goes

> for round{,_saeonly}_nimm_predicate, which too resolves to vector_operand.

>

> --- a/gcc/config/i386/sse.md

> +++ b/gcc/config/i386/sse.md

> @@ -22072,56 +22072,53 @@

>    "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")

>

>  (define_insn "vgf2p8affineinvqb_<mode><mask_name>"

> -  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")

> +  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")

>         (unspec:VI1_AVX512F

> -         [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")

> -          (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")

> -          (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]

> +         [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")

> +          (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm")

> +          (match_operand:QI 3 "const_0_to_255_operand" "n,n")]

>           UNSPEC_GF2P8AFFINEINV))]

>    "TARGET_GFNI"

>    "@

>     gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}

> -   vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}

>     vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"

> -  [(set_attr "isa" "noavx,avx,avx512f")

> -   (set_attr "prefix_data16" "1,*,*")

> +  [(set_attr "isa" "noavx,avx")

> +   (set_attr "prefix_data16" "1,*")

>     (set_attr "prefix_extra" "1")

> -   (set_attr "prefix" "orig,maybe_evex,evex")

> +   (set_attr "prefix" "orig,maybe_evex")

>     (set_attr "mode" "<sseinsnmode>")])

>

>  (define_insn "vgf2p8affineqb_<mode><mask_name>"

> -  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")

> +  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")

>         (unspec:VI1_AVX512F

> -         [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")

> -          (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")

> -          (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]

> +         [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")

> +          (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm")

> +          (match_operand:QI 3 "const_0_to_255_operand" "n,n")]

>           UNSPEC_GF2P8AFFINE))]

>    "TARGET_GFNI"

>    "@

>     gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}

> -   vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}

>     vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"

> -  [(set_attr "isa" "noavx,avx,avx512f")

> -   (set_attr "prefix_data16" "1,*,*")

> +  [(set_attr "isa" "noavx,avx")

> +   (set_attr "prefix_data16" "1,*")

>     (set_attr "prefix_extra" "1")

> -   (set_attr "prefix" "orig,maybe_evex,evex")

> +   (set_attr "prefix" "orig,maybe_evex")

>     (set_attr "mode" "<sseinsnmode>")])

>

>  (define_insn "vgf2p8mulb_<mode><mask_name>"

> -  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")

> +  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")

>         (unspec:VI1_AVX512F

> -         [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")

> -          (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]

> +         [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")

> +          (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm")]

>           UNSPEC_GF2P8MUL))]

>    "TARGET_GFNI"

>    "@

>     gf2p8mulb\t{%2, %0| %0, %2}

> -   vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}

>     vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"

> -  [(set_attr "isa" "noavx,avx,avx512f")

> -   (set_attr "prefix_data16" "1,*,*")

> +  [(set_attr "isa" "noavx,avx")

> +   (set_attr "prefix_data16" "1,*")

>     (set_attr "prefix_extra" "1")

> -   (set_attr "prefix" "orig,maybe_evex,evex")

> +   (set_attr "prefix" "orig,maybe_evex")

>     (set_attr "mode" "<sseinsnmode>")])

>

>  (define_insn "vpshrd_<mode><mask_name>"

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/gfni-5.c

> @@ -0,0 +1,19 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O2 -msse2 -mgfni" } */

> +

> +typedef char __attribute__((vector_size(16))) v16qi_t;

> +

> +v16qi_t test16a (v16qi_t x, v16qi_t a)

> +{

> +  asm volatile ("" : "+m" (a));

> +  return __builtin_ia32_vgf2p8affineqb_v16qi (x, a, 0);

> +}

> +

> +v16qi_t test16b (v16qi_t x, v16qi_t a)

> +{

> +  asm volatile ("" : "+m" (x));

> +  return __builtin_ia32_vgf2p8affineqb_v16qi (x, a, 0);

> +}

> +

> +/* { dg-final { scan-assembler-times "gf2p8affineqb\[ \t].*\\(" 1 } } */

> +/* { dg-final { scan-assembler-times "gf2p8affineqb\[ \t].*%xmm.*%xmm" 1 } } */

>

>

>
Jan Beulich June 27, 2019, 10:49 a.m. | #2
>>> On 27.06.19 at 12:20, <ubizjak@gmail.com> wrote:

> On Thu, Jun 27, 2019 at 10:57 AM Jan Beulich <JBeulich@suse.com> wrote:

>>

>> - the affine transformations are not commutative (the two source

>>   operands have entirely different meaning)

>> - there's no need for three alternatives

>> - the nonimmediate_operand/Bm combination can better be vector_operand/m

>>

>> gcc/

>> 2019-06-27  Jan Beulich  <jbeulich@suse.com>

>>

>>         * config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>,

>>         vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier.

>>         Eliminate redundant alternative.  Use vector_operand plus "m"

>>         constraint.

> 

> Please just drop % modifier and use vector_operand here. IIRC,

> register allocator operates on constraints, it doesn't care for

> predicates. But predicates shouldn't be more constrained than

> constraints. So, having "m" instead of "Bm" is a bad idea with

> vector_operand.


Well, putting back the Bm is easy (if that's really needed). But do
you also mean me to put back to redundant 3rd alternative?

Jan
Uros Bizjak June 27, 2019, 10:58 a.m. | #3
On Thu, Jun 27, 2019 at 12:49 PM Jan Beulich <JBeulich@suse.com> wrote:
>

> >>> On 27.06.19 at 12:20, <ubizjak@gmail.com> wrote:

> > On Thu, Jun 27, 2019 at 10:57 AM Jan Beulich <JBeulich@suse.com> wrote:

> >>

> >> - the affine transformations are not commutative (the two source

> >>   operands have entirely different meaning)

> >> - there's no need for three alternatives

> >> - the nonimmediate_operand/Bm combination can better be vector_operand/m

> >>

> >> gcc/

> >> 2019-06-27  Jan Beulich  <jbeulich@suse.com>

> >>

> >>         * config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>,

> >>         vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier.

> >>         Eliminate redundant alternative.  Use vector_operand plus "m"

> >>         constraint.

> >

> > Please just drop % modifier and use vector_operand here. IIRC,

> > register allocator operates on constraints, it doesn't care for

> > predicates. But predicates shouldn't be more constrained than

> > constraints. So, having "m" instead of "Bm" is a bad idea with

> > vector_operand.

>

> Well, putting back the Bm is easy (if that's really needed). But do

> you also mean me to put back to redundant 3rd alternative?


It is not redundant, "x" and "v" are different register constraints.

Uros.
Jan Beulich June 27, 2019, 11:39 a.m. | #4
>>> On 27.06.19 at 12:58, <ubizjak@gmail.com> wrote:

> On Thu, Jun 27, 2019 at 12:49 PM Jan Beulich <JBeulich@suse.com> wrote:

>>

>> >>> On 27.06.19 at 12:20, <ubizjak@gmail.com> wrote:

>> > On Thu, Jun 27, 2019 at 10:57 AM Jan Beulich <JBeulich@suse.com> wrote:

>> >>

>> >> - the affine transformations are not commutative (the two source

>> >>   operands have entirely different meaning)

>> >> - there's no need for three alternatives

>> >> - the nonimmediate_operand/Bm combination can better be vector_operand/m

>> >>

>> >> gcc/

>> >> 2019-06-27  Jan Beulich  <jbeulich@suse.com>

>> >>

>> >>         * config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>,

>> >>         vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier.

>> >>         Eliminate redundant alternative.  Use vector_operand plus "m"

>> >>         constraint.

>> >

>> > Please just drop % modifier and use vector_operand here. IIRC,

>> > register allocator operates on constraints, it doesn't care for

>> > predicates. But predicates shouldn't be more constrained than

>> > constraints. So, having "m" instead of "Bm" is a bad idea with

>> > vector_operand.

>>

>> Well, putting back the Bm is easy (if that's really needed). But do

>> you also mean me to put back to redundant 3rd alternative?

> 

> It is not redundant, "x" and "v" are different register constraints.


Well, yes, "v" is covering a wider set than "x". But only if AVX512F
is enabled. The original combinations ("x", "avx") and ("v", "avx512f")
are thus effectively the same as the new ("v", "avx"). But yes - I
guess I'll split this from the actual bug fix.

Jan
Uros Bizjak June 27, 2019, 11:42 a.m. | #5
On Thu, Jun 27, 2019 at 1:39 PM Jan Beulich <JBeulich@suse.com> wrote:
>

> >>> On 27.06.19 at 12:58, <ubizjak@gmail.com> wrote:

> > On Thu, Jun 27, 2019 at 12:49 PM Jan Beulich <JBeulich@suse.com> wrote:

> >>

> >> >>> On 27.06.19 at 12:20, <ubizjak@gmail.com> wrote:

> >> > On Thu, Jun 27, 2019 at 10:57 AM Jan Beulich <JBeulich@suse.com> wrote:

> >> >>

> >> >> - the affine transformations are not commutative (the two source

> >> >>   operands have entirely different meaning)

> >> >> - there's no need for three alternatives

> >> >> - the nonimmediate_operand/Bm combination can better be vector_operand/m

> >> >>

> >> >> gcc/

> >> >> 2019-06-27  Jan Beulich  <jbeulich@suse.com>

> >> >>

> >> >>         * config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>,

> >> >>         vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier.

> >> >>         Eliminate redundant alternative.  Use vector_operand plus "m"

> >> >>         constraint.

> >> >

> >> > Please just drop % modifier and use vector_operand here. IIRC,

> >> > register allocator operates on constraints, it doesn't care for

> >> > predicates. But predicates shouldn't be more constrained than

> >> > constraints. So, having "m" instead of "Bm" is a bad idea with

> >> > vector_operand.

> >>

> >> Well, putting back the Bm is easy (if that's really needed). But do

> >> you also mean me to put back to redundant 3rd alternative?

> >

> > It is not redundant, "x" and "v" are different register constraints.

>

> Well, yes, "v" is covering a wider set than "x". But only if AVX512F

> is enabled. The original combinations ("x", "avx") and ("v", "avx512f")

> are thus effectively the same as the new ("v", "avx"). But yes - I

> guess I'll split this from the actual bug fix.


No, you are correct. Please use "v", and remove the redundant alternative.

Uros.

Patch

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -22072,56 +22072,53 @@ 
   "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
 
 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
-  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
+  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
 	(unspec:VI1_AVX512F
-	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
-	   (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
-	   (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
+	  [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
+	   (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm")
+	   (match_operand:QI 3 "const_0_to_255_operand" "n,n")]
 	  UNSPEC_GF2P8AFFINEINV))]
   "TARGET_GFNI"
   "@
    gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
-   vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
    vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
-  [(set_attr "isa" "noavx,avx,avx512f")
-   (set_attr "prefix_data16" "1,*,*")
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "prefix_data16" "1,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,maybe_evex,evex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "vgf2p8affineqb_<mode><mask_name>"
-  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
+  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
 	(unspec:VI1_AVX512F
-	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
-	   (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
-	   (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
+	  [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
+	   (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm")
+	   (match_operand:QI 3 "const_0_to_255_operand" "n,n")]
 	  UNSPEC_GF2P8AFFINE))]
   "TARGET_GFNI"
   "@
    gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
-   vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
    vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
-  [(set_attr "isa" "noavx,avx,avx512f")
-   (set_attr "prefix_data16" "1,*,*")
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "prefix_data16" "1,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,maybe_evex,evex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "vgf2p8mulb_<mode><mask_name>"
-  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
+  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
 	(unspec:VI1_AVX512F
-	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
-	   (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]
+	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")
+	   (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm")]
 	  UNSPEC_GF2P8MUL))]
   "TARGET_GFNI"
   "@
    gf2p8mulb\t{%2, %0| %0, %2}
-   vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}
    vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
-  [(set_attr "isa" "noavx,avx,avx512f")
-   (set_attr "prefix_data16" "1,*,*")
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "prefix_data16" "1,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,maybe_evex,evex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "vpshrd_<mode><mask_name>"
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/gfni-5.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mgfni" } */
+
+typedef char __attribute__((vector_size(16))) v16qi_t;
+
+v16qi_t test16a (v16qi_t x, v16qi_t a)
+{
+  asm volatile ("" : "+m" (a));
+  return __builtin_ia32_vgf2p8affineqb_v16qi (x, a, 0);
+}
+
+v16qi_t test16b (v16qi_t x, v16qi_t a)
+{
+  asm volatile ("" : "+m" (x));
+  return __builtin_ia32_vgf2p8affineqb_v16qi (x, a, 0);
+}
+
+/* { dg-final { scan-assembler-times "gf2p8affineqb\[ \t].*\\(" 1 } } */
+/* { dg-final { scan-assembler-times "gf2p8affineqb\[ \t].*%xmm.*%xmm" 1 } } */