Fix *{add,sub,and,ior,xor}v*3 patterns on AVX512* (PR target/84524)

Message ID 20180223164402.GD5867@tucnak
State New
Headers show
Series
  • Fix *{add,sub,and,ior,xor}v*3 patterns on AVX512* (PR target/84524)
Related show

Commit Message

Jakub Jelinek Feb. 23, 2018, 4:44 p.m.
Hi!

The following testcase is miscompiled with -O3 -mavx512bw, the combiner
sees there isn't just *xorv32hi3 define_insn with no masking, but that
there is one with masking as well (with the same name) and uses it,
but 1) such instruction doesn't really exist, for masking we only have
VPXORD and VPXORQ, there is no VPXORW or VPXORB 2) the pattern emits
just non-masked string.  In tmp-mddump.md we can see:
;; ../../gcc/config/i386/sse.md: 11825
(define_insn ("*xorv32hi3")
     [
        (set (match_operand:V32HI 0 ("register_operand") ("=x,x,v"))
            (xor:V32HI (match_operand:V32HI 1 ("vector_operand") ("%0,x,v"))
                (match_operand:V32HI 2 ("vector_operand") ("xBm,xm,vm"))))
    ] ("(TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))) && (TARGET_AVX512F)") ("*{
...
      ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
and
;; ../../gcc/config/i386/sse.md: 11825
(define_insn ("*xorv32hi3")
     [
        (set (match_operand:V32HI 0 ("register_operand") ("=x,x,v"))
            (vec_merge:V32HI (xor:V32HI (match_operand:V32HI 1 ("vector_operand") ("%0,x,v"))
                    (match_operand:V32HI 2 ("vector_operand") ("xBm,xm,vm")))
                (match_operand:V32HI 3 ("vector_move_operand") ("0C,0C,0C"))
                (match_operand:SI 4 ("register_operand") ("Yk,Yk,Yk"))))
    ] ("(TARGET_AVX512F) && ((TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))) && (TARGET_AVX512F))") ("*{
...
      ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
For the any_logic patterns we want masking only on the V*[SD][IF]mode patterns
and not on V*[HQ]Imode ones and most of the pattern does it right, except
there was <mask_prefix3> in set_attr rather than just orig,vex and even
a single subst attribute anywhere forces substitution.

I'd done
grep 'define_insn ' tmp-mddump.md | sort | uniq -c | sort -n | grep -v '^ *1 ' | less
to look for other occurrences of similar bug and indeed found another
pattern where we don't really want masking, in that case not because masking
would not be provided for that operation, but because we have explicit
masked define_insn for it, as we need two different for different sets of
modes - e.g. for *addv32hi3 there is one non-masked and one incorrect
masked define_insn created from sse.md: 10085 and explicit *addv32hi3_mask
from sse.md: 10114, the incorrect masked one has wrong condition and comes
first.  There are many other define_insns with duplicated (or more) names
in the above command, but most of them seemed due to :P iterator which is
probably fine for same named patterns, there will be just one pattern
enabled for 32-bit and one for 64-bit, so no need to obfuscate the names.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
This isn't a regression (except perhaps with -O3 -march=native on SKX),
but it is a serious wrong-code.

2018-02-23  Jakub Jelinek  <jakub@redhat.com>

	PR target/84524
	* config/i386/sse.md (*<code><mode>3): Replace <mask_prefix3> with
	orig,vex.
	(*<plusminus_insn><mode>3): Likewise.  Remove <mask_operand3> uses.

	* gcc.c-torture/execute/pr84524.c: New test.
	* gcc.target/i386/avx512bw-pr84524.c: New test.


	Jakub

Patch

--- gcc/config/i386/sse.md.jj	2018-02-13 09:31:24.769607162 +0100
+++ gcc/config/i386/sse.md	2018-02-23 11:51:00.271477979 +0100
@@ -10090,11 +10090,11 @@  (define_insn "*<plusminus_insn><mode>3"
   "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
-   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseiadd")
    (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix" "<mask_prefix3>")
+   (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "*<plusminus_insn><mode>3_mask"
@@ -11899,7 +11899,7 @@  (define_insn "*<code><mode>3"
 	    (eq_attr "mode" "TI"))
        (const_string "1")
        (const_string "*")))
-   (set_attr "prefix" "<mask_prefix3>,evex")
+   (set_attr "prefix" "orig,vex,evex")
    (set (attr "mode")
 	(cond [(and (match_test "<MODE_SIZE> == 16")
 		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
--- gcc/testsuite/gcc.c-torture/execute/pr84524.c.jj	2018-02-23 11:54:51.913492631 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr84524.c	2018-02-23 11:59:55.467511836 +0100
@@ -0,0 +1,41 @@ 
+/* PR target/84524 */
+
+__attribute__((noipa)) void
+foo (unsigned short *x)
+{
+  unsigned short i, v;
+  unsigned char j;
+  for (i = 0; i < 256; i++)
+    {
+      v = i << 8;
+      for (j = 0; j < 8; j++)
+	if (v & 0x8000)
+	  v = (v << 1) ^ 0x1021;
+	else
+	  v = v << 1;
+      x[i] = v;
+    }
+}
+
+int
+main ()
+{
+  unsigned short a[256];
+
+  foo (a);
+  for (int i = 0; i < 256; i++)
+    {
+      unsigned short v = i << 8;
+      for (int j = 0; j < 8; j++)
+	{
+	  asm volatile ("" : "+r" (v));
+	  if (v & 0x8000)
+	    v = (v << 1) ^ 0x1021;
+	  else
+	    v = v << 1;
+	}
+      if (a[i] != v)
+	__builtin_abort ();
+    }
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/avx512bw-pr84524.c.jj	2018-02-23 11:58:16.919505601 +0100
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr84524.c	2018-02-23 11:58:57.377508169 +0100
@@ -0,0 +1,14 @@ 
+/* PR target/84524 */
+/* { dg-do run { target avx512bw } } */
+/* { dg-options "-O3 -mavx512bw" } */
+
+#include "avx512bw-check.h"
+
+#define main() do_main()
+#include "../../gcc.c-torture/execute/pr84524.c"
+
+static void
+avx512bw_test (void)
+{
+  do_main ();
+}