i386: Introduce peephole2 to use flags from CMPXCHG more [PR96189]

Message ID CAFULd4ZNGofEmOZ1417w0Um=dvaRx4cuhWr+fdnxgMP1=qELyA@mail.gmail.com
State New
Headers show
Series
  • i386: Introduce peephole2 to use flags from CMPXCHG more [PR96189]
Related show

Commit Message

Bill Schmidt via Gcc-patches July 15, 2020, 8:05 p.m.
CMPXCHG instruction sets ZF flag if the values in the destination operand
and EAX register are equal; otherwise the ZF flag is cleared and value
from destination operand is loaded to EAX. Following assembly:

        movl    %esi, %eax
        lock cmpxchgl   %edx, (%rdi)
        cmpl    %esi, %eax
        sete    %al

can be optimized by removing the unneeded comparison, since set ZF flag
signals that no update to EAX happened.

2020-15-07  UroŇ° Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:
    PR target/96189
    * config/i386/sync.md
    (peephole2 to remove unneded compare after CMPXCHG): New pattern.

gcc/testsuite/ChangeLog:
    PR target/96189
    * gcc.target/i386/pr96189.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.

Patch

diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 9ab5456b227..d203e9d1ecb 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -594,6 +594,41 @@ 
   "TARGET_CMPXCHG"
   "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
 
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+	(match_operand:SWI 1 "general_operand"))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI
+		     [(match_operand:SWI 2 "memory_operand")
+		      (match_dup 0)
+		      (match_operand:SWI 3 "register_operand")
+		      (match_operand:SI 4 "const_int_operand")]
+		     UNSPECV_CMPXCHG))
+	      (set (match_dup 2)
+		   (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+	      (set (reg:CCZ FLAGS_REG)
+		   (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])
+   (set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_operand:SWI 5 "register_operand")
+		     (match_operand:SWI 6 "general_operand")))]
+  "(rtx_equal_p (operands[0], operands[5])
+    && rtx_equal_p (operands[1], operands[6]))
+   || (rtx_equal_p (operands[0], operands[6])
+       && rtx_equal_p (operands[1], operands[5]))"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI
+		     [(match_dup 2)
+		      (match_dup 0)
+		      (match_dup 3)
+		      (match_dup 4)]
+		     UNSPECV_CMPXCHG))
+	      (set (match_dup 2)
+		   (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+	      (set (reg:CCZ FLAGS_REG)
+		   (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
+
 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic
 ;; additions of constants.
diff --git a/gcc/testsuite/gcc.target/i386/pr96189.c b/gcc/testsuite/gcc.target/i386/pr96189.c
new file mode 100644
index 00000000000..1505e483b94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr96189.c
@@ -0,0 +1,12 @@ 
+/* PR target/96176 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\tcmpb\t" } } */
+
+_Bool
+foo (unsigned char *x, unsigned char y, unsigned char z)
+{
+  unsigned char y_old = y;
+  __atomic_compare_exchange_n (x, &y, z, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+  return y == y_old;
+}