[nios2,committed] correct <mul>sidi3 costs

Message ID 038b9b5f-533c-6b4a-3622-95d2df04965b@codesourcery.com
State New
Headers show
Series
  • [nios2,committed] correct <mul>sidi3 costs
Related show

Commit Message

Sandra Loosemore Nov. 3, 2018, 6:24 p.m.
PR target/87079 reported that with -Os, the nios2 back end was emitting 
an inferior code sequence for widening multiply instead of using mulx. 
I tracked this down to the rtx costs hook not recognizing the RTL 
pattern for <mul>sidi3 so it would overestimate the cost.

I've been aware for a while that the RTX costs computation in the nios2 
backend is far from optimal or even correct :-P but giving it a complete 
workover is a pretty big project requiring benchmarking etc as well as 
unit tests.  I don't want the perfect to be the enemy of the good, so 
I've checked in the attached patch to fix this issue and add the test 
case (both -Os and -O2 variants).

-Sandra

Patch

Index: gcc/config/nios2/nios2.c
===================================================================
--- gcc/config/nios2/nios2.c	(revision 265561)
+++ gcc/config/nios2/nios2.c	(working copy)
@@ -1539,6 +1539,19 @@  nios2_rtx_costs (rtx x, machine_mode mod
 	    *total = COSTS_N_INSNS (2);  /* Latency adjustment.  */
 	  else 
 	    *total = COSTS_N_INSNS (1);
+	  if (TARGET_HAS_MULX && GET_MODE (x) == DImode)
+	    {
+	      enum rtx_code c0 = GET_CODE (XEXP (x, 0));
+	      enum rtx_code c1 = GET_CODE (XEXP (x, 1));
+	      if ((c0 == SIGN_EXTEND && c1 == SIGN_EXTEND)
+		  || (c0 == ZERO_EXTEND && c1 == ZERO_EXTEND))
+		/* This is the <mul>sidi3 pattern, which expands into 4 insns,
+		   2 multiplies and 2 moves.  */
+		{
+		  *total = *total * 2 + COSTS_N_INSNS (2);
+		  return true;
+		}
+	    }
           return false;
         }
 
Index: gcc/testsuite/gcc.target/nios2/pr87079-1.c
===================================================================
--- gcc/testsuite/gcc.target/nios2/pr87079-1.c	(nonexistent)
+++ gcc/testsuite/gcc.target/nios2/pr87079-1.c	(working copy)
@@ -0,0 +1,34 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Os -mhw-div -mhw-mul -mhw-mulx" } */
+
+#include <stdint.h>
+#include <stddef.h>
+
+void foo(const uint8_t* str, uint32_t* res)
+{
+  uint32_t rdVal0, rdVal1, rdVal2;
+  rdVal0 = rdVal1 = rdVal2 = 0;
+  unsigned c;
+  for (;;) {
+    c = *str++;
+    unsigned dig = c - '0';
+    if (dig > 9)
+      break; // non-digit
+    uint64_t x10;
+
+    x10 = (uint64_t)rdVal0*10 + dig;
+    rdVal0 = (uint32_t)x10;
+    dig = (uint32_t)(x10 >> 32);
+
+    x10 = (uint64_t)rdVal1*10 + dig;
+    rdVal1 = (uint32_t)x10;
+    dig = (uint32_t)(x10 >> 32);
+
+    rdVal2 = rdVal2*10 + dig;
+  }
+  res[0] = rdVal0;
+  res[1] = rdVal1;
+  res[2] = rdVal2;
+}
+
+/* { dg-final { scan-assembler-times "mulxuu\t" 2 } } */
Index: gcc/testsuite/gcc.target/nios2/pr87079-2.c
===================================================================
--- gcc/testsuite/gcc.target/nios2/pr87079-2.c	(nonexistent)
+++ gcc/testsuite/gcc.target/nios2/pr87079-2.c	(working copy)
@@ -0,0 +1,34 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mhw-div -mhw-mul -mhw-mulx" } */
+
+#include <stdint.h>
+#include <stddef.h>
+
+void foo(const uint8_t* str, uint32_t* res)
+{
+  uint32_t rdVal0, rdVal1, rdVal2;
+  rdVal0 = rdVal1 = rdVal2 = 0;
+  unsigned c;
+  for (;;) {
+    c = *str++;
+    unsigned dig = c - '0';
+    if (dig > 9)
+      break; // non-digit
+    uint64_t x10;
+
+    x10 = (uint64_t)rdVal0*10 + dig;
+    rdVal0 = (uint32_t)x10;
+    dig = (uint32_t)(x10 >> 32);
+
+    x10 = (uint64_t)rdVal1*10 + dig;
+    rdVal1 = (uint32_t)x10;
+    dig = (uint32_t)(x10 >> 32);
+
+    rdVal2 = rdVal2*10 + dig;
+  }
+  res[0] = rdVal0;
+  res[1] = rdVal1;
+  res[2] = rdVal2;
+}
+
+/* { dg-final { scan-assembler-times "mulxuu\t" 2 } } */