Fix i?86/x86_64 pre-SSE4.1 rint expansion (PR target/81906)

Message ID 20171207164831.GM2353@tucnak
State New
Headers show
Series
  • Fix i?86/x86_64 pre-SSE4.1 rint expansion (PR target/81906)
Related show

Commit Message

Jakub Jelinek Dec. 7, 2017, 4:48 p.m.
Hi!

As mentioned in the PR, the code emitted by ix86_expand_rint
doesn't work with rounding to +/- infinity.
This patch adjusts it if flag_rounding_math to do something that works
well even for that case (should be just one insn longer).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-12-07  Joseph Myers  <joseph@codesourcery.com>
	    Alexander Monakov  <amonakov@ispras.ru>
	    Jakub Jelinek  <jakub@redhat.com>

	PR target/81906
	* config/i386/i386.c (ix86_expand_rint): Handle flag_rounding_math.

	* gcc.target/i386/pr81906.c: New test.


	Jakub

Comments

Uros Bizjak Dec. 7, 2017, 5:23 p.m. | #1
On Thu, Dec 7, 2017 at 5:48 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!

>

> As mentioned in the PR, the code emitted by ix86_expand_rint

> doesn't work with rounding to +/- infinity.

> This patch adjusts it if flag_rounding_math to do something that works

> well even for that case (should be just one insn longer).

>

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

>

> 2017-12-07  Joseph Myers  <joseph@codesourcery.com>

>             Alexander Monakov  <amonakov@ispras.ru>

>             Jakub Jelinek  <jakub@redhat.com>

>

>         PR target/81906

>         * config/i386/i386.c (ix86_expand_rint): Handle flag_rounding_math.

>

>         * gcc.target/i386/pr81906.c: New test.


OK for trunk and release branches.

Thanks,
Uros.

> --- gcc/config/i386/i386.c.jj   2017-12-05 10:15:31.000000000 +0100

> +++ gcc/config/i386/i386.c      2017-12-07 11:58:15.159881741 +0100

> @@ -44255,8 +44255,7 @@ ix86_expand_lfloorceil (rtx op0, rtx op1

>    emit_move_insn (op0, ireg);

>  }

>

> -/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the

> -   result in OPERAND0.  */

> +/* Expand rint rounding OPERAND1 and storing the result in OPERAND0.  */

>  void

>  ix86_expand_rint (rtx operand0, rtx operand1)

>  {

> @@ -44264,11 +44263,17 @@ ix86_expand_rint (rtx operand0, rtx oper

>         xa = fabs (operand1);

>          if (!isless (xa, 2**52))

>           return operand1;

> -        xa = xa + 2**52 - 2**52;

> +        two52 = 2**52;

> +        if (flag_rounding_math)

> +         {

> +           two52 = copysign (two52, operand1);

> +           xa = operand1;

> +         }

> +        xa = xa + two52 - two52;

>          return copysign (xa, operand1);

>     */

>    machine_mode mode = GET_MODE (operand0);

> -  rtx res, xa, TWO52, mask;

> +  rtx res, xa, TWO52, two52, mask;

>    rtx_code_label *label;

>

>    res = gen_reg_rtx (mode);

> @@ -44281,8 +44286,16 @@ ix86_expand_rint (rtx operand0, rtx oper

>    TWO52 = ix86_gen_TWO52 (mode);

>    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);

>

> -  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);

> -  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);

> +  two52 = TWO52;

> +  if (flag_rounding_math)

> +    {

> +      two52 = gen_reg_rtx (mode);

> +      ix86_sse_copysign_to_positive (two52, TWO52, res, mask);

> +      xa = res;

> +    }

> +

> +  xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT);

> +  xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);

>

>    ix86_sse_copysign_to_positive (res, xa, res, mask);

>

> --- gcc/testsuite/gcc.target/i386/pr81906.c.jj  2017-12-07 11:38:06.730812658 +0100

> +++ gcc/testsuite/gcc.target/i386/pr81906.c     2017-12-07 11:38:14.488716544 +0100

> @@ -0,0 +1,37 @@

> +/* PR target/81906 */

> +/* { dg-do run { target *-*-linux* *-*-gnu* } }

> +/* { dg-options "-O2 -frounding-math" } */

> +

> +#include <fenv.h>

> +

> +int

> +main ()

> +{

> +  #define N 12

> +  double a[N] = { 2.0, 2.25, 2.5, 2.75, 3.5, -2.0, -2.25, -2.5, -2.75, -3.5, 0x2.0p53, -0x2.0p53 };

> +  double b[N], c[N], d[N], e[N];

> +  double be[N] = { 2.0, 2.0, 2.0, 3.0, 4.0, -2.0, -2.0, -2.0, -3.0, -4.0, 0x2.0p53, -0x2.0p53 };

> +  double ce[N] = { 2.0, 2.0, 2.0, 2.0, 3.0, -2.0, -3.0, -3.0, -3.0, -4.0, 0x2.0p53, -0x2.0p53 };

> +  double de[N] = { 2.0, 3.0, 3.0, 3.0, 4.0, -2.0, -2.0, -2.0, -2.0, -3.0, 0x2.0p53, -0x2.0p53 };

> +  double ee[N] = { 2.0, 2.0, 2.0, 2.0, 3.0, -2.0, -2.0, -2.0, -2.0, -3.0, 0x2.0p53, -0x2.0p53 };

> +  asm volatile ("" : : "g" (a), "g" (be), "g" (ce), "g" (de), "g" (ee) : "memory");

> +

> +  int i;

> +  fesetround (FE_TONEAREST);

> +  for (i = 0; i < N; ++i)

> +    b[i] = __builtin_rint (a[i]);

> +  fesetround (FE_DOWNWARD);

> +  for (i = 0; i < N; ++i)

> +    c[i] = __builtin_rint (a[i]);

> +  fesetround (FE_UPWARD);

> +  for (i = 0; i < N; ++i)

> +    d[i] = __builtin_rint (a[i]);

> +  fesetround (FE_TOWARDZERO);

> +  for (i = 0; i < N; ++i)

> +    e[i] = __builtin_rint (a[i]);

> +  fesetround (FE_TONEAREST);

> +  for (i = 0; i < N; ++i)

> +    if (b[i] != be[i] || c[i] != ce[i] || d[i] != de[i] || e[i] != ee[i])

> +      __builtin_abort ();

> +  return 0;

> +}

>

>         Jakub

Patch

--- gcc/config/i386/i386.c.jj	2017-12-05 10:15:31.000000000 +0100
+++ gcc/config/i386/i386.c	2017-12-07 11:58:15.159881741 +0100
@@ -44255,8 +44255,7 @@  ix86_expand_lfloorceil (rtx op0, rtx op1
   emit_move_insn (op0, ireg);
 }
 
-/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
-   result in OPERAND0.  */
+/* Expand rint rounding OPERAND1 and storing the result in OPERAND0.  */
 void
 ix86_expand_rint (rtx operand0, rtx operand1)
 {
@@ -44264,11 +44263,17 @@  ix86_expand_rint (rtx operand0, rtx oper
 	xa = fabs (operand1);
         if (!isless (xa, 2**52))
 	  return operand1;
-        xa = xa + 2**52 - 2**52;
+        two52 = 2**52;
+        if (flag_rounding_math)
+	  {
+	    two52 = copysign (two52, operand1);
+	    xa = operand1;
+	  }
+        xa = xa + two52 - two52;
         return copysign (xa, operand1);
    */
   machine_mode mode = GET_MODE (operand0);
-  rtx res, xa, TWO52, mask;
+  rtx res, xa, TWO52, two52, mask;
   rtx_code_label *label;
 
   res = gen_reg_rtx (mode);
@@ -44281,8 +44286,16 @@  ix86_expand_rint (rtx operand0, rtx oper
   TWO52 = ix86_gen_TWO52 (mode);
   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
 
-  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
-  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+  two52 = TWO52;
+  if (flag_rounding_math)
+    {
+      two52 = gen_reg_rtx (mode);
+      ix86_sse_copysign_to_positive (two52, TWO52, res, mask);
+      xa = res;
+    }
+
+  xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT);
+  xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);
 
   ix86_sse_copysign_to_positive (res, xa, res, mask);
 
--- gcc/testsuite/gcc.target/i386/pr81906.c.jj	2017-12-07 11:38:06.730812658 +0100
+++ gcc/testsuite/gcc.target/i386/pr81906.c	2017-12-07 11:38:14.488716544 +0100
@@ -0,0 +1,37 @@ 
+/* PR target/81906 */
+/* { dg-do run { target *-*-linux* *-*-gnu* } }
+/* { dg-options "-O2 -frounding-math" } */
+
+#include <fenv.h>
+
+int
+main ()
+{
+  #define N 12
+  double a[N] = { 2.0, 2.25, 2.5, 2.75, 3.5, -2.0, -2.25, -2.5, -2.75, -3.5, 0x2.0p53, -0x2.0p53 };
+  double b[N], c[N], d[N], e[N];
+  double be[N] = { 2.0, 2.0, 2.0, 3.0, 4.0, -2.0, -2.0, -2.0, -3.0, -4.0, 0x2.0p53, -0x2.0p53 };
+  double ce[N] = { 2.0, 2.0, 2.0, 2.0, 3.0, -2.0, -3.0, -3.0, -3.0, -4.0, 0x2.0p53, -0x2.0p53 };
+  double de[N] = { 2.0, 3.0, 3.0, 3.0, 4.0, -2.0, -2.0, -2.0, -2.0, -3.0, 0x2.0p53, -0x2.0p53 };
+  double ee[N] = { 2.0, 2.0, 2.0, 2.0, 3.0, -2.0, -2.0, -2.0, -2.0, -3.0, 0x2.0p53, -0x2.0p53 };
+  asm volatile ("" : : "g" (a), "g" (be), "g" (ce), "g" (de), "g" (ee) : "memory");
+
+  int i;
+  fesetround (FE_TONEAREST);
+  for (i = 0; i < N; ++i)
+    b[i] = __builtin_rint (a[i]);
+  fesetround (FE_DOWNWARD);
+  for (i = 0; i < N; ++i)
+    c[i] = __builtin_rint (a[i]);
+  fesetround (FE_UPWARD);
+  for (i = 0; i < N; ++i)
+    d[i] = __builtin_rint (a[i]);
+  fesetround (FE_TOWARDZERO);
+  for (i = 0; i < N; ++i)
+    e[i] = __builtin_rint (a[i]);
+  fesetround (FE_TONEAREST);
+  for (i = 0; i < N; ++i)
+    if (b[i] != be[i] || c[i] != ce[i] || d[i] != de[i] || e[i] != ee[i])
+      __builtin_abort ();
+  return 0;
+}