[v2,middle-end/i386] : Fix PR88502, Inline built-in asinh, acosh, atanh for -ffast-math

Message ID CAFULd4ZS_ekgOGhjF79jqJeZK+JBDpfVe0W0e9e9jKCVxst1xg@mail.gmail.com
State New
Headers show
Series
  • [v2,middle-end/i386] : Fix PR88502, Inline built-in asinh, acosh, atanh for -ffast-math
Related show

Commit Message

Uros Bizjak Dec. 17, 2018, 3:51 p.m.
On Mon, Dec 17, 2018 at 9:26 AM Richard Biener <rguenther@suse.de> wrote:
>

> On Mon, 17 Dec 2018, Uros Bizjak wrote:

>

> > ... and the patch.

>

> middle-end parts are OK.

>

> > On Mon, Dec 17, 2018 at 8:58 AM Uros Bizjak <ubizjak@gmail.com> wrote:

> > >

> > > Attached patch inlines calls to asinh{,f}, acosh{,f,l} and atanh{,f,l}

> > > using x87 XFmode arithmetic. In the patch, I left out asinhl due to

> > > its reduced input argument range, but perhaps it could be added back,

> > > since we are expanding under flag_unsafe_math_optimizations. The

> > > expanders are modelled after the removed inlines in glibc [1] (which

> > > also include asinhl, with a comment mentioning its reduced input

> > > argument range).


Thinking a bit more about reduced input range of asinhl - we have
similar situation with other trigonometric functions, where argument
range is reduced to +-2^63. So, I have committed version 2 of the
patch, which also expands asinhl.

2018-12-17  Uros Bizjak  <ubizjak@gmail.com>

    PR target/88502
    * internal-fn.def (ACOSH): New.
    (ASINH): Ditto.
    (ATANH): Ditto.
    * optabs.def (acosh_optab): New.
    (asinh_optab): Ditto.
    (atanh_optab): Ditto.
    * config/i386/i386-protos.h (ix86_emit_i387_asinh): New prototype.
    (ix86_emit_i387_acosh): Ditto.
    (ix86_emit_i387_atanh): Ditto.
    * config/i386/i386.c (ix86_emit_i387_asinh): New function.
    (ix86_emit_i387_acosh): Ditto.
    (ix86_emit_i387_atanh): Ditto.
    * config/i386/i386.md (asinhxf2): New expander.
    (asinh<mode>2):    Ditto.
    (acoshxf2): Ditto.
    (acosh<mode>2): Ditto.
    (atanhxf2): Ditto.
    (atanh<mode>2): Ditto.

Uros.

Patch

Index: config/i386/i386-protos.h
===================================================================
--- config/i386/i386-protos.h	(revision 267203)
+++ config/i386/i386-protos.h	(working copy)
@@ -170,6 +170,9 @@ 
 extern void x86_emit_floatuns (rtx [2]);
 extern void ix86_emit_fp_unordered_jump (rtx);
 
+extern void ix86_emit_i387_asinh (rtx, rtx);
+extern void ix86_emit_i387_acosh (rtx, rtx);
+extern void ix86_emit_i387_atanh (rtx, rtx);
 extern void ix86_emit_i387_log1p (rtx, rtx);
 extern void ix86_emit_i387_round (rtx, rtx);
 extern void ix86_emit_swdivsf (rtx, rtx, rtx, machine_mode);
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 267203)
+++ config/i386/i386.c	(working copy)
@@ -44054,6 +44054,135 @@ 
   JUMP_LABEL (insn) = label;
 }
 
+/* Output code to perform an asinh XFmode calculation.  */
+
+void ix86_emit_i387_asinh (rtx op0, rtx op1)
+{
+  rtx e1 = gen_reg_rtx (XFmode);
+  rtx e2 = gen_reg_rtx (XFmode);
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
+  rtx cst1, tmp;
+  rtx_code_label *jump_label = gen_label_rtx ();
+  rtx_insn *insn;
+
+  /* e2 = sqrt (op1^2 + 1.0) + 1.0 */
+  emit_insn (gen_mulxf3 (e1, op1, op1));
+  cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
+  emit_insn (gen_addxf3 (e2, e1, cst1));
+  emit_insn (gen_sqrtxf2 (e2, e2));
+  emit_insn (gen_addxf3 (e2, e2, cst1));
+
+  /* e1 = e1 / e2 */
+  emit_insn (gen_divxf3 (e1, e1, e2));
+
+  /* scratch = fxam (op1) */
+  emit_insn (gen_fxamxf2_i387 (scratch, op1));
+
+  /* e1 = e1 + |op1| */
+  emit_insn (gen_absxf2 (e2, op1));
+  emit_insn (gen_addxf3 (e1, e1, e2));
+
+  /* e2 = log1p (e1) */
+  ix86_emit_i387_log1p (e2, e1);
+
+  /* flags = signbit (op1) */
+  emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
+
+  /* if (flags) then e2 = -e2 */
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
+			      gen_rtx_EQ (VOIDmode, flags, const0_rtx),
+			      gen_rtx_LABEL_REF (VOIDmode, jump_label),
+			      pc_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+  predict_jump (REG_BR_PROB_BASE * 50 / 100);
+  JUMP_LABEL (insn) = jump_label;
+
+  emit_insn (gen_negxf2 (e2, e2));
+
+  emit_label (jump_label);
+  LABEL_NUSES (jump_label) = 1;
+
+  emit_move_insn (op0, e2);
+}
+
+/* Output code to perform an acosh XFmode calculation.  */
+
+void ix86_emit_i387_acosh (rtx op0, rtx op1)
+{
+  rtx e1 = gen_reg_rtx (XFmode);
+  rtx e2 = gen_reg_rtx (XFmode);
+  rtx cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
+
+  /* e2 = sqrt (op1 + 1.0) */
+  emit_insn (gen_addxf3 (e2, op1, cst1));
+  emit_insn (gen_sqrtxf2 (e2, e2));
+
+  /* e1 = sqrt (op1 - 1.0) */
+  emit_insn (gen_subxf3 (e1, op1, cst1));
+  emit_insn (gen_sqrtxf2 (e1, e1));
+
+  /* e1 = e1 * e2 */
+  emit_insn (gen_mulxf3 (e1, e1, e2));
+
+  /* e1 = e1 + op1 */
+  emit_insn (gen_addxf3 (e1, e1, op1));
+
+  /* op0 = log (e1) */
+  emit_insn (gen_logxf2 (op0, e1));
+}
+
+/* Output code to perform an atanh XFmode calculation.  */
+
+void ix86_emit_i387_atanh (rtx op0, rtx op1)
+{
+  rtx e1 = gen_reg_rtx (XFmode);
+  rtx e2 = gen_reg_rtx (XFmode);
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
+  rtx half = const_double_from_real_value (dconsthalf, XFmode);
+  rtx cst1, tmp;
+  rtx_code_label *jump_label = gen_label_rtx ();
+  rtx_insn *insn;
+
+  /* scratch = fxam (op1) */
+  emit_insn (gen_fxamxf2_i387 (scratch, op1));
+
+  /* e2 = |op1| */
+  emit_insn (gen_absxf2 (e2, op1));
+
+  /* e1 = -(e2 + e2) / (e2 + 1.0) */
+  cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
+  emit_insn (gen_addxf3 (e1, e2, cst1));
+  emit_insn (gen_addxf3 (e2, e2, e2));
+  emit_insn (gen_negxf2 (e2, e2));
+  emit_insn (gen_divxf3 (e1, e2, e1));
+
+  /* e2 = log1p (e1) */
+  ix86_emit_i387_log1p (e2, e1);
+
+  /* flags = signbit (op1) */
+  emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
+
+  /* if (!flags) then e2 = -e2 */
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
+			      gen_rtx_NE (VOIDmode, flags, const0_rtx),
+			      gen_rtx_LABEL_REF (VOIDmode, jump_label),
+			      pc_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+  predict_jump (REG_BR_PROB_BASE * 50 / 100);
+  JUMP_LABEL (insn) = jump_label;
+
+  emit_insn (gen_negxf2 (e2, e2));
+
+  emit_label (jump_label);
+  LABEL_NUSES (jump_label) = 1;
+
+  /* op0 = 0.5 * e2) */
+  half = force_reg (XFmode, half);
+  emit_insn (gen_mulxf3 (op0, e2, half));
+}
+
 /* Output code to perform a log1p XFmode calculation.  */
 
 void ix86_emit_i387_log1p (rtx op0, rtx op1)
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 267203)
+++ config/i386/i386.md	(working copy)
@@ -15483,6 +15483,89 @@ 
   DONE;
 })
 
+(define_expand "asinhxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_finite_math_only
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_asinh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "asinh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_finite_math_only
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_asinhxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_expand "acoshxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_acosh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "acosh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_acoshxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_expand "atanhxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_atanh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "atanh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_atanhxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
 (define_insn "fyl2xxf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
         (unspec:XF [(match_operand:XF 1 "register_operand" "0")
Index: internal-fn.def
===================================================================
--- internal-fn.def	(revision 267203)
+++ internal-fn.def	(working copy)
@@ -201,8 +201,11 @@ 
 
 /* Unary math functions.  */
 DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary)
+DEF_INTERNAL_FLT_FN (ACOSH, ECF_CONST, acosh, unary)
 DEF_INTERNAL_FLT_FN (ASIN, ECF_CONST, asin, unary)
+DEF_INTERNAL_FLT_FN (ASINH, ECF_CONST, asinh, unary)
 DEF_INTERNAL_FLT_FN (ATAN, ECF_CONST, atan, unary)
+DEF_INTERNAL_FLT_FN (ATANH, ECF_CONST, atanh, unary)
 DEF_INTERNAL_FLT_FN (COS, ECF_CONST, cos, unary)
 DEF_INTERNAL_FLT_FN (EXP, ECF_CONST, exp, unary)
 DEF_INTERNAL_FLT_FN (EXP10, ECF_CONST, exp10, unary)
Index: optabs.def
===================================================================
--- optabs.def	(revision 267203)
+++ optabs.def	(working copy)
@@ -273,9 +273,12 @@ 
 OPTAB_D (nearbyint_optab, "nearbyint$a2")
 
 OPTAB_D (acos_optab, "acos$a2")
+OPTAB_D (acosh_optab, "acosh$a2")
 OPTAB_D (asin_optab, "asin$a2")
+OPTAB_D (asinh_optab, "asinh$a2")
 OPTAB_D (atan2_optab, "atan2$a3")
 OPTAB_D (atan_optab, "atan$a2")
+OPTAB_D (atanh_optab, "atanh$a2")
 OPTAB_D (copysign_optab, "copysign$F$a3")
 OPTAB_D (xorsign_optab, "xorsign$F$a3")
 OPTAB_D (cos_optab, "cos$a2")