[GCC-8,AArch64] PR target/90075 Prefer bsl/bit/bif for copysignf. (backport GCC-8)

Message ID 52235872-ad5e-4d0b-e2df-14aad10e9c53@arm.com
State New
Headers show
Series
  • [GCC-8,AArch64] PR target/90075 Prefer bsl/bit/bif for copysignf. (backport GCC-8)
Related show

Commit Message

Srinath Parvathaneni April 29, 2019, 4:56 p.m.
Hi All,

This patch is to fix the ICE caused in expand pattern of copysignf 
builtin. This is a back port to r267019 of trunk.

Bootstrapped on aarch64-none-linux-gnu and regression tested on 
aarch64-none-elf.

Ok for gcc 8 branch? If ok, could someone commit the patch on my behalf, 
I don't have commit rights.

*** gcc/ChangeLog ***

2019-04-29  Srinath Parvathaneni  <srinath.parvathaneni@arm.com>

	Backport from mainline
	2018-12-11  Richard Earnshaw <Richard.Earnshaw@arm.com>

	PR target/37369
	* config/aarch64/iterators.md (sizem1): Add sizes for
	SFmode and DFmode.
	(Vbtype): Add SFmode mapping.
	* config/aarch64/aarch64.md (copysigndf3, copysignsf3): Delete.
	(copysign<GPF:mode>3): New expand pattern.
	(copysign<GPF:mode>3_insn): New insn pattern.

*** gcc/testsuite/ChangeLog ***

2019-04-29  Srinath Parvathaneni  <srinath.parvathaneni@arm.com>

	PR target/90075
	* gcc.target/aarch64/pr90075.c: New test.

Comments

Richard Earnshaw (lists) April 30, 2019, 9:25 a.m. | #1
On 29/04/2019 17:56, Srinath Parvathaneni wrote:
> Hi All,

> 

> This patch is to fix the ICE caused in expand pattern of copysignf 

> builtin. This is a back port to r267019 of trunk.

> 

> Bootstrapped on aarch64-none-linux-gnu and regression tested on 

> aarch64-none-elf.

> 

> Ok for gcc 8 branch? If ok, could someone commit the patch on my behalf, 

> I don't have commit rights.


Applied.

R.

> 

> *** gcc/ChangeLog ***

> 

> 2019-04-29  Srinath Parvathaneni  <srinath.parvathaneni@arm.com>

> 

> 	Backport from mainline

> 	2018-12-11  Richard Earnshaw <Richard.Earnshaw@arm.com>

> 

> 	PR target/37369

> 	* config/aarch64/iterators.md (sizem1): Add sizes for

> 	SFmode and DFmode.

> 	(Vbtype): Add SFmode mapping.

> 	* config/aarch64/aarch64.md (copysigndf3, copysignsf3): Delete.

> 	(copysign<GPF:mode>3): New expand pattern.

> 	(copysign<GPF:mode>3_insn): New insn pattern.

> 

> *** gcc/testsuite/ChangeLog ***

> 

> 2019-04-29  Srinath Parvathaneni  <srinath.parvathaneni@arm.com>

> 

> 	PR target/90075

> 	* gcc.target/aarch64/pr90075.c: New test.

> 

> 

> 

> rb11110.patch

> 

> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

> index 32a0e1f3685746b9a7d70239745586d0f0fa7ee1..11c0ef00dcfd5fc41a0eb93c8d5b5bda8a6e6885 100644

> --- a/gcc/config/aarch64/aarch64.md

> +++ b/gcc/config/aarch64/aarch64.md

> @@ -189,6 +189,7 @@

>      UNSPEC_CLASTB

>      UNSPEC_FADDA

>      UNSPEC_REV_SUBREG

> +    UNSPEC_COPYSIGN

>  ])

>  

>  (define_c_enum "unspecv" [

> @@ -5427,49 +5428,48 @@

>  ;;   LDR d2, #(1 << 63)

>  ;;   BSL v2.8b, [y], [x]

>  ;;

> -;; or another, equivalent, sequence using one of BSL/BIT/BIF.

> -;; aarch64_simd_bsldf will select the best suited of these instructions

> -;; to generate based on register allocation, and knows how to partially

> -;; constant fold based on the values of X and Y, so expand through that.

> -

> -(define_expand "copysigndf3"

> -  [(match_operand:DF 0 "register_operand")

> -   (match_operand:DF 1 "register_operand")

> -   (match_operand:DF 2 "register_operand")]

> +;; or another, equivalent, sequence using one of BSL/BIT/BIF.  Because

> +;; we expect these operations to nearly always operate on

> +;; floating-point values, we do not want the operation to be

> +;; simplified into a bit-field insert operation that operates on the

> +;; integer side, since typically that would involve three inter-bank

> +;; register copies.  As we do not expect copysign to be followed by

> +;; other logical operations on the result, it seems preferable to keep

> +;; this as an unspec operation, rather than exposing the underlying

> +;; logic to the compiler.

> +

> +(define_expand "copysign<GPF:mode>3"

> +  [(match_operand:GPF 0 "register_operand")

> +   (match_operand:GPF 1 "register_operand")

> +   (match_operand:GPF 2 "register_operand")]

>    "TARGET_FLOAT && TARGET_SIMD"

>  {

> -  rtx mask = gen_reg_rtx (DImode);

> -  emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 63));

> -  emit_insn (gen_aarch64_simd_bsldf (operands[0], mask,

> -				     operands[2], operands[1]));

> +  rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);

> +  emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U

> +				    << (GET_MODE_BITSIZE (<MODE>mode) - 1)));

> +  emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],

> +				       bitmask));

>    DONE;

>  }

>  )

>  

> -;; As above, but we must first get to a 64-bit value if we wish to use

> -;; aarch64_simd_bslv2sf.

> -

> -(define_expand "copysignsf3"

> -  [(match_operand:SF 0 "register_operand")

> -   (match_operand:SF 1 "register_operand")

> -   (match_operand:SF 2 "register_operand")]

> +(define_insn "copysign<GPF:mode>3_insn"

> +  [(set (match_operand:GPF 0 "register_operand" "=w,w,w,r")

> +	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w,0,w,r")

> +		     (match_operand:GPF 2 "register_operand" "w,w,0,0")

> +		     (match_operand:<V_INT_EQUIV> 3 "register_operand"

> +		      "0,w,w,X")]

> +	 UNSPEC_COPYSIGN))]

>    "TARGET_FLOAT && TARGET_SIMD"

> -{

> -  rtx v_bitmask = gen_reg_rtx (V2SImode);

> -

> -  /* Juggle modes to get us in to a vector mode for BSL.  */

> -  rtx op1 = lowpart_subreg (DImode, operands[1], SFmode);

> -  rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode);

> -  rtx tmp = gen_reg_rtx (V2SFmode);

> -  emit_move_insn (v_bitmask,

> -		  aarch64_simd_gen_const_vector_dup (V2SImode,

> -						     HOST_WIDE_INT_M1U << 31));

> -  emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1));

> -  emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));

> -  DONE;

> -}

> +  "@

> +   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>

> +   bit\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>

> +   bif\\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype>

> +   bfxil\\t%<w1>0, %<w1>1, #0, <sizem1>"

> +  [(set_attr "type" "neon_bsl<q>,neon_bsl<q>,neon_bsl<q>,bfm")]

>  )

>  

> +

>  ;; For xorsign (x, y), we want to generate:

>  ;;

>  ;; LDR   d2, #1<<63

> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

> index 25991d97836498f48ca30b8757ed7963217da417..21d66d36f82cff639fbaf2de70e97e9767b1c748 100644

> --- a/gcc/config/aarch64/iterators.md

> +++ b/gcc/config/aarch64/iterators.md

> @@ -578,7 +578,8 @@

>  (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])

>  

>  ;; Give the ordinal of the MSB in the mode

> -(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")])

> +(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")

> +			  (HF "#15") (SF "#31") (DF "#63")])

>  

>  ;; Attribute to describe constants acceptable in logical operations

>  (define_mode_attr lconst [(SI "K") (DI "L")])

> @@ -664,7 +665,7 @@

>  			  (V8HF "16b") (V2SF  "8b")

>  			  (V4SF "16b") (V2DF  "16b")

>  			  (DI   "8b")  (DF    "8b")

> -			  (SI   "8b")])

> +			  (SI   "8b")  (SF    "8b")])

>  

>  ;; Define element mode for each vector mode.

>  (define_mode_attr VEL [(V8QI  "QI") (V16QI "QI") (VNx16QI "QI")

> diff --git a/gcc/testsuite/gcc.target/aarch64/pr90075.c b/gcc/testsuite/gcc.target/aarch64/pr90075.c

> new file mode 100644

> index 0000000000000000000000000000000000000000..cae7e618fc0cc34646e3e68a419b8de0631e6bda

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/aarch64/pr90075.c

> @@ -0,0 +1,21 @@

> +/* { dg-do compile } */

> +/* { dg-additional-options "-O1" } */

> +

> +typedef struct {

> +  float one, two;

> +} twofloats;

> +

> +float

> +bug (twofloats tf)

> +{

> +  float f1, f2;

> +  union {

> +    twofloats tfloats;

> +    float arr[2];

> +  } utfloats;

> +

> +  utfloats.tfloats = tf;

> +  f1 = utfloats.arr[1];

> +  f2 = __builtin_copysignf (0, f1);

> +  return f2;

> +}

>

Patch

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 32a0e1f3685746b9a7d70239745586d0f0fa7ee1..11c0ef00dcfd5fc41a0eb93c8d5b5bda8a6e6885 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -189,6 +189,7 @@ 
     UNSPEC_CLASTB
     UNSPEC_FADDA
     UNSPEC_REV_SUBREG
+    UNSPEC_COPYSIGN
 ])
 
 (define_c_enum "unspecv" [
@@ -5427,49 +5428,48 @@ 
 ;;   LDR d2, #(1 << 63)
 ;;   BSL v2.8b, [y], [x]
 ;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF.
-;; aarch64_simd_bsldf will select the best suited of these instructions
-;; to generate based on register allocation, and knows how to partially
-;; constant fold based on the values of X and Y, so expand through that.
-
-(define_expand "copysigndf3"
-  [(match_operand:DF 0 "register_operand")
-   (match_operand:DF 1 "register_operand")
-   (match_operand:DF 2 "register_operand")]
+;; or another, equivalent, sequence using one of BSL/BIT/BIF.  Because
+;; we expect these operations to nearly always operate on
+;; floating-point values, we do not want the operation to be
+;; simplified into a bit-field insert operation that operates on the
+;; integer side, since typically that would involve three inter-bank
+;; register copies.  As we do not expect copysign to be followed by
+;; other logical operations on the result, it seems preferable to keep
+;; this as an unspec operation, rather than exposing the underlying
+;; logic to the compiler.
+
+(define_expand "copysign<GPF:mode>3"
+  [(match_operand:GPF 0 "register_operand")
+   (match_operand:GPF 1 "register_operand")
+   (match_operand:GPF 2 "register_operand")]
   "TARGET_FLOAT && TARGET_SIMD"
 {
-  rtx mask = gen_reg_rtx (DImode);
-  emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 63));
-  emit_insn (gen_aarch64_simd_bsldf (operands[0], mask,
-				     operands[2], operands[1]));
+  rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
+  emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
+				    << (GET_MODE_BITSIZE (<MODE>mode) - 1)));
+  emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
+				       bitmask));
   DONE;
 }
 )
 
-;; As above, but we must first get to a 64-bit value if we wish to use
-;; aarch64_simd_bslv2sf.
-
-(define_expand "copysignsf3"
-  [(match_operand:SF 0 "register_operand")
-   (match_operand:SF 1 "register_operand")
-   (match_operand:SF 2 "register_operand")]
+(define_insn "copysign<GPF:mode>3_insn"
+  [(set (match_operand:GPF 0 "register_operand" "=w,w,w,r")
+	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w,0,w,r")
+		     (match_operand:GPF 2 "register_operand" "w,w,0,0")
+		     (match_operand:<V_INT_EQUIV> 3 "register_operand"
+		      "0,w,w,X")]
+	 UNSPEC_COPYSIGN))]
   "TARGET_FLOAT && TARGET_SIMD"
-{
-  rtx v_bitmask = gen_reg_rtx (V2SImode);
-
-  /* Juggle modes to get us in to a vector mode for BSL.  */
-  rtx op1 = lowpart_subreg (DImode, operands[1], SFmode);
-  rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode);
-  rtx tmp = gen_reg_rtx (V2SFmode);
-  emit_move_insn (v_bitmask,
-		  aarch64_simd_gen_const_vector_dup (V2SImode,
-						     HOST_WIDE_INT_M1U << 31));
-  emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1));
-  emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));
-  DONE;
-}
+  "@
+   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
+   bit\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
+   bif\\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype>
+   bfxil\\t%<w1>0, %<w1>1, #0, <sizem1>"
+  [(set_attr "type" "neon_bsl<q>,neon_bsl<q>,neon_bsl<q>,bfm")]
 )
 
+
 ;; For xorsign (x, y), we want to generate:
 ;;
 ;; LDR   d2, #1<<63
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 25991d97836498f48ca30b8757ed7963217da417..21d66d36f82cff639fbaf2de70e97e9767b1c748 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -578,7 +578,8 @@ 
 (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
 
 ;; Give the ordinal of the MSB in the mode
-(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")])
+(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")
+			  (HF "#15") (SF "#31") (DF "#63")])
 
 ;; Attribute to describe constants acceptable in logical operations
 (define_mode_attr lconst [(SI "K") (DI "L")])
@@ -664,7 +665,7 @@ 
 			  (V8HF "16b") (V2SF  "8b")
 			  (V4SF "16b") (V2DF  "16b")
 			  (DI   "8b")  (DF    "8b")
-			  (SI   "8b")])
+			  (SI   "8b")  (SF    "8b")])
 
 ;; Define element mode for each vector mode.
 (define_mode_attr VEL [(V8QI  "QI") (V16QI "QI") (VNx16QI "QI")
diff --git a/gcc/testsuite/gcc.target/aarch64/pr90075.c b/gcc/testsuite/gcc.target/aarch64/pr90075.c
new file mode 100644
index 0000000000000000000000000000000000000000..cae7e618fc0cc34646e3e68a419b8de0631e6bda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr90075.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-O1" } */
+
+typedef struct {
+  float one, two;
+} twofloats;
+
+float
+bug (twofloats tf)
+{
+  float f1, f2;
+  union {
+    twofloats tfloats;
+    float arr[2];
+  } utfloats;
+
+  utfloats.tfloats = tf;
+  f1 = utfloats.arr[1];
+  f2 = __builtin_copysignf (0, f1);
+  return f2;
+}