rs6000: Split movsf_from_si from high word before reload[PR89310]

Message ID 20200706021757.1118129-1-luoxhu@linux.ibm.com
State New
Headers show
Series
  • rs6000: Split movsf_from_si from high word before reload[PR89310]
Related show

Commit Message

Patrick Palka via Gcc-patches July 6, 2020, 2:17 a.m.
For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

gcc/ChangeLog:

2020-07-03  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-03  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 63 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++
 2 files changed, 80 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

-- 
2.21.0.777.g83232e3864

Comments

Segher Boessenkool July 7, 2020, 12:18 a.m. | #1
Hi!

On Sun, Jul 05, 2020 at 09:17:57PM -0500, Xionghu Luo wrote:
> For extracting high part element from DImode register like:

> 

> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

> 

> split it before reload with "and mask" to avoid generating shift right

> 32 bit then shift left 32 bit.

> 

> srdi 3,3,32

> sldi 9,3,32

> mtvsrd 1,9

> xscvspdpn 1,1

> 

> =>

> 

> rldicr 3,3,0,31

> mtvsrd 1,3

> xscvspdpn 1,1


Great :-)

> +;; For extracting high part element from DImode register like:

> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

> +;; split it before reload with "and mask" to avoid generating shift right

> +;; 32 bit then shift left 32 bit.

> +(define_insn_and_split "movsf_from_si2"

> +  [(set (match_operand:SF 0 "nonimmediate_operand"

> +	    "=!r,       f,         v,         wa,        m,         Z,

> +	     Z,         wa,        ?r,        !r")

> +	    (unspec:SF [

> +	     (subreg:SI (ashiftrt:DI

> +	       (match_operand:DI 1 "input_operand"

> +	   "m,         m,         wY,        Z,         r,         f,

> +	   wa,        r,         wa,        r")

> +	  (const_int 32)) 0)]

> +		   UNSPEC_SF_FROM_SI))

> +  (clobber (match_scratch:DI 2

> +	    "=X,        X,         X,         X,         X,         X,

> +	    X,         r,         X,         X"))]

> +  "TARGET_NO_SF_SUBREG

> +   && (register_operand (operands[0], SFmode)

> +       && register_operand (operands[1], DImode))"


If the insn condition requires operands 0 and 1 to be register_operands,
it can ask for that in the predicates, instead: not nonimmediate_operand
and input_operand, but just gpc_reg_operand instead.  You can leave out
the impossible alternatives as well (0, 1, 2, 3, 4, 5, 6), leaving just

(define_insn_and_split "movsf_from_si2"
  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r,!r")
	(unspec:SF
	  [(subreg:SI (ashiftrt:DI
			(match_operand:DI 1 "input_operand" "r,wa,r")
			(const_int 32))
		      0)]
	  UNSPEC_SF_FROM_SI)))]
  "TARGET_NO_SF_SUBREG"
  "@
   #
   mfvsrwz %0,%x1
   mr %0,%1"

  "&& !reload_completed
   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
  [(const_int 0)]
{
  rtx op0 = operands[0];
  rtx op1 = operands[1];
  rtx tmp = gen_reg_rtx (DImode);

You cannot call gen_reg_rtx too late in the pass pipeline.  What we
usually do for such cases is put it as a match_scratch in the pattern,
and then do code like

  if (GET_CODE (operands[2]) == SCRATCH)
    operands[2] = gen_reg_rtx (DImode);

so that it will work both before and after reload.

  /* Avoid split {r155:SI#0=unspec[r133:DI>>0x20#0] 86;clobber scratch;} from PR42745.  */

(This line is too long, btw.)

  if (!SUBREG_P (operands[0]))
    {
      rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
      emit_insn (gen_anddi3 (tmp, op1, mask));
      emit_insn (gen_p8_mtvsrd_sf (op0, tmp));
      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
      DONE;
    }
  else
    FAIL;
}
  [(set_attr "length" "12,*,*")
   (set_attr "type" "vecfloat,mffgpr,*")
   (set_attr "isa" "p8v,p8v,*")])

I wonder what will happen if you actually do FAIL here...  There then is
no insn alternative that can match, so we ICE?  In that case, just leave
out the whole FAIL thing, it is useless?  You can do a gcc_assert if you
want to check something.

Oh, and maybe you only want to handle GPRs here, not VSRs?  So just the
"r", not the "wa" at all?  What code would it generate for vector regs?

Lots of questions, sorry!


Segher
Patrick Palka via Gcc-patches July 7, 2020, 8:39 a.m. | #2
On 2020/7/7 08:18, Segher Boessenkool wrote:
> Hi!

> 

> On Sun, Jul 05, 2020 at 09:17:57PM -0500, Xionghu Luo wrote:

>> For extracting high part element from DImode register like:

>>

>> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

>>

>> split it before reload with "and mask" to avoid generating shift right

>> 32 bit then shift left 32 bit.

>>

>> srdi 3,3,32

>> sldi 9,3,32

>> mtvsrd 1,9

>> xscvspdpn 1,1

>>

>> =>

>>

>> rldicr 3,3,0,31

>> mtvsrd 1,3

>> xscvspdpn 1,1

> 

> Great :-)

> 

>> +;; For extracting high part element from DImode register like:

>> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

>> +;; split it before reload with "and mask" to avoid generating shift right

>> +;; 32 bit then shift left 32 bit.

>> +(define_insn_and_split "movsf_from_si2"

>> +  [(set (match_operand:SF 0 "nonimmediate_operand"

>> +	    "=!r,       f,         v,         wa,        m,         Z,

>> +	     Z,         wa,        ?r,        !r")

>> +	    (unspec:SF [

>> +	     (subreg:SI (ashiftrt:DI

>> +	       (match_operand:DI 1 "input_operand"

>> +	   "m,         m,         wY,        Z,         r,         f,

>> +	   wa,        r,         wa,        r")

>> +	  (const_int 32)) 0)]

>> +		   UNSPEC_SF_FROM_SI))

>> +  (clobber (match_scratch:DI 2

>> +	    "=X,        X,         X,         X,         X,         X,

>> +	    X,         r,         X,         X"))]

>> +  "TARGET_NO_SF_SUBREG

>> +   && (register_operand (operands[0], SFmode)

>> +       && register_operand (operands[1], DImode))"

> 

> If the insn condition requires operands 0 and 1 to be register_operands,

> it can ask for that in the predicates, instead: not nonimmediate_operand

> and input_operand, but just gpc_reg_operand instead.  You can leave out

> the impossible alternatives as well (0, 1, 2, 3, 4, 5, 6), leaving just

> 

> (define_insn_and_split "movsf_from_si2"

>    [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r,!r")

> 	(unspec:SF

> 	  [(subreg:SI (ashiftrt:DI

> 			(match_operand:DI 1 "input_operand" "r,wa,r")

> 			(const_int 32))

> 		      0)]

> 	  UNSPEC_SF_FROM_SI)))]

>    "TARGET_NO_SF_SUBREG"

>    "@

>     #

>     mfvsrwz %0,%x1

>     mr %0,%1"

> 

>    "&& !reload_completed

>     && vsx_reg_sfsubreg_ok (operands[0], SFmode)"

>    [(const_int 0)]

> {

>    rtx op0 = operands[0];

>    rtx op1 = operands[1];

>    rtx tmp = gen_reg_rtx (DImode);

> 

> You cannot call gen_reg_rtx too late in the pass pipeline.  What we

> usually do for such cases is put it as a match_scratch in the pattern,

> and then do code like

> 

>    if (GET_CODE (operands[2]) == SCRATCH)

>      operands[2] = gen_reg_rtx (DImode);

> 

> so that it will work both before and after reload.

> 

>    /* Avoid split {r155:SI#0=unspec[r133:DI>>0x20#0] 86;clobber scratch;} from PR42745.  */

> 

> (This line is too long, btw.)

> 

>    if (!SUBREG_P (operands[0]))

>      {

>        rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);

>        emit_insn (gen_anddi3 (tmp, op1, mask));

>        emit_insn (gen_p8_mtvsrd_sf (op0, tmp));

>        emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));

>        DONE;

>      }

>    else

>      FAIL;

> }

>    [(set_attr "length" "12,*,*")

>     (set_attr "type" "vecfloat,mffgpr,*")

>     (set_attr "isa" "p8v,p8v,*")])

> 

> I wonder what will happen if you actually do FAIL here...  There then is

> no insn alternative that can match, so we ICE?  In that case, just leave

> out the whole FAIL thing, it is useless?  You can do a gcc_assert if you

> want to check something.

> 

> Oh, and maybe you only want to handle GPRs here, not VSRs?  So just the

> "r", not the "wa" at all?  What code would it generate for vector regs?

> 

> Lots of questions, sorry!


Thanks for the nice suggestions of the initial patch contains many issues:),

For this case, %1:SF matches with "=wa"?  And how to construct cases to
match("=?r", "wa") and ("=!r", "r") combinations, please?

Removed lots of copy-paste from "movsf_from_si" and update the patch with 
your comments:


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog:

2020-07-07  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-07  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 40 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 +++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..d4af19375aa 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,46 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r,!r")
+	    (unspec:SF [
+	     (subreg:SI (ashiftrt:DI
+	       (match_operand:DI 1 "input_operand" "r,wa,r")
+	       (const_int 32))
+	      0)]
+	     UNSPEC_SF_FROM_SI))
+	    (clobber (match_scratch:DI 2 "=r,X,X"))]
+  "TARGET_NO_SF_SUBREG"
+  "@
+  #
+  mfvsrwz %0,%x1
+  mr %0,%1"
+
+  "&& !reload_completed
+   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
+  if (GET_CODE (operands[2]) == SCRATCH)
+    op2 = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (op2, op1, mask));
+  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  DONE;
+}
+  [(set_attr "length" "12,*,*")
+  (set_attr "type" "vecfloat,mffgpr,*")
+  (set_attr "isa" "p8v,p8v,*")])
+
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864
Segher Boessenkool July 7, 2020, 9:31 p.m. | #3
Hi!

On Tue, Jul 07, 2020 at 04:39:58PM +0800, luoxhu wrote:
> > Lots of questions, sorry!

> 

> Thanks for the nice suggestions of the initial patch contains many issues:),


Pretty much all of it should *work*, it just can be improved and
simplified quite a bit :-)

> For this case, %1:SF matches with "=wa"?  And how to construct cases to

> match("=?r", "wa") and ("=!r", "r") combinations, please?


operands[0], not operands[1]?

Simple testcases will not put the output into a GPR, unless you force
the compiler to do that, because of the ? and !.

Often you can just do

  asm("#" : "+r"(x));

to force "x" into a GPR at that point of the program.  But there is
nothing stopping the compiler from copying it back to a VSR where it
thinks that is cheaper ;-)

So maybe this pattern should just have the GPR-to-VSR alternative?  It
does not look like the GPR destination variants are useful?

> +  rtx op0 = operands[0];

> +  rtx op1 = operands[1];

> +  rtx op2 = operands[2];


(Please just write out operands[N] everywhere).

> +  if (GET_CODE (operands[2]) == SCRATCH)

> +    op2 = gen_reg_rtx (DImode);

> +

> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);

> +  emit_insn (gen_anddi3 (op2, op1, mask));


Groovy :-)

So, it looks like you can remove the ? and ! alternatives, leaving just
the first alternative?


Segher
Patrick Palka via Gcc-patches July 8, 2020, 3:19 a.m. | #4
On 2020/7/8 05:31, Segher Boessenkool wrote:
> Hi!

> 

> On Tue, Jul 07, 2020 at 04:39:58PM +0800, luoxhu wrote:

>>> Lots of questions, sorry!

>>

>> Thanks for the nice suggestions of the initial patch contains many issues:),

> 

> Pretty much all of it should *work*, it just can be improved and

> simplified quite a bit :-)

> 

>> For this case, %1:SF matches with "=wa"?  And how to construct cases to

>> match("=?r", "wa") and ("=!r", "r") combinations, please?

> 

> operands[0], not operands[1]?

> 

> Simple testcases will not put the output into a GPR, unless you force

> the compiler to do that, because of the ? and !.

> 

> Often you can just do

> 

>    asm("#" : "+r"(x));

> 

> to force "x" into a GPR at that point of the program.  But there is

> nothing stopping the compiler from copying it back to a VSR where it

> thinks that is cheaper ;-)

> 

> So maybe this pattern should just have the GPR-to-VSR alternative?  It

> does not look like the GPR destination variants are useful?

> 

>> +  rtx op0 = operands[0];

>> +  rtx op1 = operands[1];

>> +  rtx op2 = operands[2];

> 

> (Please just write out operands[N] everywhere).

> 

>> +  if (GET_CODE (operands[2]) == SCRATCH)

>> +    op2 = gen_reg_rtx (DImode);

>> +

>> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);

>> +  emit_insn (gen_anddi3 (op2, op1, mask));

> 

> Groovy :-)

> 

> So, it looks like you can remove the ? and ! alternatives, leaving just

> the first alternative?

> 


Thanks.

V3 Update: Leave only GPR-to-VSR alternative and use operands[N].
Bootstrap and regression tested pass on Power8-LE.


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

gcc/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 34 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 +++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..02c5171378c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,40 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+	    (unspec:SF [
+	     (subreg:SI (ashiftrt:DI
+	       (match_operand:DI 1 "input_operand" "r")
+	       (const_int 32))
+	      0)]
+	     UNSPEC_SF_FROM_SI))
+	    (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "@
+  #"
+
+  "&& !reload_completed
+   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
+  [(const_int 0)]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
+  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
+  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
+
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864
Segher Boessenkool July 8, 2020, 10:43 p.m. | #5
Hi!

On Wed, Jul 08, 2020 at 11:19:21AM +0800, luoxhu wrote:
> For extracting high part element from DImode register like:

> 

> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

> 

> split it before reload with "and mask" to avoid generating shift right

> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and

> PR67741, etc.


> 2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

> 

> 	PR rtl-optimization/89310

> 	* config/rs6000/rs6000.md (movsf_from_si2): New

> 	define_insn_and_split.

> 

> gcc/testsuite/ChangeLog:

> 

> 2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

> 

> 	PR rtl-optimization/89310

> 	* gcc.target/powerpc/pr89310.c: New test.


> +;; For extracting high part element from DImode register like:

> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

> +;; split it before reload with "and mask" to avoid generating shift right

> +;; 32 bit then shift left 32 bit.

> +(define_insn_and_split "movsf_from_si2"

> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

> +	    (unspec:SF [

> +	     (subreg:SI (ashiftrt:DI


Put the  (ashiftrt:DI  on a separate line as well?  With indent changes,
etc.

> +	       (match_operand:DI 1 "input_operand" "r")

> +	       (const_int 32))

> +	      0)]

> +	     UNSPEC_SF_FROM_SI))

> +	    (clobber (match_scratch:DI 2 "=r"))]

> +  "TARGET_NO_SF_SUBREG"

> +  "@

> +  #"


@ with only one alternative doesn't do anything; so just write
  "#"
please.

> +


And no empty line here.

> +  "&& !reload_completed


Why this?  Why will this not work after reload?  In the very few cases
where you do need this, you usually also need to check for
lra_in_progress.

> +   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"

> +  [(const_int 0)]

> +{

> +  if (GET_CODE (operands[2]) == SCRATCH)

> +    operands[2] = gen_reg_rtx (DImode);

> +

> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);


The mask should be different for QI and HI.

> +  emit_insn (gen_anddi3 (operands[2], operands[1], mask));

> +  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));

> +  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));

> +  DONE;

> +}

> +  [(set_attr "length" "12")

> +  (set_attr "type" "vecfloat")

> +  (set_attr "isa" "p8v")])

> +


No extra whiteline please.


Maybe change it back to just SI?  It won't match often at all for QI or
HI anyway, it seems.  Sorry for that detour.  Should be good with the
above nits fixed :-)


Segher
Patrick Palka via Gcc-patches July 9, 2020, 3:09 a.m. | #6
On 2020/7/9 06:43, Segher Boessenkool wrote:
> Hi!

> 

> On Wed, Jul 08, 2020 at 11:19:21AM +0800, luoxhu wrote:

>> For extracting high part element from DImode register like:

>>

>> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

>>

>> split it before reload with "and mask" to avoid generating shift right

>> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and

>> PR67741, etc.

> 

>> 2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

>>

>> 	PR rtl-optimization/89310

>> 	* config/rs6000/rs6000.md (movsf_from_si2): New

>> 	define_insn_and_split.

>>

>> gcc/testsuite/ChangeLog:

>>

>> 2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

>>

>> 	PR rtl-optimization/89310

>> 	* gcc.target/powerpc/pr89310.c: New test.

> 

>> +;; For extracting high part element from DImode register like:

>> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

>> +;; split it before reload with "and mask" to avoid generating shift right

>> +;; 32 bit then shift left 32 bit.

>> +(define_insn_and_split "movsf_from_si2"

>> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

>> +	    (unspec:SF [

>> +	     (subreg:SI (ashiftrt:DI

> 

> Put the  (ashiftrt:DI  on a separate line as well?  With indent changes,

> etc.

> 

>> +	       (match_operand:DI 1 "input_operand" "r")

>> +	       (const_int 32))

>> +	      0)]

>> +	     UNSPEC_SF_FROM_SI))

>> +	    (clobber (match_scratch:DI 2 "=r"))]

>> +  "TARGET_NO_SF_SUBREG"

>> +  "@

>> +  #"

> 

> @ with only one alternative doesn't do anything; so just write

>    "#"

> please.

> 

>> +

> 

> And no empty line here.

> 

>> +  "&& !reload_completed

> 

> Why this?  Why will this not work after reload?  In the very few cases

> where you do need this, you usually also need to check for

> lra_in_progress.

> 

>> +   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"

>> +  [(const_int 0)]

>> +{

>> +  if (GET_CODE (operands[2]) == SCRATCH)

>> +    operands[2] = gen_reg_rtx (DImode);

>> +

>> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);

> 

> The mask should be different for QI and HI.

> 

>> +  emit_insn (gen_anddi3 (operands[2], operands[1], mask));

>> +  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));

>> +  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));

>> +  DONE;

>> +}

>> +  [(set_attr "length" "12")

>> +  (set_attr "type" "vecfloat")

>> +  (set_attr "isa" "p8v")])

>> +

> 

> No extra whiteline please.

> 

> 

> Maybe change it back to just SI?  It won't match often at all for QI or

> HI anyway, it seems.  Sorry for that detour.  Should be good with the

> above nits fixed :-)


OK, if I see correctly, subreg of DImode should be SImode and I used
subreg:SI to match only SI, so no need to consider mask for QI and HI? :)

Others are updated: removed reload_completed and adjust format. 


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 31 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..a493dfd4596 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+	    (unspec:SF [
+	     (subreg:SI (
+		 ashiftrt:DI (
+		   match_operand:DI 1 "input_operand" "r")
+		 (const_int 32))
+	      0)]
+	     UNSPEC_SF_FROM_SI))
+	    (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "#"
+  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
+  [(const_int 0)]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
+  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
+  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864
Segher Boessenkool July 9, 2020, 7:25 p.m. | #7
Hi!

On Thu, Jul 09, 2020 at 11:09:42AM +0800, luoxhu wrote:
> > Maybe change it back to just SI?  It won't match often at all for QI or

> > HI anyway, it seems.  Sorry for that detour.  Should be good with the

> > above nits fixed :-)

> 

> OK, if I see correctly, subreg of DImode should be SImode and I used

> subreg:SI to match only SI, so no need to consider mask for QI and HI? :)


My problem with it was that the shift amounts won't be 32 for QI etc.?

> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

> +;; split it before reload with "and mask" to avoid generating shift right

> +;; 32 bit then shift left 32 bit.

> +(define_insn_and_split "movsf_from_si2"

> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

> +	    (unspec:SF [

> +	     (subreg:SI (

> +		 ashiftrt:DI (


Opening parens *start* a line, they never end it.  So:

(define_insn_and_split "movsf_from_si2"
  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
	  (unspec:SF
	    [(subreg:SI
	       (ashiftrt:DI
		 (match_operand:DI 1 "input_operand" "r")
		 (const_int 32))
	      0)]
	    UNSPEC_SF_FROM_SI))
   (clobber (match_scratch:DI 2 "=r"))]

or something like that.  There aren't really any real rules...  The
important points are that nested things should be indented, and things
at the same level should have the same indent (like the outer set and
clobber).  The [ for an unspec is sometimes put at the end of a line,
that reads a little better perhaps.

> +  "TARGET_NO_SF_SUBREG"

> +  "#"

> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"


Put this in the insn condition?  And since this is just a predicate,
you can just use it instead of gpc_reg_operand.

(The split condition becomes "&& 1" then, not "").


Segher
Patrick Palka via Gcc-patches July 10, 2020, 1:39 a.m. | #8
Hi,

On 2020/7/10 03:25, Segher Boessenkool wrote:
> Hi!

> 

> On Thu, Jul 09, 2020 at 11:09:42AM +0800, luoxhu wrote:

>>> Maybe change it back to just SI?  It won't match often at all for QI or

>>> HI anyway, it seems.  Sorry for that detour.  Should be good with the

>>> above nits fixed :-)

>>

>> OK, if I see correctly, subreg of DImode should be SImode and I used

>> subreg:SI to match only SI, so no need to consider mask for QI and HI? :)

> 

> My problem with it was that the shift amounts won't be 32 for QI etc.?


But if the subreg:SI only accepts SImode comes in, why do we need handle the
shift amounts for QI/HI? It not using QHSI here...

> 

>> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

>> +;; split it before reload with "and mask" to avoid generating shift right

>> +;; 32 bit then shift left 32 bit.

>> +(define_insn_and_split "movsf_from_si2"

>> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

>> +	    (unspec:SF [

>> +	     (subreg:SI (

>> +		 ashiftrt:DI (

> 

> Opening parens *start* a line, they never end it.  So:

> 

> (define_insn_and_split "movsf_from_si2"

>    [(set (match_operand:SF 0 "vsx_reg_sfsubreg_ok" "=wa")

> 	  (unspec:SF

> 	    [(subreg:SI

> 	       (ashiftrt:DI

> 		 (match_operand:DI 1 "input_operand" "r")

> 		 (const_int 32))

> 	      0)]

> 	    UNSPEC_SF_FROM_SI))

>     (clobber (match_scratch:DI 2 "=r"))]

> 

> or something like that.  There aren't really any real rules...  The

> important points are that nested things should be indented, and things

> at the same level should have the same indent (like the outer set and

> clobber).  The [ for an unspec is sometimes put at the end of a line,

> that reads a little better perhaps.


OK, seems the md file needs a format tool too...

> 

>> +  "TARGET_NO_SF_SUBREG"

>> +  "#"

>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"

> 

> Put this in the insn condition?  And since this is just a predicate,

> you can just use it instead of gpc_reg_operand.

> 

> (The split condition becomes "&& 1" then, not "").


OK, this seems a bit strange as movsi_from_sf, movsf_from_si and
movdi_from_sf_zero_ext all use it as condition...

And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG
"avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The 
TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.


Updated patch and correct the title:


Define movsf_from_si2 to extract high part SF element from DImode[PR89310]


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 32 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..2331c84dcbd 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,38 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "vsx_reg_sfsubreg_ok" "=wa")
+	    (unspec:SF
+	     [(subreg:SI
+	       (ashiftrt:DI
+		(match_operand:DI 1 "input_operand" "r")
+		(const_int 32))
+	       0)]
+	     UNSPEC_SF_FROM_SI))
+  (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
+  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
+  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864
Patrick Palka via Gcc-patches July 10, 2020, 7:41 a.m. | #9
On 2020/7/10 03:25, Segher Boessenkool wrote:
> 

>> +  "TARGET_NO_SF_SUBREG"

>> +  "#"

>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"

> 

> Put this in the insn condition?  And since this is just a predicate,

> you can just use it instead of gpc_reg_operand.

> 

> (The split condition becomes "&& 1" then, not "").

> 


+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "vsx_reg_sfsubreg_ok" "=wa")
+	    (unspec:SF
+	     [(subreg:SI
+	       (ashiftrt:DI
+		(match_operand:DI 1 "input_operand" "r")
+		(const_int 32))
+	       0)]
+	     UNSPEC_SF_FROM_SI))
+  (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "#"
+  "&& 1"
+  [(const_int 0)]

This change is invalid as it will cause an ICE in PR42475.c, reason is:
insn #29 will be combined to insn #40, though split1 is success, but it 
will cause ICE in sched1 as the op[0] is not SFmode.  Without this, #29
won't be combined to #40 as the gpc_reg_operand (operands[0], E_SFmode)
will cause the match fail for subreg:SF (reg:SI 155 [ _4 ]) 0).


pr42475.c.268r.combine:
Trying 29 -> 40:
   29: {r120:SF=unspec[r133:DI>>0x20#0] 86;clobber scratch;}
   40: r155:SI#0=r120:SF
      REG_DEAD r120:SF
Successfully matched this instruction:
(set (subreg:SF (reg:SI 155 [ _4 ]) 0)
    (unspec:SF [
            (subreg:SI (ashiftrt:DI (reg:DI 133)
                    (const_int 32 [0x20])) 0)
        ] UNSPEC_SF_FROM_SI))
allowing combination of insns 29 and 40
original costs 12 + 4 = 16
replacement cost 12
deferring deletion of insn with uid = 29.
modifying insn i3    40: {r155:SI#0=unspec[r133:DI>>0x20#0] 86;clobber scratch;}
      REG_DEAD r133:DI
deferring rescan insn with uid = 40.


pr42475.c.273r.split1:
69: r172:DI=r133:DI&0xffffffff00000000
70: r155:SI#0=unspec[r172:DI] 62
71: r155:SI#0=unspec[r155:SI#0] 103
41: NOTE_INSN_DELETED
42: r157:DI=r155:SI#0<<0x20


pr42475.c.280r.sched1:
pr42475.c: In function ‘bar’:
pr42475.c:20:1: error: unrecognizable insn:
   20 | }
      | ^
(insn 71 70 41 2 (set (subreg:SF (reg:SI 155 [ _4 ]) 0)
        (unspec:SF [
                (subreg:SF (reg:SI 155 [ _4 ]) 0)
            ] UNSPEC_VSX_CVSPDPN)) -1
     (nil))
during RTL pass: sched1
dump file: pr42475.c.280r.sched1
pr42475.c:20:1: internal compiler error: in extract_insn, at recog.c:2294
Please submit a full bug report,
with preprocessed source if appropriate.
See <https://gcc.gnu.org/bugs/> for instructions.


VS not using vsx_reg_sfsubreg_ok as condition:


pr42475.c.268r.combine:
Trying 29 -> 40:
   29: {r120:SF=unspec[r133:DI>>0x20#0] 86;clobber scratch;}
   40: r155:SI#0=r120:SF
      REG_DEAD r120:SF
Failed to match this instruction:
(set (subreg:SF (reg:SI 155 [ _4 ]) 0)
    (unspec:SF [
            (subreg:SI (ashiftrt:DI (reg:DI 133)
                    (const_int 32 [0x20])) 0)
        ] UNSPEC_SF_FROM_SI))


273r.split1:
69: r172:DI=r133:DI&0xffffffff00000000
70: r120:SF=unspec[r172:DI] 62
71: r120:SF=unspec[r120:SF] 103
40: r155:SI#0=r120:SF
	REG_DEAD r120:SF
Segher Boessenkool July 11, 2020, 12:54 a.m. | #10
Hi!

On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:
> OK, seems the md file needs a format tool too...


Heh.  Just make sure it looks good (that is, does what it looks like),
looks like the rest, etc.  It's hard to do anything nice with unspecs,
[ ] lists do not format well.

> >> +  "TARGET_NO_SF_SUBREG"

> >> +  "#"

> >> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"

> > 

> > Put this in the insn condition?  And since this is just a predicate,

> > you can just use it instead of gpc_reg_operand.

> > 

> > (The split condition becomes "&& 1" then, not "").

> 

> OK, this seems a bit strange as movsi_from_sf, movsf_from_si and

> movdi_from_sf_zero_ext all use it as condition...


Since in your case you *always* split, the split condition should be
"always".  There are no alternatives that do not split here.

> And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG

> "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The 

> TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.


Good question.  I do not know.

Well...  Since this define_insn* requires p8 *anyway*, we do not need
any of these sf_subreg things?  We always know for each one if it should
be true or false.

> +  "TARGET_NO_SF_SUBREG"


But here we should require p8 some other way, then.

> +  (set_attr "isa" "p8v")])


(This isn't enough, unfortunately).


Segher
Patrick Palka via Gcc-patches July 13, 2020, 6:30 a.m. | #11
Hi,

On 2020/7/11 08:54, Segher Boessenkool wrote:
> Hi!

> 

> On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:

>> OK, seems the md file needs a format tool too...

> 

> Heh.  Just make sure it looks good (that is, does what it looks like),

> looks like the rest, etc.  It's hard to do anything nice with unspecs,

> [ ] lists do not format well.

> 

>>>> +  "TARGET_NO_SF_SUBREG"

>>>> +  "#"

>>>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"

>>>

>>> Put this in the insn condition?  And since this is just a predicate,

>>> you can just use it instead of gpc_reg_operand.

>>>

>>> (The split condition becomes "&& 1" then, not "").

>>

>> OK, this seems a bit strange as movsi_from_sf, movsf_from_si and

>> movdi_from_sf_zero_ext all use it as condition...

> 

> Since in your case you *always* split, the split condition should be

> "always".  There are no alternatives that do not split here.

> 

>> And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG

>> "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The

>> TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.

> 

> Good question.  I do not know.

> 

> Well...  Since this define_insn* requires p8 *anyway*, we do not need

> any of these sf_subreg things?  We always know for each one if it should

> be true or false.


Yes, removed the vsx_reg_sfsubreg_ok check.

> 

>> +  "TARGET_NO_SF_SUBREG"

> 

> But here we should require p8 some other way, then.


TARGET_NO_SF_SUBREG is defined to TARGET_DIRECT_MOVE_64BIT, and
TARGET_DIRECT_MOVE_64BIT is TARGET_DIRECT_MOVE && TARGET_P8_VECTOR && TARGET_POWERPC64
which means TARGET_P8_VECTOR must be true for TARGET_NO_SF_SUBREG.

> 

>> +  (set_attr "isa" "p8v")])

> 

> (This isn't enough, unfortunately).

> 



Updated patch to removed the vsx_reg_sfsubreg_ok and ICE fix:


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog:

2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 31 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..480385ed4d2 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+	    (unspec:SF
+	     [(subreg:SI
+	       (ashiftrt:DI
+		(match_operand:DI 1 "input_operand" "r")
+		(const_int 32))
+	       0)]
+	     UNSPEC_SF_FROM_SI))
+  (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
+  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
+  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864
Patrick Palka via Gcc-patches July 14, 2020, 2:17 p.m. | #12
Unfortunately this patch is eliciting a number of new testsuite
failures, all like

error: unrecognizable insn:
(insn 44 43 45 5 (parallel [
            (set (reg:SI 199)
                (unspec:SI [
                        (reg:SF 202)
                    ] UNSPEC_SI_FROM_SF))
            (clobber (scratch:V4SF))
        ]) "/nasfarm/edelsohn/src/src/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c":70:1
-1
     (nil))
during RTL pass: vregs

for

gcc.dg/vect/vect-alias-check-11.c
gcc.dg/vect/vect-alias-check-12.c
gcc.dg/vect/pr57741-2.c
gcc.dg/vect/pr57741-3.c
gcc.dg/vect/pr89440.c
gcc.target/powerpc/sse-movss-1.c

Thanks, David

On Mon, Jul 13, 2020 at 2:30 AM luoxhu <luoxhu@linux.ibm.com> wrote:
>

> Hi,

>

> On 2020/7/11 08:54, Segher Boessenkool wrote:

> > Hi!

> >

> > On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:

> >> OK, seems the md file needs a format tool too...

> >

> > Heh.  Just make sure it looks good (that is, does what it looks like),

> > looks like the rest, etc.  It's hard to do anything nice with unspecs,

> > [ ] lists do not format well.

> >

> >>>> +  "TARGET_NO_SF_SUBREG"

> >>>> +  "#"

> >>>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"

> >>>

> >>> Put this in the insn condition?  And since this is just a predicate,

> >>> you can just use it instead of gpc_reg_operand.

> >>>

> >>> (The split condition becomes "&& 1" then, not "").

> >>

> >> OK, this seems a bit strange as movsi_from_sf, movsf_from_si and

> >> movdi_from_sf_zero_ext all use it as condition...

> >

> > Since in your case you *always* split, the split condition should be

> > "always".  There are no alternatives that do not split here.

> >

> >> And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG

> >> "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The

> >> TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.

> >

> > Good question.  I do not know.

> >

> > Well...  Since this define_insn* requires p8 *anyway*, we do not need

> > any of these sf_subreg things?  We always know for each one if it should

> > be true or false.

>

> Yes, removed the vsx_reg_sfsubreg_ok check.

>

> >

> >> +  "TARGET_NO_SF_SUBREG"

> >

> > But here we should require p8 some other way, then.

>

> TARGET_NO_SF_SUBREG is defined to TARGET_DIRECT_MOVE_64BIT, and

> TARGET_DIRECT_MOVE_64BIT is TARGET_DIRECT_MOVE && TARGET_P8_VECTOR && TARGET_POWERPC64

> which means TARGET_P8_VECTOR must be true for TARGET_NO_SF_SUBREG.

>

> >

> >> +  (set_attr "isa" "p8v")])

> >

> > (This isn't enough, unfortunately).

> >

>

>

> Updated patch to removed the vsx_reg_sfsubreg_ok and ICE fix:

>

>

> For extracting high part element from DImode register like:

>

> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

>

> split it before reload with "and mask" to avoid generating shift right

> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and

> PR67741, etc.

>

> srdi 3,3,32

> sldi 9,3,32

> mtvsrd 1,9

> xscvspdpn 1,1

>

> =>

>

> rldicr 3,3,0,31

> mtvsrd 1,3

> xscvspdpn 1,1

>

> Bootstrap and regression tested pass on Power8-LE.

>

> gcc/ChangeLog:

>

> 2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>

>

>         PR rtl-optimization/89310

>         * config/rs6000/rs6000.md (movsf_from_si2): New

>         define_insn_and_split.

>

> gcc/testsuite/ChangeLog:

>

> 2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>

>

>         PR rtl-optimization/89310

>         * gcc.target/powerpc/pr89310.c: New test.

> ---

>  gcc/config/rs6000/rs6000.md                | 31 ++++++++++++++++++++++

>  gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++++++++

>  2 files changed, 48 insertions(+)

>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

>

> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md

> index 4fcd6a94022..480385ed4d2 100644

> --- a/gcc/config/rs6000/rs6000.md

> +++ b/gcc/config/rs6000/rs6000.md

> @@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"

>             "*,          *,         p9v,       p8v,       *,         *,

>              p8v,        p8v,       p8v,       *")])

>

> +;; For extracting high part element from DImode register like:

> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

> +;; split it before reload with "and mask" to avoid generating shift right

> +;; 32 bit then shift left 32 bit.

> +(define_insn_and_split "movsf_from_si2"

> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

> +           (unspec:SF

> +            [(subreg:SI

> +              (ashiftrt:DI

> +               (match_operand:DI 1 "input_operand" "r")

> +               (const_int 32))

> +              0)]

> +            UNSPEC_SF_FROM_SI))

> +  (clobber (match_scratch:DI 2 "=r"))]

> +  "TARGET_NO_SF_SUBREG"

> +  "#"

> +  "&& 1"

> +  [(const_int 0)]

> +{

> +  if (GET_CODE (operands[2]) == SCRATCH)

> +    operands[2] = gen_reg_rtx (DImode);

> +

> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);

> +  emit_insn (gen_anddi3 (operands[2], operands[1], mask));

> +  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));

> +  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));

> +  DONE;

> +}

> +  [(set_attr "length" "12")

> +  (set_attr "type" "vecfloat")

> +  (set_attr "isa" "p8v")])

>

>  ;; Move 64-bit binary/decimal floating point

>  (define_expand "mov<mode>"

> diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c

> new file mode 100644

> index 00000000000..15e78509246

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c

> @@ -0,0 +1,17 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O2" } */

> +

> +struct s {

> +  int i;

> +  float f;

> +};

> +

> +float

> +foo (struct s arg)

> +{

> +  return arg.f;

> +}

> +

> +/* { dg-final { scan-assembler-not {\msrdi\M} } } */

> +/* { dg-final { scan-assembler-not {\msldi\M} } } */

> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */

> --

> 2.21.0.777.g83232e3864

>

>
Patrick Palka via Gcc-patches July 15, 2020, 3:47 a.m. | #13
Hi David,

On 2020/7/14 22:17, David Edelsohn wrote:
> Unfortunately this patch is eliciting a number of new testsuite

> failures, all like

> 

> error: unrecognizable insn:

> (insn 44 43 45 5 (parallel [

>              (set (reg:SI 199)

>                  (unspec:SI [

>                          (reg:SF 202)

>                      ] UNSPEC_SI_FROM_SF))

>              (clobber (scratch:V4SF))

>          ]) "/nasfarm/edelsohn/src/src/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c":70:1

> -1

>       (nil))

> during RTL pass: vregs

> 

> for

> 

> gcc.dg/vect/vect-alias-check-11.c

> gcc.dg/vect/vect-alias-check-12.c

> gcc.dg/vect/pr57741-2.c

> gcc.dg/vect/pr57741-3.c

> gcc.dg/vect/pr89440.c

> gcc.target/powerpc/sse-movss-1.c


This patch won't match the instruction with a "clobber (scratch:V4SF)",
it only matches "(clobber (match_scratch:DI 2 "=r"))", I guess you are
replying to the other patch?

"[PATCH 2/2] rs6000: Define define_insn_and_split to split unspec sldi+or to rldimi"

Thanks for your fix patch! :)

This patch's regression tested pass on Power8-LE, I re-run these cases on
Power8-LE, and confirmed these could pass, what is your platform please?
BTW, TARGET_NO_SF_SUBREG ensured TARGET_POWERPC64 for this define_insn_and_split.
Thanks.

Xionghu

> 

> Thanks, David

> 

> On Mon, Jul 13, 2020 at 2:30 AM luoxhu <luoxhu@linux.ibm.com> wrote:

>>

>> Hi,

>>

>> On 2020/7/11 08:54, Segher Boessenkool wrote:

>>> Hi!

>>>

>>> On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:

>>>> OK, seems the md file needs a format tool too...

>>>

>>> Heh.  Just make sure it looks good (that is, does what it looks like),

>>> looks like the rest, etc.  It's hard to do anything nice with unspecs,

>>> [ ] lists do not format well.

>>>

>>>>>> +  "TARGET_NO_SF_SUBREG"

>>>>>> +  "#"

>>>>>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"

>>>>>

>>>>> Put this in the insn condition?  And since this is just a predicate,

>>>>> you can just use it instead of gpc_reg_operand.

>>>>>

>>>>> (The split condition becomes "&& 1" then, not "").

>>>>

>>>> OK, this seems a bit strange as movsi_from_sf, movsf_from_si and

>>>> movdi_from_sf_zero_ext all use it as condition...

>>>

>>> Since in your case you *always* split, the split condition should be

>>> "always".  There are no alternatives that do not split here.

>>>

>>>> And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG

>>>> "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The

>>>> TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.

>>>

>>> Good question.  I do not know.

>>>

>>> Well...  Since this define_insn* requires p8 *anyway*, we do not need

>>> any of these sf_subreg things?  We always know for each one if it should

>>> be true or false.

>>

>> Yes, removed the vsx_reg_sfsubreg_ok check.

>>

>>>

>>>> +  "TARGET_NO_SF_SUBREG"

>>>

>>> But here we should require p8 some other way, then.

>>

>> TARGET_NO_SF_SUBREG is defined to TARGET_DIRECT_MOVE_64BIT, and

>> TARGET_DIRECT_MOVE_64BIT is TARGET_DIRECT_MOVE && TARGET_P8_VECTOR && TARGET_POWERPC64

>> which means TARGET_P8_VECTOR must be true for TARGET_NO_SF_SUBREG.

>>

>>>

>>>> +  (set_attr "isa" "p8v")])

>>>

>>> (This isn't enough, unfortunately).

>>>

>>

>>

>> Updated patch to removed the vsx_reg_sfsubreg_ok and ICE fix:

>>

>>

>> For extracting high part element from DImode register like:

>>

>> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

>>

>> split it before reload with "and mask" to avoid generating shift right

>> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and

>> PR67741, etc.

>>

>> srdi 3,3,32

>> sldi 9,3,32

>> mtvsrd 1,9

>> xscvspdpn 1,1

>>

>> =>

>>

>> rldicr 3,3,0,31

>> mtvsrd 1,3

>> xscvspdpn 1,1

>>

>> Bootstrap and regression tested pass on Power8-LE.

>>

>> gcc/ChangeLog:

>>

>> 2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>

>>

>>          PR rtl-optimization/89310

>>          * config/rs6000/rs6000.md (movsf_from_si2): New

>>          define_insn_and_split.

>>

>> gcc/testsuite/ChangeLog:

>>

>> 2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>

>>

>>          PR rtl-optimization/89310

>>          * gcc.target/powerpc/pr89310.c: New test.

>> ---

>>   gcc/config/rs6000/rs6000.md                | 31 ++++++++++++++++++++++

>>   gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++++++++

>>   2 files changed, 48 insertions(+)

>>   create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

>>

>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md

>> index 4fcd6a94022..480385ed4d2 100644

>> --- a/gcc/config/rs6000/rs6000.md

>> +++ b/gcc/config/rs6000/rs6000.md

>> @@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"

>>              "*,          *,         p9v,       p8v,       *,         *,

>>               p8v,        p8v,       p8v,       *")])

>>

>> +;; For extracting high part element from DImode register like:

>> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

>> +;; split it before reload with "and mask" to avoid generating shift right

>> +;; 32 bit then shift left 32 bit.

>> +(define_insn_and_split "movsf_from_si2"

>> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

>> +           (unspec:SF

>> +            [(subreg:SI

>> +              (ashiftrt:DI

>> +               (match_operand:DI 1 "input_operand" "r")

>> +               (const_int 32))

>> +              0)]

>> +            UNSPEC_SF_FROM_SI))

>> +  (clobber (match_scratch:DI 2 "=r"))]

>> +  "TARGET_NO_SF_SUBREG"

>> +  "#"

>> +  "&& 1"

>> +  [(const_int 0)]

>> +{

>> +  if (GET_CODE (operands[2]) == SCRATCH)

>> +    operands[2] = gen_reg_rtx (DImode);

>> +

>> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);

>> +  emit_insn (gen_anddi3 (operands[2], operands[1], mask));

>> +  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));

>> +  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));

>> +  DONE;

>> +}

>> +  [(set_attr "length" "12")

>> +  (set_attr "type" "vecfloat")

>> +  (set_attr "isa" "p8v")])

>>

>>   ;; Move 64-bit binary/decimal floating point

>>   (define_expand "mov<mode>"

>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c

>> new file mode 100644

>> index 00000000000..15e78509246

>> --- /dev/null

>> +++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c

>> @@ -0,0 +1,17 @@

>> +/* { dg-do compile } */

>> +/* { dg-options "-O2" } */

>> +

>> +struct s {

>> +  int i;

>> +  float f;

>> +};

>> +

>> +float

>> +foo (struct s arg)

>> +{

>> +  return arg.f;

>> +}

>> +

>> +/* { dg-final { scan-assembler-not {\msrdi\M} } } */

>> +/* { dg-final { scan-assembler-not {\msldi\M} } } */

>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */

>> --

>> 2.21.0.777.g83232e3864

>>

>>
Segher Boessenkool July 20, 2020, 3:31 p.m. | #14
On Mon, Jul 13, 2020 at 02:30:28PM +0800, luoxhu wrote:
> For extracting high part element from DImode register like:

> 

> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

> 

> split it before reload with "and mask" to avoid generating shift right

> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and

> PR67741, etc.

> 

> srdi 3,3,32

> sldi 9,3,32

> mtvsrd 1,9

> xscvspdpn 1,1

> 

> =>

> 

> rldicr 3,3,0,31

> mtvsrd 1,3

> xscvspdpn 1,1


> 	* config/rs6000/rs6000.md (movsf_from_si2): New

> 	define_insn_and_split.


(That fits on one line).

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c

> @@ -0,0 +1,17 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O2" } */


> +/* { dg-final { scan-assembler-not {\msrdi\M} } } */

> +/* { dg-final { scan-assembler-not {\msldi\M} } } */

> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */


I'm not sure that works on older cpus?  Please test there, and add
-mdejagnu-cpu=power8 to the dg-options if needed.  Also test on BE please.

Okay for trunk with those last details taking care of.  Thank you!


Segher
Patrick Palka via Gcc-patches July 21, 2020, 3:43 a.m. | #15
On 2020/7/20 23:31, Segher Boessenkool wrote:
> On Mon, Jul 13, 2020 at 02:30:28PM +0800, luoxhu wrote:

>> For extracting high part element from DImode register like:

>>

>> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

>>

>> split it before reload with "and mask" to avoid generating shift right

>> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and

>> PR67741, etc.

>>

>> srdi 3,3,32

>> sldi 9,3,32

>> mtvsrd 1,9

>> xscvspdpn 1,1

>>

>> =>

>>

>> rldicr 3,3,0,31

>> mtvsrd 1,3

>> xscvspdpn 1,1

> 

>> 	* config/rs6000/rs6000.md (movsf_from_si2): New

>> 	define_insn_and_split.

> 

> (That fits on one line).

> 

>> --- /dev/null

>> +++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c

>> @@ -0,0 +1,17 @@

>> +/* { dg-do compile } */

>> +/* { dg-options "-O2" } */

> 

>> +/* { dg-final { scan-assembler-not {\msrdi\M} } } */

>> +/* { dg-final { scan-assembler-not {\msldi\M} } } */

>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */

> 

> I'm not sure that works on older cpus?  Please test there, and add

> -mdejagnu-cpu=power8 to the dg-options if needed.  Also test on BE please.

> 

> Okay for trunk with those last details taking care of.  Thank you!


Thanks for the remind.  Addressed the comments and committed in r11-2245.

Xionghu

Patch

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..8d51de07594 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,69 @@  (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "nonimmediate_operand"
+	    "=!r,       f,         v,         wa,        m,         Z,
+	     Z,         wa,        ?r,        !r")
+	    (unspec:SF [
+	     (subreg:SI (ashiftrt:DI
+	       (match_operand:DI 1 "input_operand"
+	   "m,         m,         wY,        Z,         r,         f,
+	   wa,        r,         wa,        r")
+	  (const_int 32)) 0)]
+		   UNSPEC_SF_FROM_SI))
+  (clobber (match_scratch:DI 2
+	    "=X,        X,         X,         X,         X,         X,
+	    X,         r,         X,         X"))]
+  "TARGET_NO_SF_SUBREG
+   && (register_operand (operands[0], SFmode)
+       && register_operand (operands[1], DImode))"
+  "@
+   lwz%U1%X1 %0,%1
+   lfs%U1%X1 %0,%1
+   lxssp %0,%1
+   lxsspx %x0,%y1
+   stw%U0%X0 %1,%0
+   stfiwx %1,%y0
+   stxsiwx %x1,%y0
+   #
+   mfvsrwz %0,%x1
+   mr %0,%1"
+
+  "&& !reload_completed
+   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx tmp = gen_reg_rtx (DImode);
+
+  /* Avoid split {r155:SI#0=unspec[r133:DI>>0x20#0] 86;clobber scratch;} from PR42745.  */
+  if (!SUBREG_P (operands[0]))
+    {
+      rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+      emit_insn (gen_anddi3 (tmp, op1, mask));
+      emit_insn (gen_p8_mtvsrd_sf (op0, tmp));
+      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+      DONE;
+    }
+  else
+    FAIL;
+}
+  [(set_attr "length"
+	    "*,          *,         *,         *,         *,         *,
+	     *,          12,        *,         *")
+   (set_attr "type"
+	    "load,       fpload,    fpload,    fpload,    store,     fpstore,
+	     fpstore,    vecfloat,  mffgpr,    *")
+   (set_attr "isa"
+	    "*,          *,         p9v,       p8v,       *,         *,
+	     p8v,        p8v,       p8v,       *")])
+
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */