[ARM,2/x] : MVE ACLE intrinsics framework patch.

Message ID DBBPR08MB4775887B6680D319A7DABCC19B710@DBBPR08MB4775.eurprd08.prod.outlook.com
State New
Headers show
Series
  • [ARM,2/x] : MVE ACLE intrinsics framework patch.
Related show

Commit Message

Srinath Parvathaneni Nov. 14, 2019, 7:12 p.m.
Hello,

This patch is part of MVE ACLE intrinsics framework.
This patches add support to update (read/write) the APSR (Application Program Status Register)
register and FPSCR (Floating-point Status and Control Register) register for MVE.
This patch also enables thumb2 mov RTL patterns for MVE.

Please refer to Arm reference manual [1] for more details.
[1] https://static.docs.arm.com/ddi0553/bh/DDI0553B_h_armv8m_arm.pdf?_ga=2.102521798.659307368.1572453718-1501600630.1548848914

Regression tested on arm-none-eabi and found no regressions.

Ok for trunk?

Thanks,
Srinath

gcc/ChangeLog:

2019-11-11  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	    Mihail Ionescu  <mihail.ionescu@arm.com>
	    Srinath Parvathaneni  <srinath.parvathaneni@arm.com>

	* config/arm/thumb2.md (thumb2_movsfcc_soft_insn): Add check to not allow
	TARGET_HAVE_MVE for this pattern.
	(thumb2_cmse_entry_return): Add TARGET_HAVE_MVE check to update APSR register.
	* config/arm/unspecs.md (UNSPEC_GET_FPSCR): Define.
	(VUNSPEC_GET_FPSCR): Remove.
	* config/arm/vfp.md (thumb2_movhi_vfp): Add TARGET_HAVE_MVE check.
	(thumb2_movhi_fp16): Add TARGET_HAVE_MVE check.
	(thumb2_movsi_vfp): Add TARGET_HAVE_MVE check.
	(movdi_vfp): Add TARGET_HAVE_MVE check.
	(thumb2_movdf_vfp): Add TARGET_HAVE_MVE check.
	(thumb2_movsfcc_vfp): Add TARGET_HAVE_MVE check.
	(thumb2_movdfcc_vfp): Add TARGET_HAVE_MVE check.
	(push_multi_vfp): Add TARGET_HAVE_MVE check.
	(set_fpscr): Add TARGET_HAVE_MVE check.
	(get_fpscr): Add TARGET_HAVE_MVE check.


###############     Attachment also inlined for ease of reply    ###############
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 809461a25da5a8058a8afce972dea0d3131effc0..81afd8fcdc1b0a82493dc0758bce16fa9e5fde20 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -435,10 +435,10 @@
 (define_insn "*cmovsi_insn"
   [(set (match_operand:SI 0 "arm_general_register_operand" "=r,r,r,r,r,r,r")
 	(if_then_else:SI
-	 (match_operator 1 "arm_comparison_operator"
-	  [(match_operand 2 "cc_register" "") (const_int 0)])
-	 (match_operand:SI 3 "arm_reg_or_m1_or_1" "r, r,UM, r,U1,UM,U1")
-	 (match_operand:SI 4 "arm_reg_or_m1_or_1" "r,UM, r,U1, r,UM,U1")))]
+	(match_operator 1 "arm_comparison_operator"
+	 [(match_operand 2 "cc_register" "") (const_int 0)])
+	(match_operand:SI 3 "arm_reg_or_m1_or_1" "r, r,UM, r,U1,UM,U1")
+	(match_operand:SI 4 "arm_reg_or_m1_or_1" "r,UM, r,U1, r,UM,U1")))]
   "TARGET_THUMB2 && TARGET_COND_ARITH
    && (!((operands[3] == const1_rtx && operands[4] == constm1_rtx)
        || (operands[3] == constm1_rtx && operands[4] == const1_rtx)))"
@@ -540,7 +540,7 @@
 			  [(match_operand 4 "cc_register" "") (const_int 0)])
 			 (match_operand:SF 1 "s_register_operand" "0,r")
 			 (match_operand:SF 2 "s_register_operand" "r,0")))]
-  "TARGET_THUMB2 && TARGET_SOFT_FLOAT"
+  "TARGET_THUMB2 && TARGET_SOFT_FLOAT && !TARGET_HAVE_MVE"
   "@
    it\\t%D3\;mov%D3\\t%0, %2
    it\\t%d3\;mov%d3\\t%0, %1"
@@ -1226,7 +1226,7 @@
    ; added to clear the APSR and potentially the FPSCR if VFP is available, so
    ; we adapt the length accordingly.
    (set (attr "length")
-     (if_then_else (match_test "TARGET_HARD_FLOAT")
+     (if_then_else (match_test "TARGET_HARD_FLOAT || TARGET_HAVE_MVE")
       (const_int 34)
       (const_int 8)))
    ; We do not support predicate execution of returns from cmse_nonsecure_entry
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index b3b4f8ee3e2d1bdad968a9dd8ccbc72ded274f48..ac7fe7d0af19f1965356d47d8327e24d410b99bd 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -170,6 +170,7 @@
   UNSPEC_TORC		; Used by the intrinsic form of the iWMMXt TORC instruction.
   UNSPEC_TORVSC		; Used by the intrinsic form of the iWMMXt TORVSC instruction.
   UNSPEC_TEXTRC		; Used by the intrinsic form of the iWMMXt TEXTRC instruction.
+  UNSPEC_GET_FPSCR	; Represent fetch of FPSCR content.
 ])
 
 
@@ -216,7 +217,6 @@
   VUNSPEC_SLX		; Represent a store-register-release-exclusive.
   VUNSPEC_LDA		; Represent a store-register-acquire.
   VUNSPEC_STL		; Represent a store-register-release.
-  VUNSPEC_GET_FPSCR	; Represent fetch of FPSCR content.
   VUNSPEC_SET_FPSCR	; Represent assign of FPSCR content.
   VUNSPEC_PROBE_STACK_RANGE ; Represent stack range probing.
   VUNSPEC_CDP		; Represent the coprocessor cdp instruction.
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 6349c0570540ec25a599166f5d427fcbdbf2af68..461a5d71ca8548cfc61c83f9716249425633ad21 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -74,10 +74,10 @@
 (define_insn "*thumb2_movhi_vfp"
  [(set
    (match_operand:HI 0 "nonimmediate_operand"
-    "=rk, r, l, r, m, r, *t, r, *t")
+    "=rk, r, l, r, m, r, *t, r, *t, Up, r")
    (match_operand:HI 1 "general_operand"
-    "rk, I, Py, n, r, m, r, *t, *t"))]
- "TARGET_THUMB2 && TARGET_HARD_FLOAT
+    "rk, I, Py, n, r, m, r, *t, *t, r, Up"))]
+ "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)
   && !TARGET_VFP_FP16INST
   && (register_operand (operands[0], HImode)
        || register_operand (operands[1], HImode))"
@@ -99,20 +99,24 @@
       return "vmov%?\t%0, %1\t%@ int";
     case 8:
       return "vmov%?.f32\t%0, %1\t%@ int";
+    case 9:
+      return "vmsr%?\t P0, %1\t@ movhi";
+    case 10:
+      return "vmrs%?\t %0, P0\t@ movhi";
     default:
       gcc_unreachable ();
     }
 }
  [(set_attr "predicable" "yes")
   (set_attr "predicable_short_it"
-   "yes, no, yes, no, no, no, no, no, no")
+   "yes, no, yes, no, no, no, no, no, no, no, no")
   (set_attr "type"
    "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\
-    f_mcr, f_mrc, fmov")
-  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *")
-  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")
-  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *")
-  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")]
+    f_mcr, f_mrc, fmov, mve_move, mve_move")
+  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *, *, *")
+  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *, *, *")
+  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *, *, *")
+  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4")]
 )
 
 ;; Patterns for HI moves which provide more data transfer instructions when FP16
@@ -170,10 +174,10 @@
 (define_insn "*thumb2_movhi_fp16"
  [(set
    (match_operand:HI 0 "nonimmediate_operand"
-    "=rk, r, l, r, m, r, *t, r, *t")
+    "=rk, r, l, r, m, r, *t, r, *t, Up, r")
    (match_operand:HI 1 "general_operand"
-    "rk, I, Py, n, r, m, r, *t, *t"))]
- "TARGET_THUMB2 && TARGET_VFP_FP16INST
+    "rk, I, Py, n, r, m, r, *t, *t, r, Up"))]
+ "TARGET_THUMB2 && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE)
   && (register_operand (operands[0], HImode)
        || register_operand (operands[1], HImode))"
 {
@@ -194,21 +198,25 @@
       return "vmov.f16\t%0, %1\t%@ int";
     case 8:
       return "vmov%?.f32\t%0, %1\t%@ int";
+    case 9:
+      return "vmsr%?\tP0, %1\t%@ movhi";
+    case 10:
+      return "vmrs%?\t%0, P0\t%@ movhi";
     default:
       gcc_unreachable ();
     }
 }
  [(set_attr "predicable"
-   "yes, yes, yes, yes, yes, yes, no, no, yes")
+   "yes, yes, yes, yes, yes, yes, no, no, yes, yes, yes")
   (set_attr "predicable_short_it"
-   "yes, no, yes, no, no, no, no, no, no")
+   "yes, no, yes, no, no, no, no, no, no, no, no")
   (set_attr "type"
    "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\
-    f_mcr, f_mrc, fmov")
-  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *")
-  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")
-  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *")
-  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")]
+    f_mcr, f_mrc, fmov, mve_move, mve_move")
+  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *, *, *")
+  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *, *, *")
+  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *, *, *")
+  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4")]
 )
 
 ;; SImode moves
@@ -258,9 +266,11 @@
 ;; is chosen with length 2 when the instruction is predicated for
 ;; arm_restrict_it.
 (define_insn "*thumb2_movsi_vfp"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,lk*r,m,*t, r,*t,*t,  *Uv")
-	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,lk*r, r,*t,*t,*UvTu,*t"))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l,*hk,m,*m,*t,\
+			     r,*t,*t,*Uv, Up, r")
+	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,*mi,l,*hk,\
+			     r,*t,*t,*UvTu,*t, r, Up"))]
+  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
   "*
@@ -275,43 +285,53 @@
     case 4:
       return \"movw%?\\t%0, %1\";
     case 5:
+    case 6:
       /* Cannot load it directly, split to load it via MOV / MOVT.  */
       if (!MEM_P (operands[1]) && arm_disable_literal_pool)
 	return \"#\";
       return \"ldr%?\\t%0, %1\";
-    case 6:
-      return \"str%?\\t%1, %0\";
     case 7:
-      return \"vmov%?\\t%0, %1\\t%@ int\";
     case 8:
-      return \"vmov%?\\t%0, %1\\t%@ int\";
+      return \"str%?\\t%1, %0\";
     case 9:
+      return \"vmov%?\\t%0, %1\\t%@ int\";
+    case 10:
+      return \"vmov%?\\t%0, %1\\t%@ int\";
+    case 11:
       return \"vmov%?.f32\\t%0, %1\\t%@ int\";
-    case 10: case 11:
+    case 12: case 13:
       return output_move_vfp (operands);
+    case 14:
+      return \"vmsr\\t P0, %1\";
+    case 15:
+      return \"vmrs\\t %0, P0\";
     default:
       gcc_unreachable ();
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no")
-   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores")
-   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4")
-   (set_attr "pool_range"     "*,*,*,*,*,1018,*,*,*,*,1018,*")
-   (set_attr "neg_pool_range" "*,*,*,*,*,   0,*,*,*,*,1008,*")]
+   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,\
+	     no,no,no")
+   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,load_4,\
+	     store_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores,mve_move,\
+	     mve_move")
+   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4,4,4")
+   (set_attr "pool_range"     "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,*,   0,   0,*,*,*,*,*,1008,*,*,*")]
 )
 
 
 ;; DImode moves
 
 (define_insn "*movdi_vfp"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv")
-	(match_operand:DI 1 "di_operand"	      "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT
-   && (   register_operand (operands[0], DImode)
-       || register_operand (operands[1], DImode))
-   && !(TARGET_NEON && CONST_INT_P (operands[1])
-	&& simd_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,\
+			    w, Uv")
+       (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]
+  "TARGET_32BIT && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)
+    && (register_operand (operands[0], DImode)
+	|| register_operand (operands[1], DImode))
+   && !((TARGET_NEON || TARGET_HAVE_MVE) && CONST_INT_P (operands[1])
+       && simd_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
   "*
   switch (which_alternative)
     {
@@ -390,9 +410,15 @@
     case 6: /* S register from immediate.  */
       return \"vmov.f16\\t%0, %1\t%@ __fp16\";
     case 7: /* S register from memory.  */
-      return \"vld1.16\\t{%z0}, %A1\";
+      if (TARGET_HAVE_MVE)
+	return \"vldr.16\\t%0, %A1\";
+      else
+	return \"vld1.16\\t{%z0}, %A1\";
     case 8: /* Memory from S register.  */
-      return \"vst1.16\\t{%z1}, %A0\";
+      if (TARGET_HAVE_MVE)
+	return \"vstr.16\\t%1, %A0\";
+      else
+	return \"vst1.16\\t{%z1}, %A0\";
     case 9: /* ARM register from constant.  */
       {
 	long bits;
@@ -593,7 +619,7 @@
 (define_insn "*thumb2_movsf_vfp"
   [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t  ,Uv,r ,m,t,r")
 	(match_operand:SF 1 "hard_sf_operand"	   " ?r,t,Dv,UvHa,t, mHa,r,t,r"))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT
+  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)
    && (   s_register_operand (operands[0], SFmode)
        || s_register_operand (operands[1], SFmode))"
   "*
@@ -682,7 +708,7 @@
 (define_insn "*thumb2_movdf_vfp"
   [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w  ,Uv,r ,m,w,r")
 	(match_operand:DF 1 "hard_df_operand"		   " ?r,w,Dy,G,UvHa,w, mHa,r, w,r"))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT
+  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
   "*
@@ -760,7 +786,7 @@
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
 	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && !arm_restrict_it"
+  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && !arm_restrict_it"
   "@
    it\\t%D3\;vmov%D3.f32\\t%0, %2
    it\\t%d3\;vmov%d3.f32\\t%0, %1
@@ -806,7 +832,8 @@
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it"
+  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && TARGET_VFP_DOUBLE
+   && !arm_restrict_it"
   "@
    it\\t%D3\;vmov%D3.f64\\t%P0, %P2
    it\\t%d3\;vmov%d3.f64\\t%P0, %P1
@@ -1982,7 +2009,7 @@
     [(set (match_operand:BLK 0 "memory_operand" "=m")
 	  (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")]
 		      UNSPEC_PUSH_MULT))])]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)"
   "* return vfp_output_vstmd (operands);"
   [(set_attr "type" "f_stored")]
 )
@@ -2070,16 +2097,18 @@
 
 ;; Write Floating-point Status and Control Register.
 (define_insn "set_fpscr"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)]
-  "TARGET_HARD_FLOAT"
+  [(set (reg:SI VFPCC_REGNUM)
+	(unspec_volatile:SI
+	 [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR))]
+  "TARGET_HARD_FLOAT || TARGET_HAVE_MVE"
   "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR"
   [(set_attr "type" "mrs")])
 
 ;; Read Floating-point Status and Control Register.
 (define_insn "get_fpscr"
   [(set (match_operand:SI 0 "register_operand" "=r")
-        (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))]
-  "TARGET_HARD_FLOAT"
+	(unspec:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR))]
+  "TARGET_HARD_FLOAT || TARGET_HAVE_MVE"
   "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR"
   [(set_attr "type" "mrs")])

Comments

Kyrill Tkachov Dec. 19, 2019, 5:19 p.m. | #1
Hi Srinath,

On 11/14/19 7:12 PM, Srinath Parvathaneni wrote:
> Hello,

>

> This patch is part of MVE ACLE intrinsics framework.

> This patches add support to update (read/write) the APSR (Application 

> Program Status Register)

> register and FPSCR (Floating-point Status and Control Register) 

> register for MVE.

> This patch also enables thumb2 mov RTL patterns for MVE.

>

> Please refer to Arm reference manual [1] for more details.

> [1] 

> https://static.docs.arm.com/ddi0553/bh/DDI0553B_h_armv8m_arm.pdf?_ga=2.102521798.659307368.1572453718-1501600630.1548848914

>

> Regression tested on arm-none-eabi and found no regressions.

>

> Ok for trunk?

>

> Thanks,

> Srinath

>

> gcc/ChangeLog:

>

> 2019-11-11  Andre Vieira <andre.simoesdiasvieira@arm.com>

>             Mihail Ionescu  <mihail.ionescu@arm.com>

>             Srinath Parvathaneni <srinath.parvathaneni@arm.com>

>

>         * config/arm/thumb2.md (thumb2_movsfcc_soft_insn): Add check 

> to not allow

>         TARGET_HAVE_MVE for this pattern.

>         (thumb2_cmse_entry_return): Add TARGET_HAVE_MVE check to 

> update APSR register.

>         * config/arm/unspecs.md (UNSPEC_GET_FPSCR): Define.

>         (VUNSPEC_GET_FPSCR): Remove.

>         * config/arm/vfp.md (thumb2_movhi_vfp): Add TARGET_HAVE_MVE check.

>         (thumb2_movhi_fp16): Add TARGET_HAVE_MVE check.

>         (thumb2_movsi_vfp): Add TARGET_HAVE_MVE check.

>         (movdi_vfp): Add TARGET_HAVE_MVE check.

>         (thumb2_movdf_vfp): Add TARGET_HAVE_MVE check.

>         (thumb2_movsfcc_vfp): Add TARGET_HAVE_MVE check.

>         (thumb2_movdfcc_vfp): Add TARGET_HAVE_MVE check.

>         (push_multi_vfp): Add TARGET_HAVE_MVE check.

>         (set_fpscr): Add TARGET_HAVE_MVE check.

>         (get_fpscr): Add TARGET_HAVE_MVE check.



These pattern changes do more that add a TARGET_HAVE_MVE check. Some add 
new alternatives, some even change the RTL pattern.

I'd like to see them reflected in the ChangeLog so that I know they're 
deliberate.


>

>

> ###############     Attachment also inlined for ease of reply    

> ###############

>

>

> diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md

> index 

> 809461a25da5a8058a8afce972dea0d3131effc0..81afd8fcdc1b0a82493dc0758bce16fa9e5fde20 

> 100644

> --- a/gcc/config/arm/thumb2.md

> +++ b/gcc/config/arm/thumb2.md

> @@ -435,10 +435,10 @@

>  (define_insn "*cmovsi_insn"

>    [(set (match_operand:SI 0 "arm_general_register_operand" 

> "=r,r,r,r,r,r,r")

>          (if_then_else:SI

> -        (match_operator 1 "arm_comparison_operator"

> -         [(match_operand 2 "cc_register" "") (const_int 0)])

> -        (match_operand:SI 3 "arm_reg_or_m1_or_1" "r, r,UM, r,U1,UM,U1")

> -        (match_operand:SI 4 "arm_reg_or_m1_or_1" "r,UM, r,U1, 

> r,UM,U1")))]

> +       (match_operator 1 "arm_comparison_operator"

> +        [(match_operand 2 "cc_register" "") (const_int 0)])

> +       (match_operand:SI 3 "arm_reg_or_m1_or_1" "r, r,UM, r,U1,UM,U1")

> +       (match_operand:SI 4 "arm_reg_or_m1_or_1" "r,UM, r,U1, r,UM,U1")))]

>    "TARGET_THUMB2 && TARGET_COND_ARITH

>     && (!((operands[3] == const1_rtx && operands[4] == constm1_rtx)

>         || (operands[3] == constm1_rtx && operands[4] == const1_rtx)))"

> @@ -540,7 +540,7 @@

>                            [(match_operand 4 "cc_register" "") 

> (const_int 0)])

>                           (match_operand:SF 1 "s_register_operand" "0,r")

>                           (match_operand:SF 2 "s_register_operand" 

> "r,0")))]

> -  "TARGET_THUMB2 && TARGET_SOFT_FLOAT"

> +  "TARGET_THUMB2 && TARGET_SOFT_FLOAT && !TARGET_HAVE_MVE"

>    "@

>     it\\t%D3\;mov%D3\\t%0, %2

>     it\\t%d3\;mov%d3\\t%0, %1"

> @@ -1226,7 +1226,7 @@

>     ; added to clear the APSR and potentially the FPSCR if VFP is 

> available, so

>     ; we adapt the length accordingly.

>     (set (attr "length")

> -     (if_then_else (match_test "TARGET_HARD_FLOAT")

> +     (if_then_else (match_test "TARGET_HARD_FLOAT || TARGET_HAVE_MVE")

>        (const_int 34)

>        (const_int 8)))

>     ; We do not support predicate execution of returns from 

> cmse_nonsecure_entry

> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md

> index 

> b3b4f8ee3e2d1bdad968a9dd8ccbc72ded274f48..ac7fe7d0af19f1965356d47d8327e24d410b99bd 

> 100644

> --- a/gcc/config/arm/unspecs.md

> +++ b/gcc/config/arm/unspecs.md

> @@ -170,6 +170,7 @@

>    UNSPEC_TORC          ; Used by the intrinsic form of the iWMMXt 

> TORC instruction.

>    UNSPEC_TORVSC                ; Used by the intrinsic form of the 

> iWMMXt TORVSC instruction.

>    UNSPEC_TEXTRC                ; Used by the intrinsic form of the 

> iWMMXt TEXTRC instruction.

> +  UNSPEC_GET_FPSCR     ; Represent fetch of FPSCR content.

>  ])

>

>

> @@ -216,7 +217,6 @@

>    VUNSPEC_SLX          ; Represent a store-register-release-exclusive.

>    VUNSPEC_LDA          ; Represent a store-register-acquire.

>    VUNSPEC_STL          ; Represent a store-register-release.

> -  VUNSPEC_GET_FPSCR    ; Represent fetch of FPSCR content.

>    VUNSPEC_SET_FPSCR    ; Represent assign of FPSCR content.

>    VUNSPEC_PROBE_STACK_RANGE ; Represent stack range probing.

>    VUNSPEC_CDP          ; Represent the coprocessor cdp instruction.

> diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md

> index 

> 6349c0570540ec25a599166f5d427fcbdbf2af68..461a5d71ca8548cfc61c83f9716249425633ad21 

> 100644

> --- a/gcc/config/arm/vfp.md

> +++ b/gcc/config/arm/vfp.md

> @@ -74,10 +74,10 @@

>  (define_insn "*thumb2_movhi_vfp"

>   [(set

>     (match_operand:HI 0 "nonimmediate_operand"

> -    "=rk, r, l, r, m, r, *t, r, *t")

> +    "=rk, r, l, r, m, r, *t, r, *t, Up, r")

>     (match_operand:HI 1 "general_operand"

> -    "rk, I, Py, n, r, m, r, *t, *t"))]

> - "TARGET_THUMB2 && TARGET_HARD_FLOAT

> +    "rk, I, Py, n, r, m, r, *t, *t, r, Up"))]

> + "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)

>    && !TARGET_VFP_FP16INST

>    && (register_operand (operands[0], HImode)

>         || register_operand (operands[1], HImode))"

> @@ -99,20 +99,24 @@

>        return "vmov%?\t%0, %1\t%@ int";

>      case 8:

>        return "vmov%?.f32\t%0, %1\t%@ int";

> +    case 9:

> +      return "vmsr%?\t P0, %1\t@ movhi";

> +    case 10:

> +      return "vmrs%?\t %0, P0\t@ movhi";

>      default:

>        gcc_unreachable ();

>      }

>  }

>   [(set_attr "predicable" "yes")

>    (set_attr "predicable_short_it"

> -   "yes, no, yes, no, no, no, no, no, no")

> +   "yes, no, yes, no, no, no, no, no, no, no, no")

>    (set_attr "type"

>     "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\

> -    f_mcr, f_mrc, fmov")

> -  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *")

> -  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")

> -  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *")

> -  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")]

> +    f_mcr, f_mrc, fmov, mve_move, mve_move")

> +  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *, *, *")

> +  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *, *, *")

> +  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *, *, *")

> +  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4")]

>  )



Hmm, I think the alternatives that touch P0 and other MVE-specific 
registers should be disabled for non-MVE targets through the "arch" 
attribute.


>

>  ;; Patterns for HI moves which provide more data transfer 

> instructions when FP16

> @@ -170,10 +174,10 @@

>  (define_insn "*thumb2_movhi_fp16"

>   [(set

>     (match_operand:HI 0 "nonimmediate_operand"

> -    "=rk, r, l, r, m, r, *t, r, *t")

> +    "=rk, r, l, r, m, r, *t, r, *t, Up, r")

>     (match_operand:HI 1 "general_operand"

> -    "rk, I, Py, n, r, m, r, *t, *t"))]

> - "TARGET_THUMB2 && TARGET_VFP_FP16INST

> +    "rk, I, Py, n, r, m, r, *t, *t, r, Up"))]

> + "TARGET_THUMB2 && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE)

>    && (register_operand (operands[0], HImode)

>         || register_operand (operands[1], HImode))"

>  {

> @@ -194,21 +198,25 @@

>        return "vmov.f16\t%0, %1\t%@ int";

>      case 8:

>        return "vmov%?.f32\t%0, %1\t%@ int";

> +    case 9:

> +      return "vmsr%?\tP0, %1\t%@ movhi";

> +    case 10:

> +      return "vmrs%?\t%0, P0\t%@ movhi";

>      default:

>        gcc_unreachable ();

>      }

>  }

>   [(set_attr "predicable"

> -   "yes, yes, yes, yes, yes, yes, no, no, yes")

> +   "yes, yes, yes, yes, yes, yes, no, no, yes, yes, yes")

>    (set_attr "predicable_short_it"

> -   "yes, no, yes, no, no, no, no, no, no")

> +   "yes, no, yes, no, no, no, no, no, no, no, no")

>    (set_attr "type"

>     "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\

> -    f_mcr, f_mrc, fmov")

> -  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *")

> -  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")

> -  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *")

> -  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")]

> +    f_mcr, f_mrc, fmov, mve_move, mve_move")

> +  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *, *, *")

> +  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *, *, *")

> +  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *, *, *")

> +  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4")]

>  )

>

>  ;; SImode moves

> @@ -258,9 +266,11 @@

>  ;; is chosen with length 2 when the instruction is predicated for

>  ;; arm_restrict_it.

>  (define_insn "*thumb2_movsi_vfp"

> -  [(set (match_operand:SI 0 "nonimmediate_operand" 

> "=rk,r,l,r,r,lk*r,m,*t, r,*t,*t,  *Uv")

> -       (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,lk*r, 

> r,*t,*t,*UvTu,*t"))]

> -  "TARGET_THUMB2 && TARGET_HARD_FLOAT

> +  [(set (match_operand:SI 0 "nonimmediate_operand" 

> "=rk,r,l,r,r,l,*hk,m,*m,*t,\

> +                            r,*t,*t,*Uv, Up, r")

> +       (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk,\

> +                            r,*t,*t,*UvTu,*t, r, Up"))]

> +  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)

>     && (   s_register_operand (operands[0], SImode)

>         || s_register_operand (operands[1], SImode))"

>    "*

> @@ -275,43 +285,53 @@

>      case 4:

>        return \"movw%?\\t%0, %1\";

>      case 5:

> +    case 6:

>        /* Cannot load it directly, split to load it via MOV / MOVT.  */

>        if (!MEM_P (operands[1]) && arm_disable_literal_pool)

>          return \"#\";

>        return \"ldr%?\\t%0, %1\";

> -    case 6:

> -      return \"str%?\\t%1, %0\";

>      case 7:

> -      return \"vmov%?\\t%0, %1\\t%@ int\";

>      case 8:

> -      return \"vmov%?\\t%0, %1\\t%@ int\";

> +      return \"str%?\\t%1, %0\";

>      case 9:

> +      return \"vmov%?\\t%0, %1\\t%@ int\";

> +    case 10:

> +      return \"vmov%?\\t%0, %1\\t%@ int\";

> +    case 11:

>        return \"vmov%?.f32\\t%0, %1\\t%@ int\";

> -    case 10: case 11:

> +    case 12: case 13:

>        return output_move_vfp (operands);

> +    case 14:

> +      return \"vmsr\\t P0, %1\";

> +    case 15:

> +      return \"vmrs\\t %0, P0\";

>      default:

>        gcc_unreachable ();

>      }

>    "

>    [(set_attr "predicable" "yes")

> -   (set_attr "predicable_short_it" 

> "yes,no,yes,no,no,no,no,no,no,no,no,no")

> -   (set_attr "type" 

> "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores")

> -   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4")

> -   (set_attr "pool_range" "*,*,*,*,*,1018,*,*,*,*,1018,*")

> -   (set_attr "neg_pool_range" "*,*,*,*,*, 0,*,*,*,*,1008,*")]

> +   (set_attr "predicable_short_it" 

> "yes,no,yes,no,no,no,no,no,no,no,no,no,no,\

> +            no,no,no")

> +   (set_attr "type" 

> "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,load_4,\

> + store_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores,mve_move,\

> +            mve_move")

> +   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4,4,4")

> +   (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*,*,*")

> +   (set_attr "neg_pool_range" "*,*,*,*,*,   0, 0,*,*,*,*,*,1008,*,*,*")]

>  )

>

>

>  ;; DImode moves

>

>  (define_insn "*movdi_vfp"

> -  [(set (match_operand:DI 0 "nonimmediate_di_operand" 

> "=r,r,r,r,r,r,m,w,!r,w,w, Uv")

> -       (match_operand:DI 1 "di_operand" 

> "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]

> -  "TARGET_32BIT && TARGET_HARD_FLOAT

> -   && (   register_operand (operands[0], DImode)

> -       || register_operand (operands[1], DImode))

> -   && !(TARGET_NEON && CONST_INT_P (operands[1])

> -       && simd_immediate_valid_for_move (operands[1], DImode, NULL, 

> NULL))"

> +  [(set (match_operand:DI 0 "nonimmediate_di_operand" 

> "=r,r,r,r,r,r,m,w,!r,w,\

> +                           w, Uv")

> +       (match_operand:DI 1 "di_operand" 

> "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]

> +  "TARGET_32BIT && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)

> +    && (register_operand (operands[0], DImode)

> +       || register_operand (operands[1], DImode))

> +   && !((TARGET_NEON || TARGET_HAVE_MVE) && CONST_INT_P (operands[1])

> +       && simd_immediate_valid_for_move (operands[1], DImode, NULL, 

> NULL))"

>    "*

>    switch (which_alternative)

>      {

> @@ -390,9 +410,15 @@

>      case 6: /* S register from immediate.  */

>        return \"vmov.f16\\t%0, %1\t%@ __fp16\";

>      case 7: /* S register from memory.  */

> -      return \"vld1.16\\t{%z0}, %A1\";

> +      if (TARGET_HAVE_MVE)

> +       return \"vldr.16\\t%0, %A1\";

> +      else

> +       return \"vld1.16\\t{%z0}, %A1\";

>      case 8: /* Memory from S register.  */

> -      return \"vst1.16\\t{%z1}, %A0\";

> +      if (TARGET_HAVE_MVE)

> +       return \"vstr.16\\t%1, %A0\";

> +      else

> +       return \"vst1.16\\t{%z1}, %A0\";

>      case 9: /* ARM register from constant.  */

>        {

>          long bits;

> @@ -593,7 +619,7 @@

>  (define_insn "*thumb2_movsf_vfp"

>    [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t  ,Uv,r 

> ,m,t,r")

>          (match_operand:SF 1 "hard_sf_operand"      " ?r,t,Dv,UvHa,t, 

> mHa,r,t,r"))]

> -  "TARGET_THUMB2 && TARGET_HARD_FLOAT

> +  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)

>     && (   s_register_operand (operands[0], SFmode)

>         || s_register_operand (operands[1], SFmode))"

>    "*

> @@ -682,7 +708,7 @@

>  (define_insn "*thumb2_movdf_vfp"

>    [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w 

> ,w,w  ,Uv,r ,m,w,r")

>          (match_operand:DF 1 "hard_df_operand" " ?r,w,Dy,G,UvHa,w, 

> mHa,r, w,r"))]

> -  "TARGET_THUMB2 && TARGET_HARD_FLOAT

> +  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)

>     && (   register_operand (operands[0], DFmode)

>         || register_operand (operands[1], DFmode))"

>    "*

> @@ -760,7 +786,7 @@

>              [(match_operand 4 "cc_register" "") (const_int 0)])

>            (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")

>            (match_operand:SF 2 "s_register_operand" 

> "t,0,t,?r,0,?r,t,0,t")))]

> -  "TARGET_THUMB2 && TARGET_HARD_FLOAT && !arm_restrict_it"

> +  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && 

> !arm_restrict_it"

>    "@

>     it\\t%D3\;vmov%D3.f32\\t%0, %2

>     it\\t%d3\;vmov%d3.f32\\t%0, %1

> @@ -806,7 +832,8 @@

>              [(match_operand 4 "cc_register" "") (const_int 0)])

>            (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")

>            (match_operand:DF 2 "s_register_operand" 

> "w,0,w,?r,0,?r,w,0,w")))]

> -  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && 

> !arm_restrict_it"

> +  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && 

> TARGET_VFP_DOUBLE

> +   && !arm_restrict_it"

>    "@

>     it\\t%D3\;vmov%D3.f64\\t%P0, %P2

>     it\\t%d3\;vmov%d3.f64\\t%P0, %P1

> @@ -1982,7 +2009,7 @@

>      [(set (match_operand:BLK 0 "memory_operand" "=m")

>            (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")]

>                        UNSPEC_PUSH_MULT))])]

> -  "TARGET_32BIT && TARGET_HARD_FLOAT"

> +  "TARGET_32BIT && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)"

>    "* return vfp_output_vstmd (operands);"

>    [(set_attr "type" "f_stored")]

>  )

> @@ -2070,16 +2097,18 @@

>

>  ;; Write Floating-point Status and Control Register.

>  (define_insn "set_fpscr"

> -  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] 

> VUNSPEC_SET_FPSCR)]

> -  "TARGET_HARD_FLOAT"

> +  [(set (reg:SI VFPCC_REGNUM)

> +       (unspec_volatile:SI

> +        [(match_operand:SI 0 "register_operand" "r")] 

> VUNSPEC_SET_FPSCR))]

> +  "TARGET_HARD_FLOAT || TARGET_HAVE_MVE"

>    "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR"

>    [(set_attr "type" "mrs")])



Why is the RTL pattern being changed here? I'm not, in principle, 
opposed, but it looks like an orthogonal change that can be done 
indendently if necessary and I'd rather keep the churn to the codebase 
minimal at this point.

Thanks,

Kyrill


>

>  ;; Read Floating-point Status and Control Register.

>  (define_insn "get_fpscr"

>    [(set (match_operand:SI 0 "register_operand" "=r")

> -        (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))]

> -  "TARGET_HARD_FLOAT"

> +       (unspec:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR))]

> +  "TARGET_HARD_FLOAT || TARGET_HAVE_MVE"

>    "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR"

>    [(set_attr "type" "mrs")])

>

>

Patch

diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 809461a25da5a8058a8afce972dea0d3131effc0..81afd8fcdc1b0a82493dc0758bce16fa9e5fde20 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -435,10 +435,10 @@ 
 (define_insn "*cmovsi_insn"
   [(set (match_operand:SI 0 "arm_general_register_operand" "=r,r,r,r,r,r,r")
 	(if_then_else:SI
-	 (match_operator 1 "arm_comparison_operator"
-	  [(match_operand 2 "cc_register" "") (const_int 0)])
-	 (match_operand:SI 3 "arm_reg_or_m1_or_1" "r, r,UM, r,U1,UM,U1")
-	 (match_operand:SI 4 "arm_reg_or_m1_or_1" "r,UM, r,U1, r,UM,U1")))]
+	(match_operator 1 "arm_comparison_operator"
+	 [(match_operand 2 "cc_register" "") (const_int 0)])
+	(match_operand:SI 3 "arm_reg_or_m1_or_1" "r, r,UM, r,U1,UM,U1")
+	(match_operand:SI 4 "arm_reg_or_m1_or_1" "r,UM, r,U1, r,UM,U1")))]
   "TARGET_THUMB2 && TARGET_COND_ARITH
    && (!((operands[3] == const1_rtx && operands[4] == constm1_rtx)
        || (operands[3] == constm1_rtx && operands[4] == const1_rtx)))"
@@ -540,7 +540,7 @@ 
 			  [(match_operand 4 "cc_register" "") (const_int 0)])
 			 (match_operand:SF 1 "s_register_operand" "0,r")
 			 (match_operand:SF 2 "s_register_operand" "r,0")))]
-  "TARGET_THUMB2 && TARGET_SOFT_FLOAT"
+  "TARGET_THUMB2 && TARGET_SOFT_FLOAT && !TARGET_HAVE_MVE"
   "@
    it\\t%D3\;mov%D3\\t%0, %2
    it\\t%d3\;mov%d3\\t%0, %1"
@@ -1226,7 +1226,7 @@ 
    ; added to clear the APSR and potentially the FPSCR if VFP is available, so
    ; we adapt the length accordingly.
    (set (attr "length")
-     (if_then_else (match_test "TARGET_HARD_FLOAT")
+     (if_then_else (match_test "TARGET_HARD_FLOAT || TARGET_HAVE_MVE")
       (const_int 34)
       (const_int 8)))
    ; We do not support predicate execution of returns from cmse_nonsecure_entry
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index b3b4f8ee3e2d1bdad968a9dd8ccbc72ded274f48..ac7fe7d0af19f1965356d47d8327e24d410b99bd 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -170,6 +170,7 @@ 
   UNSPEC_TORC		; Used by the intrinsic form of the iWMMXt TORC instruction.
   UNSPEC_TORVSC		; Used by the intrinsic form of the iWMMXt TORVSC instruction.
   UNSPEC_TEXTRC		; Used by the intrinsic form of the iWMMXt TEXTRC instruction.
+  UNSPEC_GET_FPSCR	; Represent fetch of FPSCR content.
 ])
 
 
@@ -216,7 +217,6 @@ 
   VUNSPEC_SLX		; Represent a store-register-release-exclusive.
   VUNSPEC_LDA		; Represent a store-register-acquire.
   VUNSPEC_STL		; Represent a store-register-release.
-  VUNSPEC_GET_FPSCR	; Represent fetch of FPSCR content.
   VUNSPEC_SET_FPSCR	; Represent assign of FPSCR content.
   VUNSPEC_PROBE_STACK_RANGE ; Represent stack range probing.
   VUNSPEC_CDP		; Represent the coprocessor cdp instruction.
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 6349c0570540ec25a599166f5d427fcbdbf2af68..461a5d71ca8548cfc61c83f9716249425633ad21 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -74,10 +74,10 @@ 
 (define_insn "*thumb2_movhi_vfp"
  [(set
    (match_operand:HI 0 "nonimmediate_operand"
-    "=rk, r, l, r, m, r, *t, r, *t")
+    "=rk, r, l, r, m, r, *t, r, *t, Up, r")
    (match_operand:HI 1 "general_operand"
-    "rk, I, Py, n, r, m, r, *t, *t"))]
- "TARGET_THUMB2 && TARGET_HARD_FLOAT
+    "rk, I, Py, n, r, m, r, *t, *t, r, Up"))]
+ "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)
   && !TARGET_VFP_FP16INST
   && (register_operand (operands[0], HImode)
        || register_operand (operands[1], HImode))"
@@ -99,20 +99,24 @@ 
       return "vmov%?\t%0, %1\t%@ int";
     case 8:
       return "vmov%?.f32\t%0, %1\t%@ int";
+    case 9:
+      return "vmsr%?\t P0, %1\t@ movhi";
+    case 10:
+      return "vmrs%?\t %0, P0\t@ movhi";
     default:
       gcc_unreachable ();
     }
 }
  [(set_attr "predicable" "yes")
   (set_attr "predicable_short_it"
-   "yes, no, yes, no, no, no, no, no, no")
+   "yes, no, yes, no, no, no, no, no, no, no, no")
   (set_attr "type"
    "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\
-    f_mcr, f_mrc, fmov")
-  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *")
-  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")
-  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *")
-  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")]
+    f_mcr, f_mrc, fmov, mve_move, mve_move")
+  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *, *, *")
+  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *, *, *")
+  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *, *, *")
+  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4")]
 )
 
 ;; Patterns for HI moves which provide more data transfer instructions when FP16
@@ -170,10 +174,10 @@ 
 (define_insn "*thumb2_movhi_fp16"
  [(set
    (match_operand:HI 0 "nonimmediate_operand"
-    "=rk, r, l, r, m, r, *t, r, *t")
+    "=rk, r, l, r, m, r, *t, r, *t, Up, r")
    (match_operand:HI 1 "general_operand"
-    "rk, I, Py, n, r, m, r, *t, *t"))]
- "TARGET_THUMB2 && TARGET_VFP_FP16INST
+    "rk, I, Py, n, r, m, r, *t, *t, r, Up"))]
+ "TARGET_THUMB2 && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE)
   && (register_operand (operands[0], HImode)
        || register_operand (operands[1], HImode))"
 {
@@ -194,21 +198,25 @@ 
       return "vmov.f16\t%0, %1\t%@ int";
     case 8:
       return "vmov%?.f32\t%0, %1\t%@ int";
+    case 9:
+      return "vmsr%?\tP0, %1\t%@ movhi";
+    case 10:
+      return "vmrs%?\t%0, P0\t%@ movhi";
     default:
       gcc_unreachable ();
     }
 }
  [(set_attr "predicable"
-   "yes, yes, yes, yes, yes, yes, no, no, yes")
+   "yes, yes, yes, yes, yes, yes, no, no, yes, yes, yes")
   (set_attr "predicable_short_it"
-   "yes, no, yes, no, no, no, no, no, no")
+   "yes, no, yes, no, no, no, no, no, no, no, no")
   (set_attr "type"
    "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\
-    f_mcr, f_mrc, fmov")
-  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *")
-  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")
-  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *")
-  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")]
+    f_mcr, f_mrc, fmov, mve_move, mve_move")
+  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *, *, *")
+  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *, *, *")
+  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *, *, *")
+  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4")]
 )
 
 ;; SImode moves
@@ -258,9 +266,11 @@ 
 ;; is chosen with length 2 when the instruction is predicated for
 ;; arm_restrict_it.
 (define_insn "*thumb2_movsi_vfp"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,lk*r,m,*t, r,*t,*t,  *Uv")
-	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,lk*r, r,*t,*t,*UvTu,*t"))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l,*hk,m,*m,*t,\
+			     r,*t,*t,*Uv, Up, r")
+	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,*mi,l,*hk,\
+			     r,*t,*t,*UvTu,*t, r, Up"))]
+  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
   "*
@@ -275,43 +285,53 @@ 
     case 4:
       return \"movw%?\\t%0, %1\";
     case 5:
+    case 6:
       /* Cannot load it directly, split to load it via MOV / MOVT.  */
       if (!MEM_P (operands[1]) && arm_disable_literal_pool)
 	return \"#\";
       return \"ldr%?\\t%0, %1\";
-    case 6:
-      return \"str%?\\t%1, %0\";
     case 7:
-      return \"vmov%?\\t%0, %1\\t%@ int\";
     case 8:
-      return \"vmov%?\\t%0, %1\\t%@ int\";
+      return \"str%?\\t%1, %0\";
     case 9:
+      return \"vmov%?\\t%0, %1\\t%@ int\";
+    case 10:
+      return \"vmov%?\\t%0, %1\\t%@ int\";
+    case 11:
       return \"vmov%?.f32\\t%0, %1\\t%@ int\";
-    case 10: case 11:
+    case 12: case 13:
       return output_move_vfp (operands);
+    case 14:
+      return \"vmsr\\t P0, %1\";
+    case 15:
+      return \"vmrs\\t %0, P0\";
     default:
       gcc_unreachable ();
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no")
-   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores")
-   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4")
-   (set_attr "pool_range"     "*,*,*,*,*,1018,*,*,*,*,1018,*")
-   (set_attr "neg_pool_range" "*,*,*,*,*,   0,*,*,*,*,1008,*")]
+   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,\
+	     no,no,no")
+   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,load_4,\
+	     store_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores,mve_move,\
+	     mve_move")
+   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4,4,4")
+   (set_attr "pool_range"     "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,*,   0,   0,*,*,*,*,*,1008,*,*,*")]
 )
 
 
 ;; DImode moves
 
 (define_insn "*movdi_vfp"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv")
-	(match_operand:DI 1 "di_operand"	      "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT
-   && (   register_operand (operands[0], DImode)
-       || register_operand (operands[1], DImode))
-   && !(TARGET_NEON && CONST_INT_P (operands[1])
-	&& simd_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,\
+			    w, Uv")
+       (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]
+  "TARGET_32BIT && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)
+    && (register_operand (operands[0], DImode)
+	|| register_operand (operands[1], DImode))
+   && !((TARGET_NEON || TARGET_HAVE_MVE) && CONST_INT_P (operands[1])
+       && simd_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
   "*
   switch (which_alternative)
     {
@@ -390,9 +410,15 @@ 
     case 6: /* S register from immediate.  */
       return \"vmov.f16\\t%0, %1\t%@ __fp16\";
     case 7: /* S register from memory.  */
-      return \"vld1.16\\t{%z0}, %A1\";
+      if (TARGET_HAVE_MVE)
+	return \"vldr.16\\t%0, %A1\";
+      else
+	return \"vld1.16\\t{%z0}, %A1\";
     case 8: /* Memory from S register.  */
-      return \"vst1.16\\t{%z1}, %A0\";
+      if (TARGET_HAVE_MVE)
+	return \"vstr.16\\t%1, %A0\";
+      else
+	return \"vst1.16\\t{%z1}, %A0\";
     case 9: /* ARM register from constant.  */
       {
 	long bits;
@@ -593,7 +619,7 @@ 
 (define_insn "*thumb2_movsf_vfp"
   [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t  ,Uv,r ,m,t,r")
 	(match_operand:SF 1 "hard_sf_operand"	   " ?r,t,Dv,UvHa,t, mHa,r,t,r"))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT
+  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)
    && (   s_register_operand (operands[0], SFmode)
        || s_register_operand (operands[1], SFmode))"
   "*
@@ -682,7 +708,7 @@ 
 (define_insn "*thumb2_movdf_vfp"
   [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w  ,Uv,r ,m,w,r")
 	(match_operand:DF 1 "hard_df_operand"		   " ?r,w,Dy,G,UvHa,w, mHa,r, w,r"))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT
+  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
   "*
@@ -760,7 +786,7 @@ 
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
 	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && !arm_restrict_it"
+  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && !arm_restrict_it"
   "@
    it\\t%D3\;vmov%D3.f32\\t%0, %2
    it\\t%d3\;vmov%d3.f32\\t%0, %1
@@ -806,7 +832,8 @@ 
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it"
+  "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && TARGET_VFP_DOUBLE
+   && !arm_restrict_it"
   "@
    it\\t%D3\;vmov%D3.f64\\t%P0, %P2
    it\\t%d3\;vmov%d3.f64\\t%P0, %P1
@@ -1982,7 +2009,7 @@ 
     [(set (match_operand:BLK 0 "memory_operand" "=m")
 	  (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")]
 		      UNSPEC_PUSH_MULT))])]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)"
   "* return vfp_output_vstmd (operands);"
   [(set_attr "type" "f_stored")]
 )
@@ -2070,16 +2097,18 @@ 
 
 ;; Write Floating-point Status and Control Register.
 (define_insn "set_fpscr"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)]
-  "TARGET_HARD_FLOAT"
+  [(set (reg:SI VFPCC_REGNUM)
+	(unspec_volatile:SI
+	 [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR))]
+  "TARGET_HARD_FLOAT || TARGET_HAVE_MVE"
   "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR"
   [(set_attr "type" "mrs")])
 
 ;; Read Floating-point Status and Control Register.
 (define_insn "get_fpscr"
   [(set (match_operand:SI 0 "register_operand" "=r")
-        (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))]
-  "TARGET_HARD_FLOAT"
+	(unspec:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR))]
+  "TARGET_HARD_FLOAT || TARGET_HAVE_MVE"
   "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR"
   [(set_attr "type" "mrs")])