arm: Enable no-writeback vldr.16/vstr.16.

Message ID 0E8CD82A-FA40-400B-93C2-BAA8ACD5B812@arm.com
State Superseded
Headers show
Series
  • arm: Enable no-writeback vldr.16/vstr.16.
Related show

Commit Message

Joe Ramsay July 27, 2020, 2:08 p.m.
Hi,

There was previously no way to specify that a register operand cannot
have any writeback modifiers, and as a result the argument to vldr.16
and vstr.16 could be erroneously output with post-increment. This
change adds an operand specifier which forbids all writeback, and
selects it in the relevant case for vldr.16 and vstr.16

Bootstrapped on arm-linux, gcc and CMSIS-DSP testsuites are clean.
Is this patch OK for trunk? If yes, please commit on my behalf as I don't
have commit rights.

Thanks,
Joe

gcc/ChangeLog:

2020-05-20  Joe Ramsay  <joe.ramsay@arm.com>

        * config/arm/arm-protos.h (arm_coproc_mem_operand_no_writeback): Declare prototype.
        (arm_mve_mode_and_operands_type_check): Declare prototype.
        * config/arm/arm.c (arm_coproc_mem_operand): Refactor to use _arm_coproc_mem_operand.
        (arm_coproc_mem_operand_wb): New function to cover full, limited and no writeback.
        (arm_coproc_mem_operand_no_writeback): New constraint for memory operand with no writeback.
        (arm_print_operand): Implement 'j' specifier for memory operand that does not support
        writeback.
        (arm_mve_mode_and_operands_type_check): New constraint check for MVE memory operands.
        * config/arm/constraints.md: Add Uj constraint for VFP vldr.16 and vstr.16.
        * config/arm/vfp.md (*mov_load_vfp_hf16): New pattern for vldr.16.
        (*mov_store_vfp_hf16): New pattern for vstr.16.
        (*mov<mode>_vfp_<mode>16): Remove MVE moves.

gcc/testsuite/ChangeLog:

2020-05-20  Joe Ramsay  <joe.ramsay@arm.com>

        * gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: New test.

---
 gcc/config/arm/arm-protos.h                        |   3 +
 gcc/config/arm/arm.c                               | 100 ++++++++++++++++++---
 gcc/config/arm/constraints.md                      |   7 ++
 gcc/config/arm/vfp.md                              |  28 ++++--
 .../arm/mve/intrinsics/mve-vldstr16-no-writeback.c |  17 ++++
 5 files changed, 135 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c

-- 
2.7.4

Comments

Kyrylo Tkachov July 28, 2020, 9:16 a.m. | #1
Hi Joe,

> -----Original Message-----

> From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of Joe

> Ramsay

> Sent: 27 July 2020 15:08

> To: Jakub Jelinek via Gcc-patches <gcc-patches@gcc.gnu.org>

> Subject: [PATCH] [PATCH][GCC] arm: Enable no-writeback vldr.16/vstr.16.

> 

> Hi,

> 

> There was previously no way to specify that a register operand cannot

> have any writeback modifiers, and as a result the argument to vldr.16

> and vstr.16 could be erroneously output with post-increment. This

> change adds an operand specifier which forbids all writeback, and

> selects it in the relevant case for vldr.16 and vstr.16

> 

> Bootstrapped on arm-linux, gcc and CMSIS-DSP testsuites are clean.

> Is this patch OK for trunk? If yes, please commit on my behalf as I don't

> have commit rights.

> 

> Thanks,

> Joe

> 

> gcc/ChangeLog:

> 

> 2020-05-20  Joe Ramsay  <joe.ramsay@arm.com>

> 

>         * config/arm/arm-protos.h (arm_coproc_mem_operand_no_writeback):

> Declare prototype.

>         (arm_mve_mode_and_operands_type_check): Declare prototype.

>         * config/arm/arm.c (arm_coproc_mem_operand): Refactor to use

> _arm_coproc_mem_operand.

>         (arm_coproc_mem_operand_wb): New function to cover full, limited

> and no writeback.

>         (arm_coproc_mem_operand_no_writeback): New constraint for

> memory operand with no writeback.

>         (arm_print_operand): Implement 'j' specifier for memory operand that

> does not support

>         writeback.

>         (arm_mve_mode_and_operands_type_check): New constraint check for

> MVE memory operands.

>         * config/arm/constraints.md: Add Uj constraint for VFP vldr.16 and

> vstr.16.

>         * config/arm/vfp.md (*mov_load_vfp_hf16): New pattern for vldr.16.

>         (*mov_store_vfp_hf16): New pattern for vstr.16.

>         (*mov<mode>_vfp_<mode>16): Remove MVE moves.

> 

> gcc/testsuite/ChangeLog:

> 

> 2020-05-20  Joe Ramsay  <joe.ramsay@arm.com>

> 

>         * gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: New test.

> 

> ---

>  gcc/config/arm/arm-protos.h                        |   3 +

>  gcc/config/arm/arm.c                               | 100 ++++++++++++++++++---

>  gcc/config/arm/constraints.md                      |   7 ++

>  gcc/config/arm/vfp.md                              |  28 ++++--

>  .../arm/mve/intrinsics/mve-vldstr16-no-writeback.c |  17 ++++

>  5 files changed, 135 insertions(+), 20 deletions(-)

>  create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-

> vldstr16-no-writeback.c

> 

> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

> index 33d162c..e811da4 100644

> --- a/gcc/config/arm/arm-protos.h

> +++ b/gcc/config/arm/arm-protos.h

> @@ -115,8 +115,11 @@ extern enum reg_class

> coproc_secondary_reload_class (machine_mode, rtx,

>  extern bool arm_tls_referenced_p (rtx);

> 

>  extern int arm_coproc_mem_operand (rtx, bool);

> +extern int arm_coproc_mem_operand_no_writeback (rtx);

> +extern int arm_coproc_mem_operand_wb (rtx, int);

>  extern int neon_vector_mem_operand (rtx, int, bool);

>  extern int mve_vector_mem_operand (machine_mode, rtx, bool);

> +bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx);

>  extern int neon_struct_mem_operand (rtx);

> 

>  extern rtx *neon_vcmla_lane_prepare_operands (rtx *);

> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

> index 6b7ca82..ed080d2 100644

> --- a/gcc/config/arm/arm.c

> +++ b/gcc/config/arm/arm.c

> @@ -13217,13 +13217,14 @@ neon_element_bits (machine_mode mode)

>  /* Predicates for `match_operand' and `match_operator'.  */

> 

>  /* Return TRUE if OP is a valid coprocessor memory address pattern.

> -   WB is true if full writeback address modes are allowed and is false

> +   WB level is 2 if full writeback address modes are allowed, 1

>     if limited writeback address modes (POST_INC and PRE_DEC) are

> -   allowed.  */

> +   allowed and 0 if no writeback at all is supported.  */

> 

>  int

> -arm_coproc_mem_operand (rtx op, bool wb)

> +arm_coproc_mem_operand_wb (rtx op, int wb_level)

>  {

> +  gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);

>    rtx ind;

> 

>    /* Reject eliminable registers.  */

> @@ -13256,16 +13257,18 @@ arm_coproc_mem_operand (rtx op, bool wb)

> 

>    /* Autoincremment addressing modes.  POST_INC and PRE_DEC are

>       acceptable in any case (subject to verification by

> -     arm_address_register_rtx_p).  We need WB to be true to accept

> +     arm_address_register_rtx_p).  We need full writeback to accept

> +     PRE_INC and POST_DEC, and at least restricted writeback for

>       PRE_INC and POST_DEC.  */

> -  if (GET_CODE (ind) == POST_INC

> -      || GET_CODE (ind) == PRE_DEC

> -      || (wb

> -	  && (GET_CODE (ind) == PRE_INC

> -	      || GET_CODE (ind) == POST_DEC)))

> +  if (wb_level > 0

> +      && (GET_CODE (ind) == POST_INC

> +	  || GET_CODE (ind) == PRE_DEC

> +	  || (wb_level > 1

> +	      && (GET_CODE (ind) == PRE_INC

> +		  || GET_CODE (ind) == POST_DEC))))

>      return arm_address_register_rtx_p (XEXP (ind, 0), 0);

> 

> -  if (wb

> +  if (wb_level > 1

>        && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) ==

> PRE_MODIFY)

>        && arm_address_register_rtx_p (XEXP (ind, 0), 0)

>        && GET_CODE (XEXP (ind, 1)) == PLUS

> @@ -13287,6 +13290,25 @@ arm_coproc_mem_operand (rtx op, bool wb)

>    return FALSE;

>  }

> 

> +/* Return TRUE if OP is a valid coprocessor memory address pattern.

> +   WB is true if full writeback address modes are allowed and is false

> +   if limited writeback address modes (POST_INC and PRE_DEC) are

> +   allowed.  */

> +

> +int arm_coproc_mem_operand (rtx op, bool wb)

> +{

> +  return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);

> +}

> +

> +/* Return TRUE if OP is a valid coprocessor memory address pattern in a

> +   context in which no writeback address modes are allowed.  */

> +

> +int

> +arm_coproc_mem_operand_no_writeback (rtx op)

> +{

> +  return arm_coproc_mem_operand_wb (op, 0);

> +}

> +

>  /* This function returns TRUE on matching mode and op.

>  1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.

>  2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).

> */

> @@ -23549,8 +23571,8 @@ arm_print_condition (FILE *stream)

> 

>  /* Globally reserved letters: acln

>     Puncutation letters currently used: @_|?().!#

> -   Lower case letters currently used: bcdefhimpqtvwxyz

> -   Upper case letters currently used: ABCDFGHJKLMNOPQRSTU

> +   Lower case letters currently used: bcdefhijmpqtvwxyz

> +   Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU

>     Letters previously used, but now deprecated/obsolete: sVWXYZ.

> 

>     Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.

> @@ -24163,6 +24185,41 @@ arm_print_operand (FILE *stream, rtx x, int

> code)

>        }

>        return;

> 

> +    /* To print memory operand in the case that the instruction does

> +       not support writeback.  i.e. the output will look like either of

> +       the following:

> +       1. [Rn]

> +       2. [Rn, #+/-<imm>] */


I'm unsure why we need a new output modifier here ('j').
Shouldn't the constraints/predicates on the patterns prevent the formation of writeback addresses that should be handled by the normal address printing code?

Thanks,
Kyrill

> +    case 'j':

> +      {

> +	gcc_assert (MEM_P (x));

> +	rtx addr = XEXP (x, 0);

> +

> +	switch (GET_CODE (addr))

> +	  {

> +	  case REG:

> +	    asm_fprintf (stream, "[%r]", REGNO (addr));

> +	    return;

> +

> +	  case PLUS:

> +	    {

> +	      rtx base = XEXP (addr, 0);

> +	      rtx index = XEXP (addr, 1);

> +

> +	      if (!REG_P (base) || !CONST_INT_P (index))

> +		gcc_unreachable ();

> +

> +	      HOST_WIDE_INT offset = INTVAL (index);

> +	      asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);

> +	      return;

> +	    }

> +

> +	  default:

> +	    gcc_unreachable ();

> +	  }

> +	return;

> +      }

> +

>      case 'C':

>        {

>  	rtx addr;

> @@ -33496,4 +33553,23 @@ arm_mode_base_reg_class (machine_mode

> mode)

> 

>  struct gcc_target targetm = TARGET_INITIALIZER;

> 

> +bool

> +arm_mve_mode_and_operands_type_check (machine_mode mode, rtx op0,

> rtx op1)

> +{

> +  if (!(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT))

> +    {

> +       return true;

> +    }

> +  else if (mode == E_BFmode)

> +    {

> +      return false;

> +    }

> +  else if ((s_register_operand (op0, mode) && MEM_P (op1))

> +	   || (s_register_operand (op1, mode) && MEM_P (op0)))

> +    {

> +      return false;

> +    }

> +  return true;

> +}

> +

>  #include "gt-arm.h"

> diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md

> index 011badc..ff229aa 100644

> --- a/gcc/config/arm/constraints.md

> +++ b/gcc/config/arm/constraints.md

> @@ -452,6 +452,13 @@

>   (and (match_code "mem")

>        (match_test "TARGET_32BIT && arm_coproc_mem_operand (op,

> FALSE)")))

> 

> +(define_memory_constraint "Uj"

> + "@internal

> +  In ARM/Thumb-2 state an VFP load/store address which does not support

> +  writeback at all (eg vldr.16)."

> + (and (match_code "mem")

> +      (match_test "TARGET_32BIT &&

> arm_coproc_mem_operand_no_writeback (op)")))

> +

>  (define_memory_constraint "Uy"

>   "@internal

>    In ARM/Thumb-2 state a valid iWMMX load/store address."

> diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md

> index 3470679..13174bb 100644

> --- a/gcc/config/arm/vfp.md

> +++ b/gcc/config/arm/vfp.md

> @@ -387,6 +387,22 @@

>     (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]

>  )

> 

> +(define_insn "*mov_load_vfp_hf16"

> +  [(set (match_operand:HF 0 "s_register_operand" "=t")

> +	(match_operand:HF 1 "memory_operand" "Uj"))]

> +  "TARGET_HAVE_MVE_FLOAT"

> +  "vldr.16\\t%0, %j1"

> +  [(set_attr "length" "4")]

> +)

> +

> +(define_insn "*mov_store_vfp_hf16"

> +  [(set (match_operand:HF 0 "memory_operand" "=Uj")

> +	(match_operand:HF 1 "s_register_operand"   "t"))]

> +  "TARGET_HAVE_MVE_FLOAT"

> +  "vstr.16\\t%1, %j0"

> +  [(set_attr "length" "4")]

> +)

> +

>  ;; HFmode and BFmode moves

> 

>  (define_insn "*mov<mode>_vfp_<mode>16"

> @@ -396,6 +412,8 @@

>  			  "  m,r,t,r,r,t,Dv,Um,t, F"))]

>    "TARGET_32BIT

>     && TARGET_VFP_FP16INST

> +   && arm_mve_mode_and_operands_type_check (<MODE>mode,

> operands[0],

> +					    operands[1])

>     && (s_register_operand (operands[0], <MODE>mode)

>         || s_register_operand (operands[1], <MODE>mode))"

>   {

> @@ -414,15 +432,9 @@

>      case 6: /* S register from immediate.  */

>        return \"vmov.f16\\t%0, %1\t%@ __<fporbf>\";

>      case 7: /* S register from memory.  */

> -      if (TARGET_HAVE_MVE)

> -	return \"vldr.16\\t%0, %A1\";

> -      else

> -	return \"vld1.16\\t{%z0}, %A1\";

> +      return \"vld1.16\\t{%z0}, %A1\";

>      case 8: /* Memory from S register.  */

> -      if (TARGET_HAVE_MVE)

> -	return \"vstr.16\\t%1, %A0\";

> -      else

> -	return \"vst1.16\\t{%z1}, %A0\";

> +      return \"vst1.16\\t{%z1}, %A0\";

>      case 9: /* ARM register from constant.  */

>        {

>  	long bits;

> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-

> writeback.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-

> writeback.c

> new file mode 100644

> index 0000000..0a69ace

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-

> writeback.c

> @@ -0,0 +1,17 @@

> +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */

> +/* { dg-add-options arm_v8_1m_mve_fp } */

> +/* { dg-additional-options "-O2" } */

> +

> +void

> +fn1 (__fp16 *pSrc)

> +{

> +  __fp16 high;

> +  __fp16 *pDst = 0;

> +  unsigned i;

> +  for (i = 0;; i++)

> +    if (pSrc[i])

> +      pDst[i] = high;

> +}

> +

> +/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */

> +/* { dg-final { scan-assembler {vstr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */

> --

> 2.7.4

>
Joe Ramsay July 28, 2020, 10:23 a.m. | #2
Thanks for the feedback Kyrill.

On 28/07/2020, 10:16, "Kyrylo Tkachov" <Kyrylo.Tkachov@arm.com> wrote:

    Hi Joe,

    > -----Original Message-----

    > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of Joe

    > Ramsay

    > Sent: 27 July 2020 15:08

    > To: Jakub Jelinek via Gcc-patches <gcc-patches@gcc.gnu.org>

    > Subject: [PATCH] [PATCH][GCC] arm: Enable no-writeback vldr.16/vstr.16.

    > 

    > Hi,

    > 

    > There was previously no way to specify that a register operand cannot

    > have any writeback modifiers, and as a result the argument to vldr.16

    > and vstr.16 could be erroneously output with post-increment. This

    > change adds an operand specifier which forbids all writeback, and

    > selects it in the relevant case for vldr.16 and vstr.16

    > 

    > Bootstrapped on arm-linux, gcc and CMSIS-DSP testsuites are clean.

    > Is this patch OK for trunk? If yes, please commit on my behalf as I don't

    > have commit rights.

    > 

    > Thanks,

    > Joe

    > 

    > gcc/ChangeLog:

    > 

    > 2020-05-20  Joe Ramsay  <joe.ramsay@arm.com>

    > 

    >         * config/arm/arm-protos.h (arm_coproc_mem_operand_no_writeback):

    > Declare prototype.

    >         (arm_mve_mode_and_operands_type_check): Declare prototype.

    >         * config/arm/arm.c (arm_coproc_mem_operand): Refactor to use

    > _arm_coproc_mem_operand.

    >         (arm_coproc_mem_operand_wb): New function to cover full, limited

    > and no writeback.

    >         (arm_coproc_mem_operand_no_writeback): New constraint for

    > memory operand with no writeback.

    >         (arm_print_operand): Implement 'j' specifier for memory operand that

    > does not support

    >         writeback.

    >         (arm_mve_mode_and_operands_type_check): New constraint check for

    > MVE memory operands.

    >         * config/arm/constraints.md: Add Uj constraint for VFP vldr.16 and

    > vstr.16.

    >         * config/arm/vfp.md (*mov_load_vfp_hf16): New pattern for vldr.16.

    >         (*mov_store_vfp_hf16): New pattern for vstr.16.

    >         (*mov<mode>_vfp_<mode>16): Remove MVE moves.

    > 

    > gcc/testsuite/ChangeLog:

    > 

    > 2020-05-20  Joe Ramsay  <joe.ramsay@arm.com>

    > 

    >         * gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: New test.

    > 

    > ---

    >  gcc/config/arm/arm-protos.h                        |   3 +

    >  gcc/config/arm/arm.c                               | 100 ++++++++++++++++++---

    >  gcc/config/arm/constraints.md                      |   7 ++

    >  gcc/config/arm/vfp.md                              |  28 ++++--

    >  .../arm/mve/intrinsics/mve-vldstr16-no-writeback.c |  17 ++++

    >  5 files changed, 135 insertions(+), 20 deletions(-)

    >  create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-

    > vldstr16-no-writeback.c

    > 

    > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

    > index 33d162c..e811da4 100644

    > --- a/gcc/config/arm/arm-protos.h

    > +++ b/gcc/config/arm/arm-protos.h

    > @@ -115,8 +115,11 @@ extern enum reg_class

    > coproc_secondary_reload_class (machine_mode, rtx,

    >  extern bool arm_tls_referenced_p (rtx);

    > 

    >  extern int arm_coproc_mem_operand (rtx, bool);

    > +extern int arm_coproc_mem_operand_no_writeback (rtx);

    > +extern int arm_coproc_mem_operand_wb (rtx, int);

    >  extern int neon_vector_mem_operand (rtx, int, bool);

    >  extern int mve_vector_mem_operand (machine_mode, rtx, bool);

    > +bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx);

    >  extern int neon_struct_mem_operand (rtx);

    > 

    >  extern rtx *neon_vcmla_lane_prepare_operands (rtx *);

    > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

    > index 6b7ca82..ed080d2 100644

    > --- a/gcc/config/arm/arm.c

    > +++ b/gcc/config/arm/arm.c

    > @@ -13217,13 +13217,14 @@ neon_element_bits (machine_mode mode)

    >  /* Predicates for `match_operand' and `match_operator'.  */

    > 

    >  /* Return TRUE if OP is a valid coprocessor memory address pattern.

    > -   WB is true if full writeback address modes are allowed and is false

    > +   WB level is 2 if full writeback address modes are allowed, 1

    >     if limited writeback address modes (POST_INC and PRE_DEC) are

    > -   allowed.  */

    > +   allowed and 0 if no writeback at all is supported.  */

    > 

    >  int

    > -arm_coproc_mem_operand (rtx op, bool wb)

    > +arm_coproc_mem_operand_wb (rtx op, int wb_level)

    >  {

    > +  gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);

    >    rtx ind;

    > 

    >    /* Reject eliminable registers.  */

    > @@ -13256,16 +13257,18 @@ arm_coproc_mem_operand (rtx op, bool wb)

    > 

    >    /* Autoincremment addressing modes.  POST_INC and PRE_DEC are

    >       acceptable in any case (subject to verification by

    > -     arm_address_register_rtx_p).  We need WB to be true to accept

    > +     arm_address_register_rtx_p).  We need full writeback to accept

    > +     PRE_INC and POST_DEC, and at least restricted writeback for

    >       PRE_INC and POST_DEC.  */

    > -  if (GET_CODE (ind) == POST_INC

    > -      || GET_CODE (ind) == PRE_DEC

    > -      || (wb

    > -	  && (GET_CODE (ind) == PRE_INC

    > -	      || GET_CODE (ind) == POST_DEC)))

    > +  if (wb_level > 0

    > +      && (GET_CODE (ind) == POST_INC

    > +	  || GET_CODE (ind) == PRE_DEC

    > +	  || (wb_level > 1

    > +	      && (GET_CODE (ind) == PRE_INC

    > +		  || GET_CODE (ind) == POST_DEC))))

    >      return arm_address_register_rtx_p (XEXP (ind, 0), 0);

    > 

    > -  if (wb

    > +  if (wb_level > 1

    >        && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) ==

    > PRE_MODIFY)

    >        && arm_address_register_rtx_p (XEXP (ind, 0), 0)

    >        && GET_CODE (XEXP (ind, 1)) == PLUS

    > @@ -13287,6 +13290,25 @@ arm_coproc_mem_operand (rtx op, bool wb)

    >    return FALSE;

    >  }

    > 

    > +/* Return TRUE if OP is a valid coprocessor memory address pattern.

    > +   WB is true if full writeback address modes are allowed and is false

    > +   if limited writeback address modes (POST_INC and PRE_DEC) are

    > +   allowed.  */

    > +

    > +int arm_coproc_mem_operand (rtx op, bool wb)

    > +{

    > +  return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);

    > +}

    > +

    > +/* Return TRUE if OP is a valid coprocessor memory address pattern in a

    > +   context in which no writeback address modes are allowed.  */

    > +

    > +int

    > +arm_coproc_mem_operand_no_writeback (rtx op)

    > +{

    > +  return arm_coproc_mem_operand_wb (op, 0);

    > +}

    > +

    >  /* This function returns TRUE on matching mode and op.

    >  1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.

    >  2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).

    > */

    > @@ -23549,8 +23571,8 @@ arm_print_condition (FILE *stream)

    > 

    >  /* Globally reserved letters: acln

    >     Puncutation letters currently used: @_|?().!#

    > -   Lower case letters currently used: bcdefhimpqtvwxyz

    > -   Upper case letters currently used: ABCDFGHJKLMNOPQRSTU

    > +   Lower case letters currently used: bcdefhijmpqtvwxyz

    > +   Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU

    >     Letters previously used, but now deprecated/obsolete: sVWXYZ.

    > 

    >     Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.

    > @@ -24163,6 +24185,41 @@ arm_print_operand (FILE *stream, rtx x, int

    > code)

    >        }

    >        return;

    > 

    > +    /* To print memory operand in the case that the instruction does

    > +       not support writeback.  i.e. the output will look like either of

    > +       the following:

    > +       1. [Rn]

    > +       2. [Rn, #+/-<imm>] */


    I'm unsure why we need a new output modifier here ('j').
    Shouldn't the constraints/predicates on the patterns prevent the formation of writeback addresses that should be handled by the normal address printing code?

    Thanks,
    Kyrill

I don't think the existing specifiers are sufficient for this address mode. Previously we were using A specifier here, which has no provision for const int offset. We also considered E specifier, but E only supports offset with writeback (not without), so as far as I can tell we need a new specifier to handle the new addressing mode.

    > +    case 'j':

    > +      {

    > +	gcc_assert (MEM_P (x));

    > +	rtx addr = XEXP (x, 0);

    > +

    > +	switch (GET_CODE (addr))

    > +	  {

    > +	  case REG:

    > +	    asm_fprintf (stream, "[%r]", REGNO (addr));

    > +	    return;

    > +

    > +	  case PLUS:

    > +	    {

    > +	      rtx base = XEXP (addr, 0);

    > +	      rtx index = XEXP (addr, 1);

    > +

    > +	      if (!REG_P (base) || !CONST_INT_P (index))

    > +		gcc_unreachable ();

    > +

    > +	      HOST_WIDE_INT offset = INTVAL (index);

    > +	      asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);

    > +	      return;

    > +	    }

    > +

    > +	  default:

    > +	    gcc_unreachable ();

    > +	  }

    > +	return;

    > +      }

    > +

    >      case 'C':

    >        {

    >  	rtx addr;

    > @@ -33496,4 +33553,23 @@ arm_mode_base_reg_class (machine_mode

    > mode)

    > 

    >  struct gcc_target targetm = TARGET_INITIALIZER;

    > 

    > +bool

    > +arm_mve_mode_and_operands_type_check (machine_mode mode, rtx op0,

    > rtx op1)

    > +{

    > +  if (!(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT))

    > +    {

    > +       return true;

    > +    }

    > +  else if (mode == E_BFmode)

    > +    {

    > +      return false;

    > +    }

    > +  else if ((s_register_operand (op0, mode) && MEM_P (op1))

    > +	   || (s_register_operand (op1, mode) && MEM_P (op0)))

    > +    {

    > +      return false;

    > +    }

    > +  return true;

    > +}

    > +

    >  #include "gt-arm.h"

    > diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md

    > index 011badc..ff229aa 100644

    > --- a/gcc/config/arm/constraints.md

    > +++ b/gcc/config/arm/constraints.md

    > @@ -452,6 +452,13 @@

    >   (and (match_code "mem")

    >        (match_test "TARGET_32BIT && arm_coproc_mem_operand (op,

    > FALSE)")))

    > 

    > +(define_memory_constraint "Uj"

    > + "@internal

    > +  In ARM/Thumb-2 state an VFP load/store address which does not support

    > +  writeback at all (eg vldr.16)."

    > + (and (match_code "mem")

    > +      (match_test "TARGET_32BIT &&

    > arm_coproc_mem_operand_no_writeback (op)")))

    > +

    >  (define_memory_constraint "Uy"

    >   "@internal

    >    In ARM/Thumb-2 state a valid iWMMX load/store address."

    > diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md

    > index 3470679..13174bb 100644

    > --- a/gcc/config/arm/vfp.md

    > +++ b/gcc/config/arm/vfp.md

    > @@ -387,6 +387,22 @@

    >     (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]

    >  )

    > 

    > +(define_insn "*mov_load_vfp_hf16"

    > +  [(set (match_operand:HF 0 "s_register_operand" "=t")

    > +	(match_operand:HF 1 "memory_operand" "Uj"))]

    > +  "TARGET_HAVE_MVE_FLOAT"

    > +  "vldr.16\\t%0, %j1"

    > +  [(set_attr "length" "4")]

    > +)

    > +

    > +(define_insn "*mov_store_vfp_hf16"

    > +  [(set (match_operand:HF 0 "memory_operand" "=Uj")

    > +	(match_operand:HF 1 "s_register_operand"   "t"))]

    > +  "TARGET_HAVE_MVE_FLOAT"

    > +  "vstr.16\\t%1, %j0"

    > +  [(set_attr "length" "4")]

    > +)

    > +

    >  ;; HFmode and BFmode moves

    > 

    >  (define_insn "*mov<mode>_vfp_<mode>16"

    > @@ -396,6 +412,8 @@

    >  			  "  m,r,t,r,r,t,Dv,Um,t, F"))]

    >    "TARGET_32BIT

    >     && TARGET_VFP_FP16INST

    > +   && arm_mve_mode_and_operands_type_check (<MODE>mode,

    > operands[0],

    > +					    operands[1])

    >     && (s_register_operand (operands[0], <MODE>mode)

    >         || s_register_operand (operands[1], <MODE>mode))"

    >   {

    > @@ -414,15 +432,9 @@

    >      case 6: /* S register from immediate.  */

    >        return \"vmov.f16\\t%0, %1\t%@ __<fporbf>\";

    >      case 7: /* S register from memory.  */

    > -      if (TARGET_HAVE_MVE)

    > -	return \"vldr.16\\t%0, %A1\";

    > -      else

    > -	return \"vld1.16\\t{%z0}, %A1\";

    > +      return \"vld1.16\\t{%z0}, %A1\";

    >      case 8: /* Memory from S register.  */

    > -      if (TARGET_HAVE_MVE)

    > -	return \"vstr.16\\t%1, %A0\";

    > -      else

    > -	return \"vst1.16\\t{%z1}, %A0\";

    > +      return \"vst1.16\\t{%z1}, %A0\";

    >      case 9: /* ARM register from constant.  */

    >        {

    >  	long bits;

    > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-

    > writeback.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-

    > writeback.c

    > new file mode 100644

    > index 0000000..0a69ace

    > --- /dev/null

    > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-

    > writeback.c

    > @@ -0,0 +1,17 @@

    > +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */

    > +/* { dg-add-options arm_v8_1m_mve_fp } */

    > +/* { dg-additional-options "-O2" } */

    > +

    > +void

    > +fn1 (__fp16 *pSrc)

    > +{

    > +  __fp16 high;

    > +  __fp16 *pDst = 0;

    > +  unsigned i;

    > +  for (i = 0;; i++)

    > +    if (pSrc[i])

    > +      pDst[i] = high;

    > +}

    > +

    > +/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */

    > +/* { dg-final { scan-assembler {vstr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */

    > --

    > 2.7.4

    >
Kyrylo Tkachov July 28, 2020, 10:53 a.m. | #3
> -----Original Message-----

> From: Joe Ramsay <Joe.Ramsay@arm.com>

> Sent: 28 July 2020 11:24

> To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Jakub Jelinek via Gcc-

> patches <gcc-patches@gcc.gnu.org>

> Subject: Re: [PATCH] [PATCH][GCC] arm: Enable no-writeback vldr.16/vstr.16.

> 

> Thanks for the feedback Kyrill.

> 

> On 28/07/2020, 10:16, "Kyrylo Tkachov" <Kyrylo.Tkachov@arm.com> wrote:

> 

>     Hi Joe,

> 

>     > -----Original Message-----

>     > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of

> Joe

>     > Ramsay

>     > Sent: 27 July 2020 15:08

>     > To: Jakub Jelinek via Gcc-patches <gcc-patches@gcc.gnu.org>

>     > Subject: [PATCH] [PATCH][GCC] arm: Enable no-writeback vldr.16/vstr.16.

>     >

>     > Hi,

>     >

>     > There was previously no way to specify that a register operand cannot

>     > have any writeback modifiers, and as a result the argument to vldr.16

>     > and vstr.16 could be erroneously output with post-increment. This

>     > change adds an operand specifier which forbids all writeback, and

>     > selects it in the relevant case for vldr.16 and vstr.16

>     >

>     > Bootstrapped on arm-linux, gcc and CMSIS-DSP testsuites are clean.

>     > Is this patch OK for trunk? If yes, please commit on my behalf as I don't

>     > have commit rights.

>     >

>     > Thanks,

>     > Joe

>     >

>     > gcc/ChangeLog:

>     >

>     > 2020-05-20  Joe Ramsay  <joe.ramsay@arm.com>

>     >

>     >         * config/arm/arm-protos.h

> (arm_coproc_mem_operand_no_writeback):

>     > Declare prototype.

>     >         (arm_mve_mode_and_operands_type_check): Declare prototype.

>     >         * config/arm/arm.c (arm_coproc_mem_operand): Refactor to use

>     > _arm_coproc_mem_operand.

>     >         (arm_coproc_mem_operand_wb): New function to cover full,

> limited

>     > and no writeback.

>     >         (arm_coproc_mem_operand_no_writeback): New constraint for

>     > memory operand with no writeback.

>     >         (arm_print_operand): Implement 'j' specifier for memory operand

> that

>     > does not support

>     >         writeback.

>     >         (arm_mve_mode_and_operands_type_check): New constraint check

> for

>     > MVE memory operands.

>     >         * config/arm/constraints.md: Add Uj constraint for VFP vldr.16 and

>     > vstr.16.

>     >         * config/arm/vfp.md (*mov_load_vfp_hf16): New pattern for

> vldr.16.

>     >         (*mov_store_vfp_hf16): New pattern for vstr.16.

>     >         (*mov<mode>_vfp_<mode>16): Remove MVE moves.

>     >

>     > gcc/testsuite/ChangeLog:

>     >

>     > 2020-05-20  Joe Ramsay  <joe.ramsay@arm.com>

>     >

>     >         * gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: New

> test.

>     >

>     > ---

>     >  gcc/config/arm/arm-protos.h                        |   3 +

>     >  gcc/config/arm/arm.c                               | 100 ++++++++++++++++++---

>     >  gcc/config/arm/constraints.md                      |   7 ++

>     >  gcc/config/arm/vfp.md                              |  28 ++++--

>     >  .../arm/mve/intrinsics/mve-vldstr16-no-writeback.c |  17 ++++

>     >  5 files changed, 135 insertions(+), 20 deletions(-)

>     >  create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-

>     > vldstr16-no-writeback.c

>     >

>     > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-

> protos.h

>     > index 33d162c..e811da4 100644

>     > --- a/gcc/config/arm/arm-protos.h

>     > +++ b/gcc/config/arm/arm-protos.h

>     > @@ -115,8 +115,11 @@ extern enum reg_class

>     > coproc_secondary_reload_class (machine_mode, rtx,

>     >  extern bool arm_tls_referenced_p (rtx);

>     >

>     >  extern int arm_coproc_mem_operand (rtx, bool);

>     > +extern int arm_coproc_mem_operand_no_writeback (rtx);

>     > +extern int arm_coproc_mem_operand_wb (rtx, int);

>     >  extern int neon_vector_mem_operand (rtx, int, bool);

>     >  extern int mve_vector_mem_operand (machine_mode, rtx, bool);

>     > +bool arm_mve_mode_and_operands_type_check (machine_mode, rtx,

> rtx);

>     >  extern int neon_struct_mem_operand (rtx);

>     >

>     >  extern rtx *neon_vcmla_lane_prepare_operands (rtx *);

>     > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

>     > index 6b7ca82..ed080d2 100644

>     > --- a/gcc/config/arm/arm.c

>     > +++ b/gcc/config/arm/arm.c

>     > @@ -13217,13 +13217,14 @@ neon_element_bits (machine_mode

> mode)

>     >  /* Predicates for `match_operand' and `match_operator'.  */

>     >

>     >  /* Return TRUE if OP is a valid coprocessor memory address pattern.

>     > -   WB is true if full writeback address modes are allowed and is false

>     > +   WB level is 2 if full writeback address modes are allowed, 1

>     >     if limited writeback address modes (POST_INC and PRE_DEC) are

>     > -   allowed.  */

>     > +   allowed and 0 if no writeback at all is supported.  */

>     >

>     >  int

>     > -arm_coproc_mem_operand (rtx op, bool wb)

>     > +arm_coproc_mem_operand_wb (rtx op, int wb_level)

>     >  {

>     > +  gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);

>     >    rtx ind;

>     >

>     >    /* Reject eliminable registers.  */

>     > @@ -13256,16 +13257,18 @@ arm_coproc_mem_operand (rtx op, bool

> wb)

>     >

>     >    /* Autoincremment addressing modes.  POST_INC and PRE_DEC are

>     >       acceptable in any case (subject to verification by

>     > -     arm_address_register_rtx_p).  We need WB to be true to accept

>     > +     arm_address_register_rtx_p).  We need full writeback to accept

>     > +     PRE_INC and POST_DEC, and at least restricted writeback for

>     >       PRE_INC and POST_DEC.  */

>     > -  if (GET_CODE (ind) == POST_INC

>     > -      || GET_CODE (ind) == PRE_DEC

>     > -      || (wb

>     > -  && (GET_CODE (ind) == PRE_INC

>     > -      || GET_CODE (ind) == POST_DEC)))

>     > +  if (wb_level > 0

>     > +      && (GET_CODE (ind) == POST_INC

>     > +  || GET_CODE (ind) == PRE_DEC

>     > +  || (wb_level > 1

>     > +      && (GET_CODE (ind) == PRE_INC

>     > +  || GET_CODE (ind) == POST_DEC))))

>     >      return arm_address_register_rtx_p (XEXP (ind, 0), 0);

>     >

>     > -  if (wb

>     > +  if (wb_level > 1

>     >        && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) ==

>     > PRE_MODIFY)

>     >        && arm_address_register_rtx_p (XEXP (ind, 0), 0)

>     >        && GET_CODE (XEXP (ind, 1)) == PLUS

>     > @@ -13287,6 +13290,25 @@ arm_coproc_mem_operand (rtx op, bool

> wb)

>     >    return FALSE;

>     >  }

>     >

>     > +/* Return TRUE if OP is a valid coprocessor memory address pattern.

>     > +   WB is true if full writeback address modes are allowed and is false

>     > +   if limited writeback address modes (POST_INC and PRE_DEC) are

>     > +   allowed.  */

>     > +

>     > +int arm_coproc_mem_operand (rtx op, bool wb)

>     > +{

>     > +  return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);

>     > +}

>     > +

>     > +/* Return TRUE if OP is a valid coprocessor memory address pattern in a

>     > +   context in which no writeback address modes are allowed.  */

>     > +

>     > +int

>     > +arm_coproc_mem_operand_no_writeback (rtx op)

>     > +{

>     > +  return arm_coproc_mem_operand_wb (op, 0);

>     > +}

>     > +

>     >  /* This function returns TRUE on matching mode and op.

>     >  1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.

>     >  2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect

> R13).

>     > */

>     > @@ -23549,8 +23571,8 @@ arm_print_condition (FILE *stream)

>     >

>     >  /* Globally reserved letters: acln

>     >     Puncutation letters currently used: @_|?().!#

>     > -   Lower case letters currently used: bcdefhimpqtvwxyz

>     > -   Upper case letters currently used: ABCDFGHJKLMNOPQRSTU

>     > +   Lower case letters currently used: bcdefhijmpqtvwxyz

>     > +   Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU

>     >     Letters previously used, but now deprecated/obsolete: sVWXYZ.

>     >

>     >     Note that the global reservation for 'c' is only for

> CONSTANT_ADDRESS_P.

>     > @@ -24163,6 +24185,41 @@ arm_print_operand (FILE *stream, rtx x,

> int

>     > code)

>     >        }

>     >        return;

>     >

>     > +    /* To print memory operand in the case that the instruction does

>     > +       not support writeback.  i.e. the output will look like either of

>     > +       the following:

>     > +       1. [Rn]

>     > +       2. [Rn, #+/-<imm>] */

> 

>     I'm unsure why we need a new output modifier here ('j').

>     Shouldn't the constraints/predicates on the patterns prevent the formation

> of writeback addresses that should be handled by the normal address printing

> code?

> 

>     Thanks,

>     Kyrill

> 

> I don't think the existing specifiers are sufficient for this address mode.

> Previously we were using A specifier here, which has no provision for const

> int offset. We also considered E specifier, but E only supports offset with

> writeback (not without), so as far as I can tell we need a new specifier to

> handle the new addressing mode.


I see... how about we extend the 'E' specifier to handle this case rather than create a new one?
Thanks,
Kyrill

> 

>     > +    case 'j':

>     > +      {

>     > +gcc_assert (MEM_P (x));

>     > +rtx addr = XEXP (x, 0);

>     > +

>     > +switch (GET_CODE (addr))

>     > +  {

>     > +  case REG:

>     > +    asm_fprintf (stream, "[%r]", REGNO (addr));

>     > +    return;

>     > +

>     > +  case PLUS:

>     > +    {

>     > +      rtx base = XEXP (addr, 0);

>     > +      rtx index = XEXP (addr, 1);

>     > +

>     > +      if (!REG_P (base) || !CONST_INT_P (index))

>     > +gcc_unreachable ();

>     > +

>     > +      HOST_WIDE_INT offset = INTVAL (index);

>     > +      asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);

>     > +      return;

>     > +    }

>     > +

>     > +  default:

>     > +    gcc_unreachable ();

>     > +  }

>     > +return;

>     > +      }

>     > +

>     >      case 'C':

>     >        {

>     >  rtx addr;

>     > @@ -33496,4 +33553,23 @@ arm_mode_base_reg_class

> (machine_mode

>     > mode)

>     >

>     >  struct gcc_target targetm = TARGET_INITIALIZER;

>     >

>     > +bool

>     > +arm_mve_mode_and_operands_type_check (machine_mode mode, rtx

> op0,

>     > rtx op1)

>     > +{

>     > +  if (!(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT))

>     > +    {

>     > +       return true;

>     > +    }

>     > +  else if (mode == E_BFmode)

>     > +    {

>     > +      return false;

>     > +    }

>     > +  else if ((s_register_operand (op0, mode) && MEM_P (op1))

>     > +   || (s_register_operand (op1, mode) && MEM_P (op0)))

>     > +    {

>     > +      return false;

>     > +    }

>     > +  return true;

>     > +}

>     > +

>     >  #include "gt-arm.h"

>     > diff --git a/gcc/config/arm/constraints.md

> b/gcc/config/arm/constraints.md

>     > index 011badc..ff229aa 100644

>     > --- a/gcc/config/arm/constraints.md

>     > +++ b/gcc/config/arm/constraints.md

>     > @@ -452,6 +452,13 @@

>     >   (and (match_code "mem")

>     >        (match_test "TARGET_32BIT && arm_coproc_mem_operand (op,

>     > FALSE)")))

>     >

>     > +(define_memory_constraint "Uj"

>     > + "@internal

>     > +  In ARM/Thumb-2 state an VFP load/store address which does not

> support

>     > +  writeback at all (eg vldr.16)."

>     > + (and (match_code "mem")

>     > +      (match_test "TARGET_32BIT &&

>     > arm_coproc_mem_operand_no_writeback (op)")))

>     > +

>     >  (define_memory_constraint "Uy"

>     >   "@internal

>     >    In ARM/Thumb-2 state a valid iWMMX load/store address."

>     > diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md

>     > index 3470679..13174bb 100644

>     > --- a/gcc/config/arm/vfp.md

>     > +++ b/gcc/config/arm/vfp.md

>     > @@ -387,6 +387,22 @@

>     >     (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]

>     >  )

>     >

>     > +(define_insn "*mov_load_vfp_hf16"

>     > +  [(set (match_operand:HF 0 "s_register_operand" "=t")

>     > +(match_operand:HF 1 "memory_operand" "Uj"))]

>     > +  "TARGET_HAVE_MVE_FLOAT"

>     > +  "vldr.16\\t%0, %j1"

>     > +  [(set_attr "length" "4")]

>     > +)

>     > +

>     > +(define_insn "*mov_store_vfp_hf16"

>     > +  [(set (match_operand:HF 0 "memory_operand" "=Uj")

>     > +(match_operand:HF 1 "s_register_operand"   "t"))]

>     > +  "TARGET_HAVE_MVE_FLOAT"

>     > +  "vstr.16\\t%1, %j0"

>     > +  [(set_attr "length" "4")]

>     > +)

>     > +

>     >  ;; HFmode and BFmode moves

>     >

>     >  (define_insn "*mov<mode>_vfp_<mode>16"

>     > @@ -396,6 +412,8 @@

>     >    "  m,r,t,r,r,t,Dv,Um,t, F"))]

>     >    "TARGET_32BIT

>     >     && TARGET_VFP_FP16INST

>     > +   && arm_mve_mode_and_operands_type_check (<MODE>mode,

>     > operands[0],

>     > +    operands[1])

>     >     && (s_register_operand (operands[0], <MODE>mode)

>     >         || s_register_operand (operands[1], <MODE>mode))"

>     >   {

>     > @@ -414,15 +432,9 @@

>     >      case 6: /* S register from immediate.  */

>     >        return \"vmov.f16\\t%0, %1\t%@ __<fporbf>\";

>     >      case 7: /* S register from memory.  */

>     > -      if (TARGET_HAVE_MVE)

>     > -return \"vldr.16\\t%0, %A1\";

>     > -      else

>     > -return \"vld1.16\\t{%z0}, %A1\";

>     > +      return \"vld1.16\\t{%z0}, %A1\";

>     >      case 8: /* Memory from S register.  */

>     > -      if (TARGET_HAVE_MVE)

>     > -return \"vstr.16\\t%1, %A0\";

>     > -      else

>     > -return \"vst1.16\\t{%z1}, %A0\";

>     > +      return \"vst1.16\\t{%z1}, %A0\";

>     >      case 9: /* ARM register from constant.  */

>     >        {

>     >  long bits;

>     > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-

> no-

>     > writeback.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-

> vldstr16-no-

>     > writeback.c

>     > new file mode 100644

>     > index 0000000..0a69ace

>     > --- /dev/null

>     > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-

>     > writeback.c

>     > @@ -0,0 +1,17 @@

>     > +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */

>     > +/* { dg-add-options arm_v8_1m_mve_fp } */

>     > +/* { dg-additional-options "-O2" } */

>     > +

>     > +void

>     > +fn1 (__fp16 *pSrc)

>     > +{

>     > +  __fp16 high;

>     > +  __fp16 *pDst = 0;

>     > +  unsigned i;

>     > +  for (i = 0;; i++)

>     > +    if (pSrc[i])

>     > +      pDst[i] = high;

>     > +}

>     > +

>     > +/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */

>     > +/* { dg-final { scan-assembler {vstr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */

>     > --

>     > 2.7.4

>     >

> 

>

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 33d162c..e811da4 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -115,8 +115,11 @@  extern enum reg_class coproc_secondary_reload_class (machine_mode, rtx,
 extern bool arm_tls_referenced_p (rtx);
 
 extern int arm_coproc_mem_operand (rtx, bool);
+extern int arm_coproc_mem_operand_no_writeback (rtx);
+extern int arm_coproc_mem_operand_wb (rtx, int);
 extern int neon_vector_mem_operand (rtx, int, bool);
 extern int mve_vector_mem_operand (machine_mode, rtx, bool);
+bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx);
 extern int neon_struct_mem_operand (rtx);
 
 extern rtx *neon_vcmla_lane_prepare_operands (rtx *);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 6b7ca82..ed080d2 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -13217,13 +13217,14 @@  neon_element_bits (machine_mode mode)
 /* Predicates for `match_operand' and `match_operator'.  */
 
 /* Return TRUE if OP is a valid coprocessor memory address pattern.
-   WB is true if full writeback address modes are allowed and is false
+   WB level is 2 if full writeback address modes are allowed, 1
    if limited writeback address modes (POST_INC and PRE_DEC) are
-   allowed.  */
+   allowed and 0 if no writeback at all is supported.  */
 
 int
-arm_coproc_mem_operand (rtx op, bool wb)
+arm_coproc_mem_operand_wb (rtx op, int wb_level)
 {
+  gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
   rtx ind;
 
   /* Reject eliminable registers.  */
@@ -13256,16 +13257,18 @@  arm_coproc_mem_operand (rtx op, bool wb)
 
   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
      acceptable in any case (subject to verification by
-     arm_address_register_rtx_p).  We need WB to be true to accept
+     arm_address_register_rtx_p).  We need full writeback to accept
+     PRE_INC and POST_DEC, and at least restricted writeback for
      PRE_INC and POST_DEC.  */
-  if (GET_CODE (ind) == POST_INC
-      || GET_CODE (ind) == PRE_DEC
-      || (wb
-	  && (GET_CODE (ind) == PRE_INC
-	      || GET_CODE (ind) == POST_DEC)))
+  if (wb_level > 0
+      && (GET_CODE (ind) == POST_INC
+	  || GET_CODE (ind) == PRE_DEC
+	  || (wb_level > 1
+	      && (GET_CODE (ind) == PRE_INC
+		  || GET_CODE (ind) == POST_DEC))))
     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
 
-  if (wb
+  if (wb_level > 1
       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
       && GET_CODE (XEXP (ind, 1)) == PLUS
@@ -13287,6 +13290,25 @@  arm_coproc_mem_operand (rtx op, bool wb)
   return FALSE;
 }
 
+/* Return TRUE if OP is a valid coprocessor memory address pattern.
+   WB is true if full writeback address modes are allowed and is false
+   if limited writeback address modes (POST_INC and PRE_DEC) are
+   allowed.  */
+
+int arm_coproc_mem_operand (rtx op, bool wb)
+{
+  return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
+}
+
+/* Return TRUE if OP is a valid coprocessor memory address pattern in a
+   context in which no writeback address modes are allowed.  */
+
+int
+arm_coproc_mem_operand_no_writeback (rtx op)
+{
+  return arm_coproc_mem_operand_wb (op, 0);
+}
+
 /* This function returns TRUE on matching mode and op.
 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
@@ -23549,8 +23571,8 @@  arm_print_condition (FILE *stream)
 
 /* Globally reserved letters: acln
    Puncutation letters currently used: @_|?().!#
-   Lower case letters currently used: bcdefhimpqtvwxyz
-   Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
+   Lower case letters currently used: bcdefhijmpqtvwxyz
+   Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU
    Letters previously used, but now deprecated/obsolete: sVWXYZ.
 
    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
@@ -24163,6 +24185,41 @@  arm_print_operand (FILE *stream, rtx x, int code)
       }
       return;
 
+    /* To print memory operand in the case that the instruction does
+       not support writeback.  i.e. the output will look like either of
+       the following:
+       1. [Rn]
+       2. [Rn, #+/-<imm>] */
+    case 'j':
+      {
+	gcc_assert (MEM_P (x));
+	rtx addr = XEXP (x, 0);
+
+	switch (GET_CODE (addr))
+	  {
+	  case REG:
+	    asm_fprintf (stream, "[%r]", REGNO (addr));
+	    return;
+
+	  case PLUS:
+	    {
+	      rtx base = XEXP (addr, 0);
+	      rtx index = XEXP (addr, 1);
+
+	      if (!REG_P (base) || !CONST_INT_P (index))
+		gcc_unreachable ();
+
+	      HOST_WIDE_INT offset = INTVAL (index);
+	      asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
+	      return;
+	    }
+
+	  default:
+	    gcc_unreachable ();
+	  }
+	return;
+      }
+
     case 'C':
       {
 	rtx addr;
@@ -33496,4 +33553,23 @@  arm_mode_base_reg_class (machine_mode mode)
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
+bool
+arm_mve_mode_and_operands_type_check (machine_mode mode, rtx op0, rtx op1)
+{
+  if (!(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT))
+    {
+       return true;
+    }
+  else if (mode == E_BFmode)
+    {
+      return false;
+    }
+  else if ((s_register_operand (op0, mode) && MEM_P (op1))
+	   || (s_register_operand (op1, mode) && MEM_P (op0)))
+    {
+      return false;
+    }
+  return true;
+}
+
 #include "gt-arm.h"
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 011badc..ff229aa 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -452,6 +452,13 @@ 
  (and (match_code "mem")
       (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, FALSE)")))
 
+(define_memory_constraint "Uj"
+ "@internal
+  In ARM/Thumb-2 state an VFP load/store address which does not support
+  writeback at all (eg vldr.16)."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && arm_coproc_mem_operand_no_writeback (op)")))
+
 (define_memory_constraint "Uy"
  "@internal
   In ARM/Thumb-2 state a valid iWMMX load/store address."
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 3470679..13174bb 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -387,6 +387,22 @@ 
    (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
 )
 
+(define_insn "*mov_load_vfp_hf16"
+  [(set (match_operand:HF 0 "s_register_operand" "=t")
+	(match_operand:HF 1 "memory_operand" "Uj"))]
+  "TARGET_HAVE_MVE_FLOAT"
+  "vldr.16\\t%0, %j1"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "*mov_store_vfp_hf16"
+  [(set (match_operand:HF 0 "memory_operand" "=Uj")
+	(match_operand:HF 1 "s_register_operand"   "t"))]
+  "TARGET_HAVE_MVE_FLOAT"
+  "vstr.16\\t%1, %j0"
+  [(set_attr "length" "4")]
+)
+
 ;; HFmode and BFmode moves
 
 (define_insn "*mov<mode>_vfp_<mode>16"
@@ -396,6 +412,8 @@ 
 			  "  m,r,t,r,r,t,Dv,Um,t, F"))]
   "TARGET_32BIT
    && TARGET_VFP_FP16INST
+   && arm_mve_mode_and_operands_type_check (<MODE>mode, operands[0],
+					    operands[1])
    && (s_register_operand (operands[0], <MODE>mode)
        || s_register_operand (operands[1], <MODE>mode))"
  {
@@ -414,15 +432,9 @@ 
     case 6: /* S register from immediate.  */
       return \"vmov.f16\\t%0, %1\t%@ __<fporbf>\";
     case 7: /* S register from memory.  */
-      if (TARGET_HAVE_MVE)
-	return \"vldr.16\\t%0, %A1\";
-      else
-	return \"vld1.16\\t{%z0}, %A1\";
+      return \"vld1.16\\t{%z0}, %A1\";
     case 8: /* Memory from S register.  */
-      if (TARGET_HAVE_MVE)
-	return \"vstr.16\\t%1, %A0\";
-      else
-	return \"vst1.16\\t{%z1}, %A0\";
+      return \"vst1.16\\t{%z1}, %A0\";
     case 9: /* ARM register from constant.  */
       {
 	long bits;
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c
new file mode 100644
index 0000000..0a69ace
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c
@@ -0,0 +1,17 @@ 
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+void
+fn1 (__fp16 *pSrc)
+{
+  __fp16 high;
+  __fp16 *pDst = 0;
+  unsigned i;
+  for (i = 0;; i++)
+    if (pSrc[i])
+      pDst[i] = high;
+}
+
+/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {vstr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */