[4/6,ARC] Add peephole rules to combine store/loads into double store/loads

Message ID 20181010080016.12317-5-claziss@gmail.com
State New
Headers show
Series
  • ARC updates
Related show

Commit Message

Claudiu Zissulescu Oct. 10, 2018, 8 a.m.
Simple peephole rules which combines multiple ld/st instructions into
64-bit load/store instructions. It only works for architectures which
are having double load/store option on.

gcc/
	Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc-protos.h (gen_operands_ldd_std): Add.
	* config/arc/arc.c (operands_ok_ldd_std): New function.
	(mem_ok_for_ldd_std): Likewise.
	(gen_operands_ldd_std): Likewise.
	* config/arc/arc.md: Add peephole2 rules for std/ldd.
---
 gcc/config/arc/arc-protos.h |   1 +
 gcc/config/arc/arc.c        | 163 ++++++++++++++++++++++++++++++++++++
 gcc/config/arc/arc.md       |  67 +++++++++++++++
 3 files changed, 231 insertions(+)

-- 
2.17.1

Comments

Andrew Burgess Oct. 22, 2018, 5:49 p.m. | #1
* Claudiu Zissulescu <claziss@gmail.com> [2018-10-10 11:00:14 +0300]:

> Simple peephole rules which combines multiple ld/st instructions into

> 64-bit load/store instructions. It only works for architectures which

> are having double load/store option on.

> 

> gcc/

> 	Claudiu Zissulescu  <claziss@synopsys.com>

> 

> 	* config/arc/arc-protos.h (gen_operands_ldd_std): Add.

> 	* config/arc/arc.c (operands_ok_ldd_std): New function.

> 	(mem_ok_for_ldd_std): Likewise.

> 	(gen_operands_ldd_std): Likewise.

> 	* config/arc/arc.md: Add peephole2 rules for std/ldd.

> ---

>  gcc/config/arc/arc-protos.h |   1 +

>  gcc/config/arc/arc.c        | 163 ++++++++++++++++++++++++++++++++++++

>  gcc/config/arc/arc.md       |  67 +++++++++++++++

>  3 files changed, 231 insertions(+)

> 

> diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h

> index 24bea6e1efb..55f8ed4c643 100644

> --- a/gcc/config/arc/arc-protos.h

> +++ b/gcc/config/arc/arc-protos.h

> @@ -46,6 +46,7 @@ extern int arc_return_address_register (unsigned int);

>  extern unsigned int arc_compute_function_type (struct function *);

>  extern bool arc_is_uncached_mem_p (rtx);

>  extern bool arc_lra_p (void);

> +extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute);

>  #endif /* RTX_CODE */

>  

>  extern unsigned int arc_compute_frame_size (int);

> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c

> index 18dd0de6af7..9bc69e9fbc9 100644

> --- a/gcc/config/arc/arc.c

> +++ b/gcc/config/arc/arc.c

> @@ -10803,6 +10803,169 @@ arc_cannot_substitute_mem_equiv_p (rtx)

>    return true;

>  }

>  

> +/* Checks whether the operands are valid for use in an LDD/STD

> +   instruction.	 Assumes that RT, RT2, and RN are REG.	This is

> +   guaranteed by the patterns.	Assumes that the address in the base

> +   register RN is word aligned.	 Pattern guarantees that both memory

> +   accesses use the same base register, the offsets are constants

> +   within the range, and the gap between the offsets is 4.  If preload

> +   complete then check that registers are legal.  WBACK indicates

> +   whether address is updated.	*/


You've got tabs instead of whitespace inside both this comment block,
and others within this patch.  It should be period and two whitespace
at the end of each sentence.

> +

> +static bool

> +operands_ok_ldd_std (rtx rt, rtx rt2, rtx rn ATTRIBUTE_UNUSED,

> +		    HOST_WIDE_INT offset)


Why have the RN parameter at all?  I took a quick look through patches
5/6 and don't see any additional changes to this function, we should
probably just drop this at this point.

> +{

> +  unsigned int t, t2;

> +

> +  if (!reload_completed)

> +    return true;

> +

> +  if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & -4,


Couldn't we use (~0x3) instead of -4?  Maybe I'm just feeling slow
today, but the bit pattern for negative numbers don't just pop into my
head like those for positive numbers.

> +			 (offset & (GET_MODE_SIZE (DImode) - 1) & 3

> +			  ? 0 : -(-GET_MODE_SIZE (DImode) | -4) >> 1))))

> +    return false;

> +

> +  t = REGNO (rt);

> +  t2 = REGNO (rt2);

> +

> +  if ((t2 == 63)


Can we use PROGRAM_COUNTER_REGNO here?

> +      || (t % 2 != 0)	/* First destination register is not even.  */

> +      || (t2 != t + 1))

> +      return false;

> +

> +  return true;

> +}

> +

> +/* Helper for gen_operands_ldd_std.  Returns true iff the memory

> +   operand MEM's address contains an immediate offset from the base

> +   register and has no side effects, in which case it sets BASE and

> +   OFFSET accordingly.	*/

> +

> +static bool

> +mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset)

> +{

> +  rtx addr;

> +

> +  gcc_assert (base != NULL && offset != NULL);

> +

> +  /* TODO: Handle more general memory operand patterns, such as

> +     PRE_DEC and PRE_INC.  */

> +

> +  if (side_effects_p (mem))

> +    return false;

> +

> +  /* Can't deal with subregs.  */

> +  if (GET_CODE (mem) == SUBREG)

> +    return false;

> +

> +  gcc_assert (MEM_P (mem));

> +

> +  *offset = const0_rtx;

> +

> +  addr = XEXP (mem, 0);

> +

> +  /* If addr isn't valid for DImode, then we can't handle it.  */

> +  if (!arc_legitimate_address_p (DImode, addr,

> +				reload_in_progress || reload_completed))

> +    return false;

> +

> +  if (REG_P (addr))

> +    {

> +      *base = addr;

> +      return true;

> +    }

> +  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)

> +    {

> +      *base = XEXP (addr, 0);

> +      *offset = XEXP (addr, 1);

> +      return (REG_P (*base) && CONST_INT_P (*offset));

> +    }

> +

> +  return false;

> +}

> +

> +/* Called from peephole2 to replace two word-size accesses with a

> +   single LDD/STD instruction.	Returns true iff we can generate a new

> +   instruction sequence.  That is, both accesses use the same base

> +   register and the gap between constant offsets is 4.	OPERANDS are

> +   the operands found by the peephole matcher; OPERANDS[0,1] are

> +   register operands, and OPERANDS[2,3] are the corresponding memory

> +   operands.  LOAD indicates whether the access is load or store.  */

> +

> +bool

> +gen_operands_ldd_std (rtx *operands, bool load, bool commute)

> +{

> +  int i, gap;

> +  HOST_WIDE_INT offsets[2], offset;

> +  int nops = 2;

> +  rtx cur_base, cur_offset, tmp;

> +  rtx base = NULL_RTX;

> +

> +  /* Check that the memory references are immediate offsets from the

> +     same base register.  Extract the base register, the destination

> +     registers, and the corresponding memory offsets.  */

> +  for (i = 0; i < nops; i++)

> +    {

> +      if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset))

> +	return false;

> +

> +      if (i == 0)

> +	base = cur_base;

> +      else if (REGNO (base) != REGNO (cur_base))

> +	return false;

> +

> +      offsets[i] = INTVAL (cur_offset);

> +      if (GET_CODE (operands[i]) == SUBREG)

> +	{

> +	  tmp = SUBREG_REG (operands[i]);

> +	  gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));

> +	  operands[i] = tmp;

> +	}

> +    }

> +

> +  /* Make sure there is no dependency between the individual loads.  */

> +  if (load && REGNO (operands[0]) == REGNO (base))

> +    return false; /* RAW */

> +

> +  if (load && REGNO (operands[0]) == REGNO (operands[1]))

> +    return false; /* WAW */

> +

> +  /* Make sure the instructions are ordered with lower memory access first.  */

> +  if (offsets[0] > offsets[1])

> +    {

> +      gap = offsets[0] - offsets[1];

> +      offset = offsets[1];

> +

> +      /* Swap the instructions such that lower memory is accessed first.  */

> +      std::swap (operands[0], operands[1]);

> +      std::swap (operands[2], operands[3]);

> +    }

> +  else

> +    {

> +      gap = offsets[1] - offsets[0];

> +      offset = offsets[0];

> +    }

> +

> +  /* Make sure accesses are to consecutive memory locations.  */

> +  if (gap != 4)

> +    return false;

> +

> +  /* Make sure we generate legal instructions.	*/

> +  if (operands_ok_ldd_std (operands[0], operands[1], base, offset))

> +    return true;

> +

> +  if (load && commute)

> +    {

> +      /* Try reordering registers.  */

> +      std::swap (operands [0], operands[1]);

> +      if (operands_ok_ldd_std (operands[0], operands[1], base, offset))

> +	return true;

> +    }

> +

> +  return false;

> +}

> +

>  #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P

>  #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p

>  

> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md

> index 1ed230fa5f0..b968022e64a 100644

> --- a/gcc/config/arc/arc.md

> +++ b/gcc/config/arc/arc.md

> @@ -6363,6 +6363,73 @@ archs4x, archs4xd, archs4xd_slow"

>    [(set (reg:CC CC_REG) (compare:CC (match_dup 3)

>  				    (ashift:SI (match_dup 1) (match_dup 2))))])

>  

> +(define_peephole2 ; std

> +[(set (match_operand:SI 2 "memory_operand" "")

> +      (match_operand:SI 0 "register_operand" ""))

> + (set (match_operand:SI 3 "memory_operand" "")

> +      (match_operand:SI 1 "register_operand" ""))]

> + "TARGET_LL64"

> + [(const_int 0)]

> +{

> + if (!gen_operands_ldd_std (operands, false, false))

> +   FAIL;

> + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));

> + operands[2] = adjust_address (operands[2], DImode, 0);

> + emit_insn (gen_rtx_SET (operands[2], operands[0]));

> + DONE;

> + })

> +

> +(define_peephole2 ; ldd

> +  [(set (match_operand:SI 0 "register_operand" "")

> +        (match_operand:SI 2 "memory_operand" ""))

> +   (set (match_operand:SI 1 "register_operand" "")

> +        (match_operand:SI 3 "memory_operand" ""))]

> +  "TARGET_LL64"

> +  [(const_int 0)]

> +{

> +  if (!gen_operands_ldd_std (operands, true, false))

> +    FAIL;

> +  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));

> +  operands[2] = adjust_address (operands[2], DImode, 0);

> +  emit_insn (gen_rtx_SET (operands[0], operands[2]));

> +  DONE;

> +})

> +

> +;; We require consecutive registers for LDD instruction.  Check if we

> +;; can reorder them and use an LDD.

> +

> +(define_peephole2 ; swap the destination registers of two loads

> +		  ; before a commutative operation.

> +  [(set (match_operand:SI 0 "register_operand" "")

> +        (match_operand:SI 2 "memory_operand" ""))

> +   (set (match_operand:SI 1 "register_operand" "")

> +        (match_operand:SI 3 "memory_operand" ""))

> +   (set (match_operand:SI 4 "register_operand" "")

> +        (match_operator:SI 5 "commutative_operator"

> +			   [(match_operand 6 "register_operand" "")

> +			    (match_operand 7 "register_operand" "") ]))]

> +  "TARGET_LL64

> +   && (((rtx_equal_p(operands[0], operands[6]))

> +         && (rtx_equal_p(operands[1], operands[7])))

> +        || ((rtx_equal_p(operands[0], operands[7]))

> +             && (rtx_equal_p(operands[1], operands[6]))))

> +   && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))

> +   && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"

> +  [(set (match_dup 0) (match_dup 2))

> +   (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]

> +  {

> +    if (!gen_operands_ldd_std (operands, true, true))

> +     {

> +        FAIL;

> +     }

> +    else

> +     {

> +        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));

> +        operands[2] = adjust_address (operands[2], DImode, 0);

> +     }

> +   }

> +)

> +

>  ;; include the arc-FPX instructions

>  (include "fpx.md")

>  

> -- 

> 2.17.1

>
Bernhard Reutner-Fischer Oct. 22, 2018, 10:06 p.m. | #2
On 22 October 2018 19:49:35 CEST, Andrew Burgess <andrew.burgess@embecosm.com> wrote:
>* Claudiu Zissulescu <claziss@gmail.com> [2018-10-10 11:00:14 +0300]:


>> --- a/gcc/config/arc/arc.c

>> +++ b/gcc/config/arc/arc.c

>> @@ -10803,6 +10803,169 @@ arc_cannot_substitute_mem_equiv_p (rtx)

>>    return true;

>>  }

>>  

>> +/* Checks whether the operands are valid for use in an LDD/STD

>> +   instruction.	 Assumes that RT, RT2, and RN are REG.	This is

>> +   guaranteed by the patterns.	Assumes that the address in the base

>> +   register RN is word aligned.	 Pattern guarantees that both memory

>> +   accesses use the same base register, the offsets are constants

>> +   within the range, and the gap between the offsets is 4.  If

>preload

>> +   complete then check that registers are legal.  WBACK indicates

>> +   whether address is updated.	*/

>

>You've got tabs instead of whitespace inside both this comment block,

>and others within this patch.  It should be period and two whitespace

>at the end of each sentence.


See contrib/check_GNU_style.py

Also:

s/If preload/If reload/

thanks,
Claudiu Zissulescu Oct. 31, 2018, 8:33 a.m. | #3
Thank you for your review. Please find attached a new respin patch with
your feedback in.

Please let me know if it is ok,
Claudiu
From 4ff7d8419783eceeffbaf27df017d0a93c3af942 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@gmail.com>
Date: Thu, 9 Aug 2018 14:29:05 +0300
Subject: [PATCH] [ARC] Add peephole rules to combine store/loads into double
 store/loads

Simple peephole rules which combines multiple ld/st instructions into
64-bit load/store instructions. It only works for architectures which
are having double load/store option on.

gcc/
	Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc-protos.h (gen_operands_ldd_std): Add.
	* config/arc/arc.c (operands_ok_ldd_std): New function.
	(mem_ok_for_ldd_std): Likewise.
	(gen_operands_ldd_std): Likewise.
	* config/arc/arc.md: Add peephole2 rules for std/ldd.
---
 gcc/config/arc/arc-protos.h |   1 +
 gcc/config/arc/arc.c        | 161 ++++++++++++++++++++++++++++++++++++
 gcc/config/arc/arc.md       |  69 ++++++++++++++++
 3 files changed, 231 insertions(+)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 24bea6e1efb..55f8ed4c643 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -46,6 +46,7 @@ extern int arc_return_address_register (unsigned int);
 extern unsigned int arc_compute_function_type (struct function *);
 extern bool arc_is_uncached_mem_p (rtx);
 extern bool arc_lra_p (void);
+extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute);
 #endif /* RTX_CODE */
 
 extern unsigned int arc_compute_frame_size (int);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 18dd0de6af7..daf785dbdb8 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -10803,6 +10803,167 @@ arc_cannot_substitute_mem_equiv_p (rtx)
   return true;
 }
 
+/* Checks whether the operands are valid for use in an LDD/STD
+   instruction.  Assumes that RT, and RT2 are REG.  This is guaranteed
+   by the patterns.  Assumes that the address in the base register RN
+   is word aligned.  Pattern guarantees that both memory accesses use
+   the same base register, the offsets are constants within the range,
+   and the gap between the offsets is 4.  If reload complete then
+   check that registers are legal.  */
+
+static bool
+operands_ok_ldd_std (rtx rt, rtx rt2, HOST_WIDE_INT offset)
+{
+  unsigned int t, t2;
+
+  if (!reload_completed)
+    return true;
+
+  if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & (~0x03),
+			 (offset & (GET_MODE_SIZE (DImode) - 1) & 3
+			  ? 0 : -(-GET_MODE_SIZE (DImode) | (~0x03)) >> 1))))
+    return false;
+
+  t = REGNO (rt);
+  t2 = REGNO (rt2);
+
+  if ((t2 == PROGRAM_COUNTER_REGNO)
+      || (t % 2 != 0)	/* First destination register is not even.  */
+      || (t2 != t + 1))
+      return false;
+
+  return true;
+}
+
+/* Helper for gen_operands_ldd_std.  Returns true iff the memory
+   operand MEM's address contains an immediate offset from the base
+   register and has no side effects, in which case it sets BASE and
+   OFFSET accordingly.  */
+
+static bool
+mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset)
+{
+  rtx addr;
+
+  gcc_assert (base != NULL && offset != NULL);
+
+  /* TODO: Handle more general memory operand patterns, such as
+     PRE_DEC and PRE_INC.  */
+
+  if (side_effects_p (mem))
+    return false;
+
+  /* Can't deal with subregs.  */
+  if (GET_CODE (mem) == SUBREG)
+    return false;
+
+  gcc_assert (MEM_P (mem));
+
+  *offset = const0_rtx;
+
+  addr = XEXP (mem, 0);
+
+  /* If addr isn't valid for DImode, then we can't handle it.  */
+  if (!arc_legitimate_address_p (DImode, addr,
+				reload_in_progress || reload_completed))
+    return false;
+
+  if (REG_P (addr))
+    {
+      *base = addr;
+      return true;
+    }
+  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
+    {
+      *base = XEXP (addr, 0);
+      *offset = XEXP (addr, 1);
+      return (REG_P (*base) && CONST_INT_P (*offset));
+    }
+
+  return false;
+}
+
+/* Called from peephole2 to replace two word-size accesses with a
+   single LDD/STD instruction.  Returns true iff we can generate a new
+   instruction sequence.  That is, both accesses use the same base
+   register and the gap between constant offsets is 4.  OPERANDS are
+   the operands found by the peephole matcher; OPERANDS[0,1] are
+   register operands, and OPERANDS[2,3] are the corresponding memory
+   operands.  LOAD indicates whether the access is load or store.  */
+
+bool
+gen_operands_ldd_std (rtx *operands, bool load, bool commute)
+{
+  int i, gap;
+  HOST_WIDE_INT offsets[2], offset;
+  int nops = 2;
+  rtx cur_base, cur_offset, tmp;
+  rtx base = NULL_RTX;
+
+  /* Check that the memory references are immediate offsets from the
+     same base register.  Extract the base register, the destination
+     registers, and the corresponding memory offsets.  */
+  for (i = 0; i < nops; i++)
+    {
+      if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset))
+	return false;
+
+      if (i == 0)
+	base = cur_base;
+      else if (REGNO (base) != REGNO (cur_base))
+	return false;
+
+      offsets[i] = INTVAL (cur_offset);
+      if (GET_CODE (operands[i]) == SUBREG)
+	{
+	  tmp = SUBREG_REG (operands[i]);
+	  gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
+	  operands[i] = tmp;
+	}
+    }
+
+  /* Make sure there is no dependency between the individual loads.  */
+  if (load && REGNO (operands[0]) == REGNO (base))
+    return false; /* RAW.  */
+
+  if (load && REGNO (operands[0]) == REGNO (operands[1]))
+    return false; /* WAW.  */
+
+  /* Make sure the instructions are ordered with lower memory access first.  */
+  if (offsets[0] > offsets[1])
+    {
+      gap = offsets[0] - offsets[1];
+      offset = offsets[1];
+
+      /* Swap the instructions such that lower memory is accessed first.  */
+      std::swap (operands[0], operands[1]);
+      std::swap (operands[2], operands[3]);
+    }
+  else
+    {
+      gap = offsets[1] - offsets[0];
+      offset = offsets[0];
+    }
+
+  /* Make sure accesses are to consecutive memory locations.  */
+  if (gap != 4)
+    return false;
+
+  /* Make sure we generate legal instructions.  */
+  if (operands_ok_ldd_std (operands[0], operands[1], offset))
+    return true;
+
+  if (load && commute)
+    {
+      /* Try reordering registers.  */
+      std::swap (operands[0], operands[1]);
+      if (operands_ok_ldd_std (operands[0], operands[1], offset))
+	return true;
+    }
+
+  return false;
+}
+
 #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
 #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p
 
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 1ed230fa5f0..526fd17a0cf 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -6363,6 +6363,75 @@ archs4x, archs4xd, archs4xd_slow"
   [(set (reg:CC CC_REG) (compare:CC (match_dup 3)
 				    (ashift:SI (match_dup 1) (match_dup 2))))])
 
+(define_peephole2 ; std
+  [(set (match_operand:SI 2 "memory_operand" "")
+	(match_operand:SI 0 "register_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+	(match_operand:SI 1 "register_operand" ""))]
+  "TARGET_LL64"
+  [(const_int 0)]
+{
+  if (!gen_operands_ldd_std (operands, false, false))
+    FAIL;
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  operands[2] = adjust_address (operands[2], DImode, 0);
+  emit_insn (gen_rtx_SET (operands[2], operands[0]));
+  DONE;
+})
+
+(define_peephole2 ; ldd
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "register_operand" "")
+	(match_operand:SI 3 "memory_operand" ""))]
+  "TARGET_LL64"
+  [(const_int 0)]
+{
+  if (!gen_operands_ldd_std (operands, true, false))
+    FAIL;
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  operands[2] = adjust_address (operands[2], DImode, 0);
+  emit_insn (gen_rtx_SET (operands[0], operands[2]));
+  DONE;
+})
+
+;; We require consecutive registers for LDD instruction.  Check if we
+;; can reorder them and use an LDD.
+
+(define_peephole2 ; swap the destination registers of two loads
+		  ; before a commutative operation.
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "register_operand" "")
+	(match_operand:SI 3 "memory_operand" ""))
+   (set (match_operand:SI 4 "register_operand" "")
+	(match_operator:SI 5 "commutative_operator"
+			   [(match_operand 6 "register_operand" "")
+			    (match_operand 7 "register_operand" "") ]))]
+  "TARGET_LL64
+   && (((rtx_equal_p (operands[0], operands[6]))
+	 && (rtx_equal_p (operands[1], operands[7])))
+	|| ((rtx_equal_p (operands[0], operands[7]))
+	     && (rtx_equal_p (operands[1], operands[6]))))
+   && (peep2_reg_dead_p (3, operands[0])
+       || rtx_equal_p (operands[0], operands[4]))
+   && (peep2_reg_dead_p (3, operands[1])
+       || rtx_equal_p (operands[1], operands[4]))"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
+  {
+    if (!gen_operands_ldd_std (operands, true, true))
+     {
+	FAIL;
+     }
+    else
+     {
+	operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+	operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+   }
+)
+
 ;; include the arc-FPX instructions
 (include "fpx.md")
Claudiu Zissulescu Nov. 12, 2018, 11:33 a.m. | #4
PING.

On Wed, 2018-10-31 at 10:33 +0200, claziss@gmail.com wrote:
> Thank you for your review. Please find attached a new respin patch

> with

> your feedback in.

> 

> Please let me know if it is ok,

> Claudiu
Andrew Burgess Nov. 13, 2018, 10 a.m. | #5
* claziss@gmail.com <claziss@gmail.com> [2018-10-31 10:33:33 +0200]:

> Thank you for your review. Please find attached a new respin patch with

> your feedback in.

> 

> Please let me know if it is ok,

> Claudiu 


> From 4ff7d8419783eceeffbaf27df017d0a93c3af942 Mon Sep 17 00:00:00 2001

> From: Claudiu Zissulescu <claziss@gmail.com>

> Date: Thu, 9 Aug 2018 14:29:05 +0300

> Subject: [PATCH] [ARC] Add peephole rules to combine store/loads into double

>  store/loads

> 

> Simple peephole rules which combines multiple ld/st instructions into

> 64-bit load/store instructions. It only works for architectures which

> are having double load/store option on.

> 

> gcc/

> 	Claudiu Zissulescu  <claziss@synopsys.com>

> 

> 	* config/arc/arc-protos.h (gen_operands_ldd_std): Add.

> 	* config/arc/arc.c (operands_ok_ldd_std): New function.

> 	(mem_ok_for_ldd_std): Likewise.

> 	(gen_operands_ldd_std): Likewise.

> 	* config/arc/arc.md: Add peephole2 rules for std/ldd.


Looks good.

Thanks,
Andrew


> ---

>  gcc/config/arc/arc-protos.h |   1 +

>  gcc/config/arc/arc.c        | 161 ++++++++++++++++++++++++++++++++++++

>  gcc/config/arc/arc.md       |  69 ++++++++++++++++

>  3 files changed, 231 insertions(+)

> 

> diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h

> index 24bea6e1efb..55f8ed4c643 100644

> --- a/gcc/config/arc/arc-protos.h

> +++ b/gcc/config/arc/arc-protos.h

> @@ -46,6 +46,7 @@ extern int arc_return_address_register (unsigned int);

>  extern unsigned int arc_compute_function_type (struct function *);

>  extern bool arc_is_uncached_mem_p (rtx);

>  extern bool arc_lra_p (void);

> +extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute);

>  #endif /* RTX_CODE */

>  

>  extern unsigned int arc_compute_frame_size (int);

> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c

> index 18dd0de6af7..daf785dbdb8 100644

> --- a/gcc/config/arc/arc.c

> +++ b/gcc/config/arc/arc.c

> @@ -10803,6 +10803,167 @@ arc_cannot_substitute_mem_equiv_p (rtx)

>    return true;

>  }

>  

> +/* Checks whether the operands are valid for use in an LDD/STD

> +   instruction.  Assumes that RT, and RT2 are REG.  This is guaranteed

> +   by the patterns.  Assumes that the address in the base register RN

> +   is word aligned.  Pattern guarantees that both memory accesses use

> +   the same base register, the offsets are constants within the range,

> +   and the gap between the offsets is 4.  If reload complete then

> +   check that registers are legal.  */

> +

> +static bool

> +operands_ok_ldd_std (rtx rt, rtx rt2, HOST_WIDE_INT offset)

> +{

> +  unsigned int t, t2;

> +

> +  if (!reload_completed)

> +    return true;

> +

> +  if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & (~0x03),

> +			 (offset & (GET_MODE_SIZE (DImode) - 1) & 3

> +			  ? 0 : -(-GET_MODE_SIZE (DImode) | (~0x03)) >> 1))))

> +    return false;

> +

> +  t = REGNO (rt);

> +  t2 = REGNO (rt2);

> +

> +  if ((t2 == PROGRAM_COUNTER_REGNO)

> +      || (t % 2 != 0)	/* First destination register is not even.  */

> +      || (t2 != t + 1))

> +      return false;

> +

> +  return true;

> +}

> +

> +/* Helper for gen_operands_ldd_std.  Returns true iff the memory

> +   operand MEM's address contains an immediate offset from the base

> +   register and has no side effects, in which case it sets BASE and

> +   OFFSET accordingly.  */

> +

> +static bool

> +mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset)

> +{

> +  rtx addr;

> +

> +  gcc_assert (base != NULL && offset != NULL);

> +

> +  /* TODO: Handle more general memory operand patterns, such as

> +     PRE_DEC and PRE_INC.  */

> +

> +  if (side_effects_p (mem))

> +    return false;

> +

> +  /* Can't deal with subregs.  */

> +  if (GET_CODE (mem) == SUBREG)

> +    return false;

> +

> +  gcc_assert (MEM_P (mem));

> +

> +  *offset = const0_rtx;

> +

> +  addr = XEXP (mem, 0);

> +

> +  /* If addr isn't valid for DImode, then we can't handle it.  */

> +  if (!arc_legitimate_address_p (DImode, addr,

> +				reload_in_progress || reload_completed))

> +    return false;

> +

> +  if (REG_P (addr))

> +    {

> +      *base = addr;

> +      return true;

> +    }

> +  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)

> +    {

> +      *base = XEXP (addr, 0);

> +      *offset = XEXP (addr, 1);

> +      return (REG_P (*base) && CONST_INT_P (*offset));

> +    }

> +

> +  return false;

> +}

> +

> +/* Called from peephole2 to replace two word-size accesses with a

> +   single LDD/STD instruction.  Returns true iff we can generate a new

> +   instruction sequence.  That is, both accesses use the same base

> +   register and the gap between constant offsets is 4.  OPERANDS are

> +   the operands found by the peephole matcher; OPERANDS[0,1] are

> +   register operands, and OPERANDS[2,3] are the corresponding memory

> +   operands.  LOAD indicates whether the access is load or store.  */

> +

> +bool

> +gen_operands_ldd_std (rtx *operands, bool load, bool commute)

> +{

> +  int i, gap;

> +  HOST_WIDE_INT offsets[2], offset;

> +  int nops = 2;

> +  rtx cur_base, cur_offset, tmp;

> +  rtx base = NULL_RTX;

> +

> +  /* Check that the memory references are immediate offsets from the

> +     same base register.  Extract the base register, the destination

> +     registers, and the corresponding memory offsets.  */

> +  for (i = 0; i < nops; i++)

> +    {

> +      if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset))

> +	return false;

> +

> +      if (i == 0)

> +	base = cur_base;

> +      else if (REGNO (base) != REGNO (cur_base))

> +	return false;

> +

> +      offsets[i] = INTVAL (cur_offset);

> +      if (GET_CODE (operands[i]) == SUBREG)

> +	{

> +	  tmp = SUBREG_REG (operands[i]);

> +	  gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));

> +	  operands[i] = tmp;

> +	}

> +    }

> +

> +  /* Make sure there is no dependency between the individual loads.  */

> +  if (load && REGNO (operands[0]) == REGNO (base))

> +    return false; /* RAW.  */

> +

> +  if (load && REGNO (operands[0]) == REGNO (operands[1]))

> +    return false; /* WAW.  */

> +

> +  /* Make sure the instructions are ordered with lower memory access first.  */

> +  if (offsets[0] > offsets[1])

> +    {

> +      gap = offsets[0] - offsets[1];

> +      offset = offsets[1];

> +

> +      /* Swap the instructions such that lower memory is accessed first.  */

> +      std::swap (operands[0], operands[1]);

> +      std::swap (operands[2], operands[3]);

> +    }

> +  else

> +    {

> +      gap = offsets[1] - offsets[0];

> +      offset = offsets[0];

> +    }

> +

> +  /* Make sure accesses are to consecutive memory locations.  */

> +  if (gap != 4)

> +    return false;

> +

> +  /* Make sure we generate legal instructions.  */

> +  if (operands_ok_ldd_std (operands[0], operands[1], offset))

> +    return true;

> +

> +  if (load && commute)

> +    {

> +      /* Try reordering registers.  */

> +      std::swap (operands[0], operands[1]);

> +      if (operands_ok_ldd_std (operands[0], operands[1], offset))

> +	return true;

> +    }

> +

> +  return false;

> +}

> +

>  #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P

>  #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p

>  

> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md

> index 1ed230fa5f0..526fd17a0cf 100644

> --- a/gcc/config/arc/arc.md

> +++ b/gcc/config/arc/arc.md

> @@ -6363,6 +6363,75 @@ archs4x, archs4xd, archs4xd_slow"

>    [(set (reg:CC CC_REG) (compare:CC (match_dup 3)

>  				    (ashift:SI (match_dup 1) (match_dup 2))))])

>  

> +(define_peephole2 ; std

> +  [(set (match_operand:SI 2 "memory_operand" "")

> +	(match_operand:SI 0 "register_operand" ""))

> +   (set (match_operand:SI 3 "memory_operand" "")

> +	(match_operand:SI 1 "register_operand" ""))]

> +  "TARGET_LL64"

> +  [(const_int 0)]

> +{

> +  if (!gen_operands_ldd_std (operands, false, false))

> +    FAIL;

> +  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));

> +  operands[2] = adjust_address (operands[2], DImode, 0);

> +  emit_insn (gen_rtx_SET (operands[2], operands[0]));

> +  DONE;

> +})

> +

> +(define_peephole2 ; ldd

> +  [(set (match_operand:SI 0 "register_operand" "")

> +	(match_operand:SI 2 "memory_operand" ""))

> +   (set (match_operand:SI 1 "register_operand" "")

> +	(match_operand:SI 3 "memory_operand" ""))]

> +  "TARGET_LL64"

> +  [(const_int 0)]

> +{

> +  if (!gen_operands_ldd_std (operands, true, false))

> +    FAIL;

> +  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));

> +  operands[2] = adjust_address (operands[2], DImode, 0);

> +  emit_insn (gen_rtx_SET (operands[0], operands[2]));

> +  DONE;

> +})

> +

> +;; We require consecutive registers for LDD instruction.  Check if we

> +;; can reorder them and use an LDD.

> +

> +(define_peephole2 ; swap the destination registers of two loads

> +		  ; before a commutative operation.

> +  [(set (match_operand:SI 0 "register_operand" "")

> +	(match_operand:SI 2 "memory_operand" ""))

> +   (set (match_operand:SI 1 "register_operand" "")

> +	(match_operand:SI 3 "memory_operand" ""))

> +   (set (match_operand:SI 4 "register_operand" "")

> +	(match_operator:SI 5 "commutative_operator"

> +			   [(match_operand 6 "register_operand" "")

> +			    (match_operand 7 "register_operand" "") ]))]

> +  "TARGET_LL64

> +   && (((rtx_equal_p (operands[0], operands[6]))

> +	 && (rtx_equal_p (operands[1], operands[7])))

> +	|| ((rtx_equal_p (operands[0], operands[7]))

> +	     && (rtx_equal_p (operands[1], operands[6]))))

> +   && (peep2_reg_dead_p (3, operands[0])

> +       || rtx_equal_p (operands[0], operands[4]))

> +   && (peep2_reg_dead_p (3, operands[1])

> +       || rtx_equal_p (operands[1], operands[4]))"

> +  [(set (match_dup 0) (match_dup 2))

> +   (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]

> +  {

> +    if (!gen_operands_ldd_std (operands, true, true))

> +     {

> +	FAIL;

> +     }

> +    else

> +     {

> +	operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));

> +	operands[2] = adjust_address (operands[2], DImode, 0);

> +     }

> +   }

> +)

> +

>  ;; include the arc-FPX instructions

>  (include "fpx.md")

>  

> -- 

> 2.17.1

>

Patch

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 24bea6e1efb..55f8ed4c643 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -46,6 +46,7 @@  extern int arc_return_address_register (unsigned int);
 extern unsigned int arc_compute_function_type (struct function *);
 extern bool arc_is_uncached_mem_p (rtx);
 extern bool arc_lra_p (void);
+extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute);
 #endif /* RTX_CODE */
 
 extern unsigned int arc_compute_frame_size (int);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 18dd0de6af7..9bc69e9fbc9 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -10803,6 +10803,169 @@  arc_cannot_substitute_mem_equiv_p (rtx)
   return true;
 }
 
+/* Checks whether the operands are valid for use in an LDD/STD
+   instruction.	 Assumes that RT, RT2, and RN are REG.	This is
+   guaranteed by the patterns.	Assumes that the address in the base
+   register RN is word aligned.	 Pattern guarantees that both memory
+   accesses use the same base register, the offsets are constants
+   within the range, and the gap between the offsets is 4.  If preload
+   complete then check that registers are legal.  WBACK indicates
+   whether address is updated.	*/
+
+static bool
+operands_ok_ldd_std (rtx rt, rtx rt2, rtx rn ATTRIBUTE_UNUSED,
+		    HOST_WIDE_INT offset)
+{
+  unsigned int t, t2;
+
+  if (!reload_completed)
+    return true;
+
+  if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & -4,
+			 (offset & (GET_MODE_SIZE (DImode) - 1) & 3
+			  ? 0 : -(-GET_MODE_SIZE (DImode) | -4) >> 1))))
+    return false;
+
+  t = REGNO (rt);
+  t2 = REGNO (rt2);
+
+  if ((t2 == 63)
+      || (t % 2 != 0)	/* First destination register is not even.  */
+      || (t2 != t + 1))
+      return false;
+
+  return true;
+}
+
+/* Helper for gen_operands_ldd_std.  Returns true iff the memory
+   operand MEM's address contains an immediate offset from the base
+   register and has no side effects, in which case it sets BASE and
+   OFFSET accordingly.	*/
+
+static bool
+mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset)
+{
+  rtx addr;
+
+  gcc_assert (base != NULL && offset != NULL);
+
+  /* TODO: Handle more general memory operand patterns, such as
+     PRE_DEC and PRE_INC.  */
+
+  if (side_effects_p (mem))
+    return false;
+
+  /* Can't deal with subregs.  */
+  if (GET_CODE (mem) == SUBREG)
+    return false;
+
+  gcc_assert (MEM_P (mem));
+
+  *offset = const0_rtx;
+
+  addr = XEXP (mem, 0);
+
+  /* If addr isn't valid for DImode, then we can't handle it.  */
+  if (!arc_legitimate_address_p (DImode, addr,
+				reload_in_progress || reload_completed))
+    return false;
+
+  if (REG_P (addr))
+    {
+      *base = addr;
+      return true;
+    }
+  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
+    {
+      *base = XEXP (addr, 0);
+      *offset = XEXP (addr, 1);
+      return (REG_P (*base) && CONST_INT_P (*offset));
+    }
+
+  return false;
+}
+
+/* Called from peephole2 to replace two word-size accesses with a
+   single LDD/STD instruction.	Returns true iff we can generate a new
+   instruction sequence.  That is, both accesses use the same base
+   register and the gap between constant offsets is 4.	OPERANDS are
+   the operands found by the peephole matcher; OPERANDS[0,1] are
+   register operands, and OPERANDS[2,3] are the corresponding memory
+   operands.  LOAD indicates whether the access is load or store.  */
+
+bool
+gen_operands_ldd_std (rtx *operands, bool load, bool commute)
+{
+  int i, gap;
+  HOST_WIDE_INT offsets[2], offset;
+  int nops = 2;
+  rtx cur_base, cur_offset, tmp;
+  rtx base = NULL_RTX;
+
+  /* Check that the memory references are immediate offsets from the
+     same base register.  Extract the base register, the destination
+     registers, and the corresponding memory offsets.  */
+  for (i = 0; i < nops; i++)
+    {
+      if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset))
+	return false;
+
+      if (i == 0)
+	base = cur_base;
+      else if (REGNO (base) != REGNO (cur_base))
+	return false;
+
+      offsets[i] = INTVAL (cur_offset);
+      if (GET_CODE (operands[i]) == SUBREG)
+	{
+	  tmp = SUBREG_REG (operands[i]);
+	  gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
+	  operands[i] = tmp;
+	}
+    }
+
+  /* Make sure there is no dependency between the individual loads.  */
+  if (load && REGNO (operands[0]) == REGNO (base))
+    return false; /* RAW */
+
+  if (load && REGNO (operands[0]) == REGNO (operands[1]))
+    return false; /* WAW */
+
+  /* Make sure the instructions are ordered with lower memory access first.  */
+  if (offsets[0] > offsets[1])
+    {
+      gap = offsets[0] - offsets[1];
+      offset = offsets[1];
+
+      /* Swap the instructions such that lower memory is accessed first.  */
+      std::swap (operands[0], operands[1]);
+      std::swap (operands[2], operands[3]);
+    }
+  else
+    {
+      gap = offsets[1] - offsets[0];
+      offset = offsets[0];
+    }
+
+  /* Make sure accesses are to consecutive memory locations.  */
+  if (gap != 4)
+    return false;
+
+  /* Make sure we generate legal instructions.	*/
+  if (operands_ok_ldd_std (operands[0], operands[1], base, offset))
+    return true;
+
+  if (load && commute)
+    {
+      /* Try reordering registers.  */
+      std::swap (operands [0], operands[1]);
+      if (operands_ok_ldd_std (operands[0], operands[1], base, offset))
+	return true;
+    }
+
+  return false;
+}
+
 #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
 #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p
 
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 1ed230fa5f0..b968022e64a 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -6363,6 +6363,73 @@  archs4x, archs4xd, archs4xd_slow"
   [(set (reg:CC CC_REG) (compare:CC (match_dup 3)
 				    (ashift:SI (match_dup 1) (match_dup 2))))])
 
+(define_peephole2 ; std
+[(set (match_operand:SI 2 "memory_operand" "")
+      (match_operand:SI 0 "register_operand" ""))
+ (set (match_operand:SI 3 "memory_operand" "")
+      (match_operand:SI 1 "register_operand" ""))]
+ "TARGET_LL64"
+ [(const_int 0)]
+{
+ if (!gen_operands_ldd_std (operands, false, false))
+   FAIL;
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+ operands[2] = adjust_address (operands[2], DImode, 0);
+ emit_insn (gen_rtx_SET (operands[2], operands[0]));
+ DONE;
+ })
+
+(define_peephole2 ; ldd
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))]
+  "TARGET_LL64"
+  [(const_int 0)]
+{
+  if (!gen_operands_ldd_std (operands, true, false))
+    FAIL;
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  operands[2] = adjust_address (operands[2], DImode, 0);
+  emit_insn (gen_rtx_SET (operands[0], operands[2]));
+  DONE;
+})
+
+;; We require consecutive registers for LDD instruction.  Check if we
+;; can reorder them and use an LDD.
+
+(define_peephole2 ; swap the destination registers of two loads
+		  ; before a commutative operation.
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (set (match_operand:SI 4 "register_operand" "")
+        (match_operator:SI 5 "commutative_operator"
+			   [(match_operand 6 "register_operand" "")
+			    (match_operand 7 "register_operand" "") ]))]
+  "TARGET_LL64
+   && (((rtx_equal_p(operands[0], operands[6]))
+         && (rtx_equal_p(operands[1], operands[7])))
+        || ((rtx_equal_p(operands[0], operands[7]))
+             && (rtx_equal_p(operands[1], operands[6]))))
+   && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
+   && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
+  {
+    if (!gen_operands_ldd_std (operands, true, true))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+   }
+)
+
 ;; include the arc-FPX instructions
 (include "fpx.md")