Implement more rtx vector folds on variable-length vectors

Message ID mptwogpkndf.fsf@arm.com
State New
Headers show
Series
  • Implement more rtx vector folds on variable-length vectors
Related show

Commit Message

Richard Sandiford July 11, 2019, 8:05 a.m.
This patch extends the tree-level folding of variable-length vectors
so that it can also be used on rtxes.  The first step is to move
the tree_vector_builder new_unary/binary_operator routines to the
parent vector_builder class (which in turn means adding a new
template parameter).  The second step is to make simplify-rtx.c
use a direct rtx analogue of the VECTOR_CST handling in fold-const.c.

Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
OK to install?

Richard


2019-07-11  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* vector-builder.h (vector_builder): Add a shape template parameter.
	(vector_builder::new_unary_operation): New function, generalizing
	the old tree_vector_builder function.
	(vector_builder::new_binary_operation): Likewise.
	(vector_builder::binary_encoded_nelts): Likewise.
	* int-vector-builder.h (int_vector_builder): Update template
	parameters to vector_builder.
	(int_vector_builder::shape_nelts): New function.
	* rtx-vector-builder.h (rtx_vector_builder): Update template
	parameters to vector_builder.
	(rtx_vector_builder::shape_nelts): New function.
	(rtx_vector_builder::nelts_of): Likewise.
	(rtx_vector_builder::npatterns_of): Likewise.
	(rtx_vector_builder::nelts_per_pattern_of): Likewise.
	* tree-vector-builder.h (tree_vector_builder): Update template
	parameters to vector_builder.
	(tree_vector_builder::shape_nelts): New function.
	(tree_vector_builder::nelts_of): Likewise.
	(tree_vector_builder::npatterns_of): Likewise.
	(tree_vector_builder::nelts_per_pattern_of): Likewise.
	* tree-vector-builder.c (tree_vector_builder::new_unary_operation)
	(tree_vector_builder::new_binary_operation): Delete.
	(tree_vector_builder::binary_encoded_nelts): Likewise.
	* simplify-rtx.c (distributes_over_addition_p): New function.
	(simplify_const_unary_operation)
	(simplify_const_binary_operation): Generalize handling of vector
	constants to include variable-length vectors.
	(test_vector_ops_series): Add more tests.

Comments

Richard Sandiford July 15, 2019, 3:30 p.m. | #1
Richard Sandiford <richard.sandiford@arm.com> writes:
> This patch extends the tree-level folding of variable-length vectors

> so that it can also be used on rtxes.  The first step is to move

> the tree_vector_builder new_unary/binary_operator routines to the

> parent vector_builder class (which in turn means adding a new

> template parameter).  The second step is to make simplify-rtx.c

> use a direct rtx analogue of the VECTOR_CST handling in fold-const.c.

>

> Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.

> OK to install?

>

> Richard


Here's a version updated for the earlier patch, so that we take
both HONOR_NANS and HONOR_SNANS into account.  Tested on
aarch64-linux-gnu to far.

Thanks,
Richard


2019-07-15  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* rtl.h (bit_and_conditions, bit_ior_conditions): Declare.
	* jump.c (flags_to_condition): Add an honor_nans_p argument.
	(bit_ior_conditions, bit_and_conditions): New functions.
	* simplify-rtx.c (simplify_binary_operation_1): Try to fold an
	AND or IOR of two comparisons into a single comparison.
	(simplify_ternary_operation): Try to fold an if_then_else involving
	two conditions into an AND of two conditions.
	(test_merged_comparisons): New function.
	(simplify_rtx_c_tests): Call it.

Index: gcc/rtl.h
===================================================================
--- gcc/rtl.h	2019-07-12 09:14:06.000000000 +0100
+++ gcc/rtl.h	2019-07-15 16:24:30.685937855 +0100
@@ -3315,6 +3315,8 @@ extern enum rtx_code reverse_condition_m
 extern enum rtx_code swap_condition (enum rtx_code);
 extern enum rtx_code unsigned_condition (enum rtx_code);
 extern enum rtx_code signed_condition (enum rtx_code);
+extern rtx_code bit_and_conditions (rtx_code, rtx_code, machine_mode);
+extern rtx_code bit_ior_conditions (rtx_code, rtx_code, machine_mode);
 extern void mark_jump_label (rtx, rtx_insn *, int);
 
 /* Return true if integer comparison operator CODE interprets its operands
Index: gcc/jump.c
===================================================================
--- gcc/jump.c	2019-07-15 16:22:55.342699887 +0100
+++ gcc/jump.c	2019-07-15 16:24:30.685937855 +0100
@@ -138,13 +138,28 @@ #define CASE(CODE, ORDER, SIGNEDNESS, TR
 }
 
 /* Return the comparison code that implements FLAGS_* bitmask FLAGS.
+   If MODE is not VOIDmode, it gives the mode of the values being compared.
+
    Assert on failure if FORCE, otherwise return UNKNOWN.  */
 
 static rtx_code
-flags_to_condition (unsigned int flags, bool force)
+flags_to_condition (unsigned int flags, bool force,
+		    machine_mode mode = VOIDmode)
 {
+  unsigned int order_mask = FLAGS_ORDER;
+  if (mode != VOIDmode)
+    {
+      if (!HONOR_NANS (mode))
+	{
+	  flags |= FLAGS_TRAP_NANS;
+	  order_mask &= ~FLAGS_UNORDERED;
+	}
+      else if (!HONOR_SNANS (mode))
+	flags |= FLAGS_TRAP_SNANS;
+    }
+
 #define TEST(CODE, ORDER, SIGNEDNESS, TRAPS)				\
-  if (((flags ^ (ORDER)) & FLAGS_ORDER) == 0				\
+  if (((flags ^ (ORDER)) & order_mask) == 0				\
       && (FLAGS_##SIGNEDNESS == 0					\
 	  || ((FLAGS_##SIGNEDNESS ^ flags) & FLAGS_SIGNEDNESS) == 0)	\
       && (FLAGS_##TRAPS & ~flags & FLAGS_TRAPS) == 0)			\
@@ -722,6 +737,33 @@ comparison_dominates_p (enum rtx_code co
   return (((flags1 | flags2) & FLAGS_SIGNEDNESS) != FLAGS_SIGNEDNESS
 	  && (flags1 & ~flags2 & FLAGS_ORDER) == 0);
 }
+
+/* Return the comparison code that tests whether CODE1 | CODE2 is
+   true for mode MODE.  Return UNKNOWN if no such comparison exists.
+   The result can trap whenever either CODE1 or CODE2 traps.  */
+
+rtx_code
+bit_ior_conditions (rtx_code code1, rtx_code code2, machine_mode mode)
+{
+  unsigned int flags1 = condition_to_flags (code1);
+  unsigned int flags2 = condition_to_flags (code2);
+  unsigned int flags = flags1 | flags2;
+  return flags_to_condition (flags, false, mode);
+}
+
+/* Return the comparison code that tests whether CODE1 & CODE2 is
+   true for mode MODE.  Return UNKNOWN if no such comparison exists.
+   The result can trap whenever either CODE1 or CODE2 traps.  */
+
+rtx_code
+bit_and_conditions (rtx_code code1, rtx_code code2, machine_mode mode)
+{
+  unsigned int flags1 = condition_to_flags (code1);
+  unsigned int flags2 = condition_to_flags (code2);
+  unsigned int flags = ((flags1 & flags2 & FLAGS_ORDER)
+			| ((flags1 | flags2) & ~FLAGS_ORDER));
+  return flags_to_condition (flags, false, mode);
+}
 
 /* Return 1 if INSN is an unconditional jump and nothing else.  */
 
Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c	2019-07-12 09:14:06.000000000 +0100
+++ gcc/simplify-rtx.c	2019-07-15 16:24:30.689937823 +0100
@@ -2889,6 +2889,20 @@ simplify_binary_operation_1 (enum rtx_co
 	    }
 	}
 
+      /* (ior (cmp1 x y) (cmp2 x y)) -> (cmp3 x y).  */
+      if (COMPARISON_P (op0)
+	  && COMPARISON_P (op1)
+	  && rtx_equal_p (XEXP (op0, 0), XEXP (op1, 0))
+	  && rtx_equal_p (XEXP (op0, 1), XEXP (op1, 1)))
+	{
+	  machine_mode cmp_mode = GET_MODE (XEXP (op0, 0));
+	  rtx_code cond = bit_ior_conditions (GET_CODE (op0),
+					      GET_CODE (op1), cmp_mode);
+	  if (cond != UNKNOWN)
+	    return simplify_gen_relational (cond, mode, cmp_mode,
+					    XEXP (op0, 0), XEXP (op0, 1));
+	}
+
       tem = simplify_byte_swapping_operation (code, mode, op0, op1);
       if (tem)
 	return tem;
@@ -3321,6 +3335,20 @@ simplify_binary_operation_1 (enum rtx_co
 	  && rtx_equal_p (op1, XEXP (XEXP (op0, 1), 0)))
 	return simplify_gen_binary (AND, mode, op1, XEXP (op0, 0));
 
+      /* (and (cmp1 x y) (cmp2 x y)) -> (cmp3 x y).  */
+      if (COMPARISON_P (op0)
+	  && COMPARISON_P (op1)
+	  && rtx_equal_p (XEXP (op0, 0), XEXP (op1, 0))
+	  && rtx_equal_p (XEXP (op0, 1), XEXP (op1, 1)))
+	{
+	  machine_mode cmp_mode = GET_MODE (XEXP (op0, 0));
+	  rtx_code cond = bit_and_conditions (GET_CODE (op0),
+					      GET_CODE (op1), cmp_mode);
+	  if (cond != UNKNOWN)
+	    return simplify_gen_relational (cond, mode, cmp_mode,
+					    XEXP (op0, 0), XEXP (op0, 1));
+	}
+
       tem = simplify_byte_swapping_operation (code, mode, op0, op1);
       if (tem)
 	return tem;
@@ -5832,6 +5860,14 @@ simplify_ternary_operation (enum rtx_cod
 	    return simplified;
 	}
 
+      /* (if_then_else (cmp1 X1 Y1) (cmp X2 Y2) (const_int 0))
+	 -> (and (cmp1 X1 Y1) (cmp2 X2 Y2)).  */
+      if (COMPARISON_P (op0)
+	  && COMPARISON_P (op1)
+	  && op2 == const0_rtx
+	  && GET_MODE (op0) == GET_MODE (op1))
+	return simplify_gen_binary (AND, mode, op0, op1);
+
       if (COMPARISON_P (op0) && ! side_effects_p (op0))
 	{
 	  machine_mode cmp_mode = (GET_MODE (XEXP (op0, 0)) == VOIDmode
@@ -6858,6 +6894,75 @@ make_test_reg (machine_mode mode)
   return gen_rtx_REG (mode, test_reg_num++);
 }
 
+/* Test ANDs and IORs of two comparisons.  */
+
+static void
+test_merged_comparisons (machine_mode mode)
+{
+  rtx reg1 = make_test_reg (mode);
+  rtx reg2 = make_test_reg (mode);
+
+  rtx eq = gen_rtx_EQ (mode, reg1, reg2);
+  rtx ne = gen_rtx_NE (mode, reg1, reg2);
+  rtx le = gen_rtx_LE (mode, reg1, reg2);
+  rtx leu = gen_rtx_LEU (mode, reg1, reg2);
+  rtx lt = gen_rtx_LT (mode, reg1, reg2);
+  rtx ltu = gen_rtx_LTU (mode, reg1, reg2);
+  rtx ge = gen_rtx_GE (mode, reg1, reg2);
+  rtx geu = gen_rtx_GEU (mode, reg1, reg2);
+  rtx gt = gen_rtx_GT (mode, reg1, reg2);
+  rtx gtu = gen_rtx_GTU (mode, reg1, reg2);
+
+  ASSERT_FALSE (simplify_binary_operation (AND, mode, le, leu));
+  ASSERT_FALSE (simplify_binary_operation (AND, mode, lt, ltu));
+  ASSERT_FALSE (simplify_binary_operation (AND, mode, gt, gtu));
+  ASSERT_FALSE (simplify_binary_operation (AND, mode, ge, geu));
+
+  ASSERT_RTX_EQ (eq, simplify_binary_operation (AND, mode, eq, leu));
+  ASSERT_RTX_EQ (eq, simplify_binary_operation (AND, mode, eq, geu));
+  ASSERT_RTX_EQ (eq, simplify_binary_operation (AND, mode, eq, le));
+  ASSERT_RTX_EQ (eq, simplify_binary_operation (AND, mode, eq, ge));
+
+  ASSERT_RTX_EQ (eq, simplify_binary_operation (AND, mode, geu, leu));
+  ASSERT_RTX_EQ (eq, simplify_binary_operation (AND, mode, ge, le));
+
+  ASSERT_RTX_EQ (ltu, simplify_binary_operation (AND, mode, leu, ltu));
+  ASSERT_RTX_EQ (gtu, simplify_binary_operation (AND, mode, geu, gtu));
+  ASSERT_RTX_EQ (lt, simplify_binary_operation (AND, mode, le, lt));
+  ASSERT_RTX_EQ (gt, simplify_binary_operation (AND, mode, ge, gt));
+
+  ASSERT_RTX_EQ (ltu, simplify_binary_operation (AND, mode, ne, leu));
+  ASSERT_RTX_EQ (gtu, simplify_binary_operation (AND, mode, ne, geu));
+  ASSERT_RTX_EQ (lt, simplify_binary_operation (AND, mode, ne, le));
+  ASSERT_RTX_EQ (gt, simplify_binary_operation (AND, mode, ne, ge));
+
+  ASSERT_FALSE (simplify_binary_operation (IOR, mode, le, leu));
+  ASSERT_FALSE (simplify_binary_operation (IOR, mode, lt, ltu));
+  ASSERT_FALSE (simplify_binary_operation (IOR, mode, gt, gtu));
+  ASSERT_FALSE (simplify_binary_operation (IOR, mode, ge, geu));
+
+  ASSERT_RTX_EQ (leu, simplify_binary_operation (IOR, mode, eq, leu));
+  ASSERT_RTX_EQ (geu, simplify_binary_operation (IOR, mode, eq, geu));
+  ASSERT_RTX_EQ (le, simplify_binary_operation (IOR, mode, eq, le));
+  ASSERT_RTX_EQ (ge, simplify_binary_operation (IOR, mode, eq, ge));
+
+  ASSERT_RTX_EQ (ne, simplify_binary_operation (IOR, mode, gtu, ltu));
+  ASSERT_RTX_EQ (ne, simplify_binary_operation (IOR, mode, gt, lt));
+
+  ASSERT_RTX_EQ (leu, simplify_binary_operation (IOR, mode, eq, ltu));
+  ASSERT_RTX_EQ (geu, simplify_binary_operation (IOR, mode, eq, gtu));
+  ASSERT_RTX_EQ (le, simplify_binary_operation (IOR, mode, eq, lt));
+  ASSERT_RTX_EQ (ge, simplify_binary_operation (IOR, mode, eq, gt));
+
+  ASSERT_RTX_EQ (ne, simplify_binary_operation (IOR, mode, ne, ltu));
+  ASSERT_RTX_EQ (ne, simplify_binary_operation (IOR, mode, ne, gtu));
+  ASSERT_RTX_EQ (ne, simplify_binary_operation (IOR, mode, ne, lt));
+  ASSERT_RTX_EQ (ne, simplify_binary_operation (IOR, mode, ne, gt));
+
+  ASSERT_RTX_EQ (eq, simplify_ternary_operation (IF_THEN_ELSE, mode, mode,
+						 geu, leu, const0_rtx));
+}
+
 /* Test vector simplifications involving VEC_DUPLICATE in which the
    operands and result have vector mode MODE.  SCALAR_REG is a pseudo
    register that holds one element of MODE.  */
@@ -7149,6 +7254,7 @@ simplify_const_poly_int_tests<N>::run ()
 void
 simplify_rtx_c_tests ()
 {
+  test_merged_comparisons (HImode);
   test_vector_ops ();
   simplify_const_poly_int_tests<NUM_POLY_INT_COEFFS>::run ();
 }

Patch

Index: gcc/vector-builder.h
===================================================================
--- gcc/vector-builder.h	2019-06-07 08:39:43.126344672 +0100
+++ gcc/vector-builder.h	2019-07-11 08:55:03.187049079 +0100
@@ -45,8 +45,11 @@  #define GCC_VECTOR_BUILDER_H
       variable-length vectors.  finalize () then canonicalizes the encoding
       to a simpler form if possible.
 
-   The derived class Derived provides this functionality for specific Ts.
-   Derived needs to provide the following interface:
+   Shape is the type that specifies the number of elements in the vector
+   and (where relevant) the type of each element.
+
+   The derived class Derived provides the functionality of this class
+   for specific Ts.  Derived needs to provide the following interface:
 
       bool equal_p (T elt1, T elt2) const;
 
@@ -82,9 +85,30 @@  #define GCC_VECTOR_BUILDER_H
 
 	  Record that ELT2 is being elided, given that ELT1_PTR points to
 	  the last encoded element for the containing pattern.  This is
-	  again provided for TREE_OVERFLOW handling.  */
+	  again provided for TREE_OVERFLOW handling.
+
+      static poly_uint64 shape_nelts (Shape shape);
+
+	  Return the number of elements in SHAPE.
+
+    The class provides additional functionality for the case in which
+    T can describe a vector constant as well as an individual element.
+    This functionality requires:
+
+      static poly_uint64 nelts_of (T x);
+
+	  Return the number of elements in vector constant X.
+
+      static unsigned int npatterns_of (T x);
+
+	  Return the number of patterns used to encode vector constant X.
+
+      static unsigned int nelts_per_pattern_of (T x);
 
-template<typename T, typename Derived>
+	  Return the number of elements used to encode each pattern
+	  in vector constant X.  */
+
+template<typename T, typename Shape, typename Derived>
 class vector_builder : public auto_vec<T, 32>
 {
 public:
@@ -101,8 +125,13 @@  #define GCC_VECTOR_BUILDER_H
   bool operator == (const Derived &) const;
   bool operator != (const Derived &x) const { return !operator == (x); }
 
+  bool new_unary_operation (Shape, T, bool);
+  bool new_binary_operation (Shape, T, T, bool);
+
   void finalize ();
 
+  static unsigned int binary_encoded_nelts (T, T);
+
 protected:
   void new_vector (poly_uint64, unsigned int, unsigned int);
   void reshape (unsigned int, unsigned int);
@@ -121,16 +150,16 @@  #define GCC_VECTOR_BUILDER_H
   unsigned int m_nelts_per_pattern;
 };
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 inline const Derived *
-vector_builder<T, Derived>::derived () const
+vector_builder<T, Shape, Derived>::derived () const
 {
   return static_cast<const Derived *> (this);
 }
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 inline
-vector_builder<T, Derived>::vector_builder ()
+vector_builder<T, Shape, Derived>::vector_builder ()
   : m_full_nelts (0),
     m_npatterns (0),
     m_nelts_per_pattern (0)
@@ -140,18 +169,18 @@  vector_builder<T, Derived>::vector_build
    starts with these explicitly-encoded elements and may contain additional
    elided elements.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 inline unsigned int
-vector_builder<T, Derived>::encoded_nelts () const
+vector_builder<T, Shape, Derived>::encoded_nelts () const
 {
   return m_npatterns * m_nelts_per_pattern;
 }
 
 /* Return true if every element of the vector is explicitly encoded.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 inline bool
-vector_builder<T, Derived>::encoded_full_vector_p () const
+vector_builder<T, Shape, Derived>::encoded_full_vector_p () const
 {
   return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts);
 }
@@ -159,11 +188,11 @@  vector_builder<T, Derived>::encoded_full
 /* Start building a vector that has FULL_NELTS elements.  Initially
    encode it using NPATTERNS patterns with NELTS_PER_PATTERN each.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 void
-vector_builder<T, Derived>::new_vector (poly_uint64 full_nelts,
-					unsigned int npatterns,
-					unsigned int nelts_per_pattern)
+vector_builder<T, Shape, Derived>::new_vector (poly_uint64 full_nelts,
+					       unsigned int npatterns,
+					       unsigned int nelts_per_pattern)
 {
   m_full_nelts = full_nelts;
   m_npatterns = npatterns;
@@ -175,9 +204,9 @@  vector_builder<T, Derived>::new_vector (
 /* Return true if this vector and OTHER have the same elements and
    are encoded in the same way.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 bool
-vector_builder<T, Derived>::operator == (const Derived &other) const
+vector_builder<T, Shape, Derived>::operator == (const Derived &other) const
 {
   if (maybe_ne (m_full_nelts, other.m_full_nelts)
       || m_npatterns != other.m_npatterns
@@ -195,9 +224,9 @@  vector_builder<T, Derived>::operator ==
 /* Return the value of vector element I, which might or might not be
    encoded explicitly.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 T
-vector_builder<T, Derived>::elt (unsigned int i) const
+vector_builder<T, Shape, Derived>::elt (unsigned int i) const
 {
   /* This only makes sense if the encoding has been fully populated.  */
   gcc_checking_assert (encoded_nelts () <= this->length ());
@@ -224,12 +253,118 @@  vector_builder<T, Derived>::elt (unsigne
 				 derived ()->step (prev, final));
 }
 
+/* Try to start building a new vector of shape SHAPE that holds the result of
+   a unary operation on vector constant VEC.  ALLOW_STEPPED_P is true if the
+   operation can handle stepped encodings directly, without having to expand
+   the full sequence.
+
+   Return true if the operation is possible, which it always is when
+   ALLOW_STEPPED_P is true.  Leave the builder unchanged otherwise.  */
+
+template<typename T, typename Shape, typename Derived>
+bool
+vector_builder<T, Shape, Derived>::new_unary_operation (Shape shape, T vec,
+							bool allow_stepped_p)
+{
+  poly_uint64 full_nelts = Derived::shape_nelts (shape);
+  gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec)));
+  unsigned int npatterns = Derived::npatterns_of (vec);
+  unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec);
+  if (!allow_stepped_p && nelts_per_pattern > 2)
+    {
+      if (!full_nelts.is_constant ())
+	return false;
+      npatterns = full_nelts.to_constant ();
+      nelts_per_pattern = 1;
+    }
+  derived ()->new_vector (shape, npatterns, nelts_per_pattern);
+  return true;
+}
+
+/* Try to start building a new vector of shape SHAPE that holds the result of
+   a binary operation on vector constants VEC1 and VEC2.  ALLOW_STEPPED_P is
+   true if the operation can handle stepped encodings directly, without
+   having to expand the full sequence.
+
+   Return true if the operation is possible.  Leave the builder unchanged
+   otherwise.  */
+
+template<typename T, typename Shape, typename Derived>
+bool
+vector_builder<T, Shape, Derived>::new_binary_operation (Shape shape,
+							 T vec1, T vec2,
+							 bool allow_stepped_p)
+{
+  poly_uint64 full_nelts = Derived::shape_nelts (shape);
+  gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1))
+	      && known_eq (full_nelts, Derived::nelts_of (vec2)));
+  /* Conceptually we split the patterns in VEC1 and VEC2 until we have
+     an equal number for both.  Each split pattern requires the same
+     number of elements per pattern as the original.  E.g. splitting:
+
+       { 1, 2, 3, ... }
+
+     into two gives:
+
+       { 1, 3, 5, ... }
+       { 2, 4, 6, ... }
+
+     while splitting:
+
+       { 1, 0, ... }
+
+     into two gives:
+
+       { 1, 0, ... }
+       { 0, 0, ... }.  */
+  unsigned int npatterns
+    = least_common_multiple (Derived::npatterns_of (vec1),
+			     Derived::npatterns_of (vec2));
+  unsigned int nelts_per_pattern
+    = MAX (Derived::nelts_per_pattern_of (vec1),
+	   Derived::nelts_per_pattern_of (vec2));
+  if (!allow_stepped_p && nelts_per_pattern > 2)
+    {
+      if (!full_nelts.is_constant ())
+	return false;
+      npatterns = full_nelts.to_constant ();
+      nelts_per_pattern = 1;
+    }
+  derived ()->new_vector (shape, npatterns, nelts_per_pattern);
+  return true;
+}
+
+/* Return the number of elements that the caller needs to operate on in
+   order to handle a binary operation on vector constants VEC1 and VEC2.
+   This static function is used instead of new_binary_operation if the
+   result of the operation is not a constant vector.  */
+
+template<typename T, typename Shape, typename Derived>
+unsigned int
+vector_builder<T, Shape, Derived>::binary_encoded_nelts (T vec1, T vec2)
+{
+  poly_uint64 nelts = Derived::nelts_of (vec1);
+  gcc_assert (known_eq (nelts, Derived::nelts_of (vec2)));
+  /* See new_binary_operation for details.  */
+  unsigned int npatterns
+    = least_common_multiple (Derived::npatterns_of (vec1),
+			     Derived::npatterns_of (vec2));
+  unsigned int nelts_per_pattern
+    = MAX (Derived::nelts_per_pattern_of (vec1),
+	   Derived::nelts_per_pattern_of (vec2));
+  unsigned HOST_WIDE_INT const_nelts;
+  if (nelts.is_constant (&const_nelts))
+    return MIN (npatterns * nelts_per_pattern, const_nelts);
+  return npatterns * nelts_per_pattern;
+}
+
 /* Return the number of leading duplicate elements in the range
    [START:END:STEP].  The value is always at least 1.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 unsigned int
-vector_builder<T, Derived>::count_dups (int start, int end, int step) const
+vector_builder<T, Shape, Derived>::count_dups (int start, int end,
+					       int step) const
 {
   gcc_assert ((end - start) % step == 0);
 
@@ -244,10 +379,10 @@  vector_builder<T, Derived>::count_dups (
 /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
    but without changing the underlying vector.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 void
-vector_builder<T, Derived>::reshape (unsigned int npatterns,
-				     unsigned int nelts_per_pattern)
+vector_builder<T, Shape, Derived>::reshape (unsigned int npatterns,
+					    unsigned int nelts_per_pattern)
 {
   unsigned int old_encoded_nelts = encoded_nelts ();
   unsigned int new_encoded_nelts = npatterns * nelts_per_pattern;
@@ -267,11 +402,11 @@  vector_builder<T, Derived>::reshape (uns
 /* Return true if elements [START, END) contain a repeating sequence of
    STEP elements.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 bool
-vector_builder<T, Derived>::repeating_sequence_p (unsigned int start,
-						  unsigned int end,
-						  unsigned int step)
+vector_builder<T, Shape, Derived>::repeating_sequence_p (unsigned int start,
+							 unsigned int end,
+							 unsigned int step)
 {
   for (unsigned int i = start; i < end - step; ++i)
     if (!derived ()->equal_p ((*this)[i], (*this)[i + step]))
@@ -282,11 +417,11 @@  vector_builder<T, Derived>::repeating_se
 /* Return true if elements [START, END) contain STEP interleaved linear
    series.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 bool
-vector_builder<T, Derived>::stepped_sequence_p (unsigned int start,
-						unsigned int end,
-						unsigned int step)
+vector_builder<T, Shape, Derived>::stepped_sequence_p (unsigned int start,
+						       unsigned int end,
+						       unsigned int step)
 {
   if (!derived ()->allow_steps_p ())
     return false;
@@ -315,9 +450,9 @@  vector_builder<T, Derived>::stepped_sequ
 /* Try to change the number of encoded patterns to NPATTERNS, returning
    true on success.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 bool
-vector_builder<T, Derived>::try_npatterns (unsigned int npatterns)
+vector_builder<T, Shape, Derived>::try_npatterns (unsigned int npatterns)
 {
   if (m_nelts_per_pattern == 1)
     {
@@ -368,9 +503,9 @@  vector_builder<T, Derived>::try_npattern
 
 /* Replace the current encoding with the canonical form.  */
 
-template<typename T, typename Derived>
+template<typename T, typename Shape, typename Derived>
 void
-vector_builder<T, Derived>::finalize ()
+vector_builder<T, Shape, Derived>::finalize ()
 {
   /* The encoding requires the same number of elements to come from each
      pattern.  */
Index: gcc/int-vector-builder.h
===================================================================
--- gcc/int-vector-builder.h	2019-03-08 18:14:26.497007220 +0000
+++ gcc/int-vector-builder.h	2019-07-11 08:55:03.183049114 +0100
@@ -26,10 +26,11 @@  #define GCC_INT_VECTOR_BUILDER_H 1
    encoding as tree and rtx constants.  See vector_builder for more
    details.  */
 template<typename T>
-class int_vector_builder : public vector_builder<T, int_vector_builder<T> >
+class int_vector_builder : public vector_builder<T, poly_uint64,
+						 int_vector_builder<T> >
 {
-  typedef vector_builder<T, int_vector_builder> parent;
-  friend class vector_builder<T, int_vector_builder>;
+  typedef vector_builder<T, poly_uint64, int_vector_builder> parent;
+  friend class vector_builder<T, poly_uint64, int_vector_builder>;
 
 public:
   int_vector_builder () {}
@@ -45,6 +46,8 @@  #define GCC_INT_VECTOR_BUILDER_H 1
   T apply_step (T, unsigned int, T) const;
   bool can_elide_p (T) const { return true; }
   void note_representative (T *, T) {}
+
+  static poly_uint64 shape_nelts (poly_uint64 x) { return x; }
 };
 
 /* Create a new builder for a vector with FULL_NELTS elements.
Index: gcc/rtx-vector-builder.h
===================================================================
--- gcc/rtx-vector-builder.h	2019-03-08 18:15:39.324730378 +0000
+++ gcc/rtx-vector-builder.h	2019-07-11 08:55:03.183049114 +0100
@@ -24,10 +24,11 @@  #define GCC_RTX_VECTOR_BUILDER_H
 
 /* This class is used to build VECTOR_CSTs from a sequence of elements.
    See vector_builder for more details.  */
-class rtx_vector_builder : public vector_builder<rtx, rtx_vector_builder>
+class rtx_vector_builder : public vector_builder<rtx, machine_mode,
+						 rtx_vector_builder>
 {
-  typedef vector_builder<rtx, rtx_vector_builder> parent;
-  friend class vector_builder<rtx, rtx_vector_builder>;
+  typedef vector_builder<rtx, machine_mode, rtx_vector_builder> parent;
+  friend class vector_builder<rtx, machine_mode, rtx_vector_builder>;
 
 public:
   rtx_vector_builder () : m_mode (VOIDmode) {}
@@ -48,6 +49,15 @@  #define GCC_RTX_VECTOR_BUILDER_H
   bool can_elide_p (rtx) const { return true; }
   void note_representative (rtx *, rtx) {}
 
+  static poly_uint64 shape_nelts (machine_mode mode)
+    { return GET_MODE_NUNITS (mode); }
+  static poly_uint64 nelts_of (const_rtx x)
+    { return CONST_VECTOR_NUNITS (x); }
+  static unsigned int npatterns_of (const_rtx x)
+    { return CONST_VECTOR_NPATTERNS (x); }
+  static unsigned int nelts_per_pattern_of (const_rtx x)
+    { return CONST_VECTOR_NELTS_PER_PATTERN (x); }
+
   rtx find_cached_value ();
 
   machine_mode m_mode;
Index: gcc/tree-vector-builder.h
===================================================================
--- gcc/tree-vector-builder.h	2019-03-08 18:14:25.845009699 +0000
+++ gcc/tree-vector-builder.h	2019-07-11 08:55:03.187049079 +0100
@@ -24,10 +24,11 @@  #define GCC_TREE_VECTOR_BUILDER_H
 
 /* This class is used to build VECTOR_CSTs from a sequence of elements.
    See vector_builder for more details.  */
-class tree_vector_builder : public vector_builder<tree, tree_vector_builder>
+class tree_vector_builder : public vector_builder<tree, tree,
+						  tree_vector_builder>
 {
-  typedef vector_builder<tree, tree_vector_builder> parent;
-  friend class vector_builder<tree, tree_vector_builder>;
+  typedef vector_builder<tree, tree, tree_vector_builder> parent;
+  friend class vector_builder<tree, tree, tree_vector_builder>;
 
 public:
   tree_vector_builder () : m_type (0) {}
@@ -37,10 +38,6 @@  #define GCC_TREE_VECTOR_BUILDER_H
   tree type () const { return m_type; }
 
   void new_vector (tree, unsigned int, unsigned int);
-  bool new_unary_operation (tree, tree, bool);
-  bool new_binary_operation (tree, tree, tree, bool);
-
-  static unsigned int binary_encoded_nelts (tree, tree);
 
 private:
   bool equal_p (const_tree, const_tree) const;
@@ -51,6 +48,15 @@  #define GCC_TREE_VECTOR_BUILDER_H
   bool can_elide_p (const_tree) const;
   void note_representative (tree *, tree);
 
+  static poly_uint64 shape_nelts (const_tree t)
+    { return TYPE_VECTOR_SUBPARTS (t); }
+  static poly_uint64 nelts_of (const_tree t)
+    { return VECTOR_CST_NELTS (t); }
+  static unsigned int npatterns_of (const_tree t)
+    { return VECTOR_CST_NPATTERNS (t); }
+  static unsigned int nelts_per_pattern_of (const_tree t)
+    { return VECTOR_CST_NELTS_PER_PATTERN (t); }
+
   tree m_type;
 };
 
Index: gcc/tree-vector-builder.c
===================================================================
--- gcc/tree-vector-builder.c	2019-03-08 18:14:25.333011645 +0000
+++ gcc/tree-vector-builder.c	2019-07-11 08:55:03.187049079 +0100
@@ -24,103 +24,6 @@  Software Foundation; either version 3, o
 #include "fold-const.h"
 #include "tree-vector-builder.h"
 
-/* Try to start building a new vector of type TYPE that holds the result of
-   a unary operation on VECTOR_CST T.  ALLOW_STEPPED_P is true if the
-   operation can handle stepped encodings directly, without having to
-   expand the full sequence.
-
-   Return true if the operation is possible, which it always is when
-   ALLOW_STEPPED_P is true.  Leave the builder unchanged otherwise.  */
-
-bool
-tree_vector_builder::new_unary_operation (tree type, tree t,
-					  bool allow_stepped_p)
-{
-  poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type);
-  gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t))));
-  unsigned int npatterns = VECTOR_CST_NPATTERNS (t);
-  unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (t);
-  if (!allow_stepped_p && nelts_per_pattern > 2)
-    {
-      if (!full_nelts.is_constant ())
-	return false;
-      npatterns = full_nelts.to_constant ();
-      nelts_per_pattern = 1;
-    }
-  new_vector (type, npatterns, nelts_per_pattern);
-  return true;
-}
-
-/* Try to start building a new vector of type TYPE that holds the result of
-   a binary operation on VECTOR_CSTs T1 and T2.  ALLOW_STEPPED_P is true if
-   the operation can handle stepped encodings directly, without having to
-   expand the full sequence.
-
-   Return true if the operation is possible.  Leave the builder unchanged
-   otherwise.  */
-
-bool
-tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2,
-					   bool allow_stepped_p)
-{
-  poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type);
-  gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1)))
-	      && known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2))));
-  /* Conceptually we split the patterns in T1 and T2 until we have
-     an equal number for both.  Each split pattern requires the same
-     number of elements per pattern as the original.  E.g. splitting:
-
-       { 1, 2, 3, ... }
-
-     into two gives:
-
-       { 1, 3, 5, ... }
-       { 2, 4, 6, ... }
-
-     while splitting:
-
-       { 1, 0, ... }
-
-     into two gives:
-
-       { 1, 0, ... }
-       { 0, 0, ... }.  */
-  unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1),
-						  VECTOR_CST_NPATTERNS (t2));
-  unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1),
-					VECTOR_CST_NELTS_PER_PATTERN (t2));
-  if (!allow_stepped_p && nelts_per_pattern > 2)
-    {
-      if (!full_nelts.is_constant ())
-	return false;
-      npatterns = full_nelts.to_constant ();
-      nelts_per_pattern = 1;
-    }
-  new_vector (type, npatterns, nelts_per_pattern);
-  return true;
-}
-
-/* Return the number of elements that the caller needs to operate on in
-   order to handle a binary operation on VECTOR_CSTs T1 and T2.  This static
-   function is used instead of new_binary_operation if the result of the
-   operation is not a VECTOR_CST.  */
-
-unsigned int
-tree_vector_builder::binary_encoded_nelts (tree t1, tree t2)
-{
-  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1));
-  gcc_assert (known_eq (nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2))));
-  /* See new_binary_operation for details.  */
-  unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1),
-						  VECTOR_CST_NPATTERNS (t2));
-  unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1),
-					VECTOR_CST_NELTS_PER_PATTERN (t2));
-  unsigned HOST_WIDE_INT const_nelts;
-  if (nelts.is_constant (&const_nelts))
-    return MIN (npatterns * nelts_per_pattern, const_nelts);
-  return npatterns * nelts_per_pattern;
-}
-
 /* Return a vector element with the value BASE + FACTOR * STEP.  */
 
 tree
Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c	2019-07-11 08:33:58.073250143 +0100
+++ gcc/simplify-rtx.c	2019-07-11 08:55:03.187049079 +0100
@@ -1754,27 +1754,23 @@  simplify_const_unary_operation (enum rtx
 
   if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
     {
-      unsigned int n_elts;
-      if (!CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
-	return NULL_RTX;
-
-      machine_mode opmode = GET_MODE (op);
-      gcc_assert (known_eq (GET_MODE_NUNITS (mode), n_elts));
-      gcc_assert (known_eq (GET_MODE_NUNITS (opmode), n_elts));
+      gcc_assert (GET_MODE (op) == op_mode);
 
-      rtvec v = rtvec_alloc (n_elts);
-      unsigned int i;
+      rtx_vector_builder builder;
+      if (!builder.new_unary_operation (mode, op, false))
+	return 0;
 
-      for (i = 0; i < n_elts; i++)
+      unsigned int count = builder.encoded_nelts ();
+      for (unsigned int i = 0; i < count; i++)
 	{
 	  rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode),
 					    CONST_VECTOR_ELT (op, i),
-					    GET_MODE_INNER (opmode));
+					    GET_MODE_INNER (op_mode));
 	  if (!x || !valid_for_const_vector_p (mode, x))
 	    return 0;
-	  RTVEC_ELT (v, i) = x;
+	  builder.quick_push (x);
 	}
-      return gen_rtx_CONST_VECTOR (mode, v);
+      return builder.build ();
     }
 
   /* The order of these tests is critical so that, for example, we don't
@@ -4060,6 +4056,27 @@  simplify_binary_operation_1 (enum rtx_co
   return 0;
 }
 
+/* Return true if binary operation OP distributes over addition in operand
+   OPNO, with the other operand being held constant.  OPNO counts from 1.  */
+
+static bool
+distributes_over_addition_p (rtx_code op, int opno)
+{
+  switch (op)
+    {
+    case PLUS:
+    case MINUS:
+    case MULT:
+      return true;
+
+    case ASHIFT:
+      return opno == 1;
+
+    default:
+      return false;
+    }
+}
+
 rtx
 simplify_const_binary_operation (enum rtx_code code, machine_mode mode,
 				 rtx op0, rtx op1)
@@ -4069,26 +4086,45 @@  simplify_const_binary_operation (enum rt
       && GET_CODE (op0) == CONST_VECTOR
       && GET_CODE (op1) == CONST_VECTOR)
     {
-      unsigned int n_elts;
-      if (!CONST_VECTOR_NUNITS (op0).is_constant (&n_elts))
-	return NULL_RTX;
+      bool step_ok_p;
+      if (CONST_VECTOR_STEPPED_P (op0)
+	  && CONST_VECTOR_STEPPED_P (op1))
+	/* We can operate directly on the encoding if:
+
+	      a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1
+	    implies
+	      (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1)
+
+	   Addition and subtraction are the supported operators
+	   for which this is true.  */
+	step_ok_p = (code == PLUS || code == MINUS);
+      else if (CONST_VECTOR_STEPPED_P (op0))
+	/* We can operate directly on stepped encodings if:
+
+	     a3 - a2 == a2 - a1
+	   implies:
+	     (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c)
 
-      gcc_assert (known_eq (n_elts, CONST_VECTOR_NUNITS (op1)));
-      gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode)));
-      rtvec v = rtvec_alloc (n_elts);
-      unsigned int i;
+	   which is true if (x -> x op c) distributes over addition.  */
+	step_ok_p = distributes_over_addition_p (code, 1);
+      else
+	/* Similarly in reverse.  */
+	step_ok_p = distributes_over_addition_p (code, 2);
+      rtx_vector_builder builder;
+      if (!builder.new_binary_operation (mode, op0, op1, step_ok_p))
+	return 0;
 
-      for (i = 0; i < n_elts; i++)
+      unsigned int count = builder.encoded_nelts ();
+      for (unsigned int i = 0; i < count; i++)
 	{
 	  rtx x = simplify_binary_operation (code, GET_MODE_INNER (mode),
 					     CONST_VECTOR_ELT (op0, i),
 					     CONST_VECTOR_ELT (op1, i));
 	  if (!x || !valid_for_const_vector_p (mode, x))
 	    return 0;
-	  RTVEC_ELT (v, i) = x;
+	  builder.quick_push (x);
 	}
-
-      return gen_rtx_CONST_VECTOR (mode, v);
+      return builder.build ();
     }
 
   if (VECTOR_MODE_P (mode)
@@ -7107,6 +7143,58 @@  test_vector_ops_series (machine_mode mod
   ASSERT_RTX_EQ (series_0_m1,
 		 simplify_binary_operation (VEC_SERIES, mode, const0_rtx,
 					    constm1_rtx));
+
+  /* Test NEG on constant vector series.  */
+  ASSERT_RTX_EQ (series_0_m1,
+		 simplify_unary_operation (NEG, mode, series_0_1, mode));
+  ASSERT_RTX_EQ (series_0_1,
+		 simplify_unary_operation (NEG, mode, series_0_m1, mode));
+
+  /* Test PLUS and MINUS on constant vector series.  */
+  rtx scalar2 = gen_int_mode (2, inner_mode);
+  rtx scalar3 = gen_int_mode (3, inner_mode);
+  rtx series_1_1 = gen_const_vec_series (mode, const1_rtx, const1_rtx);
+  rtx series_0_2 = gen_const_vec_series (mode, const0_rtx, scalar2);
+  rtx series_1_3 = gen_const_vec_series (mode, const1_rtx, scalar3);
+  ASSERT_RTX_EQ (series_1_1,
+		 simplify_binary_operation (PLUS, mode, series_0_1,
+					    CONST1_RTX (mode)));
+  ASSERT_RTX_EQ (series_0_m1,
+		 simplify_binary_operation (PLUS, mode, CONST0_RTX (mode),
+					    series_0_m1));
+  ASSERT_RTX_EQ (series_1_3,
+		 simplify_binary_operation (PLUS, mode, series_1_1,
+					    series_0_2));
+  ASSERT_RTX_EQ (series_0_1,
+		 simplify_binary_operation (MINUS, mode, series_1_1,
+					    CONST1_RTX (mode)));
+  ASSERT_RTX_EQ (series_1_1,
+		 simplify_binary_operation (MINUS, mode, CONST1_RTX (mode),
+					    series_0_m1));
+  ASSERT_RTX_EQ (series_1_1,
+		 simplify_binary_operation (MINUS, mode, series_1_3,
+					    series_0_2));
+
+  /* Test MULT between constant vectors.  */
+  rtx vec2 = gen_const_vec_duplicate (mode, scalar2);
+  rtx vec3 = gen_const_vec_duplicate (mode, scalar3);
+  rtx scalar9 = gen_int_mode (9, inner_mode);
+  rtx series_3_9 = gen_const_vec_series (mode, scalar3, scalar9);
+  ASSERT_RTX_EQ (series_0_2,
+		 simplify_binary_operation (MULT, mode, series_0_1, vec2));
+  ASSERT_RTX_EQ (series_3_9,
+		 simplify_binary_operation (MULT, mode, vec3, series_1_3));
+  if (!GET_MODE_NUNITS (mode).is_constant ())
+    ASSERT_FALSE (simplify_binary_operation (MULT, mode, series_0_1,
+					     series_0_1));
+
+  /* Test ASHIFT between constant vectors.  */
+  ASSERT_RTX_EQ (series_0_2,
+		 simplify_binary_operation (ASHIFT, mode, series_0_1,
+					    CONST1_RTX (mode)));
+  if (!GET_MODE_NUNITS (mode).is_constant ())
+    ASSERT_FALSE (simplify_binary_operation (ASHIFT, mode, CONST1_RTX (mode),
+					     series_0_1));
 }
 
 /* Verify simplify_merge_mask works correctly.  */