Support folding from array ctor spanning multiple elements

Message ID alpine.LSU.2.20.1907111454290.2976@zhemvz.fhfr.qr
State New
Headers show
Series
  • Support folding from array ctor spanning multiple elements
Related show

Commit Message

Richard Biener July 11, 2019, 12:59 p.m.
The following exends fold_array_ctor_reference to constant-fold
reads that cross multiple array elements which is needed to fix
folding for targets like aarch64 in PR83518 where we end up with

  arr = *.LC0;
  arr[0] = 5;
  vect__56.15_75 = MEM <vector(2) int> [(int *)&arr];

now it would be nice to have an iterator-style interface for
accessing ctor elements of an array ctor, the code I wrote
is somewhat ugly.  It also seems we have to support unordered
ctors in get_array_ctor_element_at_index when called from
(some) frontend(s) context.  When in GIMPLE form we could use
a binary search which conveniently the C++ frontend already
implements for constexpr folding.

Test coverage is also weak (writing more testcases now...),
esp. for the cases with missing initializers.

Anywhow, boostrap / regtest running on x86_64-unknown-linux-gnu.

The folding code is unfortunately structured in a way that doesn't
easily allow to deal with sub-ctors here.

Any comments?

Thanks,
Richard.

2019-07-11  Richard Biener  <rguenther@suse.de>

	* fold-const.h (get_array_ctor_element_at_index): Adjust.
	* fold-const.c (get_array_ctor_element_at_index): Add
	ctor_idx output parameter informing the caller where in
	the constructor the element was (not) found.  Add early exit
	for when the ctor is sorted.
	* gimple-fold.c (fold_array_ctor_reference): Support constant
	folding across multiple array elements.

	* gcc.dg/tree-ssa/vector-7.c: New testcase.

Patch

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 273355)
+++ gcc/fold-const.c	(working copy)
@@ -11839,10 +11839,15 @@  fold_ternary_loc (location_t loc, enum t
 }
 
 /* Gets the element ACCESS_INDEX from CTOR, which must be a CONSTRUCTOR
-   of an array (or vector).  */
+   of an array (or vector).  *CTOR_IDX if non-NULL is updated with the
+   constructor element index of the value returned.  If the element is
+   not found NULL_TREE is returned and *CTOR_IDX is updated to
+   the index of the element after the ACCESS_INDEX position (which
+   may be outside of the CTOR array).  */
 
 tree
-get_array_ctor_element_at_index (tree ctor, offset_int access_index)
+get_array_ctor_element_at_index (tree ctor, offset_int access_index,
+				 unsigned *ctor_idx)
 {
   tree index_type = NULL_TREE;
   offset_int low_bound = 0;
@@ -11869,7 +11874,7 @@  get_array_ctor_element_at_index (tree ct
 		     TYPE_SIGN (index_type));
 
   offset_int max_index;
-  unsigned HOST_WIDE_INT cnt;
+  unsigned cnt;
   tree cfield, cval;
 
   FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), cnt, cfield, cval)
@@ -11897,11 +11902,26 @@  get_array_ctor_element_at_index (tree ct
 	  max_index = index;
 	}
 
-    /* Do we have match?  */
-    if (wi::cmpu (access_index, index) >= 0
-	&& wi::cmpu (access_index, max_index) <= 0)
-      return cval;
-  }
+      /* Do we have match?  */
+      if (wi::cmpu (access_index, index) >= 0)
+	{
+	  if (wi::cmpu (access_index, max_index) <= 0)
+	    {
+	      if (ctor_idx)
+		*ctor_idx = cnt;
+	      return cval;
+	    }
+	}
+      else if (in_gimple_form)
+	/* We're past the element we search for.  Note during parsing
+	   the elements might not be sorted.
+	   ???  We should use a binary search and a flag on the
+	   CONSTRUCTOR as to whether elements are sorted in declaration
+	   order.  */
+	break;
+    }
+  if (ctor_idx)
+    *ctor_idx = cnt;
   return NULL_TREE;
 }
 
Index: gcc/fold-const.h
===================================================================
--- gcc/fold-const.h	(revision 273355)
+++ gcc/fold-const.h	(working copy)
@@ -67,7 +67,8 @@  extern tree fold_build_call_array_loc (l
 #define fold_build_call_array_initializer(T1,T2,N,T4)\
    fold_build_call_array_initializer_loc (UNKNOWN_LOCATION, T1, T2, N, T4)
 extern tree fold_build_call_array_initializer_loc (location_t, tree, tree, int, tree *);
-extern tree get_array_ctor_element_at_index (tree, offset_int);
+extern tree get_array_ctor_element_at_index (tree, offset_int,
+					     unsigned * = NULL);
 extern bool fold_convertible_p (const_tree, const_tree);
 #define fold_convert(T1,T2)\
    fold_convert_loc (UNKNOWN_LOCATION, T1, T2)
Index: gcc/gimple-fold.c
===================================================================
--- gcc/gimple-fold.c	(revision 273355)
+++ gcc/gimple-fold.c	(working copy)
@@ -6716,14 +6716,13 @@  fold_array_ctor_reference (tree type, tr
   elt_size = wi::to_offset (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (ctor))));
 
   /* When TYPE is non-null, verify that it specifies a constant-sized
-     accessed not larger than size of array element.  Avoid division
+     access of a multiple of the array element size.  Avoid division
      by zero below when ELT_SIZE is zero, such as with the result of
      an initializer for a zero-length array or an empty struct.  */
   if (elt_size == 0
       || (type
 	  && (!TYPE_SIZE_UNIT (type)
-	      || TREE_CODE (TYPE_SIZE_UNIT (type)) != INTEGER_CST
-	      || elt_size < wi::to_offset (TYPE_SIZE_UNIT (type)))))
+	      || TREE_CODE (TYPE_SIZE_UNIT (type)) != INTEGER_CST)))
     return NULL_TREE;
 
   /* Compute the array index we look for.  */
@@ -6734,10 +6733,88 @@  fold_array_ctor_reference (tree type, tr
   /* And offset within the access.  */
   inner_offset = offset % (elt_size.to_uhwi () * BITS_PER_UNIT);
 
-  /* See if the array field is large enough to span whole access.  We do not
-     care to fold accesses spanning multiple array indexes.  */
-  if (inner_offset + size > elt_size.to_uhwi () * BITS_PER_UNIT)
-    return NULL_TREE;
+  if (size > elt_size.to_uhwi () * BITS_PER_UNIT)
+    {
+      /* native_encode_expr constraints.  */
+      if (size > MAX_BITSIZE_MODE_ANY_MODE
+	  || size % BITS_PER_UNIT != 0
+	  || inner_offset % BITS_PER_UNIT != 0)
+	return NULL_TREE;
+
+      unsigned ctor_idx;
+      tree val = get_array_ctor_element_at_index (ctor, access_index,
+						  &ctor_idx);
+      if (!val && ctor_idx >= CONSTRUCTOR_NELTS  (ctor))
+	return build_zero_cst (type);
+
+      /* native-encode adjacent ctor elements.  */
+      unsigned char buf[MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT];
+      unsigned bufoff = 0;
+      offset_int index = 0;
+      offset_int max_index = access_index;
+      constructor_elt *elt = CONSTRUCTOR_ELT (ctor, ctor_idx);
+      if (!val)
+	val = build_zero_cst (TREE_TYPE (TREE_TYPE (ctor)));
+      else if (!CONSTANT_CLASS_P (val))
+	return NULL_TREE;
+      if (!elt->index)
+	;
+      else if (TREE_CODE (elt->index) == RANGE_EXPR)
+	{
+	  index = wi::to_offset (TREE_OPERAND (elt->index, 0));
+	  max_index = wi::to_offset (TREE_OPERAND (elt->index, 1));
+	}
+      else
+	index = max_index = wi::to_offset (elt->index);
+      index = wi::umax (index, access_index);
+      do
+	{
+	  int len = native_encode_expr (val, buf + bufoff,
+					elt_size.to_uhwi (),
+					inner_offset / BITS_PER_UNIT);
+	  if (len != elt_size - inner_offset / BITS_PER_UNIT)
+	    return NULL_TREE;
+	  inner_offset = 0;
+	  bufoff += len;
+
+	  access_index += 1;
+	  if (wi::cmpu (access_index, index) == 0)
+	    val = elt->value;
+	  else if (wi::cmpu (access_index, max_index) > 0)
+	    {
+	      ctor_idx++;
+	      if (ctor_idx >= CONSTRUCTOR_NELTS (ctor))
+		{
+		  val = build_zero_cst (TREE_TYPE (TREE_TYPE (ctor)));
+		  ++max_index;
+		}
+	      else
+		{
+		  elt = CONSTRUCTOR_ELT (ctor, ctor_idx);
+		  index = 0;
+		  max_index = access_index;
+		  if (!elt->index)
+		    ;
+		  else if (TREE_CODE (elt->index) == RANGE_EXPR)
+		    {
+		      index = wi::to_offset (TREE_OPERAND (elt->index, 0));
+		      max_index = wi::to_offset (TREE_OPERAND (elt->index, 1));
+		    }
+		  else
+		    index = max_index = wi::to_offset (elt->index);
+		  index = wi::umax (index, access_index);
+		  if (wi::cmpu (access_index, index) == 0)
+		    val = elt->value;
+		  else
+		    val = build_zero_cst (TREE_TYPE (TREE_TYPE (ctor)));
+		}
+	    }
+	}
+      while (bufoff < size / BITS_PER_UNIT);
+      *suboff += size;
+      return native_interpret_expr (type, buf, size / BITS_PER_UNIT);
+    }
+
   if (tree val = get_array_ctor_element_at_index (ctor, access_index))
     {
       if (!size && TREE_CODE (val) != CONSTRUCTOR)
Index: gcc/testsuite/gcc.dg/tree-ssa/vector-7.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/vector-7.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/vector-7.c	(working copy)
@@ -0,0 +1,34 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+typedef __INT8_TYPE__ v16qi __attribute__((vector_size(16),may_alias,aligned(1)));
+typedef __INT32_TYPE__ v4si __attribute__((vector_size(16),may_alias,aligned(1)));
+
+const __INT32_TYPE__ x[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+const __INT8_TYPE__ y[32]
+  = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
+
+int
+main()
+{
+  v4si v1 = *(v4si *)(x + 1);
+  for (unsigned i = 0; i < 4; ++i)
+    if (v1[i] != (v4si) { 2, 3, 4, 5 }[i])
+      __builtin_abort ();
+  v4si v3 = *(v4si *)(y + 1);
+  for (unsigned i = 0; i < 4; ++i)
+    if (v3[i] !=
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	(v4si) { 0x01020304, 0x05060708, 0x090a0b0c, 0x0d0e0f01 }[i]
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	(v4si) { 0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d }[i]
+#else
+	v3[i]
+#endif
+       )
+      __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "abort" "optimized" } } */