extend cselim to check non-trapping for more references (PR tree-optimizaton/89430)

Message ID DM6PR01MB465200A87E79015C66E760D6E1B10@DM6PR01MB4652.prod.exchangelabs.com
State New
Headers show
Series
  • extend cselim to check non-trapping for more references (PR tree-optimizaton/89430)
Related show

Commit Message

Jakub Jelinek via Gcc-patches May 27, 2020, 4:12 a.m.
Hi all,

Previously, the fix for PR89430<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=89430> was reverted by PR94734<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=94734> due to a bug. The root cause is missing non-trapping check with dominating LOAD/STORE.

This patch extends the cselim non-trapping check to support ARRAY_REF and COMPONENT_REF (previously only support MEM_REF) by get_inner_reference and hashing according to the comments from Jakub.

To support cases in PR89430, if there is a dominating LOAD of local variable without address escape,  as local stack is always writable, the STORE is not trapped and can be optimized.

Review, please.

--------------------


Thanks,
-Hao

Comments

Richard Biener May 28, 2020, 1:06 p.m. | #1
On Wed, 27 May 2020, Hao Liu OS wrote:

> Hi all,

> 

> Previously, the fix for 

> PR89430<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=89430> was 

> reverted by PR94734<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=94734> 

> due to a bug. The root cause is missing non-trapping check with 

> dominating LOAD/STORE.

> 

> This patch extends the cselim non-trapping check to support ARRAY_REF 

> and COMPONENT_REF (previously only support MEM_REF) by 

> get_inner_reference and hashing according to the comments from Jakub.

> 

> To support cases in PR89430, if there is a dominating LOAD of local 

> variable without address escape, as local stack is always writable, the 

> STORE is not trapped and can be optimized.

> 

> Review, please.


How did you test the patch?  There's a ChangeLog missing as well as
a testcase or testcase adjustments in case we XFAILed the old ones.
It helps to post patches created by git format-patch.

First comments inline...

> --------------------

> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c

> index b1e0dce93d8..3733780a0bc 100644

> --- a/gcc/tree-ssa-phiopt.c

> +++ b/gcc/tree-ssa-phiopt.c

> @@ -1986,26 +1986,31 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,

> 

>     ??? We currently are very conservative and assume that a load might

>     trap even if a store doesn't (write-only memory).  This probably is

> -   overly conservative.  */

> +   overly conservative.

> 

> -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF

> -   through it was seen, which would constitute a no-trap region for

> -   same accesses.  */

> -struct name_to_bb

> +   We currently support a special case that for !TREE_ADDRESSABLE automatic

> +   variables, it could ignore whether something is a load or store because the

> +   local stack should be always writable.  */

> +

> +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which

> +   basic block an *_REF through it was seen, which would constitute a

> +   no-trap region for same accesses.  */

> +struct ref_to_bb

>  {

> -  unsigned int ssa_name_ver;

> +  tree base;

> +  poly_int64 bitsize, bitpos;

> +  tree offset;

>    unsigned int phase;

> -  bool store;

> -  HOST_WIDE_INT offset, size;

> +  bool writable;

>    basic_block bb;

>  };

> 

>  /* Hashtable helpers.  */

> 

> -struct ssa_names_hasher : free_ptr_hash <name_to_bb>

> +struct refs_hasher : free_ptr_hash<ref_to_bb>

>  {

> -  static inline hashval_t hash (const name_to_bb *);

> -  static inline bool equal (const name_to_bb *, const name_to_bb *);

> +  static inline hashval_t hash (const ref_to_bb *);

> +  static inline bool equal (const ref_to_bb *, const ref_to_bb *);

>  };

> 

>  /* Used for quick clearing of the hash-table when we see calls.

> @@ -2015,28 +2020,44 @@ static unsigned int nt_call_phase;

>  /* The hash function.  */

> 

>  inline hashval_t

> -ssa_names_hasher::hash (const name_to_bb *n)

> +refs_hasher::hash (const ref_to_bb *n)

>  {

> -  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)

> -         ^ (n->offset << 6) ^ (n->size << 3);

> +  inchash::hash hstate;

> +  inchash::add_expr (n->base, hstate);

> +  hstate.add_poly_int (n->bitsize);

> +  hstate.add_poly_int (n->bitpos);

> +  hstate.add_int (n->writable);

> +  if (n->offset != NULL_TREE)

> +    {

> +      inchash::add_expr (n->offset, hstate);

> +    }


extra {}

> +  return hstate.end ();

>  }

> 

>  /* The equality function of *P1 and *P2.  */

> 

>  inline bool

> -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)

> +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)

>  {

> -  return n1->ssa_name_ver == n2->ssa_name_ver

> -         && n1->store == n2->store

> -         && n1->offset == n2->offset

> -         && n1->size == n2->size;

> +  if (operand_equal_p (n1->base, n2->base, 0)

> +      && known_eq (n1->bitsize, n2->bitsize)

> +      && known_eq (n1->bitpos, n2->bitpos) && n1->writable == n2->writable)

> +    {

> +      /* Should not call operand_equal_p with NULL_TREE.  */

> +      if (n1->offset == NULL_TREE || n2->offset == NULL_TREE)

> +          return n1->offset == n2->offset;


indents are off.  You should be using a tab-stop of 8 characters.

> +      else

> +          return operand_equal_p (n1->offset, n2->offset, 0);

> +    }

> +  return false;

>  }

> 

>  class nontrapping_dom_walker : public dom_walker

>  {

>  public:

>    nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)

> -    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}

> +    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (256)

> +  {}

> 

>    virtual edge before_dom_children (basic_block);

>    virtual void after_dom_children (basic_block);

> @@ -2053,7 +2074,7 @@ private:

>    hash_set<tree> *m_nontrapping;

> 

>    /* The hash table for remembering what we've seen.  */

> -  hash_table<ssa_names_hasher> m_seen_ssa_names;

> +  hash_table<refs_hasher> m_seen_refs;

>  };

> 

>  /* Called by walk_dominator_tree, when entering the block BB.  */

> @@ -2102,65 +2123,76 @@ nontrapping_dom_walker::after_dom_children (basic_block bb)

>  }

> 

>  /* We see the expression EXP in basic block BB.  If it's an interesting

> -   expression (an MEM_REF through an SSA_NAME) possibly insert the

> -   expression into the set NONTRAP or the hash table of seen expressions.

> -   STORE is true if this expression is on the LHS, otherwise it's on

> -   the RHS.  */

> +   expression of:

> +     1) MEM_REF

> +     2) ARRAY_REF

> +     3) COMPONENT_REF

> +   possibly insert the expression into the set NONTRAP or the hash table

> +   of seen expressions.  STORE is true if this expression is on the LHS,

> +   otherwise it's on the RHS.  */

>  void

>  nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)

>  {

> -  HOST_WIDE_INT size;

> -

> -  if (TREE_CODE (exp) == MEM_REF

> -      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME

> -      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))

> -      && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)

> +  if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF

> +      || TREE_CODE (exp) == COMPONENT_REF)

>      {

> -      tree name = TREE_OPERAND (exp, 0);

> -      struct name_to_bb map;

> -      name_to_bb **slot;

> -      struct name_to_bb *n2bb;

> +      struct ref_to_bb map;

> +      ref_to_bb **slot;

> +      struct ref_to_bb *r2bb;

>        basic_block found_bb = 0;

> 

> -      /* Try to find the last seen MEM_REF through the same

> -         SSA_NAME, which can trap.  */

> -      map.ssa_name_ver = SSA_NAME_VERSION (name);

> -      map.phase = 0;

> -      map.bb = 0;

> -      map.store = store;

> -      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));

> -      map.size = size;

> -

> -      slot = m_seen_ssa_names.find_slot (&map, INSERT);

> -      n2bb = *slot;

> -      if (n2bb && n2bb->phase >= nt_call_phase)

> -        found_bb = n2bb->bb;

> -

> -      /* If we've found a trapping MEM_REF, _and_ it dominates EXP

> -         (it's in a basic block on the path from us to the dominator root)

> +      /* Try to find the last seen *_REF (through the same base expression

> +         + bitsize + bitpos + offset expression), which can trap.  */

> +      machine_mode nmode;

> +      poly_int64 bitsize, bitpos;

> +      tree offset;

> +      int nunsignedp, nreversep, nvolatilep = 0;

> +      tree base = get_inner_reference (exp, &bitsize, &bitpos, &offset, &nmode,

> +                                        &nunsignedp, &nreversep, &nvolatilep);

> +      gcc_assert (base != NULL_TREE);


So my main concern is that get_inner_reference is quite expensive
since it builds the variable offset part as new GENERIC trees.

I think it would be easier to fully rely on operand_equal_p here
and thus record the original full expression.  For the writable
flag you can use the way cheaper get_base_address () which gets
you 'base' without the overhead of building the offset tree.

That means you merely exchange ssa_name_ver in the struct with 'exp'.

Thanks,
Richard.

> +      map.base = base;

> +      map.offset = offset;

> +      map.bitsize = bitsize;

> +      map.bitpos = bitpos;

> +

> +      /* The reference is writable if EXP is a:

> +         1) STORE, or

> +         2) LOAD of a local variable without address-taken (as the local stack

> +         is always writable).  This allows cselim on a STORE with a dominating

> +         LOAD.  */

> +      map.writable = store || (auto_var_p (base) && !TREE_ADDRESSABLE (base));

> +

> +      slot = m_seen_refs.find_slot (&map, INSERT);

> +      r2bb = *slot;

> +      if (r2bb && r2bb->phase >= nt_call_phase)

> +        found_bb = r2bb->bb;

> +

> +      /* If we've found a trapping *_REF, _and_ it dominates EXP

> +         (it's in a basic block on the path from us to the dominator root)

>           then we can't trap.  */

> -      if (found_bb && (((size_t)found_bb->aux) & 1) == 1)

> +      if (found_bb && (((size_t) found_bb->aux) & 1) == 1)

>          {

>            m_nontrapping->add (exp);

>          }

>        else

> -        {

> +        {

>            /* EXP might trap, so insert it into the hash table.  */

> -          if (n2bb)

> +          if (r2bb)

>              {

> -              n2bb->phase = nt_call_phase;

> -              n2bb->bb = bb;

> +              r2bb->phase = nt_call_phase;

> +              r2bb->bb = bb;

>              }

>            else

>              {

> -              n2bb = XNEW (struct name_to_bb);

> -              n2bb->ssa_name_ver = SSA_NAME_VERSION (name);

> -              n2bb->phase = nt_call_phase;

> -              n2bb->bb = bb;

> -              n2bb->store = store;

> -              n2bb->offset = map.offset;

> -              n2bb->size = size;

> -              *slot = n2bb;

> +              r2bb = XNEW (struct ref_to_bb);

> +              r2bb->phase = nt_call_phase;

> +              r2bb->bb = bb;

> +              r2bb->base = base;

> +              r2bb->offset = map.offset;

> +              r2bb->bitsize = map.bitsize;

> +              r2bb->bitpos = map.bitpos;

> +              r2bb->writable = map.writable;

> +              *slot = r2bb;

>              }

>          }

>      }

> 

> 

> Thanks,

> -Hao

> 


-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
Jakub Jelinek via Gcc-patches May 29, 2020, 9:45 a.m. | #2
Hi Richard,

Thanks for your comments. It's a good idea to simplify the code and remove get_inner_reference. I've updated the patch accordingly. I also simplified the code to ignore other loads, which can not help to check if a store can be trapped. 

About tests:
    1.  All previously XFAIL tests (gcc.dg/tree-ssa/pr89430-*) for pr89430 are passed with this patch. 
    2.  ssa-pre-17.c is failed as cselim optimizes the conditional store, so "-fno-tree-cselim" is added. That case is added as a new test case for pr89430.
    3.  Other test cases (including the regression test for pr94734) in gcc testsuit are not affected by this patch, according to gcc "make check".
    4.  Some other benchmarks are also tested for correctness and performance. The performance regression mentioned in pr89430 can be fixed. 
 
Review, please.

gcc/ChangeLog:

    PR tree-optimization/89430
    * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking to
    support ARRAY_REFs and COMPONENT_REFs.  Support a special case: if there is
    a dominating load of local variable without address escape, a store is not
    trapped (as local stack is always writable).  Other loads are ignored for
    simplicity, as they don't help to check if a store can be trapped.

gcc/testsuite/ChangeLog:

    PR tree-optimization/89430
    * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail.
    * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail.
    * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail.
    * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail.
    * gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test.
    * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim.

---
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c     |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c     |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c     |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c     |   2 +-
 .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c      |  17 +++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c    |   2 +-
 gcc/tree-ssa-phiopt.c                         | 140 +++++++++---------
 7 files changed, 91 insertions(+), 76 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
index ce242ba569b..8ee1850ac63 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
         return a[0]+a[1];
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
index 90ae36bfce2..9b96875ac7a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
         return a[0]+a[1];
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
index c633cbe947d..b2d04119381 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
@@ -13,4 +13,4 @@ int test(int b, int k) {
     return a.data[0] + a.data[1];
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
index 7cad563128d..8d3c4f7cc6a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
@@ -16,4 +16,4 @@ int test(int b, int k) {
     return a.data[0].x + a.data[1].x;
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
new file mode 100644
index 00000000000..c35a2afc70b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+typedef union {
+  int i;
+  float f;
+} U;
+
+int foo(U *u, int b, int i)
+{
+  u->i = 0;
+  if (b)
+    u->i = i;
+  return u->i;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
index 09313716598..a06f339f0bb 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-pre-stats" } */
+/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */
 
 typedef union {
   int i;
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index b1e0dce93d8..7c67b75486c 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1969,43 +1969,44 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
   return true;
 }
 
-/* Auxiliary functions to determine the set of memory accesses which
+/* Auxiliary functions to determine the set of memory write accesses which
    can't trap because they are preceded by accesses to the same memory
-   portion.  We do that for MEM_REFs, so we only need to track
-   the SSA_NAME of the pointer indirectly referenced.  The algorithm
+   portion.  We do that for MEM_REFs/ARRAY_REFs/COMPONENT_REFs.  The algorithm
    simply is a walk over all instructions in dominator order.  When
-   we see an MEM_REF we determine if we've already seen a same
+   we see an *_REF we determine if we've already seen a same
    ref anywhere up to the root of the dominator tree.  If we do the
    current access can't trap.  If we don't see any dominating access
    the current access might trap, but might also make later accesses
    non-trapping, so we remember it.  We need to be careful with loads
    or stores, for instance a load might not trap, while a store would,
    so if we see a dominating read access this doesn't mean that a later
-   write access would not trap.  Hence we also need to differentiate the
-   type of access(es) seen.
+   write access would not trap.
 
-   ??? We currently are very conservative and assume that a load might
-   trap even if a store doesn't (write-only memory).  This probably is
-   overly conservative.  */
+   We care about following memory accesses:
+     1) a store.
+     2) a load of local variable without address-taken (as local stack is
+	always writable).  It helps to optimize a conditoinal store with
+	a dominating load.
 
-/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
-   through it was seen, which would constitute a no-trap region for
-   same accesses.  */
-struct name_to_bb
+   other loads are ignored as we don't know if the accessed memory is writable,
+   so they don't help conditional store replacement.  */
+
+/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
+   basic block an *_REF through it was seen, which would constitute a
+   no-trap region for same accesses.  */
+struct ref_to_bb
 {
-  unsigned int ssa_name_ver;
+  tree exp;
   unsigned int phase;
-  bool store;
-  HOST_WIDE_INT offset, size;
   basic_block bb;
 };
 
 /* Hashtable helpers.  */
 
-struct ssa_names_hasher : free_ptr_hash <name_to_bb>
+struct refs_hasher : free_ptr_hash<ref_to_bb>
 {
-  static inline hashval_t hash (const name_to_bb *);
-  static inline bool equal (const name_to_bb *, const name_to_bb *);
+  static inline hashval_t hash (const ref_to_bb *);
+  static inline bool equal (const ref_to_bb *, const ref_to_bb *);
 };
 
 /* Used for quick clearing of the hash-table when we see calls.
@@ -2015,28 +2016,27 @@ static unsigned int nt_call_phase;
 /* The hash function.  */
 
 inline hashval_t
-ssa_names_hasher::hash (const name_to_bb *n)
+refs_hasher::hash (const ref_to_bb *n)
 {
-  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
-         ^ (n->offset << 6) ^ (n->size << 3);
+  inchash::hash hstate;
+  inchash::add_expr (n->exp, hstate);
+  return hstate.end ();
 }
 
 /* The equality function of *P1 and *P2.  */
 
 inline bool
-ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
+refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
 {
-  return n1->ssa_name_ver == n2->ssa_name_ver
-         && n1->store == n2->store
-         && n1->offset == n2->offset
-         && n1->size == n2->size;
+  return operand_equal_p (n1->exp, n2->exp, 0);
 }
 
 class nontrapping_dom_walker : public dom_walker
 {
 public:
   nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
-    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
+    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)
+  {}
 
   virtual edge before_dom_children (basic_block);
   virtual void after_dom_children (basic_block);
@@ -2053,7 +2053,7 @@ private:
   hash_set<tree> *m_nontrapping;
 
   /* The hash table for remembering what we've seen.  */
-  hash_table<ssa_names_hasher> m_seen_ssa_names;
+  hash_table<refs_hasher> m_seen_refs;
 };
 
 /* Called by walk_dominator_tree, when entering the block BB.  */
@@ -2102,65 +2102,63 @@ nontrapping_dom_walker::after_dom_children (basic_block bb)
 }
 
 /* We see the expression EXP in basic block BB.  If it's an interesting
-   expression (an MEM_REF through an SSA_NAME) possibly insert the
-   expression into the set NONTRAP or the hash table of seen expressions.
-   STORE is true if this expression is on the LHS, otherwise it's on
-   the RHS.  */
+   expression of:
+     1) MEM_REF
+     2) ARRAY_REF
+     3) COMPONENT_REF
+   possibly insert the expression into the set NONTRAP or the hash table
+   of seen expressions.  STORE is true if this expression is on the LHS,
+   otherwise it's on the RHS.  */
 void
 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
 {
-  HOST_WIDE_INT size;
-
-  if (TREE_CODE (exp) == MEM_REF
-      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
-      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
-      && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
+  if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
+      || TREE_CODE (exp) == COMPONENT_REF)
     {
-      tree name = TREE_OPERAND (exp, 0);
-      struct name_to_bb map;
-      name_to_bb **slot;
-      struct name_to_bb *n2bb;
+      struct ref_to_bb map;
+      ref_to_bb **slot;
+      struct ref_to_bb *r2bb;
       basic_block found_bb = 0;
 
-      /* Try to find the last seen MEM_REF through the same
-         SSA_NAME, which can trap.  */
-      map.ssa_name_ver = SSA_NAME_VERSION (name);
-      map.phase = 0;
-      map.bb = 0;
-      map.store = store;
-      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
-      map.size = size;
-
-      slot = m_seen_ssa_names.find_slot (&map, INSERT);
-      n2bb = *slot;
-      if (n2bb && n2bb->phase >= nt_call_phase)
-        found_bb = n2bb->bb;
-
-      /* If we've found a trapping MEM_REF, _and_ it dominates EXP
-         (it's in a basic block on the path from us to the dominator root)
+      if (!store)
+	{
+	  tree base = get_base_address (exp);
+	  /* Only record a LOAD of local variable without address-taken, as
+	     the local stack is always writable.  This allows cselim on a STORE
+	     with a dominating LOAD.  */
+	  if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
+	    return;
+	}
+
+      /* Try to find the last seen *_REF, which can trap.  */
+      map.exp = exp;
+      slot = m_seen_refs.find_slot (&map, INSERT);
+      r2bb = *slot;
+      if (r2bb && r2bb->phase >= nt_call_phase)
+	found_bb = r2bb->bb;
+
+      /* If we've found a trapping *_REF, _and_ it dominates EXP
+	 (it's in a basic block on the path from us to the dominator root)
 	 then we can't trap.  */
       if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
 	{
 	  m_nontrapping->add (exp);
 	}
       else
-        {
+	{
 	  /* EXP might trap, so insert it into the hash table.  */
-	  if (n2bb)
+	  if (r2bb)
 	    {
-	      n2bb->phase = nt_call_phase;
-	      n2bb->bb = bb;
+	      r2bb->phase = nt_call_phase;
+	      r2bb->bb = bb;
 	    }
 	  else
 	    {
-	      n2bb = XNEW (struct name_to_bb);
-	      n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
-	      n2bb->phase = nt_call_phase;
-	      n2bb->bb = bb;
-	      n2bb->store = store;
-	      n2bb->offset = map.offset;
-	      n2bb->size = size;
-	      *slot = n2bb;
+	      r2bb = XNEW (struct ref_to_bb);
+	      r2bb->phase = nt_call_phase;
+	      r2bb->bb = bb;
+	      r2bb->exp = exp;
+	      *slot = r2bb;
 	    }
 	}
     }
-- 
2.17.1


> -----Original Message-----

> From: Richard Biener <rguenther@suse.de>

> Sent: Thursday, May 28, 2020 9:06 PM

> To: Hao Liu OS <hliu@os.amperecomputing.com>

> Cc: gcc-patches@gcc.gnu.org; Jakub Jelinek <jakub@redhat.com>

> Subject: Re: [PATCH] extend cselim to check non-trapping for more

> references (PR tree-optimizaton/89430)

> 

> On Wed, 27 May 2020, Hao Liu OS wrote:

> 

> > Hi all,

> >

> > Previously, the fix for

> > PR89430<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=89430> was

> > reverted by

> > PR94734<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=94734>

> > due to a bug. The root cause is missing non-trapping check with

> > dominating LOAD/STORE.

> >

> > This patch extends the cselim non-trapping check to support ARRAY_REF

> > and COMPONENT_REF (previously only support MEM_REF) by

> > get_inner_reference and hashing according to the comments from Jakub.

> >

> > To support cases in PR89430, if there is a dominating LOAD of local

> > variable without address escape, as local stack is always writable,

> > the STORE is not trapped and can be optimized.

> >

> > Review, please.

> 

> How did you test the patch?  There's a ChangeLog missing as well as a

> testcase or testcase adjustments in case we XFAILed the old ones.

> It helps to post patches created by git format-patch.

> 

> First comments inline...

> 

> > --------------------

> > diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index

> > b1e0dce93d8..3733780a0bc 100644

> > --- a/gcc/tree-ssa-phiopt.c

> > +++ b/gcc/tree-ssa-phiopt.c

> > @@ -1986,26 +1986,31 @@ abs_replacement (basic_block cond_bb,

> > basic_block middle_bb,

> >

> >     ??? We currently are very conservative and assume that a load might

> >     trap even if a store doesn't (write-only memory).  This probably is

> > -   overly conservative.  */

> > +   overly conservative.

> >

> > -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF

> > -   through it was seen, which would constitute a no-trap region for

> > -   same accesses.  */

> > -struct name_to_bb

> > +   We currently support a special case that for !TREE_ADDRESSABLE

> automatic

> > +   variables, it could ignore whether something is a load or store because

> the

> > +   local stack should be always writable.  */

> > +

> > +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF),

> and in which

> > +   basic block an *_REF through it was seen, which would constitute a

> > +   no-trap region for same accesses.  */ struct ref_to_bb

> >  {

> > -  unsigned int ssa_name_ver;

> > +  tree base;

> > +  poly_int64 bitsize, bitpos;

> > +  tree offset;

> >    unsigned int phase;

> > -  bool store;

> > -  HOST_WIDE_INT offset, size;

> > +  bool writable;

> >    basic_block bb;

> >  };

> >

> >  /* Hashtable helpers.  */

> >

> > -struct ssa_names_hasher : free_ptr_hash <name_to_bb>

> > +struct refs_hasher : free_ptr_hash<ref_to_bb>

> >  {

> > -  static inline hashval_t hash (const name_to_bb *);

> > -  static inline bool equal (const name_to_bb *, const name_to_bb *);

> > +  static inline hashval_t hash (const ref_to_bb *);  static inline

> > + bool equal (const ref_to_bb *, const ref_to_bb *);

> >  };

> >

> >  /* Used for quick clearing of the hash-table when we see calls.

> > @@ -2015,28 +2020,44 @@ static unsigned int nt_call_phase;

> >  /* The hash function.  */

> >

> >  inline hashval_t

> > -ssa_names_hasher::hash (const name_to_bb *n)

> > +refs_hasher::hash (const ref_to_bb *n)

> >  {

> > -  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)

> > -         ^ (n->offset << 6) ^ (n->size << 3);

> > +  inchash::hash hstate;

> > +  inchash::add_expr (n->base, hstate);  hstate.add_poly_int

> > + (n->bitsize);  hstate.add_poly_int (n->bitpos);  hstate.add_int

> > + (n->writable);  if (n->offset != NULL_TREE)

> > +    {

> > +      inchash::add_expr (n->offset, hstate);

> > +    }

> 

> extra {}

> 

> > +  return hstate.end ();

> >  }

> >

> >  /* The equality function of *P1 and *P2.  */

> >

> >  inline bool

> > -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)

> > +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)

> >  {

> > -  return n1->ssa_name_ver == n2->ssa_name_ver

> > -         && n1->store == n2->store

> > -         && n1->offset == n2->offset

> > -         && n1->size == n2->size;

> > +  if (operand_equal_p (n1->base, n2->base, 0)

> > +      && known_eq (n1->bitsize, n2->bitsize)

> > +      && known_eq (n1->bitpos, n2->bitpos) && n1->writable == n2-

> >writable)

> > +    {

> > +      /* Should not call operand_equal_p with NULL_TREE.  */

> > +      if (n1->offset == NULL_TREE || n2->offset == NULL_TREE)

> > +          return n1->offset == n2->offset;

> 

> indents are off.  You should be using a tab-stop of 8 characters.

> 

> > +      else

> > +          return operand_equal_p (n1->offset, n2->offset, 0);

> > +    }

> > +  return false;

> >  }

> >

> >  class nontrapping_dom_walker : public dom_walker  {

> >  public:

> >    nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)

> > -    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128)

> {}

> > +    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (256)

> > + {}

> >

> >    virtual edge before_dom_children (basic_block);

> >    virtual void after_dom_children (basic_block); @@ -2053,7 +2074,7

> > @@ private:

> >    hash_set<tree> *m_nontrapping;

> >

> >    /* The hash table for remembering what we've seen.  */

> > -  hash_table<ssa_names_hasher> m_seen_ssa_names;

> > +  hash_table<refs_hasher> m_seen_refs;

> >  };

> >

> >  /* Called by walk_dominator_tree, when entering the block BB.  */ @@

> > -2102,65 +2123,76 @@ nontrapping_dom_walker::after_dom_children

> > (basic_block bb)  }

> >

> >  /* We see the expression EXP in basic block BB.  If it's an interesting

> > -   expression (an MEM_REF through an SSA_NAME) possibly insert the

> > -   expression into the set NONTRAP or the hash table of seen expressions.

> > -   STORE is true if this expression is on the LHS, otherwise it's on

> > -   the RHS.  */

> > +   expression of:

> > +     1) MEM_REF

> > +     2) ARRAY_REF

> > +     3) COMPONENT_REF

> > +   possibly insert the expression into the set NONTRAP or the hash table

> > +   of seen expressions.  STORE is true if this expression is on the LHS,

> > +   otherwise it's on the RHS.  */

> >  void

> >  nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp,

> > bool store)  {

> > -  HOST_WIDE_INT size;

> > -

> > -  if (TREE_CODE (exp) == MEM_REF

> > -      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME

> > -      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))

> > -      && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)

> > +  if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF

> > +      || TREE_CODE (exp) == COMPONENT_REF)

> >      {

> > -      tree name = TREE_OPERAND (exp, 0);

> > -      struct name_to_bb map;

> > -      name_to_bb **slot;

> > -      struct name_to_bb *n2bb;

> > +      struct ref_to_bb map;

> > +      ref_to_bb **slot;

> > +      struct ref_to_bb *r2bb;

> >        basic_block found_bb = 0;

> >

> > -      /* Try to find the last seen MEM_REF through the same

> > -         SSA_NAME, which can trap.  */

> > -      map.ssa_name_ver = SSA_NAME_VERSION (name);

> > -      map.phase = 0;

> > -      map.bb = 0;

> > -      map.store = store;

> > -      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));

> > -      map.size = size;

> > -

> > -      slot = m_seen_ssa_names.find_slot (&map, INSERT);

> > -      n2bb = *slot;

> > -      if (n2bb && n2bb->phase >= nt_call_phase)

> > -        found_bb = n2bb->bb;

> > -

> > -      /* If we've found a trapping MEM_REF, _and_ it dominates EXP

> > -         (it's in a basic block on the path from us to the dominator root)

> > +      /* Try to find the last seen *_REF (through the same base expression

> > +         + bitsize + bitpos + offset expression), which can trap.  */

> > +      machine_mode nmode;

> > +      poly_int64 bitsize, bitpos;

> > +      tree offset;

> > +      int nunsignedp, nreversep, nvolatilep = 0;

> > +      tree base = get_inner_reference (exp, &bitsize, &bitpos, &offset,

> &nmode,

> > +                                        &nunsignedp, &nreversep, &nvolatilep);

> > +      gcc_assert (base != NULL_TREE);

> 

> So my main concern is that get_inner_reference is quite expensive since it

> builds the variable offset part as new GENERIC trees.

> 

> I think it would be easier to fully rely on operand_equal_p here and thus

> record the original full expression.  For the writable flag you can use the way

> cheaper get_base_address () which gets you 'base' without the overhead of

> building the offset tree.

> 

> That means you merely exchange ssa_name_ver in the struct with 'exp'.

> 

> Thanks,

> Richard.

> 

> > +      map.base = base;

> > +      map.offset = offset;

> > +      map.bitsize = bitsize;

> > +      map.bitpos = bitpos;

> > +

> > +      /* The reference is writable if EXP is a:

> > +         1) STORE, or

> > +         2) LOAD of a local variable without address-taken (as the local stack

> > +         is always writable).  This allows cselim on a STORE with a dominating

> > +         LOAD.  */

> > +      map.writable = store || (auto_var_p (base) && !TREE_ADDRESSABLE

> > + (base));

> > +

> > +      slot = m_seen_refs.find_slot (&map, INSERT);

> > +      r2bb = *slot;

> > +      if (r2bb && r2bb->phase >= nt_call_phase)

> > +        found_bb = r2bb->bb;

> > +

> > +      /* If we've found a trapping *_REF, _and_ it dominates EXP

> > +         (it's in a basic block on the path from us to the dominator

> > + root)

> >           then we can't trap.  */

> > -      if (found_bb && (((size_t)found_bb->aux) & 1) == 1)

> > +      if (found_bb && (((size_t) found_bb->aux) & 1) == 1)

> >          {

> >            m_nontrapping->add (exp);

> >          }

> >        else

> > -        {

> > +        {

> >            /* EXP might trap, so insert it into the hash table.  */

> > -          if (n2bb)

> > +          if (r2bb)

> >              {

> > -              n2bb->phase = nt_call_phase;

> > -              n2bb->bb = bb;

> > +              r2bb->phase = nt_call_phase;

> > +              r2bb->bb = bb;

> >              }

> >            else

> >              {

> > -              n2bb = XNEW (struct name_to_bb);

> > -              n2bb->ssa_name_ver = SSA_NAME_VERSION (name);

> > -              n2bb->phase = nt_call_phase;

> > -              n2bb->bb = bb;

> > -              n2bb->store = store;

> > -              n2bb->offset = map.offset;

> > -              n2bb->size = size;

> > -              *slot = n2bb;

> > +              r2bb = XNEW (struct ref_to_bb);

> > +              r2bb->phase = nt_call_phase;

> > +              r2bb->bb = bb;

> > +              r2bb->base = base;

> > +              r2bb->offset = map.offset;

> > +              r2bb->bitsize = map.bitsize;

> > +              r2bb->bitpos = map.bitpos;

> > +              r2bb->writable = map.writable;

> > +              *slot = r2bb;

> >              }

> >          }

> >      }

> >

> >

> > Thanks,

> > -Hao

> >

> 

> --

> Richard Biener <rguenther@suse.de>

> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409

> Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
Richard Biener June 2, 2020, 11:29 a.m. | #3
On Fri, 29 May 2020, Hao Liu OS wrote:

> Hi Richard,

> 

> Thanks for your comments. It's a good idea to simplify the code and remove get_inner_reference. I've updated the patch accordingly. I also simplified the code to ignore other loads, which can not help to check if a store can be trapped. 

> 

> About tests:

>     1.  All previously XFAIL tests (gcc.dg/tree-ssa/pr89430-*) for pr89430 are passed with this patch. 

>     2.  ssa-pre-17.c is failed as cselim optimizes the conditional store, so "-fno-tree-cselim" is added. That case is added as a new test case for pr89430.

>     3.  Other test cases (including the regression test for pr94734) in gcc testsuit are not affected by this patch, according to gcc "make check".

>     4.  Some other benchmarks are also tested for correctness and performance. The performance regression mentioned in pr89430 can be fixed. 

>  

> Review, please.


Looks mostly good now, some more comments inline below

> gcc/ChangeLog:

> 

>     PR tree-optimization/89430

>     * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking to

>     support ARRAY_REFs and COMPONENT_REFs.  Support a special case: if there is

>     a dominating load of local variable without address escape, a store is not

>     trapped (as local stack is always writable).  Other loads are ignored for

>     simplicity, as they don't help to check if a store can be trapped.

> 

> gcc/testsuite/ChangeLog:

> 

>     PR tree-optimization/89430

>     * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail.

>     * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail.

>     * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail.

>     * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail.

>     * gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test.

>     * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim.

> 

> ---

>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c     |   2 +-

>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c     |   2 +-

>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c     |   2 +-

>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c     |   2 +-

>  .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c      |  17 +++

>  gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c    |   2 +-

>  gcc/tree-ssa-phiopt.c                         | 140 +++++++++---------

>  7 files changed, 91 insertions(+), 76 deletions(-)

>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c

> 

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c

> index ce242ba569b..8ee1850ac63 100644

> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c

> @@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {

>          return a[0]+a[1];

>  }

>  

> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */

> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c

> index 90ae36bfce2..9b96875ac7a 100644

> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c

> @@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {

>          return a[0]+a[1];

>  }

>  

> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */

> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c

> index c633cbe947d..b2d04119381 100644

> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c

> @@ -13,4 +13,4 @@ int test(int b, int k) {

>      return a.data[0] + a.data[1];

>  }

>  

> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */

> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c

> index 7cad563128d..8d3c4f7cc6a 100644

> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c

> @@ -16,4 +16,4 @@ int test(int b, int k) {

>      return a.data[0].x + a.data[1].x;

>  }

>  

> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */

> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c

> new file mode 100644

> index 00000000000..c35a2afc70b

> --- /dev/null

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c

> @@ -0,0 +1,17 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O2 -fdump-tree-cselim-details" } */

> +

> +typedef union {

> +  int i;

> +  float f;

> +} U;

> +

> +int foo(U *u, int b, int i)

> +{

> +  u->i = 0;

> +  if (b)

> +    u->i = i;

> +  return u->i;

> +}

> +

> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c

> index 09313716598..a06f339f0bb 100644

> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c

> @@ -1,5 +1,5 @@

>  /* { dg-do compile } */

> -/* { dg-options "-O2 -fdump-tree-pre-stats" } */

> +/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */

>  

>  typedef union {

>    int i;

> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c

> index b1e0dce93d8..7c67b75486c 100644

> --- a/gcc/tree-ssa-phiopt.c

> +++ b/gcc/tree-ssa-phiopt.c

> @@ -1969,43 +1969,44 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,

>    return true;

>  }

>  

> -/* Auxiliary functions to determine the set of memory accesses which

> +/* Auxiliary functions to determine the set of memory write accesses which

>     can't trap because they are preceded by accesses to the same memory

> -   portion.  We do that for MEM_REFs, so we only need to track

> -   the SSA_NAME of the pointer indirectly referenced.  The algorithm

> +   portion.  We do that for MEM_REFs/ARRAY_REFs/COMPONENT_REFs.  The algorithm

>     simply is a walk over all instructions in dominator order.  When

> -   we see an MEM_REF we determine if we've already seen a same

> +   we see an *_REF we determine if we've already seen a same

>     ref anywhere up to the root of the dominator tree.  If we do the

>     current access can't trap.  If we don't see any dominating access

>     the current access might trap, but might also make later accesses

>     non-trapping, so we remember it.  We need to be careful with loads

>     or stores, for instance a load might not trap, while a store would,

>     so if we see a dominating read access this doesn't mean that a later

> -   write access would not trap.  Hence we also need to differentiate the

> -   type of access(es) seen.

> +   write access would not trap.

>  

> -   ??? We currently are very conservative and assume that a load might

> -   trap even if a store doesn't (write-only memory).  This probably is

> -   overly conservative.  */

> +   We care about following memory accesses:

> +     1) a store.

> +     2) a load of local variable without address-taken (as local stack is

> +	always writable).  It helps to optimize a conditoinal store with

> +	a dominating load.

>  

> -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF

> -   through it was seen, which would constitute a no-trap region for

> -   same accesses.  */

> -struct name_to_bb

> +   other loads are ignored as we don't know if the accessed memory is writable,

> +   so they don't help conditional store replacement.  */

> +

> +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which

> +   basic block an *_REF through it was seen, which would constitute a

> +   no-trap region for same accesses.  */

> +struct ref_to_bb

>  {

> -  unsigned int ssa_name_ver;

> +  tree exp;

>    unsigned int phase;

> -  bool store;

> -  HOST_WIDE_INT offset, size;

>    basic_block bb;

>  };

>  

>  /* Hashtable helpers.  */

>  

> -struct ssa_names_hasher : free_ptr_hash <name_to_bb>

> +struct refs_hasher : free_ptr_hash<ref_to_bb>

>  {

> -  static inline hashval_t hash (const name_to_bb *);

> -  static inline bool equal (const name_to_bb *, const name_to_bb *);

> +  static inline hashval_t hash (const ref_to_bb *);

> +  static inline bool equal (const ref_to_bb *, const ref_to_bb *);

>  };

>  

>  /* Used for quick clearing of the hash-table when we see calls.

> @@ -2015,28 +2016,27 @@ static unsigned int nt_call_phase;

>  /* The hash function.  */

>  

>  inline hashval_t

> -ssa_names_hasher::hash (const name_to_bb *n)

> +refs_hasher::hash (const ref_to_bb *n)

>  {

> -  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)

> -         ^ (n->offset << 6) ^ (n->size << 3);

> +  inchash::hash hstate;

> +  inchash::add_expr (n->exp, hstate);

> +  return hstate.end ();

>  }

>  

>  /* The equality function of *P1 and *P2.  */

>  

>  inline bool

> -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)

> +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)

>  {

> -  return n1->ssa_name_ver == n2->ssa_name_ver

> -         && n1->store == n2->store

> -         && n1->offset == n2->offset

> -         && n1->size == n2->size;

> +  return operand_equal_p (n1->exp, n2->exp, 0);


So I figured we're going to regress some cases here that do not
have semantically agreeing MEM_REFs, like MEM<double>(s_1)
and MEM<long>(s_1) they would not compare equal but did before,
just considering offset and size.  So we'd like to pass
OEP_ADDRESS_OF as flag argument to operand_equal_p here
and to add_expr above.  This part would then just check whether
the accesses are to the same address.

This means you have to preserve the ->size member (and check it).

The rest of the patch looks good to me.

Thanks,
Richard.

>  }

>  

>  class nontrapping_dom_walker : public dom_walker

>  {

>  public:

>    nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)

> -    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}

> +    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)

> +  {}

>  

>    virtual edge before_dom_children (basic_block);

>    virtual void after_dom_children (basic_block);

> @@ -2053,7 +2053,7 @@ private:

>    hash_set<tree> *m_nontrapping;

>  

>    /* The hash table for remembering what we've seen.  */

> -  hash_table<ssa_names_hasher> m_seen_ssa_names;

> +  hash_table<refs_hasher> m_seen_refs;

>  };

>  

>  /* Called by walk_dominator_tree, when entering the block BB.  */

> @@ -2102,65 +2102,63 @@ nontrapping_dom_walker::after_dom_children (basic_block bb)

>  }

>  

>  /* We see the expression EXP in basic block BB.  If it's an interesting

> -   expression (an MEM_REF through an SSA_NAME) possibly insert the

> -   expression into the set NONTRAP or the hash table of seen expressions.

> -   STORE is true if this expression is on the LHS, otherwise it's on

> -   the RHS.  */

> +   expression of:

> +     1) MEM_REF

> +     2) ARRAY_REF

> +     3) COMPONENT_REF

> +   possibly insert the expression into the set NONTRAP or the hash table

> +   of seen expressions.  STORE is true if this expression is on the LHS,

> +   otherwise it's on the RHS.  */

>  void

>  nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)

>  {

> -  HOST_WIDE_INT size;

> -

> -  if (TREE_CODE (exp) == MEM_REF

> -      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME

> -      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))

> -      && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)

> +  if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF

> +      || TREE_CODE (exp) == COMPONENT_REF)

>      {

> -      tree name = TREE_OPERAND (exp, 0);

> -      struct name_to_bb map;

> -      name_to_bb **slot;

> -      struct name_to_bb *n2bb;

> +      struct ref_to_bb map;

> +      ref_to_bb **slot;

> +      struct ref_to_bb *r2bb;

>        basic_block found_bb = 0;

>  

> -      /* Try to find the last seen MEM_REF through the same

> -         SSA_NAME, which can trap.  */

> -      map.ssa_name_ver = SSA_NAME_VERSION (name);

> -      map.phase = 0;

> -      map.bb = 0;

> -      map.store = store;

> -      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));

> -      map.size = size;

> -

> -      slot = m_seen_ssa_names.find_slot (&map, INSERT);

> -      n2bb = *slot;

> -      if (n2bb && n2bb->phase >= nt_call_phase)

> -        found_bb = n2bb->bb;

> -

> -      /* If we've found a trapping MEM_REF, _and_ it dominates EXP

> -         (it's in a basic block on the path from us to the dominator root)

> +      if (!store)

> +	{

> +	  tree base = get_base_address (exp);

> +	  /* Only record a LOAD of local variable without address-taken, as

> +	     the local stack is always writable.  This allows cselim on a STORE

> +	     with a dominating LOAD.  */

> +	  if (!auto_var_p (base) || TREE_ADDRESSABLE (base))

> +	    return;

> +	}

> +

> +      /* Try to find the last seen *_REF, which can trap.  */

> +      map.exp = exp;

> +      slot = m_seen_refs.find_slot (&map, INSERT);

> +      r2bb = *slot;

> +      if (r2bb && r2bb->phase >= nt_call_phase)

> +	found_bb = r2bb->bb;

> +

> +      /* If we've found a trapping *_REF, _and_ it dominates EXP

> +	 (it's in a basic block on the path from us to the dominator root)

>  	 then we can't trap.  */

>        if (found_bb && (((size_t)found_bb->aux) & 1) == 1)

>  	{

>  	  m_nontrapping->add (exp);

>  	}

>        else

> -        {

> +	{

>  	  /* EXP might trap, so insert it into the hash table.  */

> -	  if (n2bb)

> +	  if (r2bb)

>  	    {

> -	      n2bb->phase = nt_call_phase;

> -	      n2bb->bb = bb;

> +	      r2bb->phase = nt_call_phase;

> +	      r2bb->bb = bb;

>  	    }

>  	  else

>  	    {

> -	      n2bb = XNEW (struct name_to_bb);

> -	      n2bb->ssa_name_ver = SSA_NAME_VERSION (name);

> -	      n2bb->phase = nt_call_phase;

> -	      n2bb->bb = bb;

> -	      n2bb->store = store;

> -	      n2bb->offset = map.offset;

> -	      n2bb->size = size;

> -	      *slot = n2bb;

> +	      r2bb = XNEW (struct ref_to_bb);

> +	      r2bb->phase = nt_call_phase;

> +	      r2bb->bb = bb;

> +	      r2bb->exp = exp;

> +	      *slot = r2bb;

>  	    }

>  	}

>      }

> 


-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
Jakub Jelinek via Gcc-patches June 4, 2020, 4:47 a.m. | #4
Hi All,

The patch is refactored a little according to the last comment. Do you have more comments? If no, I will commit it later.

Tested on X86_64 and AArch64.

gcc/:

        PR tree-optimization/89430
        * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking
        to support ARRAY_REFs and COMPONENT_REFs.  Support a special case: if
        there is a dominating load of local variable without address escape,
        a store is not trapped (as local stack is always writable).
        The logic is also simplified to ignore other loads, as they don't
        help to check if a store is trapped (may be read-only).

gcc/testsuite/:

        PR tree-optimization/89430
        * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail.
        * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail.
        * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail.
        * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail.
        * gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test.
        * gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c: New test.
        * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim.

---
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c     |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c     |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c     |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c     |   2 +-
 .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c      |  17 +++
 .../gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c  |  15 ++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c    |   2 +-
 gcc/tree-ssa-phiopt.c                         | 129 ++++++++++--------
 8 files changed, 107 insertions(+), 64 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
index ce242ba569b..8ee1850ac63 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
         return a[0]+a[1];
 }

-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
index 90ae36bfce2..9b96875ac7a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
         return a[0]+a[1];
 }

-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
index c633cbe947d..b2d04119381 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
@@ -13,4 +13,4 @@ int test(int b, int k) {
     return a.data[0] + a.data[1];
 }

-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
index 7cad563128d..8d3c4f7cc6a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
@@ -16,4 +16,4 @@ int test(int b, int k) {
     return a.data[0].x + a.data[1].x;
 }

-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
new file mode 100644
index 00000000000..c35a2afc70b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+typedef union {
+  int i;
+  float f;
+} U;
+
+int foo(U *u, int b, int i)
+{
+  u->i = 0;
+  if (b)
+    u->i = i;
+  return u->i;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
new file mode 100644
index 00000000000..f9e66aefb13
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+int *t;
+
+int f1 (int tt)
+{
+  int *t1 = t;
+  *t1 = -5;
+  if (*t1 < tt)
+    *((unsigned *) t1) = 5;
+  return *t1;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
index 09313716598..a06f339f0bb 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-pre-stats" } */
+/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */

 typedef union {
   int i;
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index bb97dcf63b4..9d69a9565b4 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1987,26 +1987,33 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,

    ??? We currently are very conservative and assume that a load might
    trap even if a store doesn't (write-only memory).  This probably is
-   overly conservative.  */
+   overly conservative.

-/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
-   through it was seen, which would constitute a no-trap region for
-   same accesses.  */
-struct name_to_bb
+   We currently support a special case that for !TREE_ADDRESSABLE automatic
+   variables, it could ignore whether something is a load or store because the
+   local stack should be always writable.  */
+
+/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
+   basic block an *_REF through it was seen, which would constitute a
+   no-trap region for same accesses.
+
+   Size is needed to support 2 MEM_REFs of different types, like
+   MEM<double>(s_1) and MEM<long>(s_1), which would compare equal with
+   OEP_ADDRESS_OF.  */
+struct ref_to_bb
 {
-  unsigned int ssa_name_ver;
+  tree exp;
+  HOST_WIDE_INT size;
   unsigned int phase;
-  bool store;
-  HOST_WIDE_INT offset, size;
   basic_block bb;
 };

 /* Hashtable helpers.  */

-struct ssa_names_hasher : free_ptr_hash <name_to_bb>
+struct refs_hasher : free_ptr_hash<ref_to_bb>
 {
-  static inline hashval_t hash (const name_to_bb *);
-  static inline bool equal (const name_to_bb *, const name_to_bb *);
+  static inline hashval_t hash (const ref_to_bb *);
+  static inline bool equal (const ref_to_bb *, const ref_to_bb *);
 };

 /* Used for quick clearing of the hash-table when we see calls.
@@ -2016,28 +2023,29 @@ static unsigned int nt_call_phase;
 /* The hash function.  */

 inline hashval_t
-ssa_names_hasher::hash (const name_to_bb *n)
+refs_hasher::hash (const ref_to_bb *n)
 {
-  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
-         ^ (n->offset << 6) ^ (n->size << 3);
+  inchash::hash hstate;
+  inchash::add_expr (n->exp, hstate, OEP_ADDRESS_OF);
+  hstate.add_hwi (n->size);
+  return hstate.end ();
 }

 /* The equality function of *P1 and *P2.  */

 inline bool
-ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
+refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
 {
-  return n1->ssa_name_ver == n2->ssa_name_ver
-         && n1->store == n2->store
-         && n1->offset == n2->offset
-         && n1->size == n2->size;
+  return operand_equal_p (n1->exp, n2->exp, OEP_ADDRESS_OF)
+        && n1->size == n2->size;
 }

 class nontrapping_dom_walker : public dom_walker
 {
 public:
   nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
-    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
+    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)
+  {}

   virtual edge before_dom_children (basic_block);
   virtual void after_dom_children (basic_block);
@@ -2054,7 +2062,7 @@ private:
   hash_set<tree> *m_nontrapping;

   /* The hash table for remembering what we've seen.  */
-  hash_table<ssa_names_hasher> m_seen_ssa_names;
+  hash_table<refs_hasher> m_seen_refs;
 };

 /* Called by walk_dominator_tree, when entering the block BB.  */
@@ -2103,65 +2111,68 @@ nontrapping_dom_walker::after_dom_children (basic_block bb)
 }

 /* We see the expression EXP in basic block BB.  If it's an interesting
-   expression (an MEM_REF through an SSA_NAME) possibly insert the
-   expression into the set NONTRAP or the hash table of seen expressions.
-   STORE is true if this expression is on the LHS, otherwise it's on
-   the RHS.  */
+   expression of:
+     1) MEM_REF
+     2) ARRAY_REF
+     3) COMPONENT_REF
+   possibly insert the expression into the set NONTRAP or the hash table
+   of seen expressions.  STORE is true if this expression is on the LHS,
+   otherwise it's on the RHS.  */
 void
 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
 {
   HOST_WIDE_INT size;

-  if (TREE_CODE (exp) == MEM_REF
-      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
-      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
+  if ((TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
+       || TREE_CODE (exp) == COMPONENT_REF)
       && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
     {
-      tree name = TREE_OPERAND (exp, 0);
-      struct name_to_bb map;
-      name_to_bb **slot;
-      struct name_to_bb *n2bb;
+      struct ref_to_bb map;
+      ref_to_bb **slot;
+      struct ref_to_bb *r2bb;
       basic_block found_bb = 0;

-      /* Try to find the last seen MEM_REF through the same
-         SSA_NAME, which can trap.  */
-      map.ssa_name_ver = SSA_NAME_VERSION (name);
-      map.phase = 0;
-      map.bb = 0;
-      map.store = store;
-      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
-      map.size = size;
+      if (!store)
+       {
+         tree base = get_base_address (exp);
+         /* Only record a LOAD of a local variable without address-taken, as
+            the local stack is always writable.  This allows cselim on a STORE
+            with a dominating LOAD.  */
+         if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
+           return;
+       }

-      slot = m_seen_ssa_names.find_slot (&map, INSERT);
-      n2bb = *slot;
-      if (n2bb && n2bb->phase >= nt_call_phase)
-        found_bb = n2bb->bb;
+      /* Try to find the last seen *_REF, which can trap.  */
+      map.exp = exp;
+      map.size = size;
+      slot = m_seen_refs.find_slot (&map, INSERT);
+      r2bb = *slot;
+      if (r2bb && r2bb->phase >= nt_call_phase)
+       found_bb = r2bb->bb;

-      /* If we've found a trapping MEM_REF, _and_ it dominates EXP
-         (it's in a basic block on the path from us to the dominator root)
+      /* If we've found a trapping *_REF, _and_ it dominates EXP
+        (it's in a basic block on the path from us to the dominator root)
         then we can't trap.  */
-      if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
+      if (found_bb && (((size_t) found_bb->aux) & 1) == 1)
        {
          m_nontrapping->add (exp);
        }
       else
-        {
+       {
          /* EXP might trap, so insert it into the hash table.  */
-         if (n2bb)
+         if (r2bb)
            {
-             n2bb->phase = nt_call_phase;
-             n2bb->bb = bb;
+             r2bb->phase = nt_call_phase;
+             r2bb->bb = bb;
            }
          else
            {
-             n2bb = XNEW (struct name_to_bb);
-             n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
-             n2bb->phase = nt_call_phase;
-             n2bb->bb = bb;
-             n2bb->store = store;
-             n2bb->offset = map.offset;
-             n2bb->size = size;
-             *slot = n2bb;
+             r2bb = XNEW (struct ref_to_bb);
+             r2bb->phase = nt_call_phase;
+             r2bb->bb = bb;
+             r2bb->exp = exp;
+             r2bb->size = size;
+             *slot = r2bb;
            }
        }
     }
--
2.17.1

________________________________________
From: Richard Biener <rguenther@suse.de>

Sent: Tuesday, June 2, 2020 19:29
To: Hao Liu OS
Cc: gcc-patches@gcc.gnu.org; Jakub Jelinek
Subject: RE: [PATCH] extend cselim to check non-trapping for more references (PR tree-optimizaton/89430)

On Fri, 29 May 2020, Hao Liu OS wrote:

> Hi Richard,

>

> Thanks for your comments. It's a good idea to simplify the code and remove get_inner_reference. I've updated the patch accordingly. I also simplified the code to ignore other loads, which can not help to check if a store can be trapped.

>

> About tests:

>     1.  All previously XFAIL tests (gcc.dg/tree-ssa/pr89430-*) for pr89430 are passed with this patch.

>     2.  ssa-pre-17.c is failed as cselim optimizes the conditional store, so "-fno-tree-cselim" is added. That case is added as a new test case for pr89430.

>     3.  Other test cases (including the regression test for pr94734) in gcc testsuit are not affected by this patch, according to gcc "make check".

>     4.  Some other benchmarks are also tested for correctness and performance. The performance regression mentioned in pr89430 can be fixed.

>

> Review, please.


Looks mostly good now, some more comments inline below

> gcc/ChangeLog:

>

>     PR tree-optimization/89430

>     * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking to

>     support ARRAY_REFs and COMPONENT_REFs.  Support a special case: if there is

>     a dominating load of local variable without address escape, a store is not

>     trapped (as local stack is always writable).  Other loads are ignored for

>     simplicity, as they don't help to check if a store can be trapped.

>

> gcc/testsuite/ChangeLog:

>

>     PR tree-optimization/89430

>     * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail.

>     * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail.

>     * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail.

>     * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail.

>     * gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test.

>     * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim.

>

> ---

>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c     |   2 +-

>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c     |   2 +-

>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c     |   2 +-

>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c     |   2 +-

>  .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c      |  17 +++

>  gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c    |   2 +-

>  gcc/tree-ssa-phiopt.c                         | 140 +++++++++---------

>  7 files changed, 91 insertions(+), 76 deletions(-)

>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c

>

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c

> index ce242ba569b..8ee1850ac63 100644

> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c

> @@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {

>          return a[0]+a[1];

>  }

>

> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */

> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c

> index 90ae36bfce2..9b96875ac7a 100644

> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c

> @@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {

>          return a[0]+a[1];

>  }

>

> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */

> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c

> index c633cbe947d..b2d04119381 100644

> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c

> @@ -13,4 +13,4 @@ int test(int b, int k) {

>      return a.data[0] + a.data[1];

>  }

>

> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */

> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c

> index 7cad563128d..8d3c4f7cc6a 100644

> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c

> @@ -16,4 +16,4 @@ int test(int b, int k) {

>      return a.data[0].x + a.data[1].x;

>  }

>

> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */

> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c

> new file mode 100644

> index 00000000000..c35a2afc70b

> --- /dev/null

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c

> @@ -0,0 +1,17 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O2 -fdump-tree-cselim-details" } */

> +

> +typedef union {

> +  int i;

> +  float f;

> +} U;

> +

> +int foo(U *u, int b, int i)

> +{

> +  u->i = 0;

> +  if (b)

> +    u->i = i;

> +  return u->i;

> +}

> +

> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c

> index 09313716598..a06f339f0bb 100644

> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c

> @@ -1,5 +1,5 @@

>  /* { dg-do compile } */

> -/* { dg-options "-O2 -fdump-tree-pre-stats" } */

> +/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */

>

>  typedef union {

>    int i;

> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c

> index b1e0dce93d8..7c67b75486c 100644

> --- a/gcc/tree-ssa-phiopt.c

> +++ b/gcc/tree-ssa-phiopt.c

> @@ -1969,43 +1969,44 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,

>    return true;

>  }

>

> -/* Auxiliary functions to determine the set of memory accesses which

> +/* Auxiliary functions to determine the set of memory write accesses which

>     can't trap because they are preceded by accesses to the same memory

> -   portion.  We do that for MEM_REFs, so we only need to track

> -   the SSA_NAME of the pointer indirectly referenced.  The algorithm

> +   portion.  We do that for MEM_REFs/ARRAY_REFs/COMPONENT_REFs.  The algorithm

>     simply is a walk over all instructions in dominator order.  When

> -   we see an MEM_REF we determine if we've already seen a same

> +   we see an *_REF we determine if we've already seen a same

>     ref anywhere up to the root of the dominator tree.  If we do the

>     current access can't trap.  If we don't see any dominating access

>     the current access might trap, but might also make later accesses

>     non-trapping, so we remember it.  We need to be careful with loads

>     or stores, for instance a load might not trap, while a store would,

>     so if we see a dominating read access this doesn't mean that a later

> -   write access would not trap.  Hence we also need to differentiate the

> -   type of access(es) seen.

> +   write access would not trap.

>

> -   ??? We currently are very conservative and assume that a load might

> -   trap even if a store doesn't (write-only memory).  This probably is

> -   overly conservative.  */

> +   We care about following memory accesses:

> +     1) a store.

> +     2) a load of local variable without address-taken (as local stack is

> +     always writable).  It helps to optimize a conditoinal store with

> +     a dominating load.

>

> -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF

> -   through it was seen, which would constitute a no-trap region for

> -   same accesses.  */

> -struct name_to_bb

> +   other loads are ignored as we don't know if the accessed memory is writable,

> +   so they don't help conditional store replacement.  */

> +

> +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which

> +   basic block an *_REF through it was seen, which would constitute a

> +   no-trap region for same accesses.  */

> +struct ref_to_bb

>  {

> -  unsigned int ssa_name_ver;

> +  tree exp;

>    unsigned int phase;

> -  bool store;

> -  HOST_WIDE_INT offset, size;

>    basic_block bb;

>  };

>

>  /* Hashtable helpers.  */

>

> -struct ssa_names_hasher : free_ptr_hash <name_to_bb>

> +struct refs_hasher : free_ptr_hash<ref_to_bb>

>  {

> -  static inline hashval_t hash (const name_to_bb *);

> -  static inline bool equal (const name_to_bb *, const name_to_bb *);

> +  static inline hashval_t hash (const ref_to_bb *);

> +  static inline bool equal (const ref_to_bb *, const ref_to_bb *);

>  };

>

>  /* Used for quick clearing of the hash-table when we see calls.

> @@ -2015,28 +2016,27 @@ static unsigned int nt_call_phase;

>  /* The hash function.  */

>

>  inline hashval_t

> -ssa_names_hasher::hash (const name_to_bb *n)

> +refs_hasher::hash (const ref_to_bb *n)

>  {

> -  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)

> -         ^ (n->offset << 6) ^ (n->size << 3);

> +  inchash::hash hstate;

> +  inchash::add_expr (n->exp, hstate);

> +  return hstate.end ();

>  }

>

>  /* The equality function of *P1 and *P2.  */

>

>  inline bool

> -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)

> +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)

>  {

> -  return n1->ssa_name_ver == n2->ssa_name_ver

> -         && n1->store == n2->store

> -         && n1->offset == n2->offset

> -         && n1->size == n2->size;

> +  return operand_equal_p (n1->exp, n2->exp, 0);


So I figured we're going to regress some cases here that do not
have semantically agreeing MEM_REFs, like MEM<double>(s_1)
and MEM<long>(s_1) they would not compare equal but did before,
just considering offset and size.  So we'd like to pass
OEP_ADDRESS_OF as flag argument to operand_equal_p here
and to add_expr above.  This part would then just check whether
the accesses are to the same address.

This means you have to preserve the ->size member (and check it).

The rest of the patch looks good to me.

Thanks,
Richard.

>  }

>

>  class nontrapping_dom_walker : public dom_walker

>  {

>  public:

>    nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)

> -    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}

> +    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)

> +  {}

>

>    virtual edge before_dom_children (basic_block);

>    virtual void after_dom_children (basic_block);

> @@ -2053,7 +2053,7 @@ private:

>    hash_set<tree> *m_nontrapping;

>

>    /* The hash table for remembering what we've seen.  */

> -  hash_table<ssa_names_hasher> m_seen_ssa_names;

> +  hash_table<refs_hasher> m_seen_refs;

>  };

>

>  /* Called by walk_dominator_tree, when entering the block BB.  */

> @@ -2102,65 +2102,63 @@ nontrapping_dom_walker::after_dom_children (basic_block bb)

>  }

>

>  /* We see the expression EXP in basic block BB.  If it's an interesting

> -   expression (an MEM_REF through an SSA_NAME) possibly insert the

> -   expression into the set NONTRAP or the hash table of seen expressions.

> -   STORE is true if this expression is on the LHS, otherwise it's on

> -   the RHS.  */

> +   expression of:

> +     1) MEM_REF

> +     2) ARRAY_REF

> +     3) COMPONENT_REF

> +   possibly insert the expression into the set NONTRAP or the hash table

> +   of seen expressions.  STORE is true if this expression is on the LHS,

> +   otherwise it's on the RHS.  */

>  void

>  nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)

>  {

> -  HOST_WIDE_INT size;

> -

> -  if (TREE_CODE (exp) == MEM_REF

> -      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME

> -      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))

> -      && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)

> +  if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF

> +      || TREE_CODE (exp) == COMPONENT_REF)

>      {

> -      tree name = TREE_OPERAND (exp, 0);

> -      struct name_to_bb map;

> -      name_to_bb **slot;

> -      struct name_to_bb *n2bb;

> +      struct ref_to_bb map;

> +      ref_to_bb **slot;

> +      struct ref_to_bb *r2bb;

>        basic_block found_bb = 0;

>

> -      /* Try to find the last seen MEM_REF through the same

> -         SSA_NAME, which can trap.  */

> -      map.ssa_name_ver = SSA_NAME_VERSION (name);

> -      map.phase = 0;

> -      map.bb = 0;

> -      map.store = store;

> -      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));

> -      map.size = size;

> -

> -      slot = m_seen_ssa_names.find_slot (&map, INSERT);

> -      n2bb = *slot;

> -      if (n2bb && n2bb->phase >= nt_call_phase)

> -        found_bb = n2bb->bb;

> -

> -      /* If we've found a trapping MEM_REF, _and_ it dominates EXP

> -         (it's in a basic block on the path from us to the dominator root)

> +      if (!store)

> +     {

> +       tree base = get_base_address (exp);

> +       /* Only record a LOAD of local variable without address-taken, as

> +          the local stack is always writable.  This allows cselim on a STORE

> +          with a dominating LOAD.  */

> +       if (!auto_var_p (base) || TREE_ADDRESSABLE (base))

> +         return;

> +     }

> +

> +      /* Try to find the last seen *_REF, which can trap.  */

> +      map.exp = exp;

> +      slot = m_seen_refs.find_slot (&map, INSERT);

> +      r2bb = *slot;

> +      if (r2bb && r2bb->phase >= nt_call_phase)

> +     found_bb = r2bb->bb;

> +

> +      /* If we've found a trapping *_REF, _and_ it dominates EXP

> +      (it's in a basic block on the path from us to the dominator root)

>        then we can't trap.  */

>        if (found_bb && (((size_t)found_bb->aux) & 1) == 1)

>       {

>         m_nontrapping->add (exp);

>       }

>        else

> -        {

> +     {

>         /* EXP might trap, so insert it into the hash table.  */

> -       if (n2bb)

> +       if (r2bb)

>           {

> -           n2bb->phase = nt_call_phase;

> -           n2bb->bb = bb;

> +           r2bb->phase = nt_call_phase;

> +           r2bb->bb = bb;

>           }

>         else

>           {

> -           n2bb = XNEW (struct name_to_bb);

> -           n2bb->ssa_name_ver = SSA_NAME_VERSION (name);

> -           n2bb->phase = nt_call_phase;

> -           n2bb->bb = bb;

> -           n2bb->store = store;

> -           n2bb->offset = map.offset;

> -           n2bb->size = size;

> -           *slot = n2bb;

> +           r2bb = XNEW (struct ref_to_bb);

> +           r2bb->phase = nt_call_phase;

> +           r2bb->bb = bb;

> +           r2bb->exp = exp;

> +           *slot = r2bb;

>           }

>       }

>      }

>


--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend?rffer; HRB 36809 (AG Nuernberg)
Jakub Jelinek via Gcc-patches June 4, 2020, 4:55 a.m. | #5
On Thu, Jun 04, 2020 at 04:47:43AM +0000, Hao Liu OS wrote:
> The patch is refactored a little according to the last comment. Do you have more comments? If no, I will commit it later.

> 

> Tested on X86_64 and AArch64.

> 

> gcc/:

> 

>         PR tree-optimization/89430

>         * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking

>         to support ARRAY_REFs and COMPONENT_REFs.  Support a special case: if

>         there is a dominating load of local variable without address escape,

>         a store is not trapped (as local stack is always writable).

>         The logic is also simplified to ignore other loads, as they don't

>         help to check if a store is trapped (may be read-only).


The ChangeLog entry is certainly incorrect, it doesn't mention all the
classes and methods you've actually changed, but mentions a routine you
haven't changed at all.  And it describes the intent of the changes rather
than the details on what actually changed.  This struct got renamed and this
and this member has been added, etc.

	Jakub
Jakub Jelinek via Gcc-patches June 4, 2020, 6:17 a.m. | #6
Hi Jakub,

I've updated the incorrect  ChangLog.

gcc/:

        PR tree-optimization/89430
        * tree-ssa-phiopt.c
        (struct name_to_bb): Rename to ref_to_bb; add a new field exp;
        remove ssa_name_ver, store, offset fields.
        (struct ssa_names_hasher): Rename to refs_hasher; update functions.
        (class nontrapping_dom_walker): Rename m_seen_ssa_names to m_seen_refs.
        (nontrapping_dom_walker::add_or_mark_expr): Extend to support ARRAY_REFs
        and COMPONENT_REFs.

 Thanks a lot.
-Hao

________________________________________
From: Jakub Jelinek <jakub@redhat.com>

Sent: Thursday, June 4, 2020 12:55
To: Hao Liu OS
Cc: Richard Biener; gcc-patches@gcc.gnu.org
Subject: Re: [PATCH] extend cselim to check non-trapping for more references (PR tree-optimizaton/89430)

On Thu, Jun 04, 2020 at 04:47:43AM +0000, Hao Liu OS wrote:
> The patch is refactored a little according to the last comment. Do you have more comments? If no, I will commit it later.

>

> Tested on X86_64 and AArch64.

>

> gcc/:

>

>         PR tree-optimization/89430

>         * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking

>         to support ARRAY_REFs and COMPONENT_REFs.  Support a special case: if

>         there is a dominating load of local variable without address escape,

>         a store is not trapped (as local stack is always writable).

>         The logic is also simplified to ignore other loads, as they don't

>         help to check if a store is trapped (may be read-only).


The ChangeLog entry is certainly incorrect, it doesn't mention all the
classes and methods you've actually changed, but mentions a routine you
haven't changed at all.  And it describes the intent of the changes rather
than the details on what actually changed.  This struct got renamed and this
and this member has been added, etc.

        Jakub
Jakub Jelinek via Gcc-patches June 4, 2020, 6:44 a.m. | #7
Hi!

The usual way to write it would be:
On Thu, Jun 04, 2020 at 06:17:13AM +0000, Hao Liu OS wrote:
> gcc/:

> 

>         PR tree-optimization/89430

>         * tree-ssa-phiopt.c

>         (struct name_to_bb): Rename to ref_to_bb; add a new field exp;

>         remove ssa_name_ver, store, offset fields.

>         (struct ssa_names_hasher): Rename to refs_hasher; update functions.

>         (class nontrapping_dom_walker): Rename m_seen_ssa_names to m_seen_refs.

>         (nontrapping_dom_walker::add_or_mark_expr): Extend to support ARRAY_REFs

>         and COMPONENT_REFs.


	PR tree-optimization/89430
	* tree-ssa-phiopt.c (struct name_to_bb): Rename to ...
	(ref_to_bb): ... this.  Add a new field exp.  Remove ssa_name_ver,
	store, offset fields.
	(struct ssa_names_hasher): Rename to ...
	(refs_hasher): ... this.  Update methods.
	(class nontrapping_dom_walker): Rename m_seen_ssa_names to m_seen_refs.
	(nontrapping_dom_walker::add_or_mark_expr): Extend to support ARRAY_REFs
	and COMPONENT_REFs.
or so.

Will defer review to Richard.

	Jakub
Richard Biener June 4, 2020, 6:44 a.m. | #8
On Thu, 4 Jun 2020, Hao Liu OS wrote:

> Hi Jakub,

> 

> I've updated the incorrect  ChangLog.

> 

> gcc/:

> 

>         PR tree-optimization/89430

>         * tree-ssa-phiopt.c

>         (struct name_to_bb): Rename to ref_to_bb; add a new field exp;

>         remove ssa_name_ver, store, offset fields.

>         (struct ssa_names_hasher): Rename to refs_hasher; update functions.

>         (class nontrapping_dom_walker): Rename m_seen_ssa_names to m_seen_refs.

>         (nontrapping_dom_walker::add_or_mark_expr): Extend to support ARRAY_REFs

>         and COMPONENT_REFs.

> 

>  Thanks a lot.


Looks good to me.

Thanks,
Richard.

> -Hao

> 

> ________________________________________

> From: Jakub Jelinek <jakub@redhat.com>

> Sent: Thursday, June 4, 2020 12:55

> To: Hao Liu OS

> Cc: Richard Biener; gcc-patches@gcc.gnu.org

> Subject: Re: [PATCH] extend cselim to check non-trapping for more references (PR tree-optimizaton/89430)

> 

> On Thu, Jun 04, 2020 at 04:47:43AM +0000, Hao Liu OS wrote:

> > The patch is refactored a little according to the last comment. Do you have more comments? If no, I will commit it later.

> >

> > Tested on X86_64 and AArch64.

> >

> > gcc/:

> >

> >         PR tree-optimization/89430

> >         * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking

> >         to support ARRAY_REFs and COMPONENT_REFs.  Support a special case: if

> >         there is a dominating load of local variable without address escape,

> >         a store is not trapped (as local stack is always writable).

> >         The logic is also simplified to ignore other loads, as they don't

> >         help to check if a store is trapped (may be read-only).

> 

> The ChangeLog entry is certainly incorrect, it doesn't mention all the

> classes and methods you've actually changed, but mentions a routine you

> haven't changed at all.  And it describes the intent of the changes rather

> than the details on what actually changed.  This struct got renamed and this

> and this member has been added, etc.

> 

>         Jakub

> 

> 


-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a96dedc8838..eae03a10948 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@ 
+2020-05-27  Hao Liu  <hliu@os.amperecomputing.com>
+
+	PR tree-optimization/89430
+	* tree-ssa-phiopt.c (cond_store_replacement): Extend add_or_mark_expr to
+    support ARRAY_REF and COMPONENT_REF by get_inner_reference, so we can check
+    if there is a dominating unconditional load/store.  A special case is also
+    supported: if there is a dominating load of local variable without address
+    escape, a store to the same location can not be trapped (as local stack is
+    always writable).
+
 2020-05-25  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* gimple-ssa-store-merging.c (merged_store_group::can_be_merged_into):
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index aec3a219953..5ffa8127f43 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,13 @@ 
+2020-05-27  Hao Liu  <hliu@os.amperecomputing.com>
+
+	PR tree-optimization/89430
+	* gcc.dg/tree-ssa/pr89430-1.c: Remove xfail.
+	* gcc.dg/tree-ssa/pr89430-2.c: Remove xfail.
+	* gcc.dg/tree-ssa/pr89430-5.c: Remove xfail.
+	* gcc.dg/tree-ssa/pr89430-6.c: Remove xfail.
+	* gcc.dg/tree-ssa/pr89430-7-comp-ref.c.c: New test.
+	* gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim.
+
 2020-05-25  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* gnat.dg/opt84.adb: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
index ce242ba569b..8ee1850ac63 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
@@ -9,4 +9,4 @@  unsigned test(unsigned k, unsigned b) {
         return a[0]+a[1];
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
index 90ae36bfce2..9b96875ac7a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
@@ -11,4 +11,4 @@  unsigned test(unsigned k, unsigned b) {
         return a[0]+a[1];
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
index c633cbe947d..b2d04119381 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
@@ -13,4 +13,4 @@  int test(int b, int k) {
     return a.data[0] + a.data[1];
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
index 7cad563128d..8d3c4f7cc6a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
@@ -16,4 +16,4 @@  int test(int b, int k) {
     return a.data[0].x + a.data[1].x;
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
new file mode 100644
index 00000000000..c35a2afc70b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+typedef union {
+  int i;
+  float f;
+} U;
+
+int foo(U *u, int b, int i)
+{
+  u->i = 0;
+  if (b)
+    u->i = i;
+  return u->i;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
index 09313716598..a06f339f0bb 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-pre-stats" } */
+/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */
 
 typedef union {
   int i;
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index b1e0dce93d8..3733780a0bc 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1986,26 +1986,31 @@  abs_replacement (basic_block cond_bb, basic_block middle_bb,
 
    ??? We currently are very conservative and assume that a load might
    trap even if a store doesn't (write-only memory).  This probably is
-   overly conservative.  */
+   overly conservative.
 
-/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
-   through it was seen, which would constitute a no-trap region for
-   same accesses.  */
-struct name_to_bb
+   We currently support a special case that for !TREE_ADDRESSABLE automatic
+   variables, it could ignore whether something is a load or store because the
+   local stack should be always writable.  */
+
+/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
+   basic block an *_REF through it was seen, which would constitute a
+   no-trap region for same accesses.  */
+struct ref_to_bb
 {
-  unsigned int ssa_name_ver;
+  tree base;
+  poly_int64 bitsize, bitpos;
+  tree offset;
   unsigned int phase;
-  bool store;
-  HOST_WIDE_INT offset, size;
+  bool writable;
   basic_block bb;
 };
 
 /* Hashtable helpers.  */
 
-struct ssa_names_hasher : free_ptr_hash <name_to_bb>
+struct refs_hasher : free_ptr_hash<ref_to_bb>
 {
-  static inline hashval_t hash (const name_to_bb *);
-  static inline bool equal (const name_to_bb *, const name_to_bb *);
+  static inline hashval_t hash (const ref_to_bb *);
+  static inline bool equal (const ref_to_bb *, const ref_to_bb *);
 };
 
 /* Used for quick clearing of the hash-table when we see calls.
@@ -2015,28 +2020,44 @@  static unsigned int nt_call_phase;
 /* The hash function.  */
 
 inline hashval_t
-ssa_names_hasher::hash (const name_to_bb *n)
+refs_hasher::hash (const ref_to_bb *n)
 {
-  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
-         ^ (n->offset << 6) ^ (n->size << 3);
+  inchash::hash hstate;
+  inchash::add_expr (n->base, hstate);
+  hstate.add_poly_int (n->bitsize);
+  hstate.add_poly_int (n->bitpos);
+  hstate.add_int (n->writable);
+  if (n->offset != NULL_TREE)
+    {
+      inchash::add_expr (n->offset, hstate);
+    }
+  return hstate.end ();
 }
 
 /* The equality function of *P1 and *P2.  */
 
 inline bool
-ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
+refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
 {
-  return n1->ssa_name_ver == n2->ssa_name_ver
-         && n1->store == n2->store
-         && n1->offset == n2->offset
-         && n1->size == n2->size;
+  if (operand_equal_p (n1->base, n2->base, 0)
+      && known_eq (n1->bitsize, n2->bitsize)
+      && known_eq (n1->bitpos, n2->bitpos) && n1->writable == n2->writable)
+    {
+      /* Should not call operand_equal_p with NULL_TREE.  */
+      if (n1->offset == NULL_TREE || n2->offset == NULL_TREE)
+	  return n1->offset == n2->offset;
+      else
+	  return operand_equal_p (n1->offset, n2->offset, 0);
+    }
+  return false;
 }
 
 class nontrapping_dom_walker : public dom_walker
 {
 public:
   nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
-    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
+    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (256)
+  {}
 
   virtual edge before_dom_children (basic_block);
   virtual void after_dom_children (basic_block);
@@ -2053,7 +2074,7 @@  private:
   hash_set<tree> *m_nontrapping;
 
   /* The hash table for remembering what we've seen.  */
-  hash_table<ssa_names_hasher> m_seen_ssa_names;
+  hash_table<refs_hasher> m_seen_refs;
 };
 
 /* Called by walk_dominator_tree, when entering the block BB.  */
@@ -2102,65 +2123,76 @@  nontrapping_dom_walker::after_dom_children (basic_block bb)
 }
 
 /* We see the expression EXP in basic block BB.  If it's an interesting
-   expression (an MEM_REF through an SSA_NAME) possibly insert the
-   expression into the set NONTRAP or the hash table of seen expressions.
-   STORE is true if this expression is on the LHS, otherwise it's on
-   the RHS.  */
+   expression of:
+     1) MEM_REF
+     2) ARRAY_REF
+     3) COMPONENT_REF
+   possibly insert the expression into the set NONTRAP or the hash table
+   of seen expressions.  STORE is true if this expression is on the LHS,
+   otherwise it's on the RHS.  */
 void
 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
 {
-  HOST_WIDE_INT size;
-
-  if (TREE_CODE (exp) == MEM_REF
-      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
-      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
-      && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
+  if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
+      || TREE_CODE (exp) == COMPONENT_REF)
     {
-      tree name = TREE_OPERAND (exp, 0);
-      struct name_to_bb map;
-      name_to_bb **slot;
-      struct name_to_bb *n2bb;
+      struct ref_to_bb map;
+      ref_to_bb **slot;
+      struct ref_to_bb *r2bb;
       basic_block found_bb = 0;
 
-      /* Try to find the last seen MEM_REF through the same
-         SSA_NAME, which can trap.  */
-      map.ssa_name_ver = SSA_NAME_VERSION (name);
-      map.phase = 0;
-      map.bb = 0;
-      map.store = store;
-      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
-      map.size = size;
-
-      slot = m_seen_ssa_names.find_slot (&map, INSERT);
-      n2bb = *slot;
-      if (n2bb && n2bb->phase >= nt_call_phase)
-        found_bb = n2bb->bb;
-
-      /* If we've found a trapping MEM_REF, _and_ it dominates EXP
-         (it's in a basic block on the path from us to the dominator root)
+      /* Try to find the last seen *_REF (through the same base expression
+	 + bitsize + bitpos + offset expression), which can trap.  */
+      machine_mode nmode;
+      poly_int64 bitsize, bitpos;
+      tree offset;
+      int nunsignedp, nreversep, nvolatilep = 0;
+      tree base = get_inner_reference (exp, &bitsize, &bitpos, &offset, &nmode,
+					&nunsignedp, &nreversep, &nvolatilep);
+      gcc_assert (base != NULL_TREE);
+      map.base = base;
+      map.offset = offset;
+      map.bitsize = bitsize;
+      map.bitpos = bitpos;
+
+      /* The reference is writable if EXP is a:
+	 1) STORE, or
+	 2) LOAD of a local variable without address-taken (as the local stack
+	 is always writable).  This allows cselim on a STORE with a dominating
+	 LOAD.  */
+      map.writable = store || (auto_var_p (base) && !TREE_ADDRESSABLE (base));
+
+      slot = m_seen_refs.find_slot (&map, INSERT);
+      r2bb = *slot;
+      if (r2bb && r2bb->phase >= nt_call_phase)
+	found_bb = r2bb->bb;
+
+      /* If we've found a trapping *_REF, _and_ it dominates EXP
+	 (it's in a basic block on the path from us to the dominator root)
 	 then we can't trap.  */
-      if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
+      if (found_bb && (((size_t) found_bb->aux) & 1) == 1)
 	{
 	  m_nontrapping->add (exp);
 	}
       else
-        {
+	{
 	  /* EXP might trap, so insert it into the hash table.  */
-	  if (n2bb)
+	  if (r2bb)
 	    {
-	      n2bb->phase = nt_call_phase;
-	      n2bb->bb = bb;
+	      r2bb->phase = nt_call_phase;
+	      r2bb->bb = bb;
 	    }
 	  else
 	    {
-	      n2bb = XNEW (struct name_to_bb);
-	      n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
-	      n2bb->phase = nt_call_phase;
-	      n2bb->bb = bb;
-	      n2bb->store = store;
-	      n2bb->offset = map.offset;
-	      n2bb->size = size;
-	      *slot = n2bb;
+	      r2bb = XNEW (struct ref_to_bb);
+	      r2bb->phase = nt_call_phase;
+	      r2bb->bb = bb;
+	      r2bb->base = base;
+	      r2bb->offset = map.offset;
+	      r2bb->bitsize = map.bitsize;
+	      r2bb->bitpos = map.bitpos;
+	      r2bb->writable = map.writable;
+	      *slot = r2bb;
 	    }
 	}
     }