[V3] Remove empty loop with assumed finiteness (PR tree-optimization/89713)

Message ID BYAPR01MB48699D78B2C44C438176B6C5F7020@BYAPR01MB4869.prod.exchangelabs.com
State New
Headers show
Series
  • [V3] Remove empty loop with assumed finiteness (PR tree-optimization/89713)
Related show

Commit Message

Feng Xue OS May 24, 2019, 4:02 p.m.
Doing aggressive loop removal at early CD-DCE might break OpenACC transformation. To the issue, this version 3 gave a somewhat simpler solution different from previous version 2. It uses a flag to postpone loop removal to late CD-DCE pass.

Feng
---------

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9f0f889..f18abdc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@ 
+2019-05-23  Feng Xue  <fxue@os.amperecomputing.com>
+
+	PR tree-optimization/89713
+	* doc/invoke.texi (-ffinite-loop): Document new option.
+	* common.opt (-ffinite-loop): New option.
+	* tree-ssa-dce.c (loop_has_true_exits): New function.
+	(find_obviously_necessary_stmts): Use flag to control
+	removal of a loop with assumed finiteness. 
+	* tree-ssa-loop.c (tree_ssa_loop_done): Update
+	flag_finite_loop.
+
 2019-05-22  David Malcolm  <dmalcolm@redhat.com>
 
 	PR c++/90462
diff --git a/gcc/common.opt b/gcc/common.opt
index d342c4f..e98a34d 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1437,6 +1437,10 @@  ffinite-math-only
 Common Report Var(flag_finite_math_only) Optimization SetByCombined
 Assume no NaNs or infinities are generated.
 
+ffinite-loop
+Common Report Var(flag_finite_loop) Optimization
+Assume loops are finite if can not be analytically determined.
+
 ffixed-
 Common Joined RejectNegative Var(common_deferred_options) Defer
 -ffixed-<register>	Mark <register> as being unavailable to the compiler.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6c89843..caa0852 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -412,6 +412,7 @@  Objective-C and Objective-C++ Dialects}.
 -fdevirtualize-at-ltrans  -fdse @gol
 -fearly-inlining  -fipa-sra  -fexpensive-optimizations  -ffat-lto-objects @gol
 -ffast-math  -ffinite-math-only  -ffloat-store  -fexcess-precision=@var{style} @gol
+-ffinite-loop @gol
 -fforward-propagate  -ffp-contract=@var{style}  -ffunction-sections @gol
 -fgcse  -fgcse-after-reload  -fgcse-las  -fgcse-lm  -fgraphite-identity @gol
 -fgcse-sm  -fhoist-adjacent-loads  -fif-conversion @gol
@@ -9501,6 +9502,20 @@  that may set @code{errno} but are otherwise free of side effects.  This flag is
 enabled by default at @option{-O2} and higher if @option{-Os} is not also
 specified.
 
+@item -ffinite-loop
+@opindex ffinite-loop
+@opindex fno-finite-loop
+Allow the compiler to assume that if finiteness of a loop can not be
+analytically determined, the loop must be finite. With the assumption, some
+aggressive transformation could be possible, such as removal of this kind
+of empty loop by dead code elimination (DCE).
+
+This option is not turned on by any @option{-O} option since it might result
+in incorrect behaviour for programs that contain seemly finite, but actually
+infinite loop.
+
+The default is @option{-fno-finite-loop}.
+
 @item -ftree-dominator-opts
 @opindex ftree-dominator-opts
 Perform a variety of simple scalar cleanups (constant/copy
diff --git a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
new file mode 100644
index 0000000..f6564a2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
@@ -0,0 +1,33 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce3 -ffinite-loop" } */
+
+#include <string>
+#include <vector>
+#include <list>
+#include <set>
+#include <map>
+
+using namespace std;
+
+int foo (vector<string> &v, list<string> &l, set<string> &s, map<int, string> &m)
+{
+  for (vector<string>::iterator it = v.begin (); it != v.end (); ++it)
+    it->length();
+
+  for (list<string>::iterator it = l.begin (); it != l.end (); ++it)
+    it->length();
+
+  for (map<int, string>::iterator it = m.begin (); it != m.end (); ++it)
+    it->first + it->second.length();
+
+  for (set<string>::iterator it0 = s.begin (); it0 != s.end(); ++it0)
+    for (vector<string>::reverse_iterator it1 = v.rbegin(); it1 != v.rend(); ++it1)
+      {
+        it0->length();
+        it1->length();
+      }  
+
+  return 0;
+}
+/* { dg-final { scan-tree-dump-not "if" "cddce3"} } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c b/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c
new file mode 100644
index 0000000..d9b9fd2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c
@@ -0,0 +1,37 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce3 -ffinite-loop" } */
+
+typedef struct list {
+    char pad[15];
+    struct list *next;
+} list;
+
+int data;
+
+list *head, *tail;
+
+int __attribute__((pure)) pfn (int);
+
+int foo (unsigned u, int s)
+{
+  unsigned i;
+  list *p;
+  int j;
+
+  for (i = 0; i < u; i += 2)
+    ;
+
+  for (p = head; p; p = p->next)
+    ;
+
+  for (j = data; j & s; j = pfn (j + 3))
+    ;
+
+  for (p = head; p != tail; p = p->next)
+    for (j = data + 1; j > s; j = pfn (j + 2))
+      ;
+
+  return 0;
+}
+/* { dg-final { scan-tree-dump-not "if" "cddce3"} } */
+
diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
index 2478219..25f722f 100644
--- a/gcc/tree-ssa-dce.c
+++ b/gcc/tree-ssa-dce.c
@@ -357,6 +357,28 @@  mark_control_dependent_edges_necessary (basic_block bb, bool ignore_self)
 }
 
 
+/* Check whether a loop has any non-EH exit. */
+
+static bool
+loop_has_true_exits (const struct loop *loop)
+{
+  vec<edge> exits = get_loop_exit_edges (loop);
+  bool found = false;
+  edge e;
+  unsigned i;
+
+  FOR_EACH_VEC_ELT (exits, i, e)
+    if (!(e->flags & EDGE_EH))
+      {
+        found = true;
+        break;
+      }
+
+  exits.release ();
+  return found;
+}
+
+
 /* Find obviously necessary statements.  These are things like most function
    calls, and stores to file level variables.
 
@@ -419,6 +441,25 @@  find_obviously_necessary_stmts (bool aggressive)
       FOR_EACH_LOOP (loop, 0)
 	if (!finite_loop_p (loop))
 	  {
+	    /* If done at early CD-DCE pass, aggressive loop removal using
+	       assumed finiteness could break OpenACC transformation. It might
+	       incorrectly change or even remove IFN_GOACC_LOOP calls, which
+	       represent parameters (i.e. step, bound) of an abstract OpenACC
+	       partitioned loop, therefore are required by OpenACC loop
+	       offload lowering. Here we need a way to postpone aggressive
+	       loop removal to late pass.
+
+	       When flag_finite_loop is 1, it means the optimization is
+	       enabled, but time is not ready. Once ready, flag_finite_loop
+	       will be updated with larger integer somewhere, and loop
+	       removal is truely allowed. */
+	    if (flag_finite_loop > 1 && loop_has_true_exits (loop))
+	      {
+		if (dump_file)
+		  fprintf (dump_file, "assume loop %i to be finite\n", loop->num);
+		continue;
+	      }
+
 	    if (dump_file)
 	      fprintf (dump_file, "cannot prove finiteness of loop %i\n", loop->num);
 	    mark_control_dependent_edges_necessary (loop->latch, false);
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 1ac6cee..4175f9e 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -527,6 +527,11 @@  make_pass_iv_optimize (gcc::context *ctxt)
 static unsigned int
 tree_ssa_loop_done (void)
 {
+  /* After all loop optimizations are done, increase flag_finite_loop to
+     truely allow aggressive loop removal in CD-DCE pass. */
+  if (flag_finite_loop)
+    flag_finite_loop++;
+
   free_numbers_of_iterations_estimates (cfun);
   scev_finalize ();
   loop_optimizer_finalize ();
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c
new file mode 100644
index 0000000..65a34d8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c
@@ -0,0 +1,31 @@ 
+/* { dg-do compile }  */
+/* { dg-options "-O2 -fdump-tree-cddce3 -ffinite-loop" } */
+
+int
+f1 (void)
+{
+  int i, j;
+
+#pragma acc parallel loop tile(2,3)
+  for (i = 1; i < 10; i++)
+    for (j = 1; j < 10; j++)
+      for (;;)
+	;
+
+  return i + j;
+}
+
+int
+f2 (void)
+{
+  int i, j, k;
+
+#pragma acc parallel loop tile(2,3)
+  for (i = 1; i < 10; i++)
+    for (j = 1; j < 10; j++)
+      for (k = 1; k < 10; k++)
+	;
+
+  return i + j;
+}
+/* { dg-final { scan-tree-dump-not "if" "cddce3"} } */