Fix PR87074: miscompilation with unroll-and-jam

Message ID alpine.LSU.2.21.1808311400020.7867@wotan.suse.de
State New
Headers show
Series
  • Fix PR87074: miscompilation with unroll-and-jam
Related show

Commit Message

Michael Matz Aug. 31, 2018, 2:11 p.m.
Hi,

as Jakub correctly identified, uses of values produced by the inner loop 
that occur inside the outer loop prevent fusion as well.  We are 
in LCSSA so the check is easy.  (Uses outside the outer loop are okay, see 
the comment)

Regstrapping on x86-64-linux in progress.  Okay for trunk?


Ciao,
Michael.

	* gimple-loop-jam.c (unroll_jam_possible_p): Check loop exit
	PHIs for outer-loop uses.

testsuite/
	* gcc.dg/pr87074.c: New test.

Comments

Richard Biener Aug. 31, 2018, 2:31 p.m. | #1
On August 31, 2018 4:11:26 PM GMT+02:00, Michael Matz <matz@suse.de> wrote:
>Hi,

>

>as Jakub correctly identified, uses of values produced by the inner

>loop 

>that occur inside the outer loop prevent fusion as well.  We are 

>in LCSSA so the check is easy.  (Uses outside the outer loop are okay,

>see 

>the comment)

>

>Regstrapping on x86-64-linux in progress.  Okay for trunk?


OK. 

Richard. 

>

>Ciao,

>Michael.

>

>	* gimple-loop-jam.c (unroll_jam_possible_p): Check loop exit

>	PHIs for outer-loop uses.

>

>testsuite/

>	* gcc.dg/pr87074.c: New test.

>

>diff --git a/gcc/gimple-loop-jam.c b/gcc/gimple-loop-jam.c

>index 2ecd1bb5a7c..c6bd0428684 100644

>--- a/gcc/gimple-loop-jam.c

>+++ b/gcc/gimple-loop-jam.c

>@@ -161,7 +161,7 @@ bb_prevents_fusion_p (basic_block bb)

>   gimple_stmt_iterator gsi;

>   /* BB is duplicated by outer unrolling and then all N-1 first copies

>move into the body of the fused inner loop.  If BB exits the outer loop

>-     the last copy still doess so, and the first N-1 copies are

>cancelled

>+     the last copy still does so, and the first N-1 copies are

>cancelled

>      by loop unrolling, so also after fusion it's the exit block.

>      But there might be other reasons that prevent fusion:

>        * stores or unknown side-effects prevent fusion

>@@ -227,6 +227,33 @@ unroll_jam_possible_p (struct loop *outer, struct

>loop *loop)

>       || !expr_invariant_in_loop_p (outer, niter.niter))

>     return false;

> 

>+  /* If the inner loop produces any values that are used inside the

>+     outer loop (except the virtual op) then it can flow

>+     back (perhaps indirectly) into the inner loop.  This prevents

>+     fusion: without fusion the value at the last iteration is used,

>+     with fusion the value after the initial iteration is used.

>+

>+     If all uses are outside the outer loop this doesn't prevent

>fusion;

>+     the value of the last iteration is still used (and the values

>from

>+     all intermediate iterations are dead).  */

>+  gphi_iterator psi;

>+  for (psi = gsi_start_phis (single_exit (loop)->dest);

>+       !gsi_end_p (psi); gsi_next (&psi))

>+    {

>+      imm_use_iterator imm_iter;

>+      use_operand_p use_p;

>+      tree op = gimple_phi_result (psi.phi ());

>+      if (virtual_operand_p (op))

>+	continue;

>+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)

>+	{

>+	  gimple *use_stmt = USE_STMT (use_p);

>+	  if (!is_gimple_debug (use_stmt)

>+	      && flow_bb_inside_loop_p (outer, gimple_bb (use_stmt)))

>+	    return false;

>+	}

>+    }

>+

>   /* And check blocks belonging to just outer loop.  */

>   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));

>n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun));

>@@ -245,7 +272,6 @@ unroll_jam_possible_p (struct loop *outer, struct

>loop *loop)

>     body would be the after-iter value of the first body) if it's over

>      an associative and commutative operation.  We wouldn't

>      be able to handle unknown cycles.  */

>-  gphi_iterator psi;

>for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next

>(&psi))

>     {

>       affine_iv iv;

>diff --git a/gcc/testsuite/gcc.dg/pr87074.c

>b/gcc/testsuite/gcc.dg/pr87074.c

>new file mode 100644

>index 00000000000..d838fcd8fc5

>--- /dev/null

>+++ b/gcc/testsuite/gcc.dg/pr87074.c

>@@ -0,0 +1,25 @@

>+/* { dg-do run } */

>+/* { dg-options "-O3 -floop-unroll-and-jam --param

>unroll-jam-min-percent=0" } */

>+long b;

>+unsigned c[5];

>+unsigned long long d = 3;

>+int e, f, g;

>+

>+void h() {

>+  for (; f < 11; f++) {

>+    b = g;

>+    for (e = 0; e < 5; e++) {

>+      c[e] = e - b - (c[e] >> 5);

>+      g = c[e];

>+    }

>+  }

>+  if (c[0])

>+    d = 0;

>+}

>+

>+extern void abort(void);

>+int main() {

>+  h();

>+  if (d != 0)

>+    abort ();

>+}
Jakub Jelinek Aug. 31, 2018, 2:33 p.m. | #2
On Fri, Aug 31, 2018 at 04:31:09PM +0200, Richard Biener wrote:
> On August 31, 2018 4:11:26 PM GMT+02:00, Michael Matz <matz@suse.de> wrote:

> >Hi,

> >

> >as Jakub correctly identified, uses of values produced by the inner

> >loop 

> >that occur inside the outer loop prevent fusion as well.  We are 

> >in LCSSA so the check is easy.  (Uses outside the outer loop are okay,

> >see 

> >the comment)

> >

> >Regstrapping on x86-64-linux in progress.  Okay for trunk?

> 

> OK. 


And please backport it for 8.x too ;)

	Jakub

Patch

diff --git a/gcc/gimple-loop-jam.c b/gcc/gimple-loop-jam.c
index 2ecd1bb5a7c..c6bd0428684 100644
--- a/gcc/gimple-loop-jam.c
+++ b/gcc/gimple-loop-jam.c
@@ -161,7 +161,7 @@  bb_prevents_fusion_p (basic_block bb)
   gimple_stmt_iterator gsi;
   /* BB is duplicated by outer unrolling and then all N-1 first copies
      move into the body of the fused inner loop.  If BB exits the outer loop
-     the last copy still doess so, and the first N-1 copies are cancelled
+     the last copy still does so, and the first N-1 copies are cancelled
      by loop unrolling, so also after fusion it's the exit block.
      But there might be other reasons that prevent fusion:
        * stores or unknown side-effects prevent fusion
@@ -227,6 +227,33 @@  unroll_jam_possible_p (struct loop *outer, struct loop *loop)
       || !expr_invariant_in_loop_p (outer, niter.niter))
     return false;
 
+  /* If the inner loop produces any values that are used inside the
+     outer loop (except the virtual op) then it can flow
+     back (perhaps indirectly) into the inner loop.  This prevents
+     fusion: without fusion the value at the last iteration is used,
+     with fusion the value after the initial iteration is used.
+
+     If all uses are outside the outer loop this doesn't prevent fusion;
+     the value of the last iteration is still used (and the values from
+     all intermediate iterations are dead).  */
+  gphi_iterator psi;
+  for (psi = gsi_start_phis (single_exit (loop)->dest);
+       !gsi_end_p (psi); gsi_next (&psi))
+    {
+      imm_use_iterator imm_iter;
+      use_operand_p use_p;
+      tree op = gimple_phi_result (psi.phi ());
+      if (virtual_operand_p (op))
+	continue;
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
+	{
+	  gimple *use_stmt = USE_STMT (use_p);
+	  if (!is_gimple_debug (use_stmt)
+	      && flow_bb_inside_loop_p (outer, gimple_bb (use_stmt)))
+	    return false;
+	}
+    }
+
   /* And check blocks belonging to just outer loop.  */
   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
   n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun));
@@ -245,7 +272,6 @@  unroll_jam_possible_p (struct loop *outer, struct loop *loop)
      body would be the after-iter value of the first body) if it's over
      an associative and commutative operation.  We wouldn't
      be able to handle unknown cycles.  */
-  gphi_iterator psi;
   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
     {
       affine_iv iv;
diff --git a/gcc/testsuite/gcc.dg/pr87074.c b/gcc/testsuite/gcc.dg/pr87074.c
new file mode 100644
index 00000000000..d838fcd8fc5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr87074.c
@@ -0,0 +1,25 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -floop-unroll-and-jam --param unroll-jam-min-percent=0" } */
+long b;
+unsigned c[5];
+unsigned long long d = 3;
+int e, f, g;
+
+void h() {
+  for (; f < 11; f++) {
+    b = g;
+    for (e = 0; e < 5; e++) {
+      c[e] = e - b - (c[e] >> 5);
+      g = c[e];
+    }
+  }
+  if (c[0])
+    d = 0;
+}
+
+extern void abort(void);
+int main() {
+  h();
+  if (d != 0)
+    abort ();
+}