[og7] Enable firstprivate OpenACC reductions

Message ID 93786c8e-047c-e526-4913-d7f360dd1fe0@codesourcery.com
State Superseded
Headers show
Series
  • [og7] Enable firstprivate OpenACC reductions
Related show

Commit Message

Cesar Philippidis Jan. 31, 2018, 3:29 p.m.
I've applied this patch to openacc-gcc-7-branch which teaches the
gimplifier how to pass certain OpenACC reduction variables as
firstprivate, and not with an implicit copy directive. This is matches
the default behavior for the implicit data mappings of scalar variables
inside OpenACC parallel regions. It should be noted that the gimplifier
will still implicitly map reduction variables on loops immediately
enclosed inside a parallel regions, like so

  #pragma acc parallel
  #pragma acc loop reduction(+:sum)

as copy. This change only impacts reductions variables inside nested acc
loops like

  #pragma acc parallel
  #pragma acc loop
  for (...)
  {
    #pragma acc loop reduction(+:s2)

Here s2 will be transferred into the accelerator as firstprivate instead
of copy.

Chung-Lin, I think one of us should go back and clean up the reduction
variable logic inside gimplify.c:omp_add_variable. I know that it's been
a while since you last worked on this. Let me know if you have any state
on that code, otherwise I'll handle the cleanup.

Cesar

Patch

Enable firstprivate OpenACC reductions

2018-01-31  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* gimplify.c (omp_add_variable): Allow certain OpenACC reduction
	variables to remain firstprivate.

	gcc/testsuite/
	* c-c++-common/goacc/reduction-8.c: New test.


diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 72ed8f1a249..44c03ab8310 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -6740,9 +6740,16 @@  omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
   else
     splay_tree_insert (ctx->variables, (splay_tree_key)decl, flags);
 
-  /* For reductions clauses in OpenACC loop directives, by default create a
-     copy clause on the enclosing parallel construct for carrying back the
-     results.  */
+  /* For OpenACC loop directives, when a reduction is immediately
+     enclosed within an acc parallel or kernels construct, it must
+     have an implied copy data mapping. E.g.
+
+       #pragma acc parallel
+	 {
+	   #pragma acc loop reduction (+:sum)
+
+     a copy clause for sum should be added on the enclosing parallel
+     construct for carrying back the results.  */
   if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION))
     {
       struct gimplify_omp_ctx *outer_ctx = ctx->outer_context;
@@ -6758,8 +6765,11 @@  omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
 	    vector = true;
 	}
 
-      /* Set new copy map as 'private' if sure we're not gang-partitioning.  */
-      bool map_private;
+      /* Reduction data maps need to be marked as private for worker
+	 and vector loops, in order to ensure that value of the
+	 reduction carried back to the host.  Set new copy map as
+	 'private' if sure we're not gang-partitioning.  */
+      bool map_private, update_data_map = false;
 
       if (gang)
 	map_private = false;
@@ -6768,6 +6778,10 @@  omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
       else
 	map_private = oacc_privatize_reduction (ctx->outer_context);
 
+      if (ctx->outer_context
+	  && ctx->outer_context->region_type == ORT_ACC_PARALLEL)
+	update_data_map = true;
+
       while (outer_ctx)
 	{
 	  n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl);
@@ -6784,7 +6798,8 @@  omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
 		  gcc_assert (!(n->value & GOVD_FIRSTPRIVATE)
 			      && (n->value & GOVD_MAP));
 		}
-	      else if (outer_ctx->region_type == ORT_ACC_PARALLEL)
+	      else if (update_data_map
+		       && outer_ctx->region_type == ORT_ACC_PARALLEL)
 		{
 		  /* Remove firstprivate and make it a copy map.  */
 		  n->value &= ~GOVD_FIRSTPRIVATE;
@@ -6796,7 +6811,8 @@  omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
 		    n->value |= GOVD_MAP_PRIVATE;
 		}
 	    }
-	  else if (outer_ctx->region_type == ORT_ACC_PARALLEL)
+	  else if (update_data_map
+		   && outer_ctx->region_type == ORT_ACC_PARALLEL)
 	    {
 	      unsigned f = GOVD_MAP | GOVD_SEEN;
 
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-8.c b/gcc/testsuite/c-c++-common/goacc/reduction-8.c
new file mode 100644
index 00000000000..8a0283f4ac3
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-8.c
@@ -0,0 +1,94 @@ 
+/* { dg-additional-options "-fdump-tree-gimple" } */
+
+#define n 1000
+
+int
+main(void)
+{
+  int i, j;
+  int result, array[n];
+
+#pragma acc parallel loop reduction (+:result)
+  for (i = 0; i < n; i++)
+    result ++;
+
+#pragma acc parallel
+#pragma acc loop reduction (+:result)
+  for (i = 0; i < n; i++)
+    result ++;
+
+#pragma acc parallel
+#pragma acc loop
+  for (i = 0; i < n; i++)
+    {
+      result = i;
+
+#pragma acc loop reduction(+:result)
+      for (j = 0; j < n; j++)
+	result ++;
+
+      array[i] = result;
+    }
+
+#pragma acc parallel
+#pragma acc loop
+  for (i = 0; i < n; i++)
+    {
+      result = i;
+
+#pragma acc loop worker vector reduction(+:result)
+      for (j = 0; j < n; j++)
+	result ++;
+
+      array[i] = result;
+    }
+
+#pragma acc parallel
+#pragma acc loop // { dg-warning "insufficient partitioning" }
+  for (i = 0; i < n; i++)
+    {
+      result = i;
+
+#pragma acc loop gang reduction(+:result)
+      for (j = 0; j < n; j++)
+	result ++;
+
+      array[i] = result;
+    }
+
+#pragma acc parallel copy(result)
+#pragma acc loop // { dg-warning "insufficient partitioning" }
+  for (i = 0; i < n; i++)
+    {
+      result = i;
+
+#pragma acc loop gang reduction(+:result)
+      for (j = 0; j < n; j++)
+	result ++;
+
+      array[i] = result;
+    }
+  
+#pragma acc kernels
+#pragma acc loop
+  for (i = 0; i < n; i++)
+    {
+      result = i;
+
+#pragma acc loop reduction(+:result)
+      for (j = 0; j < n; j++)
+	result ++;
+
+      array[i] = result;
+    }
+
+  return 0;
+}
+
+/* Check that default copy maps are generated for loop reductions.  */
+/* { dg-final { scan-tree-dump-times "reduction..:result. map.tofrom:result .len: 4.." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "oacc_parallel map.tofrom:result .len: 4.." 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map.tofrom:array .len: 4000.. firstprivate.result." 3 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map.tofrom:result .len: 4.. map.tofrom:array .len: 4000.." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map.tofrom:array .len: 4000.. map.force_tofrom:result .len: 4.." 1 "gimple" } } */
+