Add support for making maps 'private' inside OpenACC offloaded regions

Message ID a53db4e3-3c0c-1ed8-3b98-38c74ab8849d@codesourcery.com
State New
Headers show
Series
  • Add support for making maps 'private' inside OpenACC offloaded regions
Related show

Commit Message

Cesar Philippidis July 20, 2018, 9:48 p.m.
Due to the different levels of parallelism available in OpenACC, it is
useful to mark certain variables as GOMP_MAP_PRIVATE so that they can be
used in reductions. This patch was introduced in openacc-gcc-7-branch
here <https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00274.html>.


I bootstrapped and regtested on x86_64/nvptx. Is it OK for trunk?

Thanks,
Cesar

Patch

From b0e7fb09bf3a3f853e77c2712b6f85ad21472e72 Mon Sep 17 00:00:00 2001
From: Chung-Lin Tang <cltang@codesourcery.com>
Date: Tue, 5 Sep 2017 22:09:34 +0800
Subject: [PATCH 2/5] [OpenACC] Add support for making maps 'private' inside
 offloaded regions

2018-XX-YY Chung-Lin Tang  <cltang@codesourcery.com>
	   Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* tree.h (OMP_CLAUSE_MAP_PRIVATE): Define macro.
	* gimplify.c (enum gimplify_omp_var_data): Add GOVD_MAP_PRIVATE enum value.
	(omp_add_variable): Add GOVD_MAP_PRIVATE to reduction clause flags if
	not a gang-partitioned loop directive.
	(gimplify_adjust_omp_clauses_1): Set OMP_CLAUSE_MAP_PRIVATE of new map
	clause to 1 if GOVD_MAP_PRIVATE flag is present.
	* omp-low.c (lower_oacc_reductions): Handle map clauses with
	OMP_CLAUSE_MAP_PRIVATE set in same matter as firstprivate/private.
	(lower_omp_target): Likewise. Add copy back code for map clauses with
	OMP_CLAUSE_MAP_PRIVATE set.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/reduction-9.c: New test.

(cherry picked from openacc-gcc-7-branch commit
2dc21f336368889c1ebf031801a7613f65899ef1, e17bb2068f9)
---
 gcc/gimplify.c                                | 34 ++++++++++++++-
 gcc/omp-low.c                                 | 28 +++++++++++--
 gcc/tree.h                                    |  3 ++
 .../libgomp.oacc-c-c++-common/reduction-9.c   | 41 +++++++++++++++++++
 4 files changed, 101 insertions(+), 5 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index cf8977c8508..7dadf69b758 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -105,6 +105,9 @@  enum gimplify_omp_var_data
   /* Flag for GOVD_MAP: must be present already.  */
   GOVD_MAP_FORCE_PRESENT = 524288,
 
+  /* Flag for GOVD_MAP, copy to/from private storage inside offloaded region.  */
+  GOVD_MAP_PRIVATE = 1048576,
+
   GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE
 			   | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR
 			   | GOVD_LOCAL)
@@ -6835,6 +6838,21 @@  omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
   if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION))
     {
       struct gimplify_omp_ctx *outer_ctx = ctx->outer_context;
+
+      bool gang = false, worker = false, vector = false;
+      for (tree c = ctx->clauses; c; c = OMP_CLAUSE_CHAIN (c))
+	{
+	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_GANG)
+	    gang = true;
+	  else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WORKER)
+	    worker = true;
+	  else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR)
+	    vector = true;
+	}
+
+      /* Set new copy map as 'private' if sure we're not gang-partitioning.  */
+      bool map_private = !gang && (worker || vector);
+
       while (outer_ctx)
 	{
 	  n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl);
@@ -6856,12 +6874,21 @@  omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
 		  /* Remove firstprivate and make it a copy map.  */
 		  n->value &= ~GOVD_FIRSTPRIVATE;
 		  n->value |= GOVD_MAP;
+
+		  /* If not gang-partitioned, add MAP_PRIVATE on the map
+		     clause.  */
+		  if (map_private)
+		    n->value |= GOVD_MAP_PRIVATE;
 		}
 	    }
 	  else if (outer_ctx->region_type == ORT_ACC_PARALLEL)
 	    {
-	      splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl,
-				 GOVD_MAP | GOVD_SEEN);
+	      unsigned f = GOVD_MAP | GOVD_SEEN;
+
+	      /* If not gang-partitioned, add MAP_PRIVATE on the map clause.  */
+	      if (map_private)
+		f |= GOVD_MAP_PRIVATE;
+	      splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl, f);
 	      break;
 	    }
 	  outer_ctx = outer_ctx->outer_context;
@@ -8904,6 +8931,9 @@  gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data)
 	  gcc_unreachable ();
 	}
       OMP_CLAUSE_SET_MAP_KIND (clause, kind);
+      if ((flags & GOVD_MAP_PRIVATE)
+	  && TREE_CODE (OMP_CLAUSE_DECL (clause)) == VAR_DECL)
+	OMP_CLAUSE_MAP_PRIVATE (clause) = 1;
       tree c2 = gomp_needs_data_present (decl);
       /* Handle OpenACC pointers that were declared inside acc data
 	 regions.  */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 714490d6921..ef3c7651c74 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -4907,7 +4907,9 @@  lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 		      goto has_outer_reduction;
 		    }
 		  else if ((OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_FIRSTPRIVATE
-			    || OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE)
+			    || OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE
+			    || (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_MAP
+				&& OMP_CLAUSE_MAP_PRIVATE (cls)))
 			   && orig == OMP_CLAUSE_DECL (cls))
 		    {
 		      is_private = true;
@@ -7637,7 +7639,10 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 		&& !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
 		&& TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
 	      x = build_simple_mem_ref (x);
-	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
+	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
+		|| (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+		    && (OMP_CLAUSE_MAP_KIND (c) & GOMP_MAP_TO)
+		    && OMP_CLAUSE_MAP_PRIVATE (c)))
 	      {
 		gcc_assert (is_gimple_omp_oacc (ctx->stmt));
 		if (omp_is_reference (new_var)
@@ -8505,7 +8510,24 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
       gimple_seq_add_seq (&new_body, join_seq);
 
       if (offloaded)
-	new_body = maybe_catch_exception (new_body);
+	{
+	  /* For OMP_CLAUSE_MAP_PRIVATE maps, add a copy back from private
+	     storage to receiver ref, for copying back to host.  */
+	  for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
+	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+		&& (OMP_CLAUSE_MAP_KIND (c) & GOMP_MAP_FROM)
+		&& OMP_CLAUSE_MAP_PRIVATE (c))
+	      {
+		tree var = OMP_CLAUSE_DECL (c);
+		tree new_var = lookup_decl (var, ctx);
+		tree x = build_receiver_ref (var, true, ctx);
+		gimple_seq seq = NULL;
+		gimplify_assign (x, new_var, &seq);
+		gimple_seq_add_seq (&new_body, seq);
+	      }
+
+	  new_body = maybe_catch_exception (new_body);
+	}
 
       gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
       gimple_omp_set_body (stmt, new_body);
diff --git a/gcc/tree.h b/gcc/tree.h
index 79b675025d9..8bdbe3341bb 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1564,6 +1564,9 @@  extern tree maybe_wrap_with_location (tree, location_t);
 /* Nonzero if this map clause is for an ACC parallel reduction variable.  */
 #define OMP_CLAUSE_MAP_IN_REDUCTION(NODE) \
   TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
+/* Nozero if this map is loaded to private storage inside offloaded region.  */
+#define OMP_CLAUSE_MAP_PRIVATE(NODE) \
+  TREE_NOTHROW (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
 
 #define OMP_CLAUSE_PROC_BIND_KIND(NODE) \
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c
new file mode 100644
index 00000000000..d6e02fc6d7e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c
@@ -0,0 +1,41 @@ 
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main (int argc, char *argv[])
+{
+#define N 100
+  int n = N;
+  int i, j, tmp;
+  int input[N*N], output[N], houtput[N];
+
+  for (i = 0; i < n * n; i++)
+    input[i] = i;
+
+  for (i = 0; i < n; i++)
+    {
+      tmp = 0;
+      for (j = 0; j < n; j++)
+	tmp += input[i * n + j];
+      houtput[i] = tmp;
+    }
+  
+  #pragma acc parallel loop gang
+  for (i = 0; i < n; i++)
+    {
+      tmp = 0;
+
+      #pragma acc loop worker reduction(+:tmp)
+      for (j = 0; j < n; j++)
+	tmp += input[i * n + j];
+
+      output[i] = tmp;
+    }
+
+  /* Test if every worker-level reduction had correct private result.  */
+  for (i = 0; i < n; i++)
+    if (houtput[i] != output[i])
+      abort ();
+
+  return 0;
+}
-- 
2.17.1