[1/6,og9] Target-dependent gang-private variable decl rewriting

Message ID 2c432092fae99930879687f88f2e8e97d29c786d.1567644180.git.julian@codesourcery.com
State New
Headers show
Series
  • OpenACC worker partitioning in middle end (AMD GCN)
Related show

Commit Message

Julian Brown Sept. 5, 2019, 1:45 a.m.
This patch adds support for rewriting variables marked up with the "oacc
gangprivate" attributes in a target-dependent way in the oaccdevlow pass
of the offload compiler.

This behaviour is controlled by a new target hook,
TARGET_GOACC_ADJUST_GANGPRIVATE_DECL. This is conceptually similar to
the existing TARGET_GOACC_EXPAND_ACCEL_VAR hook, but that one works too
late in the compilation process for AMD GCN.

The patch to set the "oacc gangprivate" attribute was posted upstream here:

https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00749.html

A version of that is already present on the og9 branch.

Julian

ChangeLog

	gcc/
	* omp-offload.c (convert.h): Include.
	(struct addr_expr_rewrite_info): Add struct.
	(rewrite_addr_expr): New function.
	(is_sync_builtin_call): New function.
	(execute_oacc_device_lower): Support rewriting gang-private variables
	using target hook, and fix up addr_expr nodes afterwards.
	* target.def (adjust_gangprivate_decl): New target hook.
	* doc/tm.texi.in (TARGET_GOACC_ADJUST_GANGPRIVATE_DECL): Document new
	target hook.
	* doc/tm.texi: Regenerate.
---
 gcc/ChangeLog.openacc |  13 +++++
 gcc/doc/tm.texi       |   4 ++
 gcc/doc/tm.texi.in    |   2 +
 gcc/omp-offload.c     | 133 ++++++++++++++++++++++++++++++++++++++++++
 gcc/target.def        |   6 ++
 5 files changed, 158 insertions(+)

-- 
2.22.0

Patch

diff --git a/gcc/ChangeLog.openacc b/gcc/ChangeLog.openacc
index a22f07c817c..b1c627b394c 100644
--- a/gcc/ChangeLog.openacc
+++ b/gcc/ChangeLog.openacc
@@ -1,3 +1,16 @@ 
+2019-09-05  Julian Brown  <julian@codesourcery.com>
+
+	* omp-offload.c (convert.h): Include.
+	(struct addr_expr_rewrite_info): Add struct.
+	(rewrite_addr_expr): New function.
+	(is_sync_builtin_call): New function.
+	(execute_oacc_device_lower): Support rewriting gang-private variables
+	using target hook, and fix up addr_expr nodes afterwards.
+	* target.def (adjust_gangprivate_decl): New target hook.
+	* doc/tm.texi.in (TARGET_GOACC_ADJUST_GANGPRIVATE_DECL): Document new
+	target hook.
+	* doc/tm.texi: Regenerate.
+
 2019-08-13  Julian Brown  <julian@codesourcery.com>
 
 	* omp-oacc-kernels.c (add_wait): New function, split out of...
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 9b88498eb95..f3707c6abe3 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6162,6 +6162,10 @@  memories.  A return value of NULL indicates that the target does not
 handle this VAR_DECL, and normal RTL expanding is resumed.
 @end deftypefn
 
+@deftypefn {Target Hook} void TARGET_GOACC_ADJUST_GANGPRIVATE_DECL (tree @var{var})
+Tweak variable declaration for a gang-private variable.
+@end deftypefn
+
 @deftypefn {Target Hook} bool TARGET_GOACC_EXPLODE_ARGS (void)
 Define this hook to TRUE if arguments to offload regions should be
 exploded, i.e. passed as true arguments rather than in an argument array.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index c9c4341a35f..cebadf4a502 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4210,6 +4210,8 @@  address;  but often a machine-dependent strategy can generate better code.
 
 @hook TARGET_GOACC_EXPAND_ACCEL_VAR
 
+@hook TARGET_GOACC_ADJUST_GANGPRIVATE_DECL
+
 @hook TARGET_GOACC_EXPLODE_ARGS
 
 @node Anchored Addresses
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 1129b00511e..c94dc956d7e 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -52,6 +52,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "stringpool.h"
 #include "attribs.h"
 #include "cfgloop.h"
+#include "convert.h"
 
 /* Describe the OpenACC looping structure of a function.  The entire
    function is held in a 'NULL' loop.  */
@@ -1570,6 +1571,78 @@  maybe_discard_oacc_function (tree decl)
   return false;
 }
 
+struct addr_expr_rewrite_info
+{
+  gimple *stmt;
+  hash_set<tree> *adjusted_vars;
+  bool avoid_pointer_conversion;
+  bool modified;
+};
+
+static tree
+rewrite_addr_expr (tree *tp, int *walk_subtrees, void *data)
+{
+  walk_stmt_info *wi = (walk_stmt_info *) data;
+  addr_expr_rewrite_info *info = (addr_expr_rewrite_info *) wi->info;
+
+  if (TREE_CODE (*tp) == ADDR_EXPR)
+    {
+      tree arg = TREE_OPERAND (*tp, 0);
+
+      if (info->adjusted_vars->contains (arg))
+	{
+	  if (info->avoid_pointer_conversion)
+	    {
+	      *tp = build_fold_addr_expr (arg);
+	      info->modified = true;
+	      *walk_subtrees = 0;
+	    }
+	  else
+	    {
+	      gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
+	      tree repl = build_fold_addr_expr (arg);
+	      gimple *stmt1
+		= gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
+	      tree conv = convert_to_pointer (TREE_TYPE (*tp),
+					      gimple_assign_lhs (stmt1));
+	      gimple *stmt2
+		= gimple_build_assign (make_ssa_name (TREE_TYPE (*tp)), conv);
+	      gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
+	      gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
+	      *tp = gimple_assign_lhs (stmt2);
+	      info->modified = true;
+	      *walk_subtrees = 0;
+	    }
+	}
+    }
+
+  return NULL_TREE;
+}
+
+/* Return TRUE if CALL is a call to a builtin atomic/sync operation.  */
+
+static bool
+is_sync_builtin_call (gcall *call)
+{
+  tree callee = gimple_call_fndecl (call);
+
+  if (callee != NULL_TREE
+      && gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+    switch (DECL_FUNCTION_CODE (callee))
+      {
+#undef DEF_SYNC_BUILTIN
+#define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
+#include "sync-builtins.def"
+#undef DEF_SYNC_BUILTIN
+	return true;
+
+      default:
+	;
+      }
+
+  return false;
+}
+
 /* Main entry point for oacc transformations which run on the device
    compiler after LTO, so we know what the target device is at this
    point (including the host fallback).  */
@@ -1815,6 +1888,66 @@  execute_oacc_device_lower ()
 	  gsi_next (&gsi);
       }
 
+  /* Make adjustments to gang-private local variables if required by the
+     target, e.g. forcing them into a particular address space.  Afterwards,
+     ADDR_EXPR nodes which have adjusted variables as their argument need to
+     be modified in one of two ways:
+
+       1. They can be recreated, making a pointer to the variable in the new
+	  address space, or
+
+       2. The address of the variable in the new address space can be taken,
+	  converted to the default (original) address space, and the result of
+	  that conversion subsituted in place of the original ADDR_EXPR node.
+
+     Which of these is done depends on the gimple statement being processed.
+     At present atomic operations and inline asms use (1), and everything else
+     uses (2).  At least on AMD GCN, there are atomic operations that work
+     directly in the LDS address space.  */
+
+  if (targetm.goacc.adjust_gangprivate_decl)
+    {
+      tree var;
+      unsigned i;
+      hash_set<tree> adjusted_vars;
+
+      FOR_EACH_LOCAL_DECL (cfun, i, var)
+	{
+	  if (!VAR_P (var)
+	      || !lookup_attribute ("oacc gangprivate", DECL_ATTRIBUTES (var)))
+	    continue;
+
+	  targetm.goacc.adjust_gangprivate_decl (var);
+	  adjusted_vars.add (var);
+	}
+
+      FOR_ALL_BB_FN (bb, cfun)
+	for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	     !gsi_end_p (gsi);
+	     gsi_next (&gsi))
+	  {
+	    gimple *stmt = gsi_stmt (gsi);
+	    walk_stmt_info wi;
+	    addr_expr_rewrite_info info;
+
+	    info.avoid_pointer_conversion
+	      = (is_gimple_call (stmt)
+		 && is_sync_builtin_call (as_a <gcall *> (stmt)))
+		|| gimple_code (stmt) == GIMPLE_ASM;
+	    info.stmt = stmt;
+	    info.modified = false;
+	    info.adjusted_vars = &adjusted_vars;
+
+	    memset (&wi, 0, sizeof (wi));
+	    wi.info = &info;
+
+	    walk_gimple_op (stmt, rewrite_addr_expr, &wi);
+
+	    if (info.modified)
+	      update_stmt (stmt);
+	  }
+    }
+
   free_oacc_loop (loops);
 
   return 0;
diff --git a/gcc/target.def b/gcc/target.def
index d26b888a485..d82db232e40 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1729,6 +1729,12 @@  handle this VAR_DECL, and normal RTL expanding is resumed.",
 rtx, (tree var),
 NULL)
 
+DEFHOOK
+(adjust_gangprivate_decl,
+"Tweak variable declaration for a gang-private variable.",
+void, (tree var),
+NULL)
+
 DEFHOOK
 (explode_args,
 "Define this hook to TRUE if arguments to offload regions should be\n\