[og8] More goacc_parlevel enhancements

Message ID fa326873-8ca7-62bb-529c-d7b4d406a1f4@codesourcery.com
State New
Headers show
Series
  • [og8] More goacc_parlevel enhancements
Related show

Commit Message

Cesar Philippidis July 31, 2018, 5:13 p.m.
I've committed this patch which contains all of the remaining
goacc_parlevel bug fixes present in trunk to og8.

The goal of the goacc parlevel changes is replace the use of inline ptx
code with builtin functions so that the certain OpenACC execution tests
that exercise the execution model can be target independent. For the
most part, these patches applied cleanly to og8, however, as I noted in
PR86757, there were a couple of og8-specific regressions involving tests
that started to fail when built -O0. I believe that problem is caused by
the ganglocal memory changes.

Chung-Lin, we'll need to fix PR86757 before we push the gangprivate
changes upstream.

Julian, I'm not sure if the GCN port supports gangprivate memory. If it
does, you might be hit by this failure at -O0. But those tests have
already been xfailed, so you should be OK.

Cesar

Patch

[og8] More goacc_parlevel enhancements

2018-07-31  Cesar Philippidis  <cesar@codesourcery.com>

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Adjust test.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Likewise.

	Backport from mainline:
	2018-05-02  Tom de Vries  <tom@codesourcery.com>

	PR libgomp/85411
	libgomp/
	* plugin/plugin-nvptx.c (nvptx_exec): Move parsing of
	GOMP_OPENACC_DIM ...
	* env.c (parse_gomp_openacc_dim): ... here.  New function.
	(initialize_env): Call parse_gomp_openacc_dim.
	(goacc_default_dims): Define.
	* libgomp.h (goacc_default_dims): Declare.
	* oacc-plugin.c (GOMP_PLUGIN_acc_default_dim): New function.
	* oacc-plugin.h (GOMP_PLUGIN_acc_default_dim): Declare.
	* libgomp.map: New version "GOMP_PLUGIN_1.2". Add
	GOMP_PLUGIN_acc_default_dim.
	* testsuite/libgomp.oacc-c-c++-common/loop-default-runtime.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/loop-default.h: New test.

	2018-05-04  Tom de Vries  <tom@codesourcery.com>
	PR libgomp/85639
	gcc/
	* builtins.c (expand_builtin_goacc_parlevel_id_size): Handle null target
	if ignore == 0.

	2018-05-07  Tom de Vries  <tom@codesourcery.com>
	PR testsuite/85677
	libgomp/
	* testsuite/lib/libgomp.exp (libgomp_init): Move inclusion of top-level
	include directory in ALWAYS_CFLAGS out of $blddir != "" condition.

[openacc] Move GOMP_OPENACC_DIM parsing out of nvptx plugin

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@259852
138bc75d-0d04-0410-961f-82ee72b054a4

[expand] Handle null target in expand_builtin_goacc_parlevel_id_size

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@259927
138bc75d-0d04-0410-961f-82ee72b054a4

[openacc, testsuite] Allow installed testing of libgomp to find gomp-constants.h

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@259992
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 300e13c..0097d5b 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -6682,6 +6682,9 @@  expand_builtin_goacc_parlevel_id_size (tree exp, rtx target, int ignore)
   if (ignore)
     return target;
 
+  if (target == NULL_RTX)
+    target = gen_reg_rtx (TYPE_MODE (TREE_TYPE (exp)));
+
   if (!targetm.have_oacc_dim_size ())
     {
       emit_move_insn (target, fallback_retval);
diff --git a/libgomp/env.c b/libgomp/env.c
index c99ba85..fab35b7 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -90,6 +90,7 @@  int gomp_debug_var;
 unsigned int gomp_num_teams_var;
 char *goacc_device_type;
 int goacc_device_num;
+int goacc_default_dims[GOMP_DIM_MAX];
 
 #ifndef LIBGOMP_OFFLOADED_ONLY
 
@@ -1066,6 +1067,36 @@  parse_acc_device_type (void)
 }
 
 static void
+parse_gomp_openacc_dim (void)
+{
+  /* The syntax is the same as for the -fopenacc-dim compilation option.  */
+  const char *var_name = "GOMP_OPENACC_DIM";
+  const char *env_var = getenv (var_name);
+  if (!env_var)
+    return;
+
+  const char *pos = env_var;
+  int i;
+  for (i = 0; *pos && i != GOMP_DIM_MAX; i++)
+    {
+      if (i && *pos++ != ':')
+	break;
+
+      if (*pos == ':')
+	continue;
+
+      const char *eptr;
+      errno = 0;
+      long val = strtol (pos, (char **)&eptr, 10);
+      if (errno || val < 0 || (unsigned)val != val)
+	break;
+
+      goacc_default_dims[i] = (int)val;
+      pos = eptr;
+    }
+}
+
+static void
 handle_omp_display_env (unsigned long stacksize, int wait_policy)
 {
   const char *env;
@@ -1336,6 +1367,7 @@  initialize_env (void)
     goacc_device_num = 0;
 
   parse_acc_device_type ();
+  parse_gomp_openacc_dim ();
 
   goacc_runtime_initialize ();
 
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index a9aca74..607f4c2 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -44,6 +44,7 @@ 
 #include "config.h"
 #include "gstdint.h"
 #include "libgomp-plugin.h"
+#include "gomp-constants.h"
 
 #ifdef HAVE_PTHREAD_H
 #include <pthread.h>
@@ -367,6 +368,7 @@  extern unsigned int gomp_num_teams_var;
 extern int gomp_debug_var;
 extern int goacc_device_num;
 extern char *goacc_device_type;
+extern int goacc_default_dims[GOMP_DIM_MAX];
 
 enum gomp_task_kind
 {
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 7a49acc..595b988 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -485,6 +485,7 @@  GOMP_PLUGIN_1.1 {
 GOMP_PLUGIN_1.2 {
   global:
 	GOMP_PLUGIN_acc_thread_default_async;
+	GOMP_PLUGIN_acc_default_dim;
 } GOMP_PLUGIN_1.1;
 
 # TODO
diff --git a/libgomp/oacc-plugin.c b/libgomp/oacc-plugin.c
index 10a1169..01fe354 100644
--- a/libgomp/oacc-plugin.c
+++ b/libgomp/oacc-plugin.c
@@ -60,3 +60,14 @@  GOMP_PLUGIN_acc_thread_default_async (void)
   struct goacc_thread *thr = goacc_thread ();
   return thr ? thr->default_async : acc_async_default;
 }
+
+int
+GOMP_PLUGIN_acc_default_dim (unsigned int i)
+{
+  if (i >= GOMP_DIM_MAX)
+    {
+      gomp_fatal ("invalid dimension argument: %d", i);
+      return -1;
+    }
+  return goacc_default_dims[i];
+}
diff --git a/libgomp/oacc-plugin.h b/libgomp/oacc-plugin.h
index 52949ca..dc60530 100644
--- a/libgomp/oacc-plugin.h
+++ b/libgomp/oacc-plugin.h
@@ -31,6 +31,7 @@ 
 
 extern void GOMP_PLUGIN_async_unmap_vars (void *, int);
 extern void *GOMP_PLUGIN_acc_thread (void);
+extern int GOMP_PLUGIN_acc_default_dim (unsigned int);
 extern struct goacc_thread *GOMP_PLUGIN_goacc_thread (void);
 extern int GOMP_PLUGIN_acc_thread_default_async (void);
 
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 43be4cf..a1c12bf 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -757,26 +757,8 @@  nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
       const char *env_var = getenv (var_name);
       notify_var (var_name, env_var);
       if (env_var)
-	{
-	  const char *pos = env_var;
-
-	  for (i = 0; *pos && i != GOMP_DIM_MAX; i++)
-	    {
-	      if (i && *pos++ != ':')
-		break;
-	      if (*pos != ':')
-		{
-		  const char *eptr;
-
-		  errno = 0;
-		  long val = strtol (pos, (char **)&eptr, 10);
-		  if (errno || val < 0 || (unsigned)val != val)
-		    break;
-		  default_dims[i] = (int)val;
-		  pos = eptr;
-		}
-	    }
-	}
+	for (int i = 0; i < GOMP_DIM_MAX; ++i)
+	  default_dims[i] = GOMP_PLUGIN_acc_default_dim (i);
 
       /* 32 is the default for known hardware.  */
       int gang = 0, worker = 32, vector = 32;
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index 88af438..e5b5308 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -184,9 +184,9 @@  proc libgomp_init { args } {
         lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/.libs"
         lappend ALWAYS_CFLAGS "additional_flags=-I${blddir}"
         lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/.libs"
-	# The top-level include directory, for gomp-constants.h.
-	lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include"
     }
+    # The top-level include directory, for gomp-constants.h.
+    lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include"
     lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/.."
 
     # For build-tree testing, also consider the library paths used for builing.
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default-runtime.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default-runtime.c
index eb00d32..c6110a1 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default-runtime.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default-runtime.c
@@ -1,12 +1,13 @@ 
-/* { dg-additional-options "-fopenacc-dim=-:-" } */
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "8:16" } */
+/* { dg-set-target-env-var GOMP_OPENACC_DIM "8::" } */
 
 #include "loop-default.h"
+#include <stdlib.h>
 
-int main ()
+int
+main ()
 {
-  return test_1 (8, 16, 32);
+  if (check_gang (8) != 0)
+    abort ();
+
+  return 0;
 }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h
index 162c1d9..a9e2693 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h
@@ -1,5 +1,3 @@ 
-/* { dg-additional-options "-fopenacc-dim=16:16" } */
-
 #include <openacc.h>
 #include <alloca.h>
 #include <string.h>
@@ -7,24 +5,26 @@ 
 #include <gomp-constants.h>
 
 #pragma acc routine seq
-static int __attribute__ ((noinline)) coord ()
+static int __attribute__ ((noinline))
+coord (void)
 {
   int res = 0;
 
-  if (acc_on_device (acc_device_not_host))
+  if (acc_on_device (acc_device_nvidia))
     {
-      int g, w, v;
-
+      int g = 0, w = 0, v = 0;
       g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
       w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
       v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
       res = (1 << 24) | (g << 16) | (w << 8) | v;
     }
+
   return res;
 }
 
-
-int check (const int *ary, int size, int gp, int wp, int vp)
+static int
+check (const int *ary, int size, int gp, int wp, int vp)
 {
   int exit = 0;
   int ix;
@@ -32,11 +32,11 @@  int check (const int *ary, int size, int gp, int wp, int vp)
   int *workers = (int *)alloca (wp * sizeof (int));
   int *vectors = (int *)alloca (vp * sizeof (int));
   int offloaded = 0;
-  
+
   memset (gangs, 0, gp * sizeof (int));
   memset (workers, 0, wp * sizeof (int));
   memset (vectors, 0, vp * sizeof (int));
-  
+
   for (ix = 0; ix < size; ix++)
     {
       int g = (ary[ix] >> 16) & 0xff;
@@ -72,31 +72,30 @@  int check (const int *ary, int size, int gp, int wp, int vp)
 	printf ("gang %d not used %d times\n", ix, gangs[0]);
 	exit = 1;
       }
-  
+
   for (ix = 0; ix < wp; ix++)
     if (workers[ix] != workers[0])
       {
 	printf ("worker %d not used %d times\n", ix, workers[0]);
 	exit = 1;
       }
-  
+
   for (ix = 0; ix < vp; ix++)
     if (vectors[ix] != vectors[0])
       {
 	printf ("vector %d not used %d times\n", ix, vectors[0]);
 	exit = 1;
       }
-  
+
   return exit;
 }
 
-#define N (32 *32*32)
+#define N (32 * 32 * 32)
+int ary[N];
 
-int test_1 (int gp, int wp, int vp)
+static int
+check_gang (int gp)
 {
-  int ary[N];
-  int exit = 0;
-  
 #pragma acc parallel copyout (ary)
   {
 #pragma acc loop gang (static:1)
@@ -104,8 +103,12 @@  int test_1 (int gp, int wp, int vp)
       ary[ix] = coord ();
   }
 
-  exit |= check (ary, N, gp, 1, 1);
+  return check (ary, N, gp, 1, 1);
+}
 
+static int
+check_worker (int wp)
+{
 #pragma  acc parallel copyout (ary)
   {
 #pragma acc loop worker
@@ -113,8 +116,12 @@  int test_1 (int gp, int wp, int vp)
       ary[ix] = coord ();
   }
 
-  exit |= check (ary, N, 1, wp, 1);
+  return check (ary, N, 1, wp, 1);
+}
 
+static int
+check_vector (int vp)
+{
 #pragma  acc parallel copyout (ary)
   {
 #pragma acc loop vector
@@ -122,7 +129,17 @@  int test_1 (int gp, int wp, int vp)
       ary[ix] = coord ();
   }
 
-  exit |= check (ary, N, 1, 1, vp);
+  return check (ary, N, 1, 1, vp);
+}
+
+static int
+test_1 (int gp, int wp, int vp)
+{
+  int exit = 0;
+
+  exit |= check_gang (gp);
+  exit |= check_worker (wp);
+  exit |= check_vector (vp);
 
   return exit;
 }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
index 766e578..2fecac0 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
@@ -1,3 +1,5 @@ 
+/* { dg-xfail-run-if "gangprivate failure" { openacc_nvidia_accel_selected } { "-O0" } { "" } } */
+
 #include <stdio.h>
 #include <openacc.h>
 #include <gomp-constants.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
index 0bec6e1..384f2ac 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
@@ -1,3 +1,5 @@ 
+/* { dg-xfail-run-if "gangprivate failure" { openacc_nvidia_accel_selected } { "-O0" } { "" } } */
+
 #include <stdio.h>
 #include <openacc.h>
 #include <gomp-constants.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
index 15e2bc2..f919117 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
@@ -1,3 +1,5 @@ 
+/* { dg-xfail-run-if "n/a" { openacc_nvidia_accel_selected } { "-O0" "-O1" } { "" } } */
+
 #include <stdio.h>
 #include <openacc.h>
 #include <gomp-constants.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
index 6bbd04f..fcfa7ab 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
@@ -1,3 +1,5 @@ 
+/* { dg-additional-options "-w" } */
+
 #include <stdio.h>
 #include <openacc.h>
 #include <gomp-constants.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
index c63a5d4..23d288c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
@@ -1,3 +1,6 @@ 
+/* { dg-additional-options "-w" } */
+/* { dg-xfail-run-if "gangprivate failure" { openacc_nvidia_accel_selected } { "-O0" } { "" } } */
+
 #include <stdio.h>
 #include <openacc.h>
 #include <gomp-constants.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
index 05e5d67..10b80f1 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
@@ -14,7 +14,7 @@  int main ()
     ary[ix] = -1;
   
 #pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
-  /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 18 } */
+  /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 16 } */
   {
 #pragma acc loop worker
     for (unsigned ix = 0; ix < N; ix++)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index f223afa..26bb9fe 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -157,7 +157,7 @@  int main ()
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
 #pragma acc parallel copy (vectors_actual) /* { dg-warning "region contains vector partitoned code but is not vector partitioned" } */ \
-  /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } 170 } */ \
+  /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } 159 } */ \
   vector_length (VECTORS) /* { dg-warning "'vector_length' value must be positive" "" { target c++ } } */
     {
       /* We're actually executing with vector_length (1), just the GCC nvptx
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
index d211782..8c3b938 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
@@ -6,8 +6,8 @@ 
 
 #pragma acc routine gang
 void __attribute__ ((noinline)) gang (int ary[N])
-/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "worker" { target *-*-* } 10 } */
-/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 10 } */
+/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "worker" { target *-*-* } 8 } */
+/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 8 } */
 {
 #pragma acc loop gang
     for (unsigned ix = 0; ix < N; ix++)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
index 77d1d00..e14947c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
@@ -6,7 +6,7 @@ 
 
 #pragma acc routine worker
 void __attribute__ ((noinline)) worker (int ary[N])
-/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 10 } */
+/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 8 } */
 {
 #pragma acc loop worker
   for (unsigned ix = 0; ix < N; ix++)
-- 
2.7.4