[6/7,amdgcn] Use a single worker for OpenACC on AMD GCN

Message ID 4a85b8bd8b8acbd62c50120b72abbc5458e0715e.1573560401.git.ams@codesourcery.com
State New
Headers show
Series
  • AMD GCN Offloading Support
Related show

Commit Message

Andrew Stubbs Nov. 12, 2019, 1:29 p.m.
This patch prevents the compiler using multiple workers in a gang.  This
should be reverted when worker support is committed.

I will commit this with the reset of the series.

Andrew


2019-11-12  Andrew Stubbs  <ams@codesourcery.com>
	    Julian Brown  <julian@codesourcery.com>

	gcc/
	* config/gcn/gcn.c (gcn_goacc_validate_dims): Ensure
	flag_worker_partitioning is not set.
	(TARGET_GOACC_WORKER_PARTITIONING): Remove target hook definition.
	* config/gcn/gcn.opt (macc-experimental-workers): Default to off.
---
 gcc/config/gcn/gcn.c   | 4 ++--
 gcc/config/gcn/gcn.opt | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

Comments

Thomas Schwinge June 8, 2021, 10:07 a.m. | #1
Hi!

On 2019-11-12T13:29:15+0000, Andrew Stubbs <ams@codesourcery.com> wrote:
> This patch prevents the compiler using multiple workers in a gang.


Almost.  The GCN back end fails to enforce this for the case of run-time
variable 'num_workers': that's 'dims[GOMP_DIM_WORKER] == 0', and the
current 'gcc/config/gcn/gcn.c:gcn_goacc_validate_dims' logic doesn't
consider that case:

    /* Check the num workers is not too large.  */
    if (dims[GOMP_DIM_WORKER] > max_workers)
      {
        warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
                    OPT_Wopenacc_dims,
                    "using num_workers (%d), ignoring %d",
                    max_workers, dims[GOMP_DIM_WORKER]);
        dims[GOMP_DIM_WORKER] = max_workers;

We could fix that either here, or simply in the GCN libgomp plugin.  I've
pushed "[GCN] Fix run-time variable 'num_workers'" to master branch in
commit 30656822b3792712c7a69fe1a0a79739f8f29abc, see attached.  As
detailed there, this actually affects/fixes a small number of testcases.

> This

> should be reverted when worker support is committed.


ACK; working on that.


Grüße
 Thomas


> 2019-11-12  Andrew Stubbs  <ams@codesourcery.com>

>           Julian Brown  <julian@codesourcery.com>

>

>       gcc/

>       * config/gcn/gcn.c (gcn_goacc_validate_dims): Ensure

>       flag_worker_partitioning is not set.

>       (TARGET_GOACC_WORKER_PARTITIONING): Remove target hook definition.

>       * config/gcn/gcn.opt (macc-experimental-workers): Default to off.

> ---

>  gcc/config/gcn/gcn.c   | 4 ++--

>  gcc/config/gcn/gcn.opt | 2 +-

>  2 files changed, 3 insertions(+), 3 deletions(-)

>

> diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c

> index cdd24277cf6..1a69737f693 100644

> --- a/gcc/config/gcn/gcn.c

> +++ b/gcc/config/gcn/gcn.c

> @@ -4695,6 +4695,8 @@ gcn_goacc_validate_dims (tree decl, int dims[], int fn_level,

>    /* FIXME: remove -facc-experimental-workers when they're ready.  */

>    int max_workers = flag_worker_partitioning ? 16 : 1;

>

> +  gcc_assert (!flag_worker_partitioning);

> +

>    /* The vector size must appear to be 64, to the user, unless this is a

>       SEQ routine.  The real, internal value is always 1, which means use

>       autovectorization, but the user should not see that.  */

> @@ -6073,8 +6075,6 @@ print_operand (FILE *file, rtx x, int code)

>  #define TARGET_GOACC_REDUCTION gcn_goacc_reduction

>  #undef  TARGET_GOACC_VALIDATE_DIMS

>  #define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims

> -#undef  TARGET_GOACC_WORKER_PARTITIONING

> -#define TARGET_GOACC_WORKER_PARTITIONING true

>  #undef  TARGET_HARD_REGNO_MODE_OK

>  #define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok

>  #undef  TARGET_HARD_REGNO_NREGS

> diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt

> index bdc878f35ad..402deb625bd 100644

> --- a/gcc/config/gcn/gcn.opt

> +++ b/gcc/config/gcn/gcn.opt

> @@ -65,7 +65,7 @@ Target Report RejectNegative Var(flag_bypass_init_error)

>  bool flag_worker_partitioning = false

>

>  macc-experimental-workers

> -Target Report Var(flag_worker_partitioning) Init(1)

> +Target Report Var(flag_worker_partitioning) Init(0)

>

>  int stack_size_opt = -1

>



-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
From 30656822b3792712c7a69fe1a0a79739f8f29abc Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>

Date: Sat, 5 Jun 2021 22:39:21 +0200
Subject: [PATCH] [GCN] Fix run-time variable 'num_workers'

... which currently has *not* been forced to 'num_workers (1)'.

In addition to the testcases modified here, this also fixes:

    FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/mode-transitions.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
    [Etc.]

    mode-transitions.exe: [...]/libgomp.oacc-c-c++-common/mode-transitions.c:702: t17: Assertion `arr_b[i] == (i ^ 31) * 8' failed.

	libgomp/
	* plugin/plugin-gcn.c (gcn_exec): Force 'num_workers (1)'
	unconditionally.
	* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c:
	Update.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Likewise.
---
 libgomp/plugin/plugin-gcn.c                                  | 5 ++---
 .../testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c | 4 +---
 libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c  | 5 +++--
 libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c   | 3 ++-
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 8aab708b0ef..cfed42a2d4d 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3041,10 +3041,9 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs,
      problem size, so let's do a reasonable number of single-worker gangs.
      64 gangs matches a typical Fiji device.  */
 
-  /* NOTE: Until support for middle-end worker partitioning is merged, use 1
-     for the default number of workers.  */
   if (dims[0] == 0) dims[0] = get_cu_count (kernel->agent); /* Gangs.  */
-  if (dims[1] == 0) dims[1] = 1;  /* Workers.  */
+  /* NOTE: Until support for middle-end worker partitioning is merged, force 'num_workers (1)'.  */
+  if (/*TODO dims[1] == 0*/ true) dims[1] = 1;  /* Workers.  */
 
   /* The incoming dimensions are expressed in terms of gangs, workers, and
      vectors.  The HSA dimensions are expressed in terms of "work-items",
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
index 7f74ee922b7..6c136c26c93 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
@@ -94,9 +94,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
   if (num_workers < 1)
     assert (event_info->launch_event.num_workers >= 1);
   /* GCN currently enforces 'num_workers (1)'.  */
-  else if (acc_device_type == acc_device_radeon
-	   /*TODO ... just not in the "Parallelism dimensions: variable" case.  */
-	   && /*TODO*/ num_gangs != 22)
+  else if (acc_device_type == acc_device_radeon)
     assert (event_info->launch_event.num_workers == 1);
   else
     {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index 974e1504534..fe0dacd5aac 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -313,8 +313,9 @@ int main ()
 	}
       else if (acc_on_device (acc_device_radeon))
 	{
-	  /* The GCC GCN back end is limited to num_workers (16).  */
-	  workers_actual = 16;
+	  /* The GCC GCN back end is limited to num_workers (16).
+	     Temporarily set this to 1 until multiple workers are permitted. */
+	  workers_actual = 1; // 16;
 	}
       else
 	__builtin_abort ();
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
index a03a2c2b163..624ec24e437 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
@@ -6,7 +6,8 @@
 #include <gomp-constants.h>
 
 #ifdef ACC_DEVICE_TYPE_radeon
-#define NUM_WORKERS 16
+/* Temporarily set this to 1 until multiple workers are permitted.  */
+#define NUM_WORKERS 1
 #define NUM_VECTORS 1
 #else
 #define NUM_WORKERS 16
-- 
2.30.2

Patch

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index cdd24277cf6..1a69737f693 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -4695,6 +4695,8 @@  gcn_goacc_validate_dims (tree decl, int dims[], int fn_level,
   /* FIXME: remove -facc-experimental-workers when they're ready.  */
   int max_workers = flag_worker_partitioning ? 16 : 1;
 
+  gcc_assert (!flag_worker_partitioning);
+
   /* The vector size must appear to be 64, to the user, unless this is a
      SEQ routine.  The real, internal value is always 1, which means use
      autovectorization, but the user should not see that.  */
@@ -6073,8 +6075,6 @@  print_operand (FILE *file, rtx x, int code)
 #define TARGET_GOACC_REDUCTION gcn_goacc_reduction
 #undef  TARGET_GOACC_VALIDATE_DIMS
 #define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims
-#undef  TARGET_GOACC_WORKER_PARTITIONING
-#define TARGET_GOACC_WORKER_PARTITIONING true
 #undef  TARGET_HARD_REGNO_MODE_OK
 #define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok
 #undef  TARGET_HARD_REGNO_NREGS
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index bdc878f35ad..402deb625bd 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -65,7 +65,7 @@  Target Report RejectNegative Var(flag_bypass_init_error)
 bool flag_worker_partitioning = false
 
 macc-experimental-workers
-Target Report Var(flag_worker_partitioning) Init(1)
+Target Report Var(flag_worker_partitioning) Init(0)
 
 int stack_size_opt = -1