openmp: ignore nowait if async execution is unsupported [PR93481]

Message ID 3b2c0707-cc23-c3e7-82ad-de885abe85d5@codesourcery.com
State New
Headers show
Series
  • openmp: ignore nowait if async execution is unsupported [PR93481]
Related show

Commit Message

Frederik Harwath Feb. 13, 2020, 8:04 a.m.
Hi Jakub,

On 10.02.20 08:49, Harwath, Frederik wrote:

>> There has been even in some PR a suggestion that instead of failing

>> in nvptx async_run we should just ignore the nowait clause if the plugin

>> doesn't implement it properly.

> 

> This must be https://gcc.gnu.org/PR93481.


The attached patch implements the behavior that has been suggested in the PR.
It makes GOMP_OFFLOAD_async_run optional, removes the stub which produces
the error described in the PR from the nvptx plugin, and changes the nowait-handling
to ignore the clause if GOMP_OFFLOAD_async_run is not available for the executing
device's plugin. I have tested the patch by running the full libgomp testsuite with
nvptx-none offloading on x86_64-linux-gnu. I have observed no regressions.

Ok to push the commit to master?

For the record: Someone should implement GOMP_OFFLOAD_async_run properly
in the nvtpx plugin.

Best regards,
Frederik

Comments

Jakub Jelinek Feb. 13, 2020, 8:30 a.m. | #1
On Thu, Feb 13, 2020 at 09:04:36AM +0100, Harwath, Frederik wrote:
> --- a/libgomp/target.c

> +++ b/libgomp/target.c

> @@ -2022,6 +2022,16 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,

>    gomp_unmap_vars (tgt_vars, true);

>  }

>  

> +static unsigned int


Add inline?

> +clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)

> +{

> +  /* If we cannot run asynchronously, simply ignore nowait.  */

> +  if (devicep != NULL && devicep->async_run_func == NULL)

> +    flags &= ~GOMP_TARGET_FLAG_NOWAIT;

> +

> +  return flags;

> +}

> +

>  /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,

>     and several arguments have been added:

>     FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.

> @@ -2054,6 +2064,8 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,

>    size_t tgt_align = 0, tgt_size = 0;

>    bool fpc_done = false;

>  

> +  flags = clear_unsupported_flags (devicep, flags);

> +

>    if (flags & GOMP_TARGET_FLAG_NOWAIT)

>      {

>        struct gomp_thread *thr = gomp_thread ();


LGTM.

> @@ -2257,6 +2269,8 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,

>  {

>    struct gomp_device_descr *devicep = resolve_device (device);

>  

> +  flags = clear_unsupported_flags (devicep, flags);

> +

>    /* If there are depend clauses, but nowait is not present,

>       block the parent task until the dependencies are resolved

>       and then just continue with the rest of the function as if it

> @@ -2398,6 +2412,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,

>  {

>    struct gomp_device_descr *devicep = resolve_device (device);

>  

> +  flags = clear_unsupported_flags (devicep, flags);

> +

>    /* If there are depend clauses, but nowait is not present,

>       block the parent task until the dependencies are resolved

>       and then just continue with the rest of the function as if it


I don't see why you need to do the above two.  GOMP_TARGET_TASK_DATA
is done on the host side, async_run callback isn't called in that case
and while we create a task, all we do is wait for the (host) dependencies
in there and then perform the data transfer we need.
I think it is perfectly fine to ignore nowait on target but honor it
on target update or target {enter,exit} data.

> @@ -2524,6 +2540,7 @@ gomp_target_task_fn (void *data)

>  	}

>        ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;

>  

> +      assert (devicep->async_run_func);

>        devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,

>  			       ttask->args, (void *) ttask);

>        return true;

> @@ -3040,7 +3057,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,

>    if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)

>      {

>        DLSYM (run);

> -      DLSYM (async_run);

> +      DLSYM_OPT (async_run, async_run);

>        DLSYM_OPT (can_run, can_run);

>        DLSYM (dev2dev);

>      }

> diff --git a/libgomp/testsuite/libgomp.c/target-33.c b/libgomp/testsuite/libgomp.c/target-33.c

> index 15d2d7e38ab..1bed4b6bc67 100644

> --- a/libgomp/testsuite/libgomp.c/target-33.c

> +++ b/libgomp/testsuite/libgomp.c/target-33.c

> @@ -1,6 +1,3 @@

> -/* { dg-xfail-run-if "GOMP_OFFLOAD_async_run not implemented" { offload_target_nvptx } }

> -   Cf. https://gcc.gnu.org/PR81688.  */

> -

>  extern void abort (void);

>  

>  int

> diff --git a/libgomp/testsuite/libgomp.c/target-34.c b/libgomp/testsuite/libgomp.c/target-34.c

> index 5a3596424d8..66d9f54202b 100644

> --- a/libgomp/testsuite/libgomp.c/target-34.c

> +++ b/libgomp/testsuite/libgomp.c/target-34.c

> @@ -1,6 +1,3 @@

> -/* { dg-xfail-run-if "GOMP_OFFLOAD_async_run not implemented" { offload_target_nvptx } }

> -   Cf. https://gcc.gnu.org/PR81688.  */

> -

>  extern void abort (void);

>  

>  int

> -- 

> 2.17.1

> 

> 


Otherwise LGTM.

	Jakub
Frederik Harwath Feb. 13, 2020, 9:36 a.m. | #2
Hi Jakub,

On 13.02.20 09:30, Jakub Jelinek wrote:
> On Thu, Feb 13, 2020 at 09:04:36AM +0100, Harwath, Frederik wrote:

>> --- a/libgomp/target.c

>> +++ b/libgomp/target.c

>> @@ -2022,6 +2022,16 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,

>>    gomp_unmap_vars (tgt_vars, true);

>>  }

>>  

>> +static unsigned int

> 

> Add inline?

> 


Added.

>> @@ -2257,6 +2269,8 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,

>>  {

>>    struct gomp_device_descr *devicep = resolve_device (device);

>>  

>> +  flags = clear_unsupported_flags (devicep, flags);


>> @@ -2398,6 +2412,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,

>>  {

>>    struct gomp_device_descr *devicep = resolve_device (device);

>>  

>> +  flags = clear_unsupported_flags (devicep, flags);


> I don't see why you need to do the above two.  GOMP_TARGET_TASK_DATA

> is done on the host side, async_run callback isn't called in that case

> and while we create a task, all we do is wait for the (host) dependencies

> in there and then perform the data transfer we need.

> I think it is perfectly fine to ignore nowait on target but honor it

> on target update or target {enter,exit} data.


I see. Removed.


> Otherwise LGTM.


Thanks for the review! I have committed the patch with those changes. I forgot to include the ChangeLog
entry which I had to add in a separate commit. Sorry for that! It seems that I have to adapt my workflow -
perhaps some pre-push hook ;-).

Best regards,
Frederik
From 001ab12e620c6f117b2e93c77d188bd62fe7ba03 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <frederik@codesourcery.com>

Date: Thu, 13 Feb 2020 07:30:16 +0100
Subject: [PATCH 1/2] openmp: ignore nowait if async execution is unsupported
 [PR93481]

An OpenMP "nowait" clause on a target construct currently leads to
a call to GOMP_OFFLOAD_async_run in the plugin that is used for
offloading at execution time. The nvptx plugin contains only a stub
of this function that always produces a fatal error if called.

This commit changes the "nowait" implementation to ignore the clause
if the executing device's plugin does not implement GOMP_OFFLOAD_async_run.
The stub in the nvptx plugin is removed which effectively means that
programs containing "nowait" can now be executed with nvptx offloading
as if the clause had not been used.
This behavior is consistent with the OpenMP specification which says that
"[...] execution of the target task *may* be deferred" (emphasis added),
cf. OpenMP 5.0, page 172.

libgomp/

	* plugin/plugin-nvptx.c: Remove GOMP_OFFLOAD_async_run stub.
	* target.c (gomp_load_plugin_for_device): Make "async_run" loading
	optional.
	(gomp_target_task_fn): Assert "devicep->async_run_func".
	(clear_unsupported_flags): New function to remove unsupported flags
	(right now only GOMP_TARGET_FLAG_NOWAIT) that can be be ignored.
	(GOMP_target_ext): Apply clear_unsupported_flags to flags.
	* testsuite/libgomp.c/target-33.c:
	Remove xfail for offload_target_nvptx.
	* testsuite/libgomp.c/target-34.c: Likewise.
---
 libgomp/plugin/plugin-nvptx.c           |  7 +------
 libgomp/target.c                        | 15 ++++++++++++++-
 libgomp/testsuite/libgomp.c/target-33.c |  3 ---
 libgomp/testsuite/libgomp.c/target-34.c |  3 ---
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 6033c71a9db..ec103a2f40b 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1931,9 +1931,4 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
   nvptx_stacks_free (stacks, teams * threads);
 }
 
-void
-GOMP_OFFLOAD_async_run (int ord, void *tgt_fn, void *tgt_vars, void **args,
-			void *async_data)
-{
-  GOMP_PLUGIN_fatal ("GOMP_OFFLOAD_async_run unimplemented");
-}
+/* TODO: Implement GOMP_OFFLOAD_async_run. */
diff --git a/libgomp/target.c b/libgomp/target.c
index 3df007283f4..0ff727de47d 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -2022,6 +2022,16 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
   gomp_unmap_vars (tgt_vars, true);
 }
 
+static inline unsigned int
+clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
+{
+  /* If we cannot run asynchronously, simply ignore nowait.  */
+  if (devicep != NULL && devicep->async_run_func == NULL)
+    flags &= ~GOMP_TARGET_FLAG_NOWAIT;
+
+  return flags;
+}
+
 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
    and several arguments have been added:
    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
@@ -2054,6 +2064,8 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
   size_t tgt_align = 0, tgt_size = 0;
   bool fpc_done = false;
 
+  flags = clear_unsupported_flags (devicep, flags);
+
   if (flags & GOMP_TARGET_FLAG_NOWAIT)
     {
       struct gomp_thread *thr = gomp_thread ();
@@ -2524,6 +2536,7 @@ gomp_target_task_fn (void *data)
 	}
       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
 
+      assert (devicep->async_run_func);
       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
 			       ttask->args, (void *) ttask);
       return true;
@@ -3040,7 +3053,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
     {
       DLSYM (run);
-      DLSYM (async_run);
+      DLSYM_OPT (async_run, async_run);
       DLSYM_OPT (can_run, can_run);
       DLSYM (dev2dev);
     }
diff --git a/libgomp/testsuite/libgomp.c/target-33.c b/libgomp/testsuite/libgomp.c/target-33.c
index 15d2d7e38ab..1bed4b6bc67 100644
--- a/libgomp/testsuite/libgomp.c/target-33.c
+++ b/libgomp/testsuite/libgomp.c/target-33.c
@@ -1,6 +1,3 @@
-/* { dg-xfail-run-if "GOMP_OFFLOAD_async_run not implemented" { offload_target_nvptx } }
-   Cf. https://gcc.gnu.org/PR81688.  */
-
 extern void abort (void);
 
 int
diff --git a/libgomp/testsuite/libgomp.c/target-34.c b/libgomp/testsuite/libgomp.c/target-34.c
index 5a3596424d8..66d9f54202b 100644
--- a/libgomp/testsuite/libgomp.c/target-34.c
+++ b/libgomp/testsuite/libgomp.c/target-34.c
@@ -1,6 +1,3 @@
-/* { dg-xfail-run-if "GOMP_OFFLOAD_async_run not implemented" { offload_target_nvptx } }
-   Cf. https://gcc.gnu.org/PR81688.  */
-
 extern void abort (void);
 
 int
-- 
2.17.1
From 2d9eb4e4ca7be5cbe289eef390afb53f36d313b1 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <frederik@codesourcery.com>

Date: Thu, 13 Feb 2020 10:24:39 +0100
Subject: [PATCH 2/2] Add ChangeLog entry for my last commit

---
 libgomp/ChangeLog | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index f77ce955ac7..ef1c5dae66c 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,16 @@
+2020-02-13  Frederik Harwath  <frederik@codesourcery.com>
+
+	PR libgomp/93481
+	* plugin/plugin-nvptx.c: Remove GOMP_OFFLOAD_async_run stub.
+	* target.c (gomp_load_plugin_for_device): Make "async_run" loading
+	optional.
+	(gomp_target_task_fn): Assert "devicep->async_run_func".
+	(clear_unsupported_flags): New function to remove unsupported flags
+	(right now only GOMP_TARGET_FLAG_NOWAIT) that can be be ignored.
+	(GOMP_target_ext): Apply clear_unsupported_flags to flags.
+	* testsuite/libgomp.c/target-33.c:
+	Remove xfail for offload_target_nvptx.
+	* testsuite/libgomp.c/target-34.c: Likewise.
 
 2020-02-10  Frederik Harwath  <frederik@codesourcery.com>
 
-- 
2.17.1

Patch

From 1258f713be317870e9171281e3f7c3a174773aa1 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <frederik@codesourcery.com>
Date: Thu, 13 Feb 2020 07:30:16 +0100
Subject: [PATCH] openmp: ignore nowait if async execution is unsupported
 [PR93481]

An OpenMP "nowait" clause on a target construct currently leads to
a call to GOMP_OFFLOAD_async_run in the plugin that is used for
offloading at execution time. The nvptx plugin contains only a stub
of this function that always produces a fatal error if called.

This commit changes the "nowait" implementation to ignore the clause
if the executing device's plugin does not implement GOMP_OFFLOAD_async_run.
The stub in the nvptx plugin is removed which effectively means that
programs containing "nowait" can now be executed with nvptx offloading
as if the clause had not been used.
This behavior is consistent with the OpenMP specification which says that
"[...] execution of the target task *may* be deferred" (emphasis added),
cf. OpenMP 5.0, page 172.

libgomp/

	* plugin/plugin-nvptx.c: Remove GOMP_OFFLOAD_async_run stub.
	* target.c (gomp_load_plugin_for_device): Make "async_run" loading
	optional.
	(gomp_target_task_fn): Assert "devicep->async_run_func".
	(clear_unsupported_flags): New function to remove unsupported flags
	(right now only GOMP_TARGET_FLAG_NOWAIT) that can be be ignored.
	(GOMP_target_ext): Apply clear_unsupported_flags to flags.
	(GOMP_target_update_ext): Likewise.
	(GOMP_target_enter_exit_data): Likewise.
	* testsuite/libgomp.c/target-33.c:
	Remove xfail for offload_target_nvptx.
	* testsuite/libgomp.c/target-34.c: Likewise.
---
 libgomp/plugin/plugin-nvptx.c           |  7 +------
 libgomp/target.c                        | 19 ++++++++++++++++++-
 libgomp/testsuite/libgomp.c/target-33.c |  3 ---
 libgomp/testsuite/libgomp.c/target-34.c |  3 ---
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 6033c71a9db..ec103a2f40b 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1931,9 +1931,4 @@  GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
   nvptx_stacks_free (stacks, teams * threads);
 }
 
-void
-GOMP_OFFLOAD_async_run (int ord, void *tgt_fn, void *tgt_vars, void **args,
-			void *async_data)
-{
-  GOMP_PLUGIN_fatal ("GOMP_OFFLOAD_async_run unimplemented");
-}
+/* TODO: Implement GOMP_OFFLOAD_async_run. */
diff --git a/libgomp/target.c b/libgomp/target.c
index 3df007283f4..4fbf963f305 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -2022,6 +2022,16 @@  GOMP_target (int device, void (*fn) (void *), const void *unused,
   gomp_unmap_vars (tgt_vars, true);
 }
 
+static unsigned int
+clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
+{
+  /* If we cannot run asynchronously, simply ignore nowait.  */
+  if (devicep != NULL && devicep->async_run_func == NULL)
+    flags &= ~GOMP_TARGET_FLAG_NOWAIT;
+
+  return flags;
+}
+
 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
    and several arguments have been added:
    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
@@ -2054,6 +2064,8 @@  GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
   size_t tgt_align = 0, tgt_size = 0;
   bool fpc_done = false;
 
+  flags = clear_unsupported_flags (devicep, flags);
+
   if (flags & GOMP_TARGET_FLAG_NOWAIT)
     {
       struct gomp_thread *thr = gomp_thread ();
@@ -2257,6 +2269,8 @@  GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
+  flags = clear_unsupported_flags (devicep, flags);
+
   /* If there are depend clauses, but nowait is not present,
      block the parent task until the dependencies are resolved
      and then just continue with the rest of the function as if it
@@ -2398,6 +2412,8 @@  GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
+  flags = clear_unsupported_flags (devicep, flags);
+
   /* If there are depend clauses, but nowait is not present,
      block the parent task until the dependencies are resolved
      and then just continue with the rest of the function as if it
@@ -2524,6 +2540,7 @@  gomp_target_task_fn (void *data)
 	}
       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
 
+      assert (devicep->async_run_func);
       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
 			       ttask->args, (void *) ttask);
       return true;
@@ -3040,7 +3057,7 @@  gomp_load_plugin_for_device (struct gomp_device_descr *device,
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
     {
       DLSYM (run);
-      DLSYM (async_run);
+      DLSYM_OPT (async_run, async_run);
       DLSYM_OPT (can_run, can_run);
       DLSYM (dev2dev);
     }
diff --git a/libgomp/testsuite/libgomp.c/target-33.c b/libgomp/testsuite/libgomp.c/target-33.c
index 15d2d7e38ab..1bed4b6bc67 100644
--- a/libgomp/testsuite/libgomp.c/target-33.c
+++ b/libgomp/testsuite/libgomp.c/target-33.c
@@ -1,6 +1,3 @@ 
-/* { dg-xfail-run-if "GOMP_OFFLOAD_async_run not implemented" { offload_target_nvptx } }
-   Cf. https://gcc.gnu.org/PR81688.  */
-
 extern void abort (void);
 
 int
diff --git a/libgomp/testsuite/libgomp.c/target-34.c b/libgomp/testsuite/libgomp.c/target-34.c
index 5a3596424d8..66d9f54202b 100644
--- a/libgomp/testsuite/libgomp.c/target-34.c
+++ b/libgomp/testsuite/libgomp.c/target-34.c
@@ -1,6 +1,3 @@ 
-/* { dg-xfail-run-if "GOMP_OFFLOAD_async_run not implemented" { offload_target_nvptx } }
-   Cf. https://gcc.gnu.org/PR81688.  */
-
 extern void abort (void);
 
 int
-- 
2.17.1