[OpenACC] Fix potential race condition in OpenACC "exit data" operations

Message ID 20191212190121.42ea3f8c@squid.athome
State New
Headers show
Series
  • [OpenACC] Fix potential race condition in OpenACC "exit data" operations
Related show

Commit Message

Julian Brown Dec. 12, 2019, 7:01 p.m.
Hi,

This is a fix for PR92881, broken out of the larger "reference counting
overhaul" patch last posted here:

https://gcc.gnu.org/ml/gcc-patches/2019-11/msg02235.html

The current implementation (potentially with synchronous unmapping of
a variable/memory block immediately after an asynchronous copyout which
might not complete beforehand) is "obviously incorrect" by observation,
but does not appear to cause any issues with the current testsuite
(i.e., there is no test improvement with this patch).

Tested with offloading to AMD GCN. OK for trunk?

Thanks,

Julian

ChangeLog

2019-12-12  Julian Brown  <julian@codesourcery.com>
            Thomas Schwinge  <thomas@codesourcery.com>

    PR libgomp/92881

    libgomp/
    * libgomp.h (gomp_remove_var_async): Add prototype.
    * oacc-mem.c (delete_copyout): Call gomp_remove_var_async instead of
    gomp_remove_var.
    * target.c (gomp_unref_tgt): Change return type to bool, indicating
    whether target_mem_desc was unmapped.
    (gomp_unref_tgt_void): New.
    (gomp_remove_var): Reimplement in terms of...
    (gomp_remove_var_internal): ...this new helper function.
    (gomp_remove_var_async): New, implemented using above helper function.
    (gomp_unmap_vars_internal): Use gomp_unref_tgt_void instead of
    gomp_unref_tgt.

Comments

Thomas Schwinge Dec. 13, 2019, 2:42 p.m. | #1
Hi Julian!

On 2019-12-12T19:01:21+0000, Julian Brown <julian@codesourcery.com> wrote:
> This is a fix for PR92881, broken out of the larger "reference counting

> overhaul" patch last posted here:

>

> https://gcc.gnu.org/ml/gcc-patches/2019-11/msg02235.html


Thanks!

> The current implementation (potentially with synchronous unmapping of

> a variable/memory block immediately after an asynchronous copyout which

> might not complete beforehand) is "obviously incorrect" by observation


ACK.

> but does not appear to cause any issues with the current testsuite

> (i.e., there is no test improvement with this patch).


Ah, too bad.  ;-|

> Tested with offloading to AMD GCN. OK for trunk?


Yes, with a tiny change as noted below:

> --- a/libgomp/oacc-mem.c

> +++ b/libgomp/oacc-mem.c

> @@ -660,7 +660,6 @@ static void

>  delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)

>  {

>    splay_tree_key n;

> -  void *d;

>    struct goacc_thread *thr = goacc_thread ();

>    struct gomp_device_descr *acc_dev = thr->dev;

>  

> @@ -689,9 +688,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)

>        gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);

>      }

>  

> -  d = (void *) (n->tgt->tgt_start + n->tgt_offset

> -		+ (uintptr_t) h - n->host_start);

> -

>    if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)

>      {

>        size_t host_size = n->host_end - n->host_start;

> @@ -723,12 +719,15 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)

>        if (f & FLAG_COPYOUT)

>  	{

> -	  [...]

> +	  void *d = (void *) (n->tgt->tgt_start + n->tgt_offset

> +			      + (uintptr_t) h - n->host_start);


Not sure why you're moving 'd' as part of this patch, but OK if you'd
like.  ;-)

> --- a/libgomp/target.c

> +++ b/libgomp/target.c


(For anyone else reading, the diff displays a bit weird; what's really
going on is that 'gomp_remove_var' got moved further down, as is nicely
shown by 'git diff --patience'.)

> +static bool

> +gomp_unref_tgt (void *ptr)

>  {

>    bool is_tgt_unmapped = false;

> -  [...]

> +

> +  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;

> +

> +  assert (tgt->refcount != REFCOUNT_INFINITY);


Please leave out this 'assert': we don't have any of these yet for
'tgt->refcount' (as far as I remember), and fixing that is a separate
change, to generally deal that issue;
<http://mid.mail-archive.com/87r22an29u.fsf@euler.schwinge.homeip.net>.


Grüße
 Thomas

Patch

commit 7a5134adb474c65269bb16b1e31bf92bb9d6a06a
Author: Julian Brown <julian@codesourcery.com>
Date:   Thu Dec 12 10:39:23 2019 -0800

    Fix potential race condition in OpenACC "exit data" operations
    
            PR libgomp/92881
    
            libgomp/
            * libgomp.h (gomp_remove_var_async): Add prototype.
            * oacc-mem.c (delete_copyout): Call gomp_remove_var_async instead of
            gomp_remove_var.
            * target.c (gomp_unref_tgt): Change return type to bool, indicating
            whether target_mem_desc was unmapped.
            (gomp_unref_tgt_void): New.
            (gomp_remove_var): Reimplement in terms of...
            (gomp_remove_var_internal): ...this new helper function.
            (gomp_remove_var_async): New, implemented using above helper function.
            (gomp_unmap_vars_internal): Use gomp_unref_tgt_void instead of
            gomp_unref_tgt.

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 8cbfe0b1f30..6390187dff5 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1166,6 +1166,8 @@  extern bool gomp_fini_device (struct gomp_device_descr *);
 extern void gomp_free_memmap (struct splay_tree_s *);
 extern void gomp_unload_device (struct gomp_device_descr *);
 extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);
+extern void gomp_remove_var_async (struct gomp_device_descr *, splay_tree_key,
+				   struct goacc_asyncqueue *);
 extern bool gomp_offload_target_available_p (int);
 
 /* work.c */
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index a809d0495a6..196b7e2a520 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -660,7 +660,6 @@  static void
 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
 {
   splay_tree_key n;
-  void *d;
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
@@ -689,9 +688,6 @@  delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
       gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
     }
 
-  d = (void *) (n->tgt->tgt_start + n->tgt_offset
-		+ (uintptr_t) h - n->host_start);
-
   if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
     {
       size_t host_size = n->host_end - n->host_start;
@@ -723,12 +719,15 @@  delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
 
   if (n->refcount == 0)
     {
+      goacc_aq aq = get_goacc_asyncqueue (async);
+
       if (f & FLAG_COPYOUT)
 	{
-	  goacc_aq aq = get_goacc_asyncqueue (async);
+	  void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
+			      + (uintptr_t) h - n->host_start);
 	  gomp_copy_dev2host (acc_dev, aq, h, d, s);
 	}
-      gomp_remove_var (acc_dev, n);
+      gomp_remove_var_async (acc_dev, n, aq);
     }
 
   gomp_mutex_unlock (&acc_dev->lock);
diff --git a/libgomp/target.c b/libgomp/target.c
index d2fd90be539..d2990e06348 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1128,32 +1128,65 @@  gomp_unmap_tgt (struct target_mem_desc *tgt)
   free (tgt);
 }
 
-attribute_hidden bool
-gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
+static bool
+gomp_unref_tgt (void *ptr)
 {
   bool is_tgt_unmapped = false;
-  splay_tree_remove (&devicep->mem_map, k);
-  if (k->link_key)
-    splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
-  if (k->tgt->refcount > 1)
-    k->tgt->refcount--;
+
+  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
+
+  assert (tgt->refcount != REFCOUNT_INFINITY);
+
+  if (tgt->refcount > 1)
+    tgt->refcount--;
   else
     {
+      gomp_unmap_tgt (tgt);
       is_tgt_unmapped = true;
-      gomp_unmap_tgt (k->tgt);
     }
+
   return is_tgt_unmapped;
 }
 
 static void
-gomp_unref_tgt (void *ptr)
+gomp_unref_tgt_void (void *ptr)
 {
-  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
+  (void) gomp_unref_tgt (ptr);
+}
 
-  if (tgt->refcount > 1)
-    tgt->refcount--;
+static inline __attribute__((always_inline)) bool
+gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
+			  struct goacc_asyncqueue *aq)
+{
+  bool is_tgt_unmapped = false;
+  splay_tree_remove (&devicep->mem_map, k);
+  if (k->link_key)
+    splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
+  if (aq)
+    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
+						(void *) k->tgt);
   else
-    gomp_unmap_tgt (tgt);
+    is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
+  return is_tgt_unmapped;
+}
+
+attribute_hidden bool
+gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
+{
+  return gomp_remove_var_internal (devicep, k, NULL);
+}
+
+/* Remove a variable asynchronously.  This actually removes the variable
+   mapping immediately, but retains the linked target_mem_desc until the
+   asynchronous operation has completed (as it may still refer to target
+   memory).  The device lock must be held before entry, and remains locked on
+   exit.  */
+
+attribute_hidden void
+gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
+		       struct goacc_asyncqueue *aq)
+{
+  (void) gomp_remove_var_internal (devicep, k, aq);
 }
 
 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
@@ -1209,7 +1242,7 @@  gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
     }
 
   if (aq)
-    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
+    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
 						(void *) tgt);
   else
     gomp_unref_tgt ((void *) tgt);