[06/13] OpenACC 2.6 deep copy: attach/detach API routines

Message ID d803a16d815d3a71c4a447ac52f45b731abb19cf.1576648001.git.julian@codesourcery.com
State New
Headers show
Series
  • OpenACC 2.6 manual deep copy support
Related show

Commit Message

Julian Brown Dec. 18, 2019, 6:03 a.m.
This patch has been broken out of the "OpenACC 2.6 manual deep copy
support" patch, last posted here:

  https://gcc.gnu.org/ml/gcc-patches/2019-11/msg02376.html

It contains just the minimal libgomp bits necessary to support the OpenACC
runtime API routines (acc_attach, acc_detach and async/finalize versions
of same). This is essentially the same as the version posted by Thomas
here, modulo rebasing:

  https://gcc.gnu.org/ml/gcc-patches/2019-12/msg01212.html

Tested alongside other patches in this series with offloading to
NVPTX. OK?

Thanks,

Julian

ChangeLog

	libgomp/
	* libgomp.h (struct splay_tree_aux): Add attach_count field.
	(gomp_attach_pointer, gomp_detach_pointer): Add prototypes.
	* libgomp.map (OACC_2.6): New section. Add acc_attach,
	acc_attach_async, acc_detach, acc_detach_async, acc_detach_finalize,
	acc_detach_finalize_async.
	* oacc-mem.c (acc_attach_async, acc_attach, goacc_detach_internal,
	acc_detach, acc_detach_async, acc_detach_finalize,
	acc_detach_finalize_async): New functions.
	* openacc.h (acc_attach, acc_attach_async, acc_detach,
	(acc_detach_async, acc_detach_finalize, acc_detach_finalize_async): Add
	prototypes.
	* target.c (gomp_attach_pointer, gomp_detach_pointer): New functions.
	(gomp_remove_var_internal): Free attachment counts if present.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c: New test.
---
 libgomp/libgomp.h                             |  10 ++
 libgomp/libgomp.map                           |  10 ++
 libgomp/oacc-mem.c                            |  84 +++++++++++
 libgomp/openacc.h                             |   6 +
 libgomp/target.c                              | 130 ++++++++++++++++++
 .../libgomp.oacc-c-c++-common/deep-copy-3.c   |  34 +++++
 .../libgomp.oacc-c-c++-common/deep-copy-5.c   |  81 +++++++++++
 7 files changed, 355 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c

-- 
2.23.0

Patch

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 248d8dc5d63..2017991b59c 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1002,6 +1002,9 @@  struct target_mem_desc {
 struct splay_tree_aux {
   /* Pointer to the original mapping of "omp declare target link" object.  */
   splay_tree_key link_key;
+  /* For a block with attached pointers, the attachment counters for each.
+     Only used for OpenACC.  */
+  uintptr_t *attach_count;
 };
 
 struct splay_tree_key_s {
@@ -1170,6 +1173,13 @@  extern void gomp_copy_dev2host (struct gomp_device_descr *,
 				struct goacc_asyncqueue *, void *, const void *,
 				size_t);
 extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t);
+extern void gomp_attach_pointer (struct gomp_device_descr *,
+				 struct goacc_asyncqueue *, splay_tree,
+				 splay_tree_key, uintptr_t, size_t,
+				 struct gomp_coalesce_buf *);
+extern void gomp_detach_pointer (struct gomp_device_descr *,
+				 struct goacc_asyncqueue *, splay_tree_key,
+				 uintptr_t, bool, struct gomp_coalesce_buf *);
 
 #ifdef RC_CHECKING
 extern void dump_tgt (const char *, struct target_mem_desc *);
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index c79430f8d8d..63276f7d29b 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -484,6 +484,16 @@  OACC_2.5.1 {
 	acc_register_library;
 } OACC_2.5;
 
+OACC_2.6 {
+  global:
+	acc_attach;
+	acc_attach_async;
+	acc_detach;
+	acc_detach_async;
+	acc_detach_finalize;
+	acc_detach_finalize_async;
+} OACC_2.5.1;
+
 GOACC_2.0 {
   global:
 	GOACC_data_end;
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 4a2185c58ad..08507791399 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -866,6 +866,90 @@  acc_update_self_async (void *h, size_t s, int async)
   update_dev_host (0, h, s, async);
 }
 
+void
+acc_attach_async (void **hostaddr, int async)
+{
+  struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *acc_dev = thr->dev;
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
+  struct splay_tree_key_s cur_node;
+  splay_tree_key n;
+
+  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return;
+
+  gomp_mutex_lock (&acc_dev->lock);
+
+  cur_node.host_start = (uintptr_t) hostaddr;
+  cur_node.host_end = cur_node.host_start + sizeof (void *);
+  n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+  if (n == NULL)
+    gomp_fatal ("struct not mapped for acc_attach");
+
+  gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
+		       0, NULL);
+
+  gomp_mutex_unlock (&acc_dev->lock);
+}
+
+void
+acc_attach (void **hostaddr)
+{
+  acc_attach_async (hostaddr, acc_async_sync);
+}
+
+static void
+goacc_detach_internal (void **hostaddr, int async, bool finalize)
+{
+  struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *acc_dev = thr->dev;
+  struct splay_tree_key_s cur_node;
+  splay_tree_key n;
+  struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
+
+  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return;
+
+  gomp_mutex_lock (&acc_dev->lock);
+
+  cur_node.host_start = (uintptr_t) hostaddr;
+  cur_node.host_end = cur_node.host_start + sizeof (void *);
+  n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+  if (n == NULL)
+    gomp_fatal ("struct not mapped for acc_detach");
+
+  gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
+
+  gomp_mutex_unlock (&acc_dev->lock);
+}
+
+void
+acc_detach (void **hostaddr)
+{
+  goacc_detach_internal (hostaddr, acc_async_sync, false);
+}
+
+void
+acc_detach_async (void **hostaddr, int async)
+{
+  goacc_detach_internal (hostaddr, async, false);
+}
+
+void
+acc_detach_finalize (void **hostaddr)
+{
+  goacc_detach_internal (hostaddr, acc_async_sync, true);
+}
+
+void
+acc_detach_finalize_async (void **hostaddr, int async)
+{
+  goacc_detach_internal (hostaddr, async, true);
+}
+
 /* Some types of (pointer) variables use several consecutive mappings, which
    must be treated as a group for enter/exit data directives.  This function
    returns the last mapping in such a group (inclusive), or POS for singleton
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index 42c861caabf..d2e5c101f7f 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -109,12 +109,18 @@  void *acc_hostptr (void *) __GOACC_NOTHROW;
 int acc_is_present (void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
+void acc_attach (void **) __GOACC_NOTHROW;
+void acc_attach_async (void **, int) __GOACC_NOTHROW;
+void acc_detach (void **) __GOACC_NOTHROW;
+void acc_detach_async (void **, int) __GOACC_NOTHROW;
 
 /* Finalize versions of copyout/delete functions, specified in OpenACC 2.5.  */
 void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
 void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
 void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_detach_finalize (void **) __GOACC_NOTHROW;
+void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
 
 /* Async functions, specified in OpenACC 2.5.  */
 void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
diff --git a/libgomp/target.c b/libgomp/target.c
index 46b20c04865..1f429900113 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -673,6 +673,134 @@  gomp_map_fields_existing (struct target_mem_desc *tgt,
 	      (void *) cur_node.host_end);
 }
 
+attribute_hidden void
+gomp_attach_pointer (struct gomp_device_descr *devicep,
+		     struct goacc_asyncqueue *aq, splay_tree mem_map,
+		     splay_tree_key n, uintptr_t attach_to, size_t bias,
+		     struct gomp_coalesce_buf *cbufp)
+{
+  struct splay_tree_key_s s;
+  size_t size, idx;
+
+  if (n == NULL)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("enclosing struct not mapped for attach");
+    }
+
+  size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
+  /* We might have a pointer in a packed struct: however we cannot have more
+     than one such pointer in each pointer-sized portion of the struct, so
+     this is safe.  */
+  idx = (attach_to - n->host_start) / sizeof (void *);
+
+  if (!n->aux)
+    n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
+
+  if (!n->aux->attach_count)
+    n->aux->attach_count
+      = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
+
+  if (n->aux->attach_count[idx] < UINTPTR_MAX)
+    n->aux->attach_count[idx]++;
+  else
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("attach count overflow");
+    }
+
+  if (n->aux->attach_count[idx] == 1)
+    {
+      uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
+			 - n->host_start;
+      uintptr_t target = (uintptr_t) *(void **) attach_to;
+      splay_tree_key tn;
+      uintptr_t data;
+
+      if ((void *) target == NULL)
+	{
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("attempt to attach null pointer");
+	}
+
+      s.host_start = target + bias;
+      s.host_end = s.host_start + 1;
+      tn = splay_tree_lookup (mem_map, &s);
+
+      if (!tn)
+	{
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("pointer target not mapped for attach");
+	}
+
+      data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
+
+      gomp_debug (1,
+		  "%s: attaching host %p, target %p (struct base %p) to %p\n",
+		  __FUNCTION__, (void *) attach_to, (void *) devptr,
+		  (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
+
+      gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
+			  sizeof (void *), cbufp);
+    }
+  else
+    gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
+		(void *) attach_to, (int) n->aux->attach_count[idx]);
+}
+
+attribute_hidden void
+gomp_detach_pointer (struct gomp_device_descr *devicep,
+		     struct goacc_asyncqueue *aq, splay_tree_key n,
+		     uintptr_t detach_from, bool finalize,
+		     struct gomp_coalesce_buf *cbufp)
+{
+  size_t idx;
+
+  if (n == NULL)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("enclosing struct not mapped for detach");
+    }
+
+  idx = (detach_from - n->host_start) / sizeof (void *);
+
+  if (!n->aux || !n->aux->attach_count)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("no attachment counters for struct");
+    }
+
+  if (finalize)
+    n->aux->attach_count[idx] = 1;
+
+  if (n->aux->attach_count[idx] == 0)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("attach count underflow");
+    }
+  else
+    n->aux->attach_count[idx]--;
+
+  if (n->aux->attach_count[idx] == 0)
+    {
+      uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
+			 - n->host_start;
+      uintptr_t target = (uintptr_t) *(void **) detach_from;
+
+      gomp_debug (1,
+		  "%s: detaching host %p, target %p (struct base %p) to %p\n",
+		  __FUNCTION__, (void *) detach_from, (void *) devptr,
+		  (void *) (n->tgt->tgt_start + n->tgt_offset),
+		  (void *) target);
+
+      gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
+			  sizeof (void *), cbufp);
+    }
+  else
+    gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
+		(void *) detach_from, (int) n->aux->attach_count[idx]);
+}
+
 attribute_hidden uintptr_t
 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
 {
@@ -1348,6 +1476,8 @@  gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
       if (k->aux->link_key)
 	splay_tree_insert (&devicep->mem_map,
 			   (splay_tree_node) k->aux->link_key);
+      if (k->aux->attach_count)
+	free (k->aux->attach_count);
       free (k->aux);
       k->aux = NULL;
     }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
new file mode 100644
index 00000000000..cec764bd3e7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
@@ -0,0 +1,34 @@ 
+#include <assert.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main ()
+{
+  int n = 100, i;
+  int *a = (int *) malloc (sizeof (int) * n);
+  int *b;
+
+  for (i = 0; i < n; i++)
+    a[i] = i+1;
+
+#pragma acc enter data copyin(a[:n]) create(b)
+
+  b = a;
+  acc_attach ((void **)&b);
+
+#pragma acc parallel loop present (b[:n])
+  for (i = 0; i < n; i++)
+    b[i] = i+1;
+
+  acc_detach ((void **)&b);
+
+#pragma acc exit data copyout(a[:n], b)
+
+  for (i = 0; i < 10; i++)
+    assert (a[i] == b[i]);
+
+  free (a);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
new file mode 100644
index 00000000000..89cafbb62ab
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
@@ -0,0 +1,81 @@ 
+#include <assert.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+struct node
+{
+  struct node *next;
+  int val;
+};
+
+int
+sum_nodes (struct node *head)
+{
+  int i = 0, sum = 0;
+
+#pragma acc parallel reduction(+:sum) present(head[:1])
+  {
+    for (; head != NULL; head = head->next)
+      sum += head->val;
+  }
+
+  return sum;
+}
+
+void
+insert (struct node *head, int val)
+{
+  struct node *n = (struct node *) malloc (sizeof (struct node));
+
+  if (head->next)
+    acc_detach ((void **) &head->next);
+
+  n->val = val;
+  n->next = head->next;
+  head->next = n;
+
+  acc_copyin (n, sizeof (struct node));
+  acc_attach((void **) &head->next);
+
+  if (n->next)
+    acc_attach ((void **) &n->next);
+}
+
+void
+destroy (struct node *head)
+{
+  while (head->next != NULL)
+    {
+      acc_detach ((void **) &head->next);
+      struct node * n = head->next;
+      head->next = n->next;
+      if (n->next)
+	acc_detach ((void **) &n->next);
+
+      acc_delete (n, sizeof (struct node));
+      if (head->next)
+	acc_attach((void **) &head->next);
+
+      free (n);
+    }
+}
+
+int
+main ()
+{
+  struct node list = { .next = NULL, .val = 0 };
+  int i;
+
+  acc_copyin (&list, sizeof (struct node));
+
+  for (i = 0; i < 10; i++)
+    insert (&list, 2);
+
+  assert (sum_nodes (&list) == 10 * 2);
+
+  destroy (&list);
+
+  acc_delete (&list, sizeof (struct node));
+
+  return 0;
+}