[5/7,OpenACC,libgomp,v5,stage1] Async re-work, libgomp/target.c changes

Message ID 5d9cabcf-5b02-181c-0206-2fef7a35f343@mentor.com
State New
Headers show
Series
  • Async re-work
Related show

Commit Message

Chung-Lin Tang Jan. 22, 2019, 2:53 p.m.
Hi Jakub,
Since the last revision, the main change is that I've added at your request,
__builtin_expect (aq != NULL, 0) as the if-condition in gomp_copy_host2dev/dev2host.

Stashing the aq pointer into the coalesce-buf didn't match very well, as
the cbuf argument was sometimes null and didn't always exist.

If you are still not satisfied, then I guess we can still further optimize this code
by moving more stuff into headers as static inline.

Thanks,
Chung-Lin

     * target.c (goacc_device_copy_async): New function.
     (gomp_copy_host2dev): Remove 'static', add goacc_asyncqueue parameter,
     add goacc_device_copy_async case.
     (gomp_copy_dev2host): Likewise.
     (gomp_map_vars_existing): Add goacc_asyncqueue parameter, adjust code.
     (gomp_map_pointer): Likewise.
     (gomp_map_fields_existing): Likewise.
     (gomp_map_vars_internal): New always_inline function, renamed from gomp_map_vars.
     (gomp_map_vars): Implement by calling gomp_map_vars_internal.
     (gomp_map_vars_async): Implement by calling gomp_map_vars_internal,
     passing goacc_asyncqueue argument.
     (gomp_unmap_tgt): Remove statis, add attribute_hidden.
     (gomp_unmap_vars_internal): New always_inline function, renamed from gomp_unmap_vars.
     (gomp_unmap_vars): Implement by calling gomp_unmap_vars_internal.
     (gomp_unmap_vars_async): Implement by calling gomp_unmap_vars_internal,
     passing goacc_asyncqueue argument.
     (gomp_fini_device): New function.
     (gomp_exit_data): Adjust gomp_copy_dev2host call.
     (gomp_load_plugin_for_device): Remove old interface, adjust to load
     new async interface.
     (gomp_target_fini): Adjust code to call gomp_fini_device.

Patch

Index: libgomp/target.c
===================================================================
--- libgomp/target.c	(revision 268142)
+++ libgomp/target.c	(working copy)
@@ -177,6 +177,22 @@  gomp_device_copy (struct gomp_device_descr *device
     }
 }
 
+static inline void
+goacc_device_copy_async (struct gomp_device_descr *devicep,
+			 bool (*copy_func) (int, void *, const void *, size_t,
+					    struct goacc_asyncqueue *),
+			 const char *dst, void *dstaddr,
+			 const char *src, const void *srcaddr,
+			 size_t size, struct goacc_asyncqueue *aq)
+{
+  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
+		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
+    }
+}
+
 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
    host to device memory transfers.  */
 
@@ -269,8 +285,9 @@  gomp_to_device_kind_p (int kind)
     }
 }
 
-static void
+attribute_hidden void
 gomp_copy_host2dev (struct gomp_device_descr *devicep,
+		    struct goacc_asyncqueue *aq,
 		    void *d, const void *h, size_t sz,
 		    struct gomp_coalesce_buf *cbuf)
 {
@@ -299,14 +316,23 @@  gomp_copy_host2dev (struct gomp_device_descr *devi
 	    }
 	}
     }
-  gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
+  if (__builtin_expect (aq != NULL, 0))
+    goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
+			     "dev", d, "host", h, sz, aq);
+  else
+    gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
 }
 
-static void
+attribute_hidden void
 gomp_copy_dev2host (struct gomp_device_descr *devicep,
+		    struct goacc_asyncqueue *aq,
 		    void *h, const void *d, size_t sz)
 {
-  gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
+  if (__builtin_expect (aq != NULL, 0))
+    goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
+			     "host", h, "dev", d, sz, aq);
+  else
+    gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
 }
 
 static void
@@ -324,7 +350,8 @@  gomp_free_device_memory (struct gomp_device_descr
    Helper function of gomp_map_vars.  */
 
 static inline void
-gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
+gomp_map_vars_existing (struct gomp_device_descr *devicep,
+			struct goacc_asyncqueue *aq, splay_tree_key oldn,
 			splay_tree_key newn, struct target_var_desc *tgt_var,
 			unsigned char kind, struct gomp_coalesce_buf *cbuf)
 {
@@ -346,7 +373,7 @@  static inline void
     }
 
   if (GOMP_MAP_ALWAYS_TO_P (kind))
-    gomp_copy_host2dev (devicep,
+    gomp_copy_host2dev (devicep, aq,
 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
 				  + newn->host_start - oldn->host_start),
 			(void *) newn->host_start,
@@ -364,8 +391,8 @@  get_kind (bool short_mapkind, void *kinds, int idx
 }
 
 static void
-gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
-		  uintptr_t target_offset, uintptr_t bias,
+gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
+		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
 		  struct gomp_coalesce_buf *cbuf)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
@@ -376,7 +403,7 @@  static void
   if (cur_node.host_start == (uintptr_t) NULL)
     {
       cur_node.tgt_offset = (uintptr_t) NULL;
-      gomp_copy_host2dev (devicep,
+      gomp_copy_host2dev (devicep, aq,
 			  (void *) (tgt->tgt_start + target_offset),
 			  (void *) &cur_node.tgt_offset,
 			  sizeof (void *), cbuf);
@@ -398,12 +425,13 @@  static void
      array section.  Now subtract bias to get what we want
      to initialize the pointer with.  */
   cur_node.tgt_offset -= bias;
-  gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
+  gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
 		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
 }
 
 static void
-gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
+gomp_map_fields_existing (struct target_mem_desc *tgt,
+			  struct goacc_asyncqueue *aq, splay_tree_key n,
 			  size_t first, size_t i, void **hostaddrs,
 			  size_t *sizes, void *kinds,
 			  struct gomp_coalesce_buf *cbuf)
@@ -423,7 +451,7 @@  static void
       && n2->tgt == n->tgt
       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
     {
-      gomp_map_vars_existing (devicep, n2, &cur_node,
+      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
 			      &tgt->list[i], kind & typemask, cbuf);
       return;
     }
@@ -439,8 +467,8 @@  static void
 	      && n2->host_start - n->host_start
 		 == n2->tgt_offset - n->tgt_offset)
 	    {
-	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
-				      kind & typemask, cbuf);
+	      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
+				      &tgt->list[i], kind & typemask, cbuf);
 	      return;
 	    }
 	}
@@ -451,7 +479,7 @@  static void
 	  && n2->tgt == n->tgt
 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
 	{
-	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
 				  kind & typemask, cbuf);
 	  return;
 	}
@@ -483,10 +511,12 @@  gomp_map_val (struct target_mem_desc *tgt, void **
   return tgt->tgt_start + tgt->list[i].offset;
 }
 
-attribute_hidden struct target_mem_desc *
-gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
-	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
-	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
+static inline __attribute__((always_inline)) struct target_mem_desc *
+gomp_map_vars_internal (struct gomp_device_descr *devicep,
+			struct goacc_asyncqueue *aq, size_t mapnum,
+			void **hostaddrs, void **devaddrs, size_t *sizes,
+			void *kinds, bool short_mapkind,
+			enum gomp_map_vars_kind pragma_kind)
 {
   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
   bool has_firstprivate = false;
@@ -600,7 +630,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
 	      continue;
 	    }
 	  for (i = first; i <= last; i++)
-	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
 				      sizes, kinds, NULL);
 	  i--;
 	  continue;
@@ -645,7 +675,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
       else
 	n = splay_tree_lookup (mem_map, &cur_node);
       if (n && n->refcount != REFCOUNT_LINK)
-	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
+	gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
 				kind & typemask, NULL);
       else
 	{
@@ -756,7 +786,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
 		tgt->list[i].offset = tgt_size;
 		len = sizes[i];
-		gomp_copy_host2dev (devicep,
+		gomp_copy_host2dev (devicep, aq,
 				    (void *) (tgt->tgt_start + tgt_size),
 				    (void *) hostaddrs[i], len, cbufp);
 		tgt_size += len;
@@ -790,7 +820,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
 		    continue;
 		  }
 		for (i = first; i <= last; i++)
-		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
 					    sizes, kinds, cbufp);
 		i--;
 		continue;
@@ -810,7 +840,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
 		if (cur_node.tgt_offset)
 		  cur_node.tgt_offset -= sizes[i];
-		gomp_copy_host2dev (devicep,
+		gomp_copy_host2dev (devicep, aq,
 				    (void *) (n->tgt->tgt_start
 					      + n->tgt_offset
 					      + cur_node.host_start
@@ -831,7 +861,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
 	      k->host_end = k->host_start + sizeof (void *);
 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
 	    if (n && n->refcount != REFCOUNT_LINK)
-	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
+	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
 				      kind & typemask, cbufp);
 	    else
 	      {
@@ -884,7 +914,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
 		  case GOMP_MAP_FORCE_TOFROM:
 		  case GOMP_MAP_ALWAYS_TO:
 		  case GOMP_MAP_ALWAYS_TOFROM:
-		    gomp_copy_host2dev (devicep,
+		    gomp_copy_host2dev (devicep, aq,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
@@ -891,11 +921,12 @@  gomp_map_val (struct target_mem_desc *tgt, void **
 					k->host_end - k->host_start, cbufp);
 		    break;
 		  case GOMP_MAP_POINTER:
-		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
+		    gomp_map_pointer (tgt, aq,
+				      (uintptr_t) *(void **) k->host_start,
 				      k->tgt_offset, sizes[i], cbufp);
 		    break;
 		  case GOMP_MAP_TO_PSET:
-		    gomp_copy_host2dev (devicep,
+		    gomp_copy_host2dev (devicep, aq,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
@@ -917,7 +948,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
 			  tgt->list[j].always_copy_from = false;
 			  if (k->refcount != REFCOUNT_INFINITY)
 			    k->refcount++;
-			  gomp_map_pointer (tgt,
+			  gomp_map_pointer (tgt, aq,
 					    (uintptr_t) *(void **) hostaddrs[j],
 					    k->tgt_offset
 					    + ((uintptr_t) hostaddrs[j]
@@ -946,7 +977,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
 		    break;
 		  case GOMP_MAP_FORCE_DEVICEPTR:
 		    assert (k->host_end - k->host_start == sizeof (void *));
-		    gomp_copy_host2dev (devicep,
+		    gomp_copy_host2dev (devicep, aq,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
@@ -965,7 +996,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
 		    /* We intentionally do not use coalescing here, as it's not
 		       data allocated by the current call to this function.  */
-		    gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
+		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
 					&tgt_addr, sizeof (void *), NULL);
 		  }
 		array++;
@@ -978,7 +1009,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
       for (i = 0; i < mapnum; i++)
 	{
 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
-	  gomp_copy_host2dev (devicep,
+	  gomp_copy_host2dev (devicep, aq,
 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
 			      (void *) &cur_node.tgt_offset, sizeof (void *),
 			      cbufp);
@@ -989,7 +1020,7 @@  gomp_map_val (struct target_mem_desc *tgt, void **
     {
       long c = 0;
       for (c = 0; c < cbuf.chunk_cnt; ++c)
-	gomp_copy_host2dev (devicep,
+	gomp_copy_host2dev (devicep, aq,
 			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
 			    (char *) cbuf.buf + (cbuf.chunks[c].start
 						 - cbuf.chunks[0].start),
@@ -1012,7 +1043,27 @@  gomp_map_val (struct target_mem_desc *tgt, void **
   return tgt;
 }
 
-static void
+attribute_hidden struct target_mem_desc *
+gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
+	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
+	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
+{
+  return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
+				 sizes, kinds, short_mapkind, pragma_kind);
+}
+
+attribute_hidden struct target_mem_desc *
+gomp_map_vars_async (struct gomp_device_descr *devicep,
+		     struct goacc_asyncqueue *aq, size_t mapnum,
+		     void **hostaddrs, void **devaddrs, size_t *sizes,
+		     void *kinds, bool short_mapkind,
+		     enum gomp_map_vars_kind pragma_kind)
+{
+  return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
+				 sizes, kinds, short_mapkind, pragma_kind);
+}
+
+attribute_hidden void
 gomp_unmap_tgt (struct target_mem_desc *tgt)
 {
   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
@@ -1040,12 +1091,24 @@  gomp_remove_var (struct gomp_device_descr *devicep
   return is_tgt_unmapped;
 }
 
+static void
+gomp_unref_tgt (void *ptr)
+{
+  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
+
+  if (tgt->refcount > 1)
+    tgt->refcount--;
+  else
+    gomp_unmap_tgt (tgt);
+}
+
 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
    variables back from device to host: if it is false, it is assumed that this
    has been done already.  */
 
-attribute_hidden void
-gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
+static inline __attribute__((always_inline)) void
+gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
+			  struct goacc_asyncqueue *aq)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
 
@@ -1082,7 +1145,7 @@  gomp_remove_var (struct gomp_device_descr *devicep
 
       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
 	  || tgt->list[i].always_copy_from)
-	gomp_copy_dev2host (devicep,
+	gomp_copy_dev2host (devicep, aq,
 			    (void *) (k->host_start + tgt->list[i].offset),
 			    (void *) (k->tgt->tgt_start + k->tgt_offset
 				      + tgt->list[i].offset),
@@ -1091,14 +1154,28 @@  gomp_remove_var (struct gomp_device_descr *devicep
 	gomp_remove_var (devicep, k);
     }
 
-  if (tgt->refcount > 1)
-    tgt->refcount--;
+  if (aq)
+    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
+						(void *) tgt);
   else
-    gomp_unmap_tgt (tgt);
+    gomp_unref_tgt ((void *) tgt);
 
   gomp_mutex_unlock (&devicep->lock);
 }
 
+attribute_hidden void
+gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
+{
+  gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
+}
+
+attribute_hidden void
+gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
+		       struct goacc_asyncqueue *aq)
+{
+  gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
+}
+
 static void
 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
 	     size_t *sizes, void *kinds, bool short_mapkind)
@@ -1148,9 +1225,10 @@  gomp_update (struct gomp_device_descr *devicep, si
 	    size_t size = cur_node.host_end - cur_node.host_start;
 
 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
-	      gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
+	      gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
+				  NULL);
 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
-	      gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
+	      gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
 	  }
       }
   gomp_mutex_unlock (&devicep->lock);
@@ -1443,9 +1521,21 @@  gomp_init_device (struct gomp_device_descr *device
 				   false);
     }
 
+  /* Initialize OpenACC asynchronous queues.  */
+  goacc_init_asyncqueues (devicep);
+
   devicep->state = GOMP_DEVICE_INITIALIZED;
 }
 
+attribute_hidden bool
+gomp_fini_device (struct gomp_device_descr *devicep)
+{
+  bool ret = goacc_fini_asyncqueues (devicep);
+  ret &= devicep->fini_device_func (devicep->target_id);
+  devicep->state = GOMP_DEVICE_FINALIZED;
+  return ret;
+}
+
 attribute_hidden void
 gomp_unload_device (struct gomp_device_descr *devicep)
 {
@@ -1954,7 +2044,7 @@  gomp_exit_data (struct gomp_device_descr *devicep,
 
 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
 	      || kind == GOMP_MAP_ALWAYS_FROM)
-	    gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
+	    gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
 				(void *) (k->tgt->tgt_start + k->tgt_offset
 					  + cur_node.host_start
 					  - k->host_start),
@@ -2607,7 +2697,7 @@  gomp_load_plugin_for_device (struct gomp_device_de
    || (last_missing = #n, 0))
 
   DLSYM (version);
-  if (device->version_func () != GOMP_VERSION)
+  if (device->version_func () != GOMP_PLUGIN_IF_VERSION)
     {
       err = "plugin version mismatch";
       goto fail;
@@ -2636,20 +2726,20 @@  gomp_load_plugin_for_device (struct gomp_device_de
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
     {
       if (!DLSYM_OPT (openacc.exec, openacc_exec)
-	  || !DLSYM_OPT (openacc.register_async_cleanup,
-			 openacc_register_async_cleanup)
-	  || !DLSYM_OPT (openacc.async_test, openacc_async_test)
-	  || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
-	  || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
-	  || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
-	  || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
-	  || !DLSYM_OPT (openacc.async_wait_all_async,
-			 openacc_async_wait_all_async)
-	  || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
 	  || !DLSYM_OPT (openacc.create_thread_data,
 			 openacc_create_thread_data)
 	  || !DLSYM_OPT (openacc.destroy_thread_data,
-			 openacc_destroy_thread_data))
+			 openacc_destroy_thread_data)
+	  || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
+	  || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
+	  || !DLSYM_OPT (openacc.async.test, openacc_async_test)
+	  || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
+	  || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
+	  || !DLSYM_OPT (openacc.async.queue_callback,
+			 openacc_async_queue_callback)
+	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
+	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
+	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
 	{
 	  /* Require all the OpenACC handlers if we have
 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
@@ -2700,10 +2790,7 @@  gomp_target_fini (void)
       struct gomp_device_descr *devicep = &devices[i];
       gomp_mutex_lock (&devicep->lock);
       if (devicep->state == GOMP_DEVICE_INITIALIZED)
-	{
-	  ret = devicep->fini_device_func (devicep->target_id);
-	  devicep->state = GOMP_DEVICE_FINALIZED;
-	}
+	ret = gomp_fini_device (devicep);
       gomp_mutex_unlock (&devicep->lock);
       if (!ret)
 	gomp_fatal ("device finalization failed");