[1/7,OpenACC,libgomp,v5,stage1] Async re-work, interfaces

Message ID 2d574c4f-444e-6aad-121b-a82b075f6cca@mentor.com
State New
Headers show
Series
  • Async re-work
Related show

Commit Message

Chung-Lin Tang Jan. 22, 2019, 2:52 p.m.
This patch separates out the header interface changes.

A new GOMP_PLUGIN_IF_VERSION symbol has been defined in include/gomp-constants.h,
specifically for versioning the plugin-interface. Corresponding places in libgomp
has been updated, leaving GOMP_VERSION* only for the libgomp proper/offload images
versioning.

     include/
     * gomp-constants.h (GOMP_ASYNC_DEFAULT): Define.
     (GOMP_PLUGIN_IF_VERSION): New version symbol for plugin interface.
     Update surrounding comments.

     libgomp/
     * libgomp-plugin.h (struct goacc_asyncqueue): Declare.
     (struct goacc_asyncqueue_list): Likewise.
     (goacc_aq): Likewise.
     (goacc_aq_list): Likewise.
     (GOMP_OFFLOAD_openacc_register_async_cleanup): Remove.
     (GOMP_OFFLOAD_openacc_async_test): Remove.
     (GOMP_OFFLOAD_openacc_async_test_all): Remove.
     (GOMP_OFFLOAD_openacc_async_wait): Remove.
     (GOMP_OFFLOAD_openacc_async_wait_async): Remove.
     (GOMP_OFFLOAD_openacc_async_wait_all): Remove.
     (GOMP_OFFLOAD_openacc_async_wait_all_async): Remove.
     (GOMP_OFFLOAD_openacc_async_set_async): Remove.
     (GOMP_OFFLOAD_openacc_exec): Adjust declaration.
     (GOMP_OFFLOAD_openacc_cuda_get_stream): Likewise.
     (GOMP_OFFLOAD_openacc_cuda_set_stream): Likewise.
     (GOMP_OFFLOAD_openacc_async_exec): Declare.
     (GOMP_OFFLOAD_openacc_async_construct): Declare.
     (GOMP_OFFLOAD_openacc_async_destruct): Declare.
     (GOMP_OFFLOAD_openacc_async_test): Declare.
     (GOMP_OFFLOAD_openacc_async_synchronize): Declare.
     (GOMP_OFFLOAD_openacc_async_serialize): Declare.
     (GOMP_OFFLOAD_openacc_async_queue_callback): Declare.
     (GOMP_OFFLOAD_openacc_async_host2dev): Declare.
     (GOMP_OFFLOAD_openacc_async_dev2host): Declare.

     * libgomp.h (struct acc_dispatch_t): Define 'async' sub-struct.
     Delete register_async_cleanup_func, async_test_func, async_test_all_func,
     async_wait_func, async_wait_async_func, async_wait_all_func,
     async_wait_all_async_func, async_set_async_func hook fields.
     (gomp_acc_insert_pointer): Adjust declaration.
     (gomp_copy_host2dev): New declaration.
     (gomp_copy_dev2host): Likewise.
     (gomp_map_vars_async): Likewise.
     (gomp_unmap_tgt): Likewise.
     (gomp_unmap_vars_async): Likewise.
     (gomp_fini_device): Likewise.

Patch

Index: include/gomp-constants.h
===================================================================
--- include/gomp-constants.h	(revision 268142)
+++ include/gomp-constants.h	(working copy)
@@ -160,6 +160,7 @@  enum gomp_map_kind
 /* Asynchronous behavior.  Keep in sync with
    libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t.  */
 
+#define GOMP_ASYNC_DEFAULT		0
 #define GOMP_ASYNC_NOVAL		-1
 #define GOMP_ASYNC_SYNC			-2
 
@@ -208,10 +209,13 @@  enum gomp_map_kind
 #define GOACC_FLAGS_MARSHAL_OP		BIT_NOT_EXPR
 #define GOACC_FLAGS_UNMARSHAL(X)	(~(X))
 
+/* Version number of plugin interface between libgomp and device-specific
+   plugins. GOMP_PLUGIN_IF_VERSION should be incremented whenever a change in
+   the plugin hook interface defined in libgomp/libgomp.h is introduced.
+   There is no compatibility between mismatched libgomp-proper/plugins.  */
+#define GOMP_PLUGIN_IF_VERSION 2
 
-/* Versions of libgomp and device-specific plugins.  GOMP_VERSION
-   should be incremented whenever an ABI-incompatible change is introduced
-   to the plugin interface defined in libgomp/libgomp.h.  */
+/* Version numbers of libgomp and device-specific offload images.  */
 #define GOMP_VERSION	1
 #define GOMP_VERSION_NVIDIA_PTX 1
 #define GOMP_VERSION_INTEL_MIC 0
Index: libgomp/libgomp-plugin.h
===================================================================
--- libgomp/libgomp-plugin.h	(revision 268142)
+++ libgomp/libgomp-plugin.h	(working copy)
@@ -53,6 +53,20 @@  enum offload_target_type
   OFFLOAD_TARGET_TYPE_HSA = 7
 };
 
+/* Opaque type to represent plugin-dependent implementation of an
+   OpenACC asynchronous queue.  */
+struct goacc_asyncqueue;
+
+/* Used to keep a list of active asynchronous queues.  */
+struct goacc_asyncqueue_list
+{
+  struct goacc_asyncqueue *aq;
+  struct goacc_asyncqueue_list *next;
+};
+
+typedef struct goacc_asyncqueue *goacc_aq;
+typedef struct goacc_asyncqueue_list *goacc_aq_list;
+
 /* Auxiliary struct, used for transferring pairs of addresses from plugin
    to libgomp.  */
 struct addr_pair
@@ -93,22 +107,31 @@  extern bool GOMP_OFFLOAD_dev2dev (int, void *, con
 extern bool GOMP_OFFLOAD_can_run (void *);
 extern void GOMP_OFFLOAD_run (int, void *, void *, void **);
 extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *);
+
 extern void GOMP_OFFLOAD_openacc_exec (void (*) (void *), size_t, void **,
-				       void **, int, unsigned *, void *);
-extern void GOMP_OFFLOAD_openacc_register_async_cleanup (void *, int);
-extern int GOMP_OFFLOAD_openacc_async_test (int);
-extern int GOMP_OFFLOAD_openacc_async_test_all (void);
-extern void GOMP_OFFLOAD_openacc_async_wait (int);
-extern void GOMP_OFFLOAD_openacc_async_wait_async (int, int);
-extern void GOMP_OFFLOAD_openacc_async_wait_all (void);
-extern void GOMP_OFFLOAD_openacc_async_wait_all_async (int);
-extern void GOMP_OFFLOAD_openacc_async_set_async (int);
+				       void **, unsigned *, void *);
 extern void *GOMP_OFFLOAD_openacc_create_thread_data (int);
 extern void GOMP_OFFLOAD_openacc_destroy_thread_data (void *);
+extern struct goacc_asyncqueue *GOMP_OFFLOAD_openacc_async_construct (void);
+extern bool GOMP_OFFLOAD_openacc_async_destruct (struct goacc_asyncqueue *);
+extern int GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *);
+extern bool GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *);
+extern bool GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *,
+						  struct goacc_asyncqueue *);
+extern void GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *,
+						       void (*)(void *), void *);
+extern void GOMP_OFFLOAD_openacc_async_exec (void (*) (void *), size_t, void **,
+					     void **, unsigned *, void *,
+					     struct goacc_asyncqueue *);
+extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size_t,
+						 struct goacc_asyncqueue *);
+extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t,
+						 struct goacc_asyncqueue *);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
-extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (int);
-extern int GOMP_OFFLOAD_openacc_cuda_set_stream (int, void *);
+extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *);
+extern int GOMP_OFFLOAD_openacc_cuda_set_stream (struct goacc_asyncqueue *,
+						 void *);
 
 #ifdef __cplusplus
 }
Index: libgomp/libgomp.h
===================================================================
--- libgomp/libgomp.h	(revision 268142)
+++ libgomp/libgomp.h	(working copy)
@@ -949,25 +949,29 @@  typedef struct acc_dispatch_t
   /* Execute.  */
   __typeof (GOMP_OFFLOAD_openacc_exec) *exec_func;
 
-  /* Async cleanup callback registration.  */
-  __typeof (GOMP_OFFLOAD_openacc_register_async_cleanup)
-    *register_async_cleanup_func;
-
-  /* Asynchronous routines.  */
-  __typeof (GOMP_OFFLOAD_openacc_async_test) *async_test_func;
-  __typeof (GOMP_OFFLOAD_openacc_async_test_all) *async_test_all_func;
-  __typeof (GOMP_OFFLOAD_openacc_async_wait) *async_wait_func;
-  __typeof (GOMP_OFFLOAD_openacc_async_wait_async) *async_wait_async_func;
-  __typeof (GOMP_OFFLOAD_openacc_async_wait_all) *async_wait_all_func;
-  __typeof (GOMP_OFFLOAD_openacc_async_wait_all_async)
-    *async_wait_all_async_func;
-  __typeof (GOMP_OFFLOAD_openacc_async_set_async) *async_set_async_func;
-
   /* Create/destroy TLS data.  */
   __typeof (GOMP_OFFLOAD_openacc_create_thread_data) *create_thread_data_func;
   __typeof (GOMP_OFFLOAD_openacc_destroy_thread_data)
     *destroy_thread_data_func;
+  
+  struct {
+    gomp_mutex_t lock;
+    int nasyncqueue;
+    struct goacc_asyncqueue **asyncqueue;
+    struct goacc_asyncqueue_list *active;
 
+    __typeof (GOMP_OFFLOAD_openacc_async_construct) *construct_func;
+    __typeof (GOMP_OFFLOAD_openacc_async_destruct) *destruct_func;
+    __typeof (GOMP_OFFLOAD_openacc_async_test) *test_func;
+    __typeof (GOMP_OFFLOAD_openacc_async_synchronize) *synchronize_func;
+    __typeof (GOMP_OFFLOAD_openacc_async_serialize) *serialize_func;
+    __typeof (GOMP_OFFLOAD_openacc_async_queue_callback) *queue_callback_func;
+
+    __typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func;
+    __typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func;
+    __typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func;
+  } async;
+
   /* NVIDIA target specific routines.  */
   struct {
     __typeof (GOMP_OFFLOAD_openacc_cuda_get_current_device)
@@ -1053,17 +1057,33 @@  enum gomp_map_vars_kind
   GOMP_MAP_VARS_ENTER_DATA
 };
 
-extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
+extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int);
 extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
 extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
 				       unsigned short *);
+struct gomp_coalesce_buf;
+extern void gomp_copy_host2dev (struct gomp_device_descr *,
+				struct goacc_asyncqueue *, void *, const void *,
+				size_t, struct gomp_coalesce_buf *);
+extern void gomp_copy_dev2host (struct gomp_device_descr *,
+				struct goacc_asyncqueue *, void *, const void *,
+				size_t);
 
 extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
 					      size_t, void **, void **,
 					      size_t *, void *, bool,
 					      enum gomp_map_vars_kind);
+extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *,
+						    struct goacc_asyncqueue *,
+						    size_t, void **, void **,
+						    size_t *, void *, bool,
+						    enum gomp_map_vars_kind);
+extern void gomp_unmap_tgt (struct target_mem_desc *);
 extern void gomp_unmap_vars (struct target_mem_desc *, bool);
+extern void gomp_unmap_vars_async (struct target_mem_desc *, bool,
+				   struct goacc_asyncqueue *);
 extern void gomp_init_device (struct gomp_device_descr *);
+extern bool gomp_fini_device (struct gomp_device_descr *);
 extern void gomp_free_memmap (struct splay_tree_s *);
 extern void gomp_unload_device (struct gomp_device_descr *);
 extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);