[02/10,OpenACC] Add OpenACC target kinds for decomposed kernels regions

Message ID b305f639-9850-e46c-9f7b-302c1a444915@codesourcery.com
State New
Headers show
Series
  • Rework handling of OpenACC kernels regions
Related show

Commit Message

Kwok Cheung Yeung July 17, 2019, 9:04 p.m.
This patch is in preparation for changes that will cut up OpenACC kernels 
regions into individual parts. For the new sub-regions that will be generated, 
this adds the following new kinds of OpenACC regions for internal use:

- GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED for parts of kernels 
regions to be executed in gang-redundant mode
- GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE for parts of kernels 
regions to be executed in gang-single mode
- GF_OMP_TARGET_KIND_OACC_DATA_KERNELS for data regions generated around the 
body of a kernels region

2019-07-16  Thomas Schwinge  <thomas@codesourcery.com>

	gcc/
	* gimple.h (enum gf_mask): Add new target kinds
	GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED,
	GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE, and
	GF_OMP_TARGET_KIND_OACC_DATA_KERNELS.
	(is_gimple_omp_oacc): Handle new target kinds.
	(is_gimple_omp_offloaded): Likewise.
	* gimple-pretty-print.c (dump_gimple_omp_target): Likewise.
	* omp-expand.c (expand_omp_target): Likewise.
	(build_omp_regions_1): Likewise.
	(omp_make_gimple_edges): Likewise.
	* omp-low.c (is_oacc_parallel_or_serial): Likewise.
	(was_originally_oacc_kernels): New function.
	(scan_omp_for): Update check for illegal nesting.
	(check_omp_nesting_restrictions): Handle new target kinds.
	(lower_oacc_reductions): Likewise.
	(lower_omp_target): Likewise.
	* omp-offload.c (execute_oacc_device_lower): Likewise.
---
  gcc/gimple-pretty-print.c |  9 +++++++++
  gcc/gimple.h              | 14 +++++++++++++
  gcc/omp-expand.c          | 34 ++++++++++++++++++++++++++++----
  gcc/omp-low.c             | 50 ++++++++++++++++++++++++++++++++++++++++++-----
  gcc/omp-offload.c         | 20 +++++++++++++++++++
  5 files changed, 118 insertions(+), 9 deletions(-)

-- 
2.8.1

Comments

Jakub Jelinek July 18, 2019, 9:24 a.m. | #1
On Wed, Jul 17, 2019 at 10:04:10PM +0100, Kwok Cheung Yeung wrote:
> @@ -2319,7 +2339,8 @@ scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)

>      {

>        omp_context *tgt = enclosing_target_ctx (outer_ctx);

> 

> -      if (!tgt || is_oacc_parallel (tgt))

> +      if (!tgt || (is_oacc_parallel (tgt)

> +                    && !was_originally_oacc_kernels (tgt)))

>  	for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))

>  	  {

>  	    char const *check = NULL;


Please watch up formatting, the above doesn't use tabs where it should.
Have you run the series through contrib/check_GNU_style.sh ?

Otherwise, no concerns about this particular patch, assuming Thomas is ok
with it.

	Jakub
Kwok Cheung Yeung Aug. 5, 2019, 9:58 p.m. | #2
I have run the whole patch series through check_GNU_style.sh and fixed 
up the formatting where indicated. Do I need to post the reformatted 
patchset?

Thanks

Kwok

On 18/07/2019 10:24 am, Jakub Jelinek wrote:
> On Wed, Jul 17, 2019 at 10:04:10PM +0100, Kwok Cheung Yeung wrote:

>> @@ -2319,7 +2339,8 @@ scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)

>>       {

>>         omp_context *tgt = enclosing_target_ctx (outer_ctx);

>>

>> -      if (!tgt || is_oacc_parallel (tgt))

>> +      if (!tgt || (is_oacc_parallel (tgt)

>> +                    && !was_originally_oacc_kernels (tgt)))

>>   	for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))

>>   	  {

>>   	    char const *check = NULL;

> 

> Please watch up formatting, the above doesn't use tabs where it should.

> Have you run the series through contrib/check_GNU_style.sh ?

> 

> Otherwise, no concerns about this particular patch, assuming Thomas is ok

> with it.

> 

> 	Jakub

>

Patch

diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index ce339ee..cf4d0e0 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -1691,6 +1691,15 @@  dump_gimple_omp_target (pretty_printer *buffer, 
gomp_target *gs,
      case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
        kind = " oacc_host_data";
        break;
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+      kind = " oacc_parallel_kernels_parallelized";
+      break;
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+      kind = " oacc_parallel_kernels_gang_single";
+      break;
+    case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
+      kind = " oacc_data_kernels";
+      break;
      default:
        gcc_unreachable ();
      }
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 47070e7..d8423be 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -184,6 +184,15 @@  enum gf_mask {
      GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 9,
      GF_OMP_TARGET_KIND_OACC_DECLARE = 10,
      GF_OMP_TARGET_KIND_OACC_HOST_DATA = 11,
+    /* A GF_OMP_TARGET_KIND_OACC_PARALLEL that originates from a 'kernels'
+       construct, parallelized.  */
+    GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED = 12,
+    /* A GF_OMP_TARGET_KIND_OACC_PARALLEL that originates from a 'kernels'
+       construct, "gang-single".  */
+    GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE = 13,
+    /* A GF_OMP_TARGET_KIND_OACC_DATA that originates from a 'kernels'
+       construct.  */
+    GF_OMP_TARGET_KIND_OACC_DATA_KERNELS = 14,
      GF_OMP_TEAMS_GRID_PHONY	= 1 << 0,
      GF_OMP_TEAMS_HOST		= 1 << 1,

@@ -6479,6 +6488,9 @@  is_gimple_omp_oacc (const gimple *stmt)
  	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
  	case GF_OMP_TARGET_KIND_OACC_DECLARE:
  	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+	case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
  	  return true;
  	default:
  	  return false;
@@ -6503,6 +6515,8 @@  is_gimple_omp_offloaded (const gimple *stmt)
  	case GF_OMP_TARGET_KIND_REGION:
  	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
  	case GF_OMP_TARGET_KIND_OACC_KERNELS:
+	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
  	  return true;
  	default:
  	  return false;
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index c007ec1..7e4d5a8 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -7914,6 +7914,8 @@  expand_omp_target (struct omp_region *region)
      case GF_OMP_TARGET_KIND_ENTER_DATA:
      case GF_OMP_TARGET_KIND_EXIT_DATA:
      case GF_OMP_TARGET_KIND_OACC_PARALLEL:
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
      case GF_OMP_TARGET_KIND_OACC_KERNELS:
      case GF_OMP_TARGET_KIND_OACC_UPDATE:
      case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
@@ -7923,6 +7925,7 @@  expand_omp_target (struct omp_region *region)
      case GF_OMP_TARGET_KIND_DATA:
      case GF_OMP_TARGET_KIND_OACC_DATA:
      case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+    case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
        data_region = true;
        break;
      default:
@@ -7945,16 +7948,30 @@  expand_omp_target (struct omp_region *region)
    entry_bb = region->entry;
    exit_bb = region->exit;

-  if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
+  /* Further down, all OpenACC compute constructs will be mapped to
+     BUILT_IN_GOACC_PARALLEL, and to distinguish between them, we now attach
+     attributes.  */
+  switch (gimple_omp_target_kind (entry_stmt))
      {
+    case GF_OMP_TARGET_KIND_OACC_KERNELS:
        mark_loops_in_oacc_kernels_region (region->entry, region->exit);

-      /* Further down, both OpenACC kernels and OpenACC parallel constructs
-	 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
-	 two, there is an "oacc kernels" attribute set for OpenACC kernels.  */
        DECL_ATTRIBUTES (child_fn)
  	= tree_cons (get_identifier ("oacc kernels"),
  		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
+      break;
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+      DECL_ATTRIBUTES (child_fn)
+	= tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
+		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
+      break;
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+      DECL_ATTRIBUTES (child_fn)
+	= tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
+		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
+      break;
+    default:
+      break;
      }

    if (offloaded)
@@ -8159,10 +8176,13 @@  expand_omp_target (struct omp_region *region)
        break;
      case GF_OMP_TARGET_KIND_OACC_KERNELS:
      case GF_OMP_TARGET_KIND_OACC_PARALLEL:
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
        start_ix = BUILT_IN_GOACC_PARALLEL;
        break;
      case GF_OMP_TARGET_KIND_OACC_DATA:
      case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+    case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
        start_ix = BUILT_IN_GOACC_DATA_START;
        break;
      case GF_OMP_TARGET_KIND_OACC_UPDATE:
@@ -8916,6 +8936,9 @@  build_omp_regions_1 (basic_block bb, struct omp_region 
*parent,
  		case GF_OMP_TARGET_KIND_OACC_KERNELS:
  		case GF_OMP_TARGET_KIND_OACC_DATA:
  		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+		case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+		case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+		case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
  		  break;
  		case GF_OMP_TARGET_KIND_UPDATE:
  		case GF_OMP_TARGET_KIND_ENTER_DATA:
@@ -9170,6 +9193,9 @@  omp_make_gimple_edges (basic_block bb, struct omp_region 
**region,
  	case GF_OMP_TARGET_KIND_OACC_KERNELS:
  	case GF_OMP_TARGET_KIND_OACC_DATA:
  	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+	case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
  	  break;
  	case GF_OMP_TARGET_KIND_UPDATE:
  	case GF_OMP_TARGET_KIND_ENTER_DATA:
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index a855c5b..623da18 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -178,8 +178,12 @@  is_oacc_parallel (omp_context *ctx)
  {
    enum gimple_code outer_type = gimple_code (ctx->stmt);
    return ((outer_type == GIMPLE_OMP_TARGET)
-	  && (gimple_omp_target_kind (ctx->stmt)
-	      == GF_OMP_TARGET_KIND_OACC_PARALLEL));
+	  && ((gimple_omp_target_kind (ctx->stmt)
+	       == GF_OMP_TARGET_KIND_OACC_PARALLEL)
+	      || (gimple_omp_target_kind (ctx->stmt)
+		  == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
+	      || (gimple_omp_target_kind (ctx->stmt)
+		  == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE)));
  }

  /* Return true if CTX corresponds to an oacc kernels region.  */
@@ -193,6 +197,22 @@  is_oacc_kernels (omp_context *ctx)
  	      == GF_OMP_TARGET_KIND_OACC_KERNELS));
  }

+/* Return true if CTX corresponds to an oacc region that was generated from
+   an original kernels region that has been lowered to parallel regions.  */
+
+static bool
+was_originally_oacc_kernels (omp_context *ctx)
+{
+  enum gimple_code outer_type = gimple_code (ctx->stmt);
+  return ((outer_type == GIMPLE_OMP_TARGET)
+	  && ((gimple_omp_target_kind (ctx->stmt)
+	       == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
+	      || (gimple_omp_target_kind (ctx->stmt)
+		  == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE)
+	      || (gimple_omp_target_kind (ctx->stmt)
+		  == GF_OMP_TARGET_KIND_OACC_DATA_KERNELS)));
+}
+
  /* If DECL is the artificial dummy VAR_DECL created for non-static
     data member privatization, return the underlying "this" parameter,
     otherwise return NULL.  */
@@ -2319,7 +2339,8 @@  scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
      {
        omp_context *tgt = enclosing_target_ctx (outer_ctx);

-      if (!tgt || is_oacc_parallel (tgt))
+      if (!tgt || (is_oacc_parallel (tgt)
+                    && !was_originally_oacc_kernels (tgt)))
  	for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
  	  {
  	    char const *check = NULL;
@@ -2752,6 +2773,8 @@  check_omp_nesting_restrictions (gimple *stmt, omp_context 
*ctx)
  		  {
  		  case GF_OMP_TARGET_KIND_OACC_PARALLEL:
  		  case GF_OMP_TARGET_KIND_OACC_KERNELS:
+		  case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+		  case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
  		    ok = true;
  		    break;

@@ -3207,6 +3230,11 @@  check_omp_nesting_restrictions (gimple *stmt, omp_context 
*ctx)
  	    case GF_OMP_TARGET_KIND_OACC_DECLARE: stmt_name = "declare"; break;
  	    case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data";
  	      break;
+	    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+	    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+	    case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
+	      /* These three cases arise from kernels conversion.  */
+	      stmt_name = "kernels"; break;
  	    default: gcc_unreachable ();
  	    }
  	  switch (gimple_omp_target_kind (ctx->stmt))
@@ -3220,6 +3248,11 @@  check_omp_nesting_restrictions (gimple *stmt, omp_context 
*ctx)
  	    case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break;
  	    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
  	      ctx_stmt_name = "host_data"; break;
+	    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+	    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+	    case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
+	      /* These three cases arise from kernels conversion.  */
+	      ctx_stmt_name = "kernels"; break;
  	    default: gcc_unreachable ();
  	    }

@@ -6375,8 +6408,12 @@  lower_oacc_reductions (location_t loc, tree clauses, tree 
level, bool inner,
  		    break;

  		  case GIMPLE_OMP_TARGET:
-		    if (gimple_omp_target_kind (probe->stmt)
-			!= GF_OMP_TARGET_KIND_OACC_PARALLEL)
+		    if ((gimple_omp_target_kind (probe->stmt)
+			 != GF_OMP_TARGET_KIND_OACC_PARALLEL)
+			&& (gimple_omp_target_kind (probe->stmt)
+			    != GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
+			&& (gimple_omp_target_kind (probe->stmt)
+			    != GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE))
  		      goto do_lookup;

  		    cls = gimple_omp_target_clauses (probe->stmt);
@@ -11027,11 +11064,14 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)
      case GF_OMP_TARGET_KIND_OACC_UPDATE:
      case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
      case GF_OMP_TARGET_KIND_OACC_DECLARE:
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
        data_region = false;
        break;
      case GF_OMP_TARGET_KIND_DATA:
      case GF_OMP_TARGET_KIND_OACC_DATA:
      case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+    case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
        data_region = true;
        break;
      default:
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index da788d9..4ebfa83 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -1525,6 +1525,20 @@  execute_oacc_device_lower ()
    bool is_oacc_kernels_parallelized
      = (lookup_attribute ("oacc kernels parallelized",
  			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+  if (is_oacc_kernels_parallelized)
+    gcc_checking_assert (is_oacc_kernels);
+  bool is_oacc_parallel_kernels_parallelized
+    = (lookup_attribute ("oacc parallel_kernels_parallelized",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+  if (is_oacc_parallel_kernels_parallelized)
+    gcc_checking_assert (!is_oacc_kernels);
+  bool is_oacc_parallel_kernels_gang_single
+    = (lookup_attribute ("oacc parallel_kernels_gang_single",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+  if (is_oacc_parallel_kernels_gang_single)
+    gcc_checking_assert (!is_oacc_kernels);
+  gcc_checking_assert (!(is_oacc_parallel_kernels_parallelized
+			 && is_oacc_parallel_kernels_gang_single));

    /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
       kernels, so remove the parallelism dimensions function attributes
@@ -1548,6 +1562,12 @@  execute_oacc_device_lower ()
  	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
  		 (is_oacc_kernels_parallelized
  		  ? "parallelized" : "unparallelized"));
+      else if (is_oacc_parallel_kernels_parallelized)
+	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
+		 "parallel_kernels_parallelized");
+      else if (is_oacc_parallel_kernels_gang_single)
+	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
+		 "parallel_kernels_gang_single");
        else
  	fprintf (dump_file, "Function is OpenACC parallel offload\n");
      }