[1/9,nvptx] Enable large vectors

Message ID 20190112222131.29519-2-tdevries@suse.de
State New
Headers show
Series
  • Add support for warp-multiple openacc vector length
Related show

Commit Message

Tom de Vries Jan. 12, 2019, 10:21 p.m.
Allow vector_length clauses to accept values larger than warp size.  Note that
this does not enable setting vector_length to values larger than warp size using
-fopenacc-dim.

2018-12-17  Tom de Vries  <tdevries@suse.de>

	* config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Take larger vector
	lengths into account.

	* testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c: Expect
	vector length to be 128.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Expect vector
	length 2097152 to be reduced to 1024 instead of 32.
---
 gcc/config/nvptx/nvptx.c                                          | 2 +-
 libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c       | 4 ++--
 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c | 5 ++---
 3 files changed, 5 insertions(+), 6 deletions(-)

-- 
2.16.4

Patch

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 1d9704543d9..8d2740cd50f 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -96,7 +96,7 @@ 
 #define PTX_NUM_PER_WORKER_BARRIERS (PTX_CTA_NUM_BARRIERS - PTX_NUM_PER_CTA_BARRIERS)
 
 #define PTX_DEFAULT_VECTOR_LENGTH PTX_WARP_SIZE
-#define PTX_MAX_VECTOR_LENGTH PTX_WARP_SIZE
+#define PTX_MAX_VECTOR_LENGTH PTX_CTA_SIZE
 #define PTX_WORKER_LENGTH 32
 #define PTX_DEFAULT_RUNTIME_DIM 0 /* Defer to runtime.  */
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index 4a9854662cc..d7cd0461b53 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -350,7 +350,7 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \
+#pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(1024\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \
   vector_length (VECTORS)
     {
       if (acc_on_device (acc_device_host))
@@ -361,7 +361,7 @@  int main ()
       else if (acc_on_device (acc_device_nvidia))
 	{
 	  /* The GCC nvptx back end enforces vector_length (32).  */
-	  vectors_actual = 32;
+	  vectors_actual = 1024;
 	}
       else
 	__builtin_abort ();
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
index fab5b0d25d1..18d77cc5ecb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
@@ -33,7 +33,6 @@  main (void)
 
   return 0;
 }
-/* { dg-prune-output "using vector_length \\(32\\), ignoring 128" } */
 
-/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
-/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */