[9/9,nvptx] Enable setting vector length using -fopenacc-dim -- testcases

Message ID 20190112222131.29519-10-tdevries@suse.de
State New
Headers show
Series
  • Add support for warp-multiple openacc vector length
Related show

Commit Message

Tom de Vries Jan. 12, 2019, 10:21 p.m.
Add some test-cases that set vector length using -fopenacc-dim.

2019-01-12  Tom de Vries  <tdevries@suse.de>

	* testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test.
	* testsuite/libgomp.oacc-fortran/gemm-2.f90: New test.
---
 .../libgomp.oacc-c-c++-common/pr85486-2.c          | 52 ++++++++++++++
 .../vector-length-128-2.c                          | 39 +++++++++++
 .../vector-length-128-5.c                          | 41 +++++++++++
 libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90  | 80 ++++++++++++++++++++++
 4 files changed, 212 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90

-- 
2.16.4

Patch

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
new file mode 100644
index 00000000000..f6ca263166d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
@@ -0,0 +1,52 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=::128" } */
+
+/* Minimized from ref-1.C.  */
+
+#include <stdio.h>
+
+#pragma acc routine vector
+void __attribute__((noinline, noclone))
+Vector (int *ptr, int n, const int inc)
+{
+  #pragma acc loop vector
+  for (unsigned ix = 0; ix < n; ix++)
+    ptr[ix] += inc;
+}
+
+int
+main (void)
+{
+  const int n = 32, m=32;
+
+  int ary[m][n];
+  unsigned ix,  iy;
+
+  for (ix = m; ix--;)
+    for (iy = n; iy--;)
+      ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
+
+  int err = 0;
+
+#pragma acc parallel copy (ary)
+  {
+    Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
+  }
+
+  for (ix = m; ix--;)
+    for (iy = n; iy--;)
+      if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
+	{
+	  printf ("ary[%u][%u] = %x expected %x\n",
+		  ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
+	  err++;
+	}
+
+  if (err)
+    {
+      printf ("%d failed\n", err);
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
new file mode 100644
index 00000000000..8b5b2a4a92d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
@@ -0,0 +1,39 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=::128" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+    {
+      a[i] = i % 3;
+      b[i] = i % 5;
+    }
+
+#pragma acc parallel copyin (a,b) copyout (c)
+  {
+#pragma acc loop vector
+    for (unsigned int i = 0; i < n; i++)
+      c[i] = a[i] + b[i];
+  }
+
+  for (unsigned int i = 0; i < n; ++i)
+    if (c[i] != (i % 3) + (i % 5))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c
new file mode 100644
index 00000000000..e60f1c28db4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c
@@ -0,0 +1,41 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=:2:128" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+    {
+      a[i] = i % 3;
+      b[i] = i % 5;
+    }
+
+#pragma acc parallel copyin (a,b) copyout (c)
+  {
+#pragma acc loop worker
+    for (unsigned int i = 0; i < 4; i++)
+#pragma acc loop vector
+      for (unsigned int j = 0; j < n / 4; j++)
+	c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
+  }
+
+  for (unsigned int i = 0; i < n; ++i)
+    if (c[i] != (i % 3) + (i % 5))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90
new file mode 100644
index 00000000000..fe108732a5f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90
@@ -0,0 +1,80 @@ 
+! Exercise three levels of parallelism using SGEMM from BLAS.
+
+! { dg-do run }
+! { dg-additional-options "-fopenacc-dim=::128" }
+
+! Implicitly set vector_length to 128 using -fopenacc-dim.
+subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c)
+  integer :: m, n, k
+  real :: alpha, beta
+  real :: a(k,*), b(k,*), c(m,*)
+
+  integer :: i, j, l
+  real :: temp
+
+  !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) firstprivate (temp)
+  do j = 1, n
+     !$acc loop
+     do i = 1, m
+        temp = 0.0
+        !$acc loop reduction(+:temp)
+        do l = 1, k
+           temp = temp + a(l,i)*b(l,j)
+        end do
+        if(beta == 0.0) then
+           c(i,j) = alpha*temp
+        else
+           c(i,j) = alpha*temp + beta*c(i,j)
+        end if
+     end do
+  end do
+end subroutine openacc_sgemm
+
+subroutine host_sgemm (m, n, k, alpha, a, b, beta, c)
+  integer :: m, n, k
+  real :: alpha, beta
+  real :: a(k,*), b(k,*), c(m,*)
+
+  integer :: i, j, l
+  real :: temp
+
+  do j = 1, n
+     do i = 1, m
+        temp = 0.0
+        do l = 1, k
+           temp = temp + a(l,i)*b(l,j)
+        end do
+        if(beta == 0.0) then
+           c(i,j) = alpha*temp
+        else
+           c(i,j) = alpha*temp + beta*c(i,j)
+        end if
+     end do
+  end do
+end subroutine host_sgemm
+
+program main
+  integer, parameter :: M = 100, N = 50, K = 2000
+  real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N)
+  real alpha, beta
+  integer i, j
+
+  a(:,:) = 1.0
+  b(:,:) = 0.25
+
+  c(:,:) = 0.0
+  d(:,:) = 0.0
+  e(:,:) = 0.0
+
+  alpha = 1.05
+  beta = 1.25
+
+  call openacc_sgemm (M, N, K, alpha, a, b, beta, c)
+  call host_sgemm (M, N, K, alpha, a, b, beta, e)
+
+  do i = 1, m
+     do j = 1, n
+        if (c(i,j) /= e(i,j)) call abort
+     end do
+  end do
+end program main