[4/9,nvptx] Enable large vectors -- reduction testcases

Message ID 20190112222131.29519-5-tdevries@suse.de
State New
Headers show
Series
  • Add support for warp-multiple openacc vector length
Related show

Commit Message

Tom de Vries Jan. 12, 2019, 10:21 p.m.
Add various reduction test-cases with vector length 128.

2018-12-17  Tom de Vries  <tdevries@suse.de>

	* testsuite/libgomp.oacc-c-c++-common/vred2d-128.c: New test.
	* testsuite/libgomp.oacc-fortran/gemm.f90: New test.
	* testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c: New test.
---
 .../vector-length-128-10.c                         | 39 +++++++++++
 .../libgomp.oacc-c-c++-common/vred2d-128.c         | 55 +++++++++++++++
 libgomp/testsuite/libgomp.oacc-fortran/gemm.f90    | 79 ++++++++++++++++++++++
 3 files changed, 173 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm.f90

-- 
2.16.4

Patch

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c
new file mode 100644
index 00000000000..0658cfde7ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c
@@ -0,0 +1,39 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+    {
+      a[i] = i % 3;
+      b[i] = i % 5;
+    }
+
+  unsigned int res = 1;
+  unsigned long long res2 = 1;
+#pragma acc parallel vector_length (128) copyin (a,b) reduction (+:res, res2) copy (res, res2)
+  {
+#pragma acc loop vector reduction (+:res, res2)
+    for (unsigned int i = 0; i < n; i++)
+      {
+	res += ((a[i] + b[i]) % 2);
+	res2 += ((a[i] + b[i]) % 2);
+      }
+  }
+
+  if (res != 478)
+    abort ();
+  if (res2 != 478)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
new file mode 100644
index 00000000000..86171d456e0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
@@ -0,0 +1,55 @@ 
+/* Test large vector lengths.  */
+
+#include <assert.h>
+
+#define n 10000
+int a1[n], a2[n];
+
+#define gentest(name, outer, inner)		\
+  void name ()					\
+  {						\
+  long i, j, t1, t2, t3;			\
+  _Pragma(outer)				\
+  for (i = 0; i < n; i++)			\
+    {						\
+      t1 = 0;					\
+      t2 = 0;					\
+      _Pragma(inner)				\
+      for (j = i; j < n; j++)			\
+	{					\
+	  t1++;					\
+	  t2--;					\
+	}					\
+      a1[i] = t1;				\
+      a2[i] = t2;				\
+    }						\
+  for (i = 0; i < n; i++)			\
+    {						\
+      assert (a1[i] == n-i);			\
+      assert (a2[i] == -(n-i));			\
+    }						\
+  }						\
+
+gentest (test1, "acc parallel loop gang vector_length (128) firstprivate (t1, t2)",
+	 "acc loop vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test2, "acc parallel loop gang vector_length (128) firstprivate (t1, t2)",
+	 "acc loop worker vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test3, "acc parallel loop gang worker vector_length (128) firstprivate (t1, t2)",
+	 "acc loop vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test4, "acc parallel loop firstprivate (t1, t2)",
+	 "acc loop reduction(+:t1) reduction(-:t2)")
+
+
+int
+main ()
+{
+  test1 ();
+  test2 ();
+  test3 ();
+  test4 ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90
new file mode 100644
index 00000000000..de78148c7b3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90
@@ -0,0 +1,79 @@ 
+! Exercise three levels of parallelism using SGEMM from BLAS.
+
+! { dg-do run }
+
+! Explicitly set vector_length to 128 using a vector_length clause.
+subroutine openacc_sgemm_128 (m, n, k, alpha, a, b, beta, c)
+  integer :: m, n, k
+  real :: alpha, beta
+  real :: a(k,*), b(k,*), c(m,*)
+
+  integer :: i, j, l
+  real :: temp
+
+  !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) vector_length (128) firstprivate (temp)
+  do j = 1, n
+     !$acc loop
+     do i = 1, m
+        temp = 0.0
+        !$acc loop reduction(+:temp)
+        do l = 1, k
+           temp = temp + a(l,i)*b(l,j)
+        end do
+        if(beta == 0.0) then
+           c(i,j) = alpha*temp
+        else
+           c(i,j) = alpha*temp + beta*c(i,j)
+        end if
+     end do
+  end do
+end subroutine openacc_sgemm_128
+
+subroutine host_sgemm (m, n, k, alpha, a, b, beta, c)
+  integer :: m, n, k
+  real :: alpha, beta
+  real :: a(k,*), b(k,*), c(m,*)
+
+  integer :: i, j, l
+  real :: temp
+
+  do j = 1, n
+     do i = 1, m
+        temp = 0.0
+        do l = 1, k
+           temp = temp + a(l,i)*b(l,j)
+        end do
+        if(beta == 0.0) then
+           c(i,j) = alpha*temp
+        else
+           c(i,j) = alpha*temp + beta*c(i,j)
+        end if
+     end do
+  end do
+end subroutine host_sgemm
+
+program main
+  integer, parameter :: M = 100, N = 50, K = 2000
+  real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N)
+  real alpha, beta
+  integer i, j
+
+  a(:,:) = 1.0
+  b(:,:) = 0.25
+
+  c(:,:) = 0.0
+  d(:,:) = 0.0
+  e(:,:) = 0.0
+
+  alpha = 1.05
+  beta = 1.25
+
+  call openacc_sgemm_128 (M, N, K, alpha, a, b, beta, d)
+  call host_sgemm (M, N, K, alpha, a, b, beta, e)
+
+  do i = 1, m
+     do j = 1, n
+        if (d(i,j) /= e(i,j)) call abort
+     end do
+  end do
+end program main