Fix nvptx_output_softstack_switch (PR target/90811)

Message ID 20190610214644.GE19695@tucnak
State New
Headers show
Series
  • Fix nvptx_output_softstack_switch (PR target/90811)
Related show

Commit Message

Jakub Jelinek June 10, 2019, 9:46 p.m.
Hi!

In PTX, and accepts only .pred, .b16, .b32 and .b64 types, not .u64 etc.
Fixed thusly, bootstrapped/regtested on x86_64-linux with nvptx-none
offloading, ok for trunk and 9.2/8.4?

I'll try to figure out tomorrow if we can avoid the overaligned variables in
this specific testcase, but generally there is always the possibility of
overaligned vars.

2019-06-10  Jakub Jelinek  <jakub@redhat.com>

	PR target/90811
	* config/nvptx/nvptx.c (nvptx_output_softstack_switch): Use and.b%d
	instead of and.u%d.

	* testsuite/libgomp.c/pr90811.c: New test.


	Jakub

Comments

Tom de Vries June 11, 2019, 4:35 p.m. | #1
On 10-06-19 23:46, Jakub Jelinek wrote:
> Hi!

> 

> In PTX, and accepts only .pred, .b16, .b32 and .b64 types, not .u64 etc.

> Fixed thusly, bootstrapped/regtested on x86_64-linux with nvptx-none

> offloading, ok for trunk and 9.2/8.4?

> 


LGTM [and to Alexander as mentioned in PR90811 comment 3].

Thanks,
- Tom

> I'll try to figure out tomorrow if we can avoid the overaligned variables in

> this specific testcase, but generally there is always the possibility of

> overaligned vars.

> 

> 2019-06-10  Jakub Jelinek  <jakub@redhat.com>

> 

> 	PR target/90811

> 	* config/nvptx/nvptx.c (nvptx_output_softstack_switch): Use and.b%d

> 	instead of and.u%d.

> 

> 	* testsuite/libgomp.c/pr90811.c: New test.

> 

> --- gcc/config/nvptx/nvptx.c.jj	2019-03-11 22:56:55.934666848 +0100

> +++ gcc/config/nvptx/nvptx.c	2019-06-10 15:20:43.154588406 +0200

> @@ -1475,7 +1475,7 @@ nvptx_output_softstack_switch (FILE *fil

>        fputs (";\n", file);

>        if (!CONST_INT_P (size) || UINTVAL (align) > GET_MODE_SIZE (DImode))

>  	fprintf (file,

> -		 "\t\tand.u%d %%r%d, %%r%d, -" HOST_WIDE_INT_PRINT_DEC ";\n",

> +		 "\t\tand.b%d %%r%d, %%r%d, -" HOST_WIDE_INT_PRINT_DEC ";\n",

>  		 bits, regno, regno, UINTVAL (align));

>      }

>    if (cfun->machine->has_softstack)

> --- libgomp/testsuite/libgomp.c/pr90811.c.jj	2019-06-10 15:43:30.464115978 +0200

> +++ libgomp/testsuite/libgomp.c/pr90811.c	2019-06-10 15:43:37.618003812 +0200

> @@ -0,0 +1,29 @@

> +/* PR target/90811 */

> +

> +int

> +main ()

> +{

> +  long long a[100], b[100];

> +  int i;

> +  for (i = 0; i < 100; i++)

> +    {

> +      a[i] = i;

> +      b[i] = i % 10;

> +    }

> +  #pragma omp target teams distribute parallel for simd map(tofrom: a[:100], b[:100])

> +  for (i = 0; i < 100; i++)

> +    {

> +      long long c = 0;

> +      const long long d[] = { 1, 3, 5, 7, 9 };

> +      for (int j = 4; j >= 0; j--)

> +         c = d[j] + b[i] * c;

> +      a[i] += c;

> +    }

> +  for (i = 0; i < 100; i++)

> +    {

> +      const long long r[] = { 1, 26, 229, 976, 2849, 6646, 13381, 24284, 40801, 64594 };

> +      if (a[i] != r[i % 10] + (i / 10 * 10))

> +	__builtin_abort ();

> +    }

> +  return 0;

> +}

> 

> 	Jakub

>

Patch

--- gcc/config/nvptx/nvptx.c.jj	2019-03-11 22:56:55.934666848 +0100
+++ gcc/config/nvptx/nvptx.c	2019-06-10 15:20:43.154588406 +0200
@@ -1475,7 +1475,7 @@  nvptx_output_softstack_switch (FILE *fil
       fputs (";\n", file);
       if (!CONST_INT_P (size) || UINTVAL (align) > GET_MODE_SIZE (DImode))
 	fprintf (file,
-		 "\t\tand.u%d %%r%d, %%r%d, -" HOST_WIDE_INT_PRINT_DEC ";\n",
+		 "\t\tand.b%d %%r%d, %%r%d, -" HOST_WIDE_INT_PRINT_DEC ";\n",
 		 bits, regno, regno, UINTVAL (align));
     }
   if (cfun->machine->has_softstack)
--- libgomp/testsuite/libgomp.c/pr90811.c.jj	2019-06-10 15:43:30.464115978 +0200
+++ libgomp/testsuite/libgomp.c/pr90811.c	2019-06-10 15:43:37.618003812 +0200
@@ -0,0 +1,29 @@ 
+/* PR target/90811 */
+
+int
+main ()
+{
+  long long a[100], b[100];
+  int i;
+  for (i = 0; i < 100; i++)
+    {
+      a[i] = i;
+      b[i] = i % 10;
+    }
+  #pragma omp target teams distribute parallel for simd map(tofrom: a[:100], b[:100])
+  for (i = 0; i < 100; i++)
+    {
+      long long c = 0;
+      const long long d[] = { 1, 3, 5, 7, 9 };
+      for (int j = 4; j >= 0; j--)
+         c = d[j] + b[i] * c;
+      a[i] += c;
+    }
+  for (i = 0; i < 100; i++)
+    {
+      const long long r[] = { 1, 26, 229, 976, 2849, 6646, 13381, 24284, 40801, 64594 };
+      if (a[i] != r[i % 10] + (i / 10 * 10))
+	__builtin_abort ();
+    }
+  return 0;
+}