[x86] Tweak testcases for PR82361

Message ID mpto8zi3mqs.fsf@arm.com
State New
Headers show
Series
  • [x86] Tweak testcases for PR82361
Related show

Commit Message

Richard Sandiford Sept. 17, 2019, 4:34 p.m.
gcc/testsuite/gcc.target/i386/pr82361-[12].c check whether we
can optimise away a 32-to-64-bit zero extension of a 32-bit
division or modulus result.  Currently this fails for the modulus
part of f1 and f2 in pr82361-1.c:

/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
   one.  */
/* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */

pr82361-2.c instead expects no failures:

/* Ditto %edx to %rdx zero extensions.  */
/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */

But we actually get the same zero-extensions for f1 and f2 in pr82361-2.c.
The reason they don't trigger a failure is that the RA allocates the
asm input for "d" to %rdi rather than %rdx, so we have:

	movl	%rdi, %rdx

instead of:

	movl	%rdx, %rdx

For the tests to work as expected, I think they have to force "c" and
"d" to be %rax and %rdx respectively.  We then see the same failure in
pr82361-2.c as for pr82361-1.c (but doubled, due to the 8-bit division
path).

Tested on x86_64-linux-gnu.  OK to install?

Richard


2019-09-17  Richard Sandiford  <richard.sandiford@arm.com>

gcc/testsuite/
	* gcc.target/i386/pr82361-1.c (f1, f2, f3, f4, f5, f6): Force
	"c" to be in %rax and "d" to be in %rdx.
	* gcc.target/i386/pr82361-2.c: Expect 4 instances of "movl\t%edx".

Comments

Uros Bizjak Sept. 18, 2019, 6:43 a.m. | #1
On Tue, Sep 17, 2019 at 6:34 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>

> gcc/testsuite/gcc.target/i386/pr82361-[12].c check whether we

> can optimise away a 32-to-64-bit zero extension of a 32-bit

> division or modulus result.  Currently this fails for the modulus

> part of f1 and f2 in pr82361-1.c:

>

> /* FIXME: We are still not able to optimize the modulo in f1/f2, only manage

>    one.  */

> /* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */

>

> pr82361-2.c instead expects no failures:

>

> /* Ditto %edx to %rdx zero extensions.  */

> /* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */

>

> But we actually get the same zero-extensions for f1 and f2 in pr82361-2.c.

> The reason they don't trigger a failure is that the RA allocates the

> asm input for "d" to %rdi rather than %rdx, so we have:

>

>         movl    %rdi, %rdx

>

> instead of:

>

>         movl    %rdx, %rdx

>

> For the tests to work as expected, I think they have to force "c" and

> "d" to be %rax and %rdx respectively.  We then see the same failure in

> pr82361-2.c as for pr82361-1.c (but doubled, due to the 8-bit division

> path).

>

> Tested on x86_64-linux-gnu.  OK to install?

>

> Richard

>

>

> 2019-09-17  Richard Sandiford  <richard.sandiford@arm.com>

>

> gcc/testsuite/

>         * gcc.target/i386/pr82361-1.c (f1, f2, f3, f4, f5, f6): Force

>         "c" to be in %rax and "d" to be in %rdx.

>         * gcc.target/i386/pr82361-2.c: Expect 4 instances of "movl\t%edx".


OK, with a comment improvement below.

Thanks,
Uros.

> Index: gcc/testsuite/gcc.target/i386/pr82361-1.c

> ===================================================================

> --- gcc/testsuite/gcc.target/i386/pr82361-1.c   2019-03-08 18:14:39.040959532 +0000

> +++ gcc/testsuite/gcc.target/i386/pr82361-1.c   2019-09-17 17:32:00.930930762 +0100

> @@ -11,43 +11,43 @@

>  void

>  f1 (unsigned int a, unsigned int b)

>  {

> -  unsigned long long c = a / b;

> -  unsigned long long d = a % b;

> +  register unsigned long long c asm ("rax") = a / b;

> +  register unsigned long long d asm ("rdx") = a % b;

>    asm volatile ("" : : "r" (c), "r" (d));

>  }

>

>  void

>  f2 (int a, int b)

>  {

> -  unsigned long long c = (unsigned int) (a / b);

> -  unsigned long long d = (unsigned int) (a % b);

> +  register unsigned long long c asm ("rax") = (unsigned int) (a / b);

> +  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);

>    asm volatile ("" : : "r" (c), "r" (d));

>  }

>

>  void

>  f3 (unsigned int a, unsigned int b)

>  {

> -  unsigned long long c = a / b;

> +  register unsigned long long c asm ("rax") = a / b;

>    asm volatile ("" : : "r" (c));

>  }

>

>  void

>  f4 (int a, int b)

>  {

> -  unsigned long long c = (unsigned int) (a / b);

> +  register unsigned long long c asm ("rax") = (unsigned int) (a / b);

>    asm volatile ("" : : "r" (c));

>  }

>

>  void

>  f5 (unsigned int a, unsigned int b)

>  {

> -  unsigned long long d = a % b;

> +  register unsigned long long d asm ("rdx") = a % b;

>    asm volatile ("" : : "r" (d));

>  }

>

>  void

>  f6 (int a, int b)

>  {

> -  unsigned long long d = (unsigned int) (a % b);

> +  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);

>    asm volatile ("" : : "r" (d));

>  }

> Index: gcc/testsuite/gcc.target/i386/pr82361-2.c

> ===================================================================

> --- gcc/testsuite/gcc.target/i386/pr82361-2.c   2019-09-17 16:34:52.280124553 +0100

> +++ gcc/testsuite/gcc.target/i386/pr82361-2.c   2019-09-17 17:32:00.930930762 +0100

> @@ -4,7 +4,8 @@

>  /* We should be able to optimize all %eax to %rax zero extensions, because

>     div and idiv instructions with 32-bit operands zero-extend both results.   */

>  /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */

> -/* Ditto %edx to %rdx zero extensions.  */

> -/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */

> +/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage

> +   one.  */


Can we please change comment here and in pr82361-2.c to something like:

/* FIXME: The compiler does not merge zero-extension to the modulo part.  */


> +/* { dg-final { scan-assembler-times "movl\t%edx" 4 } } */

>

>  #include "pr82361-1.c"
Richard Sandiford Sept. 18, 2019, 7:40 a.m. | #2
Uros Bizjak <ubizjak@gmail.com> writes:
> On Tue, Sep 17, 2019 at 6:34 PM Richard Sandiford

> <richard.sandiford@arm.com> wrote:

>>

>> gcc/testsuite/gcc.target/i386/pr82361-[12].c check whether we

>> can optimise away a 32-to-64-bit zero extension of a 32-bit

>> division or modulus result.  Currently this fails for the modulus

>> part of f1 and f2 in pr82361-1.c:

>>

>> /* FIXME: We are still not able to optimize the modulo in f1/f2, only manage

>>    one.  */

>> /* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */

>>

>> pr82361-2.c instead expects no failures:

>>

>> /* Ditto %edx to %rdx zero extensions.  */

>> /* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */

>>

>> But we actually get the same zero-extensions for f1 and f2 in pr82361-2.c.

>> The reason they don't trigger a failure is that the RA allocates the

>> asm input for "d" to %rdi rather than %rdx, so we have:

>>

>>         movl    %rdi, %rdx

>>

>> instead of:

>>

>>         movl    %rdx, %rdx

>>

>> For the tests to work as expected, I think they have to force "c" and

>> "d" to be %rax and %rdx respectively.  We then see the same failure in

>> pr82361-2.c as for pr82361-1.c (but doubled, due to the 8-bit division

>> path).

>>

>> Tested on x86_64-linux-gnu.  OK to install?

>>

>> Richard

>>

>>

>> 2019-09-17  Richard Sandiford  <richard.sandiford@arm.com>

>>

>> gcc/testsuite/

>>         * gcc.target/i386/pr82361-1.c (f1, f2, f3, f4, f5, f6): Force

>>         "c" to be in %rax and "d" to be in %rdx.

>>         * gcc.target/i386/pr82361-2.c: Expect 4 instances of "movl\t%edx".

>

> OK, with a comment improvement below.

>

> Thanks,

> Uros.

>

>> Index: gcc/testsuite/gcc.target/i386/pr82361-1.c

>> ===================================================================

>> --- gcc/testsuite/gcc.target/i386/pr82361-1.c   2019-03-08 18:14:39.040959532 +0000

>> +++ gcc/testsuite/gcc.target/i386/pr82361-1.c   2019-09-17 17:32:00.930930762 +0100

>> @@ -11,43 +11,43 @@

>>  void

>>  f1 (unsigned int a, unsigned int b)

>>  {

>> -  unsigned long long c = a / b;

>> -  unsigned long long d = a % b;

>> +  register unsigned long long c asm ("rax") = a / b;

>> +  register unsigned long long d asm ("rdx") = a % b;

>>    asm volatile ("" : : "r" (c), "r" (d));

>>  }

>>

>>  void

>>  f2 (int a, int b)

>>  {

>> -  unsigned long long c = (unsigned int) (a / b);

>> -  unsigned long long d = (unsigned int) (a % b);

>> +  register unsigned long long c asm ("rax") = (unsigned int) (a / b);

>> +  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);

>>    asm volatile ("" : : "r" (c), "r" (d));

>>  }

>>

>>  void

>>  f3 (unsigned int a, unsigned int b)

>>  {

>> -  unsigned long long c = a / b;

>> +  register unsigned long long c asm ("rax") = a / b;

>>    asm volatile ("" : : "r" (c));

>>  }

>>

>>  void

>>  f4 (int a, int b)

>>  {

>> -  unsigned long long c = (unsigned int) (a / b);

>> +  register unsigned long long c asm ("rax") = (unsigned int) (a / b);

>>    asm volatile ("" : : "r" (c));

>>  }

>>

>>  void

>>  f5 (unsigned int a, unsigned int b)

>>  {

>> -  unsigned long long d = a % b;

>> +  register unsigned long long d asm ("rdx") = a % b;

>>    asm volatile ("" : : "r" (d));

>>  }

>>

>>  void

>>  f6 (int a, int b)

>>  {

>> -  unsigned long long d = (unsigned int) (a % b);

>> +  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);

>>    asm volatile ("" : : "r" (d));

>>  }

>> Index: gcc/testsuite/gcc.target/i386/pr82361-2.c

>> ===================================================================

>> --- gcc/testsuite/gcc.target/i386/pr82361-2.c   2019-09-17 16:34:52.280124553 +0100

>> +++ gcc/testsuite/gcc.target/i386/pr82361-2.c   2019-09-17 17:32:00.930930762 +0100

>> @@ -4,7 +4,8 @@

>>  /* We should be able to optimize all %eax to %rax zero extensions, because

>>     div and idiv instructions with 32-bit operands zero-extend both results.   */

>>  /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */

>> -/* Ditto %edx to %rdx zero extensions.  */

>> -/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */

>> +/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage

>> +   one.  */

>

> Can we please change comment here and in pr82361-2.c to something like:

>

> /* FIXME: The compiler does not merge zero-extension to the modulo part.  */


Thanks, here's what I applied.

Richard


2019-09-18  Richard Sandiford  <richard.sandiford@arm.com>

gcc/testsuite/
	* gcc.target/i386/pr82361-1.c (f1, f2, f3, f4, f5, f6): Force
	"c" to be in %rax and "d" to be in %rdx.
	* gcc.target/i386/pr82361-2.c: Expect 4 instances of "movl\t%edx".

Index: gcc/testsuite/gcc.target/i386/pr82361-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr82361-1.c	2019-09-17 18:00:14.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr82361-1.c	2019-09-18 08:37:39.030720198 +0100
@@ -4,50 +4,50 @@
 /* We should be able to optimize all %eax to %rax zero extensions, because
    div and idiv instructions with 32-bit operands zero-extend both results.   */
 /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
-/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
-   one.  */
+/* FIXME: The compiler does not merge zero-extension to the modulo part
+   of f1 and f2.  */
 /* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */
 
 void
 f1 (unsigned int a, unsigned int b)
 {
-  unsigned long long c = a / b;
-  unsigned long long d = a % b;
+  register unsigned long long c asm ("rax") = a / b;
+  register unsigned long long d asm ("rdx") = a % b;
   asm volatile ("" : : "r" (c), "r" (d));
 }
 
 void
 f2 (int a, int b)
 {
-  unsigned long long c = (unsigned int) (a / b);
-  unsigned long long d = (unsigned int) (a % b);
+  register unsigned long long c asm ("rax") = (unsigned int) (a / b);
+  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
   asm volatile ("" : : "r" (c), "r" (d));
 }
 
 void
 f3 (unsigned int a, unsigned int b)
 {
-  unsigned long long c = a / b;
+  register unsigned long long c asm ("rax") = a / b;
   asm volatile ("" : : "r" (c));
 }
 
 void
 f4 (int a, int b)
 {
-  unsigned long long c = (unsigned int) (a / b);
+  register unsigned long long c asm ("rax") = (unsigned int) (a / b);
   asm volatile ("" : : "r" (c));
 }
 
 void
 f5 (unsigned int a, unsigned int b)
 {
-  unsigned long long d = a % b;
+  register unsigned long long d asm ("rdx") = a % b;
   asm volatile ("" : : "r" (d));
 }
 
 void
 f6 (int a, int b)
 {
-  unsigned long long d = (unsigned int) (a % b);
+  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
   asm volatile ("" : : "r" (d));
 }
Index: gcc/testsuite/gcc.target/i386/pr82361-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr82361-2.c	2019-09-17 18:00:14.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr82361-2.c	2019-09-18 08:37:39.034720166 +0100
@@ -4,7 +4,8 @@
 /* We should be able to optimize all %eax to %rax zero extensions, because
    div and idiv instructions with 32-bit operands zero-extend both results.   */
 /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
-/* Ditto %edx to %rdx zero extensions.  */
-/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
+/* FIXME: The compiler does not merge zero-extension to the modulo part
+   of f1 and f2.  */
+/* { dg-final { scan-assembler-times "movl\t%edx" 4 } } */
 
 #include "pr82361-1.c"

Patch

Index: gcc/testsuite/gcc.target/i386/pr82361-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr82361-1.c	2019-03-08 18:14:39.040959532 +0000
+++ gcc/testsuite/gcc.target/i386/pr82361-1.c	2019-09-17 17:32:00.930930762 +0100
@@ -11,43 +11,43 @@ 
 void
 f1 (unsigned int a, unsigned int b)
 {
-  unsigned long long c = a / b;
-  unsigned long long d = a % b;
+  register unsigned long long c asm ("rax") = a / b;
+  register unsigned long long d asm ("rdx") = a % b;
   asm volatile ("" : : "r" (c), "r" (d));
 }
 
 void
 f2 (int a, int b)
 {
-  unsigned long long c = (unsigned int) (a / b);
-  unsigned long long d = (unsigned int) (a % b);
+  register unsigned long long c asm ("rax") = (unsigned int) (a / b);
+  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
   asm volatile ("" : : "r" (c), "r" (d));
 }
 
 void
 f3 (unsigned int a, unsigned int b)
 {
-  unsigned long long c = a / b;
+  register unsigned long long c asm ("rax") = a / b;
   asm volatile ("" : : "r" (c));
 }
 
 void
 f4 (int a, int b)
 {
-  unsigned long long c = (unsigned int) (a / b);
+  register unsigned long long c asm ("rax") = (unsigned int) (a / b);
   asm volatile ("" : : "r" (c));
 }
 
 void
 f5 (unsigned int a, unsigned int b)
 {
-  unsigned long long d = a % b;
+  register unsigned long long d asm ("rdx") = a % b;
   asm volatile ("" : : "r" (d));
 }
 
 void
 f6 (int a, int b)
 {
-  unsigned long long d = (unsigned int) (a % b);
+  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
   asm volatile ("" : : "r" (d));
 }
Index: gcc/testsuite/gcc.target/i386/pr82361-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr82361-2.c	2019-09-17 16:34:52.280124553 +0100
+++ gcc/testsuite/gcc.target/i386/pr82361-2.c	2019-09-17 17:32:00.930930762 +0100
@@ -4,7 +4,8 @@ 
 /* We should be able to optimize all %eax to %rax zero extensions, because
    div and idiv instructions with 32-bit operands zero-extend both results.   */
 /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
-/* Ditto %edx to %rdx zero extensions.  */
-/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
+/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
+   one.  */
+/* { dg-final { scan-assembler-times "movl\t%edx" 4 } } */
 
 #include "pr82361-1.c"