[3/9] x86-64: honor REX prefixes for SSE2AVX

Message ID 19b2cc0f-7c8b-18d8-2325-6334291dbcc2@suse.com
State New
Headers show
Series
  • x86: (mainly) prefix handling adjustments
Related show

Commit Message

Jan Beulich June 24, 2020, 12:12 p.m.
Legacy encoded insns do so, and their automatic conversion to AVX ones
ought to produce functionally identical code. Therefore explicit REX
prefixes cannot simply be ignored. This is in particular relevant
because at least PCMPESTR{I,M}'s 64-bit forms couldn't be expressed in
older gas by other than using a REX64 prefix.

gas/
2020-06-XX  Jan Beulich  <jbeulich@suse.com>

	* config/tc-i386.c (process_operands): Translate explicit REX
	prefix into i.rex for SSE2AVX templates.
	(set_rex_vrex): New helper.
	(build_modrm_byte): Use it.
	* testsuite/gas/i386/x86-64-sse2avx.s: Add cases with explict
	REX prefixes.
	* testsuite/gas/i386/x86-64-sse2avx.d: Adjust expectations.

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7407,6 +7407,15 @@  process_operands (void)
      unnecessary segment overrides.  */
   const seg_entry *default_seg = 0;
 
+  if (i.tm.opcode_modifier.sse2avx)
+    {
+      /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
+	 need converting.  */
+      i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
+      i.prefix[REX_PREFIX] = 0;
+      i.rex_encoding = 0;
+    }
+
   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
     {
       unsigned int dupl = i.operands;
@@ -7644,6 +7653,25 @@  process_operands (void)
   return 1;
 }
 
+static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
+				 bfd_boolean do_sse2avx)
+{
+  if (r->reg_flags & RegRex)
+    {
+      if (i.rex & rex_bit)
+	as_bad (_("same type of prefix used twice"));
+      i.rex |= rex_bit;
+    }
+  else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
+    {
+      gas_assert (i.vex.register_specifier == r);
+      i.vex.register_specifier += 8;
+    }
+
+  if (r->reg_flags & RegVRex)
+    i.vrex |= rex_bit;
+}
+
 static const seg_entry *
 build_modrm_byte (void)
 {
@@ -7874,27 +7902,15 @@  build_modrm_byte (void)
 	      else
 		i.has_regxmm = TRUE;
 	    }
-	  if ((i.op[dest].regs->reg_flags & RegRex) != 0)
-	    i.rex |= REX_R;
-	  if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
-	    i.vrex |= REX_R;
-	  if ((i.op[source].regs->reg_flags & RegRex) != 0)
-	    i.rex |= REX_B;
-	  if ((i.op[source].regs->reg_flags & RegVRex) != 0)
-	    i.vrex |= REX_B;
+	  set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
+	  set_rex_vrex (i.op[source].regs, REX_B, FALSE);
 	}
       else
 	{
 	  i.rm.reg = i.op[source].regs->reg_num;
 	  i.rm.regmem = i.op[dest].regs->reg_num;
-	  if ((i.op[dest].regs->reg_flags & RegRex) != 0)
-	    i.rex |= REX_B;
-	  if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
-	    i.vrex |= REX_B;
-	  if ((i.op[source].regs->reg_flags & RegRex) != 0)
-	    i.rex |= REX_R;
-	  if ((i.op[source].regs->reg_flags & RegVRex) != 0)
-	    i.vrex |= REX_R;
+	  set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
+	  set_rex_vrex (i.op[source].regs, REX_R, FALSE);
 	}
       if (flag_code != CODE_64BIT && (i.rex & REX_R))
 	{
@@ -7944,10 +7960,7 @@  build_modrm_byte (void)
 		    }
 		}
 	      i.sib.index = i.index_reg->reg_num;
-	      if ((i.index_reg->reg_flags & RegRex) != 0)
-		i.rex |= REX_X;
-	      if ((i.index_reg->reg_flags & RegVRex) != 0)
-		i.vrex |= REX_X;
+	      set_rex_vrex (i.index_reg, REX_X, FALSE);
 	    }
 
 	  default_seg = &ds;
@@ -8313,18 +8326,14 @@  build_modrm_byte (void)
 	      if (i.tm.extension_opcode != None)
 		{
 		  i.rm.regmem = i.op[op].regs->reg_num;
-		  if ((i.op[op].regs->reg_flags & RegRex) != 0)
-		    i.rex |= REX_B;
-		  if ((i.op[op].regs->reg_flags & RegVRex) != 0)
-		    i.vrex |= REX_B;
+		  set_rex_vrex (i.op[op].regs, REX_B,
+				i.tm.opcode_modifier.sse2avx);
 		}
 	      else
 		{
 		  i.rm.reg = i.op[op].regs->reg_num;
-		  if ((i.op[op].regs->reg_flags & RegRex) != 0)
-		    i.rex |= REX_R;
-		  if ((i.op[op].regs->reg_flags & RegVRex) != 0)
-		    i.vrex |= REX_R;
+		  set_rex_vrex (i.op[op].regs, REX_R,
+				i.tm.opcode_modifier.sse2avx);
 		}
 	    }
 
--- a/gas/testsuite/gas/i386/x86-64-sse2avx.d
+++ b/gas/testsuite/gas/i386/x86-64-sse2avx.d
@@ -711,6 +711,33 @@  Disassembly of section .text:
 [ 	]*[a-f0-9]+:	c5 d9 73 d4 64       	vpsrlq \$0x64,%xmm4,%xmm4
 [ 	]*[a-f0-9]+:	c5 d9 71 d4 64       	vpsrlw \$0x64,%xmm4,%xmm4
 [ 	]*[a-f0-9]+:	c5 f9 c5 cc 64       	vpextrw \$0x64,%xmm4,%ecx
+[ 	]*[a-f0-9]+:	c5 f8 58 c0          	vaddps %xmm0,%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c5 f8 58 04 00       	vaddps \(%rax,%rax(,1)?\),%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c5 f8 58 c0          	vaddps %xmm0,%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c5 f8 58 04 00       	vaddps \(%rax,%rax(,1)?\),%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c5 38 58 c0          	vaddps %xmm0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c5 38 58 04 00       	vaddps \(%rax,%rax(,1)?\),%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 a1 78 58 c0       	vaddps %xmm0,%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c4 a1 78 58 04 00    	vaddps \(%rax,%r8(,1)?\),%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c4 c1 78 58 c0       	vaddps %xmm8,%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c4 c1 78 58 04 00    	vaddps \(%r8,%rax(,1)?\),%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c5 3a 10 c0          	vmovss %xmm0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c1 7a 10 c0       	vmovss %xmm8,%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c5 7a 11 c0          	vmovss %xmm8,%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c4 c1 3a 11 c0       	vmovss %xmm0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c1 39 71 f0 00    	vpsllw \$(0x)?0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c5 79 c5 c0 00       	vpextrw \$(0x)?0,%xmm0,%r8d
+[ 	]*[a-f0-9]+:	c4 c1 79 c5 c0 00    	vpextrw \$(0x)?0,%xmm8,%eax
+[ 	]*[a-f0-9]+:	c4 63 79 14 c0 00    	vpextrb \$(0x)?0,%xmm8,%eax
+[ 	]*[a-f0-9]+:	c4 c3 79 14 c0 00    	vpextrb \$(0x)?0,%xmm0,%r8d
+[ 	]*[a-f0-9]+:	c4 63 39 4a c0 00    	vblendvps %xmm0,%xmm0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c3 79 4a c0 00    	vblendvps %xmm0,%xmm8,%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c4 63 39 4a c0 00    	vblendvps %xmm0,%xmm0,%xmm8,%xmm8
+[ 	]*[a-f0-9]+:	c4 c3 79 4a c0 00    	vblendvps %xmm0,%xmm8,%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c4 e1 fb 2a 00       	vcvtsi2sdq \(%rax\),%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c4 e1 fa 2a 00       	vcvtsi2ssq \(%rax\),%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c4 e3 f9 61 c0 00    	vpcmpestriq \$(0x)?0,%xmm0,%xmm0
+[ 	]*[a-f0-9]+:	c4 e3 f9 60 c0 00    	vpcmpestrmq \$(0x)?0,%xmm0,%xmm0
 [ 	]*[a-f0-9]+:	c5 f8 ae 11          	vldmxcsr \(%rcx\)
 [ 	]*[a-f0-9]+:	c5 f8 ae 19          	vstmxcsr \(%rcx\)
 [ 	]*[a-f0-9]+:	c5 f8 5b f4          	vcvtdq2ps %xmm4,%xmm6
--- a/gas/testsuite/gas/i386/x86-64-sse2avx.s
+++ b/gas/testsuite/gas/i386/x86-64-sse2avx.s
@@ -802,6 +802,44 @@  _start:
 # Tests for op imm8, xmm, regl
 	pextrw $100,%xmm4,%ecx
 
+# Tests for REX prefix conversion
+	{rex} addps %xmm0, %xmm0
+	{rex} addps (%rax,%rax), %xmm0
+	rex addps %xmm0, %xmm0
+	rex addps (%rax,%rax), %xmm0
+	rexx addps %xmm0, %xmm0
+	rexx addps (%rax,%rax), %xmm0
+	rexy addps %xmm0, %xmm0
+	rexy addps (%rax,%rax), %xmm0
+	rexz addps %xmm0, %xmm0
+	rexz addps (%rax,%rax), %xmm0
+
+	{load} rexx movss %xmm0, %xmm0
+	{load} rexz movss %xmm0, %xmm0
+
+	{store} rexx movss %xmm0, %xmm0
+	{store} rexz movss %xmm0, %xmm0
+
+	rexz psllw $0, %xmm0
+
+	rexx pextrw $0, %xmm0, %eax
+	rexz pextrw $0, %xmm0, %eax
+
+	rexx pextrb $0, %xmm0, %eax
+	rexz pextrb $0, %xmm0, %eax
+
+	rexx blendvps %xmm0, %xmm0, %xmm0
+	rexz blendvps %xmm0, %xmm0, %xmm0
+
+	rexx blendvps %xmm0, %xmm0
+	rexz blendvps %xmm0, %xmm0
+
+	rex64 cvtsi2sd (%rax), %xmm0
+	rex64 cvtsi2ss (%rax), %xmm0
+
+	rex64 pcmpestri $0, %xmm0, %xmm0
+	rex64 pcmpestrm $0, %xmm0, %xmm0
+
 
 	.intel_syntax noprefix
 # Tests for op mem64
@@ -1505,4 +1543,3 @@  _start:
 
 # Tests for op imm8, xmm, regl
 	pextrw ecx,xmm4,100
-