[v2,1/9] x86: drop stray W

Message ID 3af878a2-2d47-b3a9-3d00-2e6fa53caaff@suse.com
State New
Headers show
Series
  • x86: operand size handling improvements
Related show

Commit Message

Jan Beulich Oct. 28, 2019, 8:02 a.m.
The flag is used to indicate opcodes which can be switched between byte
and word/dword/qword forms (in a "canonical" way). Obviously it's quite
odd then to see it on insns not allowing for byte operands in the first
place. As a result the opcode bytes need to be adjusted accordingly,
which includes comparisons done in optimize_encoding().

To make re-introduction of such issues less likely have i386-gen
diagnose it (in a generally non-fatal way for now).

gas/
2019-10-XX  Jan Beulich  <jbeulich@suse.com>

	* config/tc-i386.c (optimize_encoding): Adjust opcodes compared
	against. Adjust replacement opcode and clear .w.

opcodes/
2019-10-XX  Jan Beulich  <jbeulich@suse.com>

	* i386-gen.c (process_i386_opcode_modifier): Report bogus uses
	of W.
	* i386-opc.h (W): Extend comment.
	* i386-opc.tbl (mov, movabs, movq): Drop W and adjust opcodes of
	general purpose variants not allowing for byte operands.
	* i386-tbl.h: Re-generate.
---
v2: Diagnose misuse of W. Extend comment.

Comments

H.J. Lu Oct. 29, 2019, 5:30 p.m. | #1
On Mon, Oct 28, 2019 at 1:02 AM Jan Beulich <jbeulich@suse.com> wrote:
>

> The flag is used to indicate opcodes which can be switched between byte

> and word/dword/qword forms (in a "canonical" way). Obviously it's quite

> odd then to see it on insns not allowing for byte operands in the first

> place. As a result the opcode bytes need to be adjusted accordingly,

> which includes comparisons done in optimize_encoding().

>

> To make re-introduction of such issues less likely have i386-gen

> diagnose it (in a generally non-fatal way for now).

>

> gas/

> 2019-10-XX  Jan Beulich  <jbeulich@suse.com>

>

>         * config/tc-i386.c (optimize_encoding): Adjust opcodes compared

>         against. Adjust replacement opcode and clear .w.

>

> opcodes/

> 2019-10-XX  Jan Beulich  <jbeulich@suse.com>

>

>         * i386-gen.c (process_i386_opcode_modifier): Report bogus uses

>         of W.

>         * i386-opc.h (W): Extend comment.

>         * i386-opc.tbl (mov, movabs, movq): Drop W and adjust opcodes of

>         general purpose variants not allowing for byte operands.

>         * i386-tbl.h: Re-generate.

> ---

> v2: Diagnose misuse of W. Extend comment.

>

> --- a/gas/config/tc-i386.c

> +++ b/gas/config/tc-i386.c

> @@ -3974,7 +3974,7 @@ optimize_encoding (void)

>                 && i.reg_operands == 1

>                 && i.imm_operands == 1

>                 && i.op[0].imms->X_op == O_constant

> -               && ((i.tm.base_opcode == 0xb0

> +               && ((i.tm.base_opcode == 0xb8

>                      && i.tm.extension_opcode == None

>                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))

>                     || (fits_in_imm31 (i.op[0].imms->X_add_number)

> @@ -3984,7 +3984,7 @@ optimize_encoding (void)

>                             || (i.tm.base_opcode == 0x80

>                                 && i.tm.extension_opcode == 0x4)

>                             || ((i.tm.base_opcode == 0xf6

> -                                || i.tm.base_opcode == 0xc6)

> +                                || (i.tm.base_opcode | 1) == 0xc7)

>                                 && i.tm.extension_opcode == 0x0)))

>                     || (fits_in_imm7 (i.op[0].imms->X_add_number)

>                         && i.tm.base_opcode == 0x83

> @@ -4010,7 +4010,7 @@ optimize_encoding (void)

>            movq $imm32, %r64   -> movl $imm32, %r32

>          */

>        i.tm.opcode_modifier.norex64 = 1;

> -      if (i.tm.base_opcode == 0xb0 || i.tm.base_opcode == 0xc6)

> +      if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)

>         {

>           /* Handle

>                movq $imm31, %r64   -> movl $imm31, %r32

> @@ -4024,13 +4024,14 @@ optimize_encoding (void)

>           i.types[0].bitfield.imm64 = 0;

>           i.types[1].bitfield.dword = 1;

>           i.types[1].bitfield.qword = 0;

> -         if (i.tm.base_opcode == 0xc6)

> +         if ((i.tm.base_opcode | 1) == 0xc7)

>             {

>               /* Handle

>                    movq $imm31, %r64   -> movl $imm31, %r32

>                */

> -             i.tm.base_opcode = 0xb0;

> +             i.tm.base_opcode = 0xb8;

>               i.tm.extension_opcode = None;

> +             i.tm.opcode_modifier.w = 0;

>               i.tm.opcode_modifier.shortform = 1;

>               i.tm.opcode_modifier.modrm = 0;

>             }

> --- a/opcodes/i386-gen.c

> +++ b/opcodes/i386-gen.c

> @@ -1107,6 +1107,8 @@ process_i386_opcode_modifier (FILE *tabl

>

>    if (strcmp (mod, "0"))

>      {

> +      unsigned int have_w = 0, bwlq_suf = 0xf;

> +

>        last = mod + strlen (mod);

>        for (next = mod; next && next < last; )

>         {

> @@ -1120,8 +1122,30 @@ process_i386_opcode_modifier (FILE *tabl

>                           lineno);

>               if (strcasecmp(str, "IsString") == 0)

>                 active_isstring = 1;

> +

> +             if (strcasecmp(str, "W") == 0)

> +               have_w = 1;

> +

> +             if (strcasecmp(str, "No_bSuf") == 0)

> +               bwlq_suf &= ~1;

> +             if (strcasecmp(str, "No_wSuf") == 0)

> +               bwlq_suf &= ~2;

> +             if (strcasecmp(str, "No_lSuf") == 0)

> +               bwlq_suf &= ~4;

> +             if (strcasecmp(str, "No_qSuf") == 0)

> +               bwlq_suf &= ~8;

>             }

>         }

> +

> +      if (have_w && !bwlq_suf)

> +       fail ("%s: %d: stray W modifier\n", filename, lineno);

> +      if (have_w && !(bwlq_suf & 1))

> +       fprintf (stderr, "%s: %d: W modifier without Byte operand(s)\n",

> +                filename, lineno);

> +      if (have_w && !(bwlq_suf & ~1))

> +       fprintf (stderr,

> +                "%s: %d: W modifier without Word/Dword/Qword operand(s)\n",

> +                filename, lineno);

>      }

>    output_opcode_modifier (table, modifiers, ARRAY_SIZE (modifiers));

>  }

> --- a/opcodes/i386-opc.h

> +++ b/opcodes/i386-opc.h

> @@ -387,7 +387,9 @@ enum

>  {

>    /* has direction bit. */

>    D = 0,

> -  /* set if operands can be words or dwords encoded the canonical way */

> +  /* set if operands can be both bytes and words/dwords/qwords, encoded the

> +     canonical way; the base_opcode field should hold the encoding for byte

> +     operands  */

>    W,

>    /* load form instruction. Must be placed before store form.  */

>    Load,

> --- a/opcodes/i386-opc.tbl

> +++ b/opcodes/i386-opc.tbl

> @@ -60,7 +60,7 @@ mov, 2, 0x88, None, 1, 0, D|W|CheckRegSi

>  // 64bit value.

>  mov, 2, 0xb0, None, 1, 0, W|ShortForm|No_sSuf|No_qSuf|No_ldSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32 }

>  mov, 2, 0xc6, 0x0, 1, 0, W|Modrm|No_sSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }

> -mov, 2, 0xb0, None, 1, Cpu64, W|ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

> +mov, 2, 0xb8, None, 1, Cpu64, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

>  // The segment register moves accept WordReg so that a segment register

>  // can be copied to a 32 bit register, and vice versa, without using a

>  // size prefix.  When moving to a 32 bit register, the upper 16 bits

> @@ -77,7 +77,7 @@ mov, 2, 0xf21, None, 2, Cpu386|CpuNo64,

>  mov, 2, 0xf21, None, 2, Cpu64, D|RegMem|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|NoRex64, { Debug, Reg64 }

>  mov, 2, 0xf24, None, 2, Cpu386|CpuNo64, D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Test, Reg32 }

>  movabs, 2, 0xa0, None, 1, Cpu64, D|W|No_sSuf|No_ldSuf, { Disp64|Unspecified|Byte|Word|Dword|Qword, Acc|Byte|Word|Dword|Qword }

> -movabs, 2, 0xb0, None, 1, Cpu64, W|ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf, { Imm64, Reg64 }

> +movabs, 2, 0xb8, None, 1, Cpu64, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf, { Imm64, Reg64 }

>

>  // Move after swapping the bytes

>  movbe, 2, 0x0f38f0, None, 3, CpuMovbe, Modrm|No_bSuf|No_sSuf|No_ldSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }

> @@ -974,10 +974,10 @@ movd, 2, 0xf6e, None, 2, CpuMMX|Cpu64, D

>  // In the 64bit mode the short form mov immediate is redefined to have

>  // 64bit displacement value.  We put the 64bit displacement first and

>  // we only mark constants larger than 32bit as Disp64.

> -movq, 2, 0xa0, None, 1, Cpu64, D|W|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Disp64|Unspecified|Qword, Acc|Qword }

> -movq, 2, 0x88, None, 1, Cpu64, D|W|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3, { Reg64, Reg64|Unspecified|Qword|BaseIndex }

> -movq, 2, 0xc6, 0x0, 1, Cpu64, W|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm32S, Reg64|Qword|Unspecified|BaseIndex }

> -movq, 2, 0xb0, None, 1, Cpu64, W|ShortForm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

> +movq, 2, 0xa1, None, 1, Cpu64, D|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Disp64|Unspecified|Qword, Acc|Qword }

> +movq, 2, 0x89, None, 1, Cpu64, D|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3, { Reg64, Reg64|Unspecified|Qword|BaseIndex }

> +movq, 2, 0xc7, 0x0, 1, Cpu64, Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm32S, Reg64|Qword|Unspecified|BaseIndex }

> +movq, 2, 0xb8, None, 1, Cpu64, ShortForm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

>  movq, 2, 0xf37e, None, 1, CpuAVX, Load|Modrm|Vex=1|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }

>  movq, 2, 0x66d6, None, 1, CpuAVX, Modrm|Vex=1|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }

>  movq, 2, 0x666e, None, 1, CpuAVX|Cpu64, D|Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }

>


OK.

Thanks.

-- 
H.J.

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -3974,7 +3974,7 @@  optimize_encoding (void)
 		&& i.reg_operands == 1
 		&& i.imm_operands == 1
 		&& i.op[0].imms->X_op == O_constant
-		&& ((i.tm.base_opcode == 0xb0
+		&& ((i.tm.base_opcode == 0xb8
 		     && i.tm.extension_opcode == None
 		     && fits_in_unsigned_long (i.op[0].imms->X_add_number))
 		    || (fits_in_imm31 (i.op[0].imms->X_add_number)
@@ -3984,7 +3984,7 @@  optimize_encoding (void)
 			    || (i.tm.base_opcode == 0x80
 				&& i.tm.extension_opcode == 0x4)
 			    || ((i.tm.base_opcode == 0xf6
-				 || i.tm.base_opcode == 0xc6)
+				 || (i.tm.base_opcode | 1) == 0xc7)
 				&& i.tm.extension_opcode == 0x0)))
 		    || (fits_in_imm7 (i.op[0].imms->X_add_number)
 			&& i.tm.base_opcode == 0x83
@@ -4010,7 +4010,7 @@  optimize_encoding (void)
 	   movq $imm32, %r64   -> movl $imm32, %r32
         */
       i.tm.opcode_modifier.norex64 = 1;
-      if (i.tm.base_opcode == 0xb0 || i.tm.base_opcode == 0xc6)
+      if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
 	{
 	  /* Handle
 	       movq $imm31, %r64   -> movl $imm31, %r32
@@ -4024,13 +4024,14 @@  optimize_encoding (void)
 	  i.types[0].bitfield.imm64 = 0;
 	  i.types[1].bitfield.dword = 1;
 	  i.types[1].bitfield.qword = 0;
-	  if (i.tm.base_opcode == 0xc6)
+	  if ((i.tm.base_opcode | 1) == 0xc7)
 	    {
 	      /* Handle
 		   movq $imm31, %r64   -> movl $imm31, %r32
 	       */
-	      i.tm.base_opcode = 0xb0;
+	      i.tm.base_opcode = 0xb8;
 	      i.tm.extension_opcode = None;
+	      i.tm.opcode_modifier.w = 0;
 	      i.tm.opcode_modifier.shortform = 1;
 	      i.tm.opcode_modifier.modrm = 0;
 	    }
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -1107,6 +1107,8 @@  process_i386_opcode_modifier (FILE *tabl
 
   if (strcmp (mod, "0"))
     {
+      unsigned int have_w = 0, bwlq_suf = 0xf;
+
       last = mod + strlen (mod);
       for (next = mod; next && next < last; )
 	{
@@ -1120,8 +1122,30 @@  process_i386_opcode_modifier (FILE *tabl
 			  lineno);
 	      if (strcasecmp(str, "IsString") == 0)
 		active_isstring = 1;
+
+	      if (strcasecmp(str, "W") == 0)
+		have_w = 1;
+
+	      if (strcasecmp(str, "No_bSuf") == 0)
+		bwlq_suf &= ~1;
+	      if (strcasecmp(str, "No_wSuf") == 0)
+		bwlq_suf &= ~2;
+	      if (strcasecmp(str, "No_lSuf") == 0)
+		bwlq_suf &= ~4;
+	      if (strcasecmp(str, "No_qSuf") == 0)
+		bwlq_suf &= ~8;
 	    }
 	}
+
+      if (have_w && !bwlq_suf)
+	fail ("%s: %d: stray W modifier\n", filename, lineno);
+      if (have_w && !(bwlq_suf & 1))
+	fprintf (stderr, "%s: %d: W modifier without Byte operand(s)\n",
+		 filename, lineno);
+      if (have_w && !(bwlq_suf & ~1))
+	fprintf (stderr,
+		 "%s: %d: W modifier without Word/Dword/Qword operand(s)\n",
+		 filename, lineno);
     }
   output_opcode_modifier (table, modifiers, ARRAY_SIZE (modifiers));
 }
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -387,7 +387,9 @@  enum
 {
   /* has direction bit. */
   D = 0,
-  /* set if operands can be words or dwords encoded the canonical way */
+  /* set if operands can be both bytes and words/dwords/qwords, encoded the
+     canonical way; the base_opcode field should hold the encoding for byte
+     operands  */
   W,
   /* load form instruction. Must be placed before store form.  */
   Load,
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -60,7 +60,7 @@  mov, 2, 0x88, None, 1, 0, D|W|CheckRegSi
 // 64bit value.
 mov, 2, 0xb0, None, 1, 0, W|ShortForm|No_sSuf|No_qSuf|No_ldSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32 }
 mov, 2, 0xc6, 0x0, 1, 0, W|Modrm|No_sSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
-mov, 2, 0xb0, None, 1, Cpu64, W|ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Optimize, { Imm64, Reg64 }
+mov, 2, 0xb8, None, 1, Cpu64, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Optimize, { Imm64, Reg64 }
 // The segment register moves accept WordReg so that a segment register
 // can be copied to a 32 bit register, and vice versa, without using a
 // size prefix.  When moving to a 32 bit register, the upper 16 bits
@@ -77,7 +77,7 @@  mov, 2, 0xf21, None, 2, Cpu386|CpuNo64,
 mov, 2, 0xf21, None, 2, Cpu64, D|RegMem|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|NoRex64, { Debug, Reg64 }
 mov, 2, 0xf24, None, 2, Cpu386|CpuNo64, D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Test, Reg32 }
 movabs, 2, 0xa0, None, 1, Cpu64, D|W|No_sSuf|No_ldSuf, { Disp64|Unspecified|Byte|Word|Dword|Qword, Acc|Byte|Word|Dword|Qword }
-movabs, 2, 0xb0, None, 1, Cpu64, W|ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf, { Imm64, Reg64 }
+movabs, 2, 0xb8, None, 1, Cpu64, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf, { Imm64, Reg64 }
 
 // Move after swapping the bytes
 movbe, 2, 0x0f38f0, None, 3, CpuMovbe, Modrm|No_bSuf|No_sSuf|No_ldSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
@@ -974,10 +974,10 @@  movd, 2, 0xf6e, None, 2, CpuMMX|Cpu64, D
 // In the 64bit mode the short form mov immediate is redefined to have
 // 64bit displacement value.  We put the 64bit displacement first and
 // we only mark constants larger than 32bit as Disp64.
-movq, 2, 0xa0, None, 1, Cpu64, D|W|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Disp64|Unspecified|Qword, Acc|Qword }
-movq, 2, 0x88, None, 1, Cpu64, D|W|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3, { Reg64, Reg64|Unspecified|Qword|BaseIndex }
-movq, 2, 0xc6, 0x0, 1, Cpu64, W|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm32S, Reg64|Qword|Unspecified|BaseIndex }
-movq, 2, 0xb0, None, 1, Cpu64, W|ShortForm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Imm64, Reg64 }
+movq, 2, 0xa1, None, 1, Cpu64, D|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Disp64|Unspecified|Qword, Acc|Qword }
+movq, 2, 0x89, None, 1, Cpu64, D|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3, { Reg64, Reg64|Unspecified|Qword|BaseIndex }
+movq, 2, 0xc7, 0x0, 1, Cpu64, Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm32S, Reg64|Qword|Unspecified|BaseIndex }
+movq, 2, 0xb8, None, 1, Cpu64, ShortForm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Imm64, Reg64 }
 movq, 2, 0xf37e, None, 1, CpuAVX, Load|Modrm|Vex=1|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
 movq, 2, 0x66d6, None, 1, CpuAVX, Modrm|Vex=1|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
 movq, 2, 0x666e, None, 1, CpuAVX|Cpu64, D|Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }