[02/10] x86: drop stray W

Message ID f0035bb7-5b85-a3c1-2721-4540de241025@suse.com
State New
Headers show
Series
  • x86: operand size handling improvements
Related show

Commit Message

Jan Beulich Aug. 6, 2019, 2:25 p.m.
The flag is used to indicate opcodes which can be switched between byte
and word/dword/qword forms (in a "canonical" way). Obviously it's quite
odd then to see it on insns not allowing for byte operands in the first
place. As a result the opcode bytes need to be adjusted accordingly,
which includes comparisons done in optimize_encoding().

gas/
2019-08-XX  Jan Beulich  <jbeulich@suse.com>

	* config/tc-i386.c (optimize_encoding): Adjust opcodes compared
	against. Adjust replacement opcode and clear .w.

opcodes/
2019-08-XX  Jan Beulich  <jbeulich@suse.com>

	* i386-opc.tbl (mov, movabs, movq): Drop W and adjust opcodes of
	general purpose variants not allowing for byte operands.
	* i386-tbl.h: Re-generate.

Comments

H.J. Lu Aug. 6, 2019, 7:37 p.m. | #1
On Tue, Aug 6, 2019 at 7:25 AM Jan Beulich <jbeulich@suse.com> wrote:
>

> The flag is used to indicate opcodes which can be switched between byte

> and word/dword/qword forms (in a "canonical" way). Obviously it's quite

> odd then to see it on insns not allowing for byte operands in the first

> place. As a result the opcode bytes need to be adjusted accordingly,

> which includes comparisons done in optimize_encoding().


These encodings do allow byte operand.  The reason why they are in
a separate entry is Cpu64.  Can they be merged without Cpu64?

> gas/

> 2019-08-XX  Jan Beulich  <jbeulich@suse.com>

>

>         * config/tc-i386.c (optimize_encoding): Adjust opcodes compared

>         against. Adjust replacement opcode and clear .w.

>

> opcodes/

> 2019-08-XX  Jan Beulich  <jbeulich@suse.com>

>

>         * i386-opc.tbl (mov, movabs, movq): Drop W and adjust opcodes of

>         general purpose variants not allowing for byte operands.

>         * i386-tbl.h: Re-generate.

>

> --- a/gas/config/tc-i386.c

> +++ b/gas/config/tc-i386.c

> @@ -3974,7 +3974,7 @@ optimize_encoding (void)

>                 && i.reg_operands == 1

>                 && i.imm_operands == 1

>                 && i.op[0].imms->X_op == O_constant

> -               && ((i.tm.base_opcode == 0xb0

> +               && ((i.tm.base_opcode == 0xb8

>                      && i.tm.extension_opcode == None

>                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))

>                     || (fits_in_imm31 (i.op[0].imms->X_add_number)

> @@ -3984,7 +3984,7 @@ optimize_encoding (void)

>                             || (i.tm.base_opcode == 0x80

>                                 && i.tm.extension_opcode == 0x4)

>                             || ((i.tm.base_opcode == 0xf6

> -                                || i.tm.base_opcode == 0xc6)

> +                                || (i.tm.base_opcode | 1) == 0xc7)

>                                 && i.tm.extension_opcode == 0x0)))

>                     || (fits_in_imm7 (i.op[0].imms->X_add_number)

>                         && i.tm.base_opcode == 0x83

> @@ -4010,7 +4010,7 @@ optimize_encoding (void)

>            movq $imm32, %r64   -> movl $imm32, %r32

>           */

>         i.tm.opcode_modifier.norex64 = 1;

> -      if (i.tm.base_opcode == 0xb0 || i.tm.base_opcode == 0xc6)

> +      if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)

>         {

>           /* Handle

>                movq $imm31, %r64   -> movl $imm31, %r32

> @@ -4024,13 +4024,14 @@ optimize_encoding (void)

>           i.types[0].bitfield.imm64 = 0;

>           i.types[1].bitfield.dword = 1;

>           i.types[1].bitfield.qword = 0;

> -         if (i.tm.base_opcode == 0xc6)

> +         if ((i.tm.base_opcode | 1) == 0xc7)

>             {

>               /* Handle

>                    movq $imm31, %r64   -> movl $imm31, %r32

>                */

> -             i.tm.base_opcode = 0xb0;

> +             i.tm.base_opcode = 0xb8;

>               i.tm.extension_opcode = None;

> +             i.tm.opcode_modifier.w = 0;

>               i.tm.opcode_modifier.shortform = 1;

>               i.tm.opcode_modifier.modrm = 0;

>             }

> --- a/opcodes/i386-opc.tbl

> +++ b/opcodes/i386-opc.tbl

> @@ -60,7 +60,7 @@ mov, 2, 0x88, None, 1, 0, D|W|CheckRegSi

>   // 64bit value.

>   mov, 2, 0xb0, None, 1, 0, W|ShortForm|No_sSuf|No_qSuf|No_ldSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32 }

>   mov, 2, 0xc6, 0x0, 1, 0, W|Modrm|No_sSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }

> -mov, 2, 0xb0, None, 1, Cpu64, W|ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

> +mov, 2, 0xb8, None, 1, Cpu64, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

>   // The segment register moves accept WordReg so that a segment register

>   // can be copied to a 32 bit register, and vice versa, without using a

>   // size prefix.  When moving to a 32 bit register, the upper 16 bits

> @@ -77,7 +77,7 @@ mov, 2, 0xf21, None, 2, Cpu386|CpuNo64,

>   mov, 2, 0xf21, None, 2, Cpu64, D|RegMem|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|NoRex64, { Debug, Reg64 }

>   mov, 2, 0xf24, None, 2, Cpu386|CpuNo64, D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Test, Reg32 }

>   movabs, 2, 0xa0, None, 1, Cpu64, D|W|No_sSuf|No_ldSuf, { Disp64|Unspecified|Byte|Word|Dword|Qword, Acc|Byte|Word|Dword|Qword }

> -movabs, 2, 0xb0, None, 1, Cpu64, W|ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf, { Imm64, Reg64 }

> +movabs, 2, 0xb8, None, 1, Cpu64, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf, { Imm64, Reg64 }

>

>   // Move after swapping the bytes

>   movbe, 2, 0x0f38f0, None, 3, CpuMovbe, Modrm|No_bSuf|No_sSuf|No_ldSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }

> @@ -972,10 +972,10 @@ movd, 2, 0xf6e, None, 2, CpuMMX|Cpu64, D

>   // In the 64bit mode the short form mov immediate is redefined to have

>   // 64bit displacement value.  We put the 64bit displacement first and

>   // we only mark constants larger than 32bit as Disp64.

> -movq, 2, 0xa0, None, 1, Cpu64, D|W|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Disp64|Unspecified|Qword, Acc|Qword }

> -movq, 2, 0x88, None, 1, Cpu64, D|W|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3, { Reg64, Reg64|Unspecified|Qword|BaseIndex }

> -movq, 2, 0xc6, 0x0, 1, Cpu64, W|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm32S, Reg64|Qword|Unspecified|BaseIndex }

> -movq, 2, 0xb0, None, 1, Cpu64, W|ShortForm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

> +movq, 2, 0xa1, None, 1, Cpu64, D|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Disp64|Unspecified|Qword, Acc|Qword }

> +movq, 2, 0x89, None, 1, Cpu64, D|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3, { Reg64, Reg64|Unspecified|Qword|BaseIndex }

> +movq, 2, 0xc7, 0x0, 1, Cpu64, Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm32S, Reg64|Qword|Unspecified|BaseIndex }

> +movq, 2, 0xb8, None, 1, Cpu64, ShortForm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

>   movq, 2, 0xf37e, None, 1, CpuAVX, Load|Modrm|Vex=1|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }

>   movq, 2, 0x66d6, None, 1, CpuAVX, Modrm|Vex=1|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }

>   movq, 2, 0x666e, None, 1, CpuAVX|Cpu64, D|Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }

>



-- 
H.J.
Jan Beulich Aug. 7, 2019, 7:43 a.m. | #2
On 06.08.2019 21:37,  H.J. Lu  wrote:
> On Tue, Aug 6, 2019 at 7:25 AM Jan Beulich <jbeulich@suse.com> wrote:

>>

>> The flag is used to indicate opcodes which can be switched between byte

>> and word/dword/qword forms (in a "canonical" way). Obviously it's quite

>> odd then to see it on insns not allowing for byte operands in the first

>> place. As a result the opcode bytes need to be adjusted accordingly,

>> which includes comparisons done in optimize_encoding().

> 

> These encodings do allow byte operand.


By "encodings" I assume you mean the opcodes, not the templates. The
templates modified here all clearly don't allow byte operands, and
that's what counts when considering whether W is applicable.

>  The reason why they are in

> a separate entry is Cpu64.  Can they be merged without Cpu64?


I don't think they can, let me got through:

>> --- a/opcodes/i386-opc.tbl

>> +++ b/opcodes/i386-opc.tbl

>> @@ -60,7 +60,7 @@ mov, 2, 0x88, None, 1, 0, D|W|CheckRegSi

>>    // 64bit value.

>>    mov, 2, 0xb0, None, 1, 0, W|ShortForm|No_sSuf|No_qSuf|No_ldSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32 }

>>    mov, 2, 0xc6, 0x0, 1, 0, W|Modrm|No_sSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }

>> -mov, 2, 0xb0, None, 1, Cpu64, W|ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

>> +mov, 2, 0xb8, None, 1, Cpu64, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Optimize, { Imm64, Reg64 }


While the Reg64 part would be fine to be merged, I'm not convinced the
Imm64 part can sensibly be (in a regression free manner). Nevertheless
it might be possible, but to be honest I'm not up to investing time
here. If this could be sensibly folded, it shouldn't have been a
separate template to begin with. Furthermore, folding this one when
(see below) the others can't be folded into anything would mean further
(even if just slightly) more complicated code in optimize_encoding().

>> @@ -77,7 +77,7 @@ mov, 2, 0xf21, None, 2, Cpu386|CpuNo64,

>>    mov, 2, 0xf21, None, 2, Cpu64, D|RegMem|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|NoRex64, { Debug, Reg64 }

>>    mov, 2, 0xf24, None, 2, Cpu386|CpuNo64, D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Test, Reg32 }

>>    movabs, 2, 0xa0, None, 1, Cpu64, D|W|No_sSuf|No_ldSuf, { Disp64|Unspecified|Byte|Word|Dword|Qword, Acc|Byte|Word|Dword|Qword }

>> -movabs, 2, 0xb0, None, 1, Cpu64, W|ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf, { Imm64, Reg64 }

>> +movabs, 2, 0xb8, None, 1, Cpu64, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf, { Imm64, Reg64 }


movabs is a 64-bit only mnemonic.

>> @@ -972,10 +972,10 @@ movd, 2, 0xf6e, None, 2, CpuMMX|Cpu64, D

>>    // In the 64bit mode the short form mov immediate is redefined to have

>>    // 64bit displacement value.  We put the 64bit displacement first and

>>    // we only mark constants larger than 32bit as Disp64.

>> -movq, 2, 0xa0, None, 1, Cpu64, D|W|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Disp64|Unspecified|Qword, Acc|Qword }

>> -movq, 2, 0x88, None, 1, Cpu64, D|W|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3, { Reg64, Reg64|Unspecified|Qword|BaseIndex }

>> -movq, 2, 0xc6, 0x0, 1, Cpu64, W|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm32S, Reg64|Qword|Unspecified|BaseIndex }

>> -movq, 2, 0xb0, None, 1, Cpu64, W|ShortForm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

>> +movq, 2, 0xa1, None, 1, Cpu64, D|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Disp64|Unspecified|Qword, Acc|Qword }

>> +movq, 2, 0x89, None, 1, Cpu64, D|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3, { Reg64, Reg64|Unspecified|Qword|BaseIndex }

>> +movq, 2, 0xc7, 0x0, 1, Cpu64, Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm32S, Reg64|Qword|Unspecified|BaseIndex }

>> +movq, 2, 0xb8, None, 1, Cpu64, ShortForm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Imm64, Reg64 }

>>    movq, 2, 0xf37e, None, 1, CpuAVX, Load|Modrm|Vex=1|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }

>>    movq, 2, 0x66d6, None, 1, CpuAVX, Modrm|Vex=1|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }

>>    movq, 2, 0x666e, None, 1, CpuAVX|Cpu64, D|Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }


There are no suitable movq templates without Cpu64 that these could
be merged with.

Jan
H.J. Lu Aug. 7, 2019, 3:13 p.m. | #3
On Wed, Aug 7, 2019 at 12:43 AM Jan Beulich <jbeulich@suse.com> wrote:
>

> On 06.08.2019 21:37,  H.J. Lu  wrote:

> > On Tue, Aug 6, 2019 at 7:25 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>

> >> The flag is used to indicate opcodes which can be switched between byte

> >> and word/dword/qword forms (in a "canonical" way). Obviously it's quite

> >> odd then to see it on insns not allowing for byte operands in the first

> >> place. As a result the opcode bytes need to be adjusted accordingly,

> >> which includes comparisons done in optimize_encoding().

> >

> > These encodings do allow byte operand.

>

> By "encodings" I assume you mean the opcodes, not the templates. The

> templates modified here all clearly don't allow byte operands, and

> that's what counts when considering whether W is applicable.


i.tm.opcode_modifier.w is checked only in process_suffix.  This part

      /* It's not a byte, select word/dword operation.  */
      if (i.tm.opcode_modifier.w)
        {
          if (i.tm.opcode_modifier.shortform)
            i.tm.base_opcode |= 8;
          else
            i.tm.base_opcode |= 1;
        }

applies to encoding.  Even if we can't merge entries in i386-opc.tbl,
W still makes senses.   Will keeping W cause any issues?

> >  The reason why they are in

> > a separate entry is Cpu64.  Can they be merged without Cpu64?

>

> I don't think they can, let me got through:

>



-- 
H.J.
Jan Beulich Aug. 7, 2019, 3:49 p.m. | #4
On 07.08.2019 17:13,  H.J. Lu  wrote:
> On Wed, Aug 7, 2019 at 12:43 AM Jan Beulich <jbeulich@suse.com> wrote:

>>

>> On 06.08.2019 21:37,  H.J. Lu  wrote:

>>> On Tue, Aug 6, 2019 at 7:25 AM Jan Beulich <jbeulich@suse.com> wrote:

>>>>

>>>> The flag is used to indicate opcodes which can be switched between byte

>>>> and word/dword/qword forms (in a "canonical" way). Obviously it's quite

>>>> odd then to see it on insns not allowing for byte operands in the first

>>>> place. As a result the opcode bytes need to be adjusted accordingly,

>>>> which includes comparisons done in optimize_encoding().

>>>

>>> These encodings do allow byte operand.

>>

>> By "encodings" I assume you mean the opcodes, not the templates. The

>> templates modified here all clearly don't allow byte operands, and

>> that's what counts when considering whether W is applicable.

> 

> i.tm.opcode_modifier.w is checked only in process_suffix.  This part

> 

>        /* It's not a byte, select word/dword operation.  */

>        if (i.tm.opcode_modifier.w)

>          {

>            if (i.tm.opcode_modifier.shortform)

>              i.tm.base_opcode |= 8;

>            else

>              i.tm.base_opcode |= 1;

>          }

> 

> applies to encoding.  Even if we can't merge entries in i386-opc.tbl,

> W still makes senses.   Will keeping W cause any issues?


Probably not right now, but I'd have to invest time to re-check the
rest of the series without it. But I still don't get it: Other than
what you say, W does _not_ make sense when no accepted operand
combination allows the if() above to be bypassed. It is an "alter
the encoding if the operand is word/dword/qword, i.e. not byte" flag,
implying that the encoding should remain unchanged for byte operands,
which the templates in question don't accept in the first place.

Let me state my position in another way: Every, absolutely every
attribute in the templates should have a reason to be there.
Everything else should be dropped. Over the last couple of years
I've managed to get rid of quite a few pointlessly present
attributes. The W flags here are just another example. Not
following this fundamental way of handling things has led to the
mess that the opcode table was and to a fair degree still is.
This is actively hindering maintainability.

Jan
H.J. Lu Aug. 8, 2019, 3:59 p.m. | #5
On Wed, Aug 7, 2019 at 8:49 AM Jan Beulich <jbeulich@suse.com> wrote:
>

> On 07.08.2019 17:13,  H.J. Lu  wrote:

> > On Wed, Aug 7, 2019 at 12:43 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>

> >> On 06.08.2019 21:37,  H.J. Lu  wrote:

> >>> On Tue, Aug 6, 2019 at 7:25 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>>>

> >>>> The flag is used to indicate opcodes which can be switched between byte

> >>>> and word/dword/qword forms (in a "canonical" way). Obviously it's quite

> >>>> odd then to see it on insns not allowing for byte operands in the first

> >>>> place. As a result the opcode bytes need to be adjusted accordingly,

> >>>> which includes comparisons done in optimize_encoding().

> >>>

> >>> These encodings do allow byte operand.

> >>

> >> By "encodings" I assume you mean the opcodes, not the templates. The

> >> templates modified here all clearly don't allow byte operands, and

> >> that's what counts when considering whether W is applicable.

> >

> > i.tm.opcode_modifier.w is checked only in process_suffix.  This part

> >

> >        /* It's not a byte, select word/dword operation.  */

> >        if (i.tm.opcode_modifier.w)

> >          {

> >            if (i.tm.opcode_modifier.shortform)

> >              i.tm.base_opcode |= 8;

> >            else

> >              i.tm.base_opcode |= 1;

> >          }

> >

> > applies to encoding.  Even if we can't merge entries in i386-opc.tbl,

> > W still makes senses.   Will keeping W cause any issues?

>

> Probably not right now, but I'd have to invest time to re-check the

> rest of the series without it. But I still don't get it: Other than

> what you say, W does _not_ make sense when no accepted operand

> combination allows the if() above to be bypassed. It is an "alter

> the encoding if the operand is word/dword/qword, i.e. not byte" flag,

> implying that the encoding should remain unchanged for byte operands,

> which the templates in question don't accept in the first place.

>

> Let me state my position in another way: Every, absolutely every

> attribute in the templates should have a reason to be there.

> Everything else should be dropped. Over the last couple of years

> I've managed to get rid of quite a few pointlessly present

> attributes. The W flags here are just another example. Not

> following this fundamental way of handling things has led to the

> mess that the opcode table was and to a fair degree still is.

> This is actively hindering maintainability.

>


W is set on instructions with the operand size encoding bit (w) in SDM.
It isn't set on anything else.   How will it be a problem?

-- 
H.J.
Jan Beulich Aug. 9, 2019, 7:42 a.m. | #6
On 08.08.2019 17:59,  H.J. Lu  wrote:
> On Wed, Aug 7, 2019 at 8:49 AM Jan Beulich <jbeulich@suse.com> wrote:

>>

>> On 07.08.2019 17:13,  H.J. Lu  wrote:

>>> On Wed, Aug 7, 2019 at 12:43 AM Jan Beulich <jbeulich@suse.com> wrote:

>>>>

>>>> On 06.08.2019 21:37,  H.J. Lu  wrote:

>>>>> On Tue, Aug 6, 2019 at 7:25 AM Jan Beulich <jbeulich@suse.com> wrote:

>>>>>>

>>>>>> The flag is used to indicate opcodes which can be switched between byte

>>>>>> and word/dword/qword forms (in a "canonical" way). Obviously it's quite

>>>>>> odd then to see it on insns not allowing for byte operands in the first

>>>>>> place. As a result the opcode bytes need to be adjusted accordingly,

>>>>>> which includes comparisons done in optimize_encoding().

>>>>>

>>>>> These encodings do allow byte operand.

>>>>

>>>> By "encodings" I assume you mean the opcodes, not the templates. The

>>>> templates modified here all clearly don't allow byte operands, and

>>>> that's what counts when considering whether W is applicable.

>>>

>>> i.tm.opcode_modifier.w is checked only in process_suffix.  This part

>>>

>>>         /* It's not a byte, select word/dword operation.  */

>>>         if (i.tm.opcode_modifier.w)

>>>           {

>>>             if (i.tm.opcode_modifier.shortform)

>>>               i.tm.base_opcode |= 8;

>>>             else

>>>               i.tm.base_opcode |= 1;

>>>           }

>>>

>>> applies to encoding.  Even if we can't merge entries in i386-opc.tbl,

>>> W still makes senses.   Will keeping W cause any issues?

>>

>> Probably not right now, but I'd have to invest time to re-check the

>> rest of the series without it. But I still don't get it: Other than

>> what you say, W does _not_ make sense when no accepted operand

>> combination allows the if() above to be bypassed. It is an "alter

>> the encoding if the operand is word/dword/qword, i.e. not byte" flag,

>> implying that the encoding should remain unchanged for byte operands,

>> which the templates in question don't accept in the first place.

>>

>> Let me state my position in another way: Every, absolutely every

>> attribute in the templates should have a reason to be there.

>> Everything else should be dropped. Over the last couple of years

>> I've managed to get rid of quite a few pointlessly present

>> attributes. The W flags here are just another example. Not

>> following this fundamental way of handling things has led to the

>> mess that the opcode table was and to a fair degree still is.

>> This is actively hindering maintainability.

>>

> 

> W is set on instructions with the operand size encoding bit (w) in SDM.

> It isn't set on anything else.   How will it be a problem?


"Problem" has to be seen from two perspectives here: There's no
problem with the generated code. But there is a problem in that
people other than you may legitimately wonder why the attribute
is there. I.e. its unnecessary presence is potentially confusing.
Guess how I came to put together this patch?

Furthermore at least up until patch 9 the attribute is not used
consistently - it's been missing for MOVSX/MOVZX. Similarly
prior to commit 556059dd13 it hadn't been used for CRC32. Hence
your "its use follows what the SDM says" isn't really applicable.
(Note also how we've recently moved away from the sreg2 / sreg3
distinction the SDM makes.)

Jan
H.J. Lu Aug. 9, 2019, 7:12 p.m. | #7
On Fri, Aug 9, 2019 at 12:42 AM Jan Beulich <jbeulich@suse.com> wrote:
>

> On 08.08.2019 17:59,  H.J. Lu  wrote:

> > On Wed, Aug 7, 2019 at 8:49 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>

> >> On 07.08.2019 17:13,  H.J. Lu  wrote:

> >>> On Wed, Aug 7, 2019 at 12:43 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>>>

> >>>> On 06.08.2019 21:37,  H.J. Lu  wrote:

> >>>>> On Tue, Aug 6, 2019 at 7:25 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>>>>>

> >>>>>> The flag is used to indicate opcodes which can be switched between byte

> >>>>>> and word/dword/qword forms (in a "canonical" way). Obviously it's quite

> >>>>>> odd then to see it on insns not allowing for byte operands in the first

> >>>>>> place. As a result the opcode bytes need to be adjusted accordingly,

> >>>>>> which includes comparisons done in optimize_encoding().

> >>>>>

> >>>>> These encodings do allow byte operand.

> >>>>

> >>>> By "encodings" I assume you mean the opcodes, not the templates. The

> >>>> templates modified here all clearly don't allow byte operands, and

> >>>> that's what counts when considering whether W is applicable.

> >>>

> >>> i.tm.opcode_modifier.w is checked only in process_suffix.  This part

> >>>

> >>>         /* It's not a byte, select word/dword operation.  */

> >>>         if (i.tm.opcode_modifier.w)

> >>>           {

> >>>             if (i.tm.opcode_modifier.shortform)

> >>>               i.tm.base_opcode |= 8;

> >>>             else

> >>>               i.tm.base_opcode |= 1;

> >>>           }

> >>>

> >>> applies to encoding.  Even if we can't merge entries in i386-opc.tbl,

> >>> W still makes senses.   Will keeping W cause any issues?

> >>

> >> Probably not right now, but I'd have to invest time to re-check the

> >> rest of the series without it. But I still don't get it: Other than

> >> what you say, W does _not_ make sense when no accepted operand

> >> combination allows the if() above to be bypassed. It is an "alter

> >> the encoding if the operand is word/dword/qword, i.e. not byte" flag,

> >> implying that the encoding should remain unchanged for byte operands,

> >> which the templates in question don't accept in the first place.

> >>

> >> Let me state my position in another way: Every, absolutely every

> >> attribute in the templates should have a reason to be there.

> >> Everything else should be dropped. Over the last couple of years

> >> I've managed to get rid of quite a few pointlessly present

> >> attributes. The W flags here are just another example. Not

> >> following this fundamental way of handling things has led to the

> >> mess that the opcode table was and to a fair degree still is.

> >> This is actively hindering maintainability.

> >>

> >

> > W is set on instructions with the operand size encoding bit (w) in SDM.

> > It isn't set on anything else.   How will it be a problem?

>

> "Problem" has to be seen from two perspectives here: There's no

> problem with the generated code. But there is a problem in that

> people other than you may legitimately wonder why the attribute

> is there. I.e. its unnecessary presence is potentially confusing.

> Guess how I came to put together this patch?


At first, I didn't know why W was used on these instructions.   I
found my answer in SDM.   BTW, some other bits can be traced
to SDW.

> Furthermore at least up until patch 9 the attribute is not used

> consistently - it's been missing for MOVSX/MOVZX. Similarly

> prior to commit 556059dd13 it hadn't been used for CRC32. Hence

> your "its use follows what the SDM says" isn't really applicable.

> (Note also how we've recently moved away from the sreg2 / sreg3

> distinction the SDM makes.)

>


W may not be used on all places where it should be.   If we don't
want to totally remove W, we could add it where it should be used,
remove it where it is used now.
-- 
H.J.
Jan Beulich Aug. 12, 2019, 8:11 a.m. | #8
On 09.08.2019 21:12,  H.J. Lu  wrote:
> On Fri, Aug 9, 2019 at 12:42 AM Jan Beulich <jbeulich@suse.com> wrote:

>>

>> On 08.08.2019 17:59,  H.J. Lu  wrote:

>>> On Wed, Aug 7, 2019 at 8:49 AM Jan Beulich <jbeulich@suse.com> wrote:

>>>>

>>>> On 07.08.2019 17:13,  H.J. Lu  wrote:

>>>>> On Wed, Aug 7, 2019 at 12:43 AM Jan Beulich <jbeulich@suse.com> wrote:

>>>>>>

>>>>>> On 06.08.2019 21:37,  H.J. Lu  wrote:

>>>>>>> On Tue, Aug 6, 2019 at 7:25 AM Jan Beulich <jbeulich@suse.com> wrote:

>>>>>>>>

>>>>>>>> The flag is used to indicate opcodes which can be switched between byte

>>>>>>>> and word/dword/qword forms (in a "canonical" way). Obviously it's quite

>>>>>>>> odd then to see it on insns not allowing for byte operands in the first

>>>>>>>> place. As a result the opcode bytes need to be adjusted accordingly,

>>>>>>>> which includes comparisons done in optimize_encoding().

>>>>>>>

>>>>>>> These encodings do allow byte operand.

>>>>>>

>>>>>> By "encodings" I assume you mean the opcodes, not the templates. The

>>>>>> templates modified here all clearly don't allow byte operands, and

>>>>>> that's what counts when considering whether W is applicable.

>>>>>

>>>>> i.tm.opcode_modifier.w is checked only in process_suffix.  This part

>>>>>

>>>>>          /* It's not a byte, select word/dword operation.  */

>>>>>          if (i.tm.opcode_modifier.w)

>>>>>            {

>>>>>              if (i.tm.opcode_modifier.shortform)

>>>>>                i.tm.base_opcode |= 8;

>>>>>              else

>>>>>                i.tm.base_opcode |= 1;

>>>>>            }

>>>>>

>>>>> applies to encoding.  Even if we can't merge entries in i386-opc.tbl,

>>>>> W still makes senses.   Will keeping W cause any issues?

>>>>

>>>> Probably not right now, but I'd have to invest time to re-check the

>>>> rest of the series without it. But I still don't get it: Other than

>>>> what you say, W does _not_ make sense when no accepted operand

>>>> combination allows the if() above to be bypassed. It is an "alter

>>>> the encoding if the operand is word/dword/qword, i.e. not byte" flag,

>>>> implying that the encoding should remain unchanged for byte operands,

>>>> which the templates in question don't accept in the first place.

>>>>

>>>> Let me state my position in another way: Every, absolutely every

>>>> attribute in the templates should have a reason to be there.

>>>> Everything else should be dropped. Over the last couple of years

>>>> I've managed to get rid of quite a few pointlessly present

>>>> attributes. The W flags here are just another example. Not

>>>> following this fundamental way of handling things has led to the

>>>> mess that the opcode table was and to a fair degree still is.

>>>> This is actively hindering maintainability.

>>>>

>>>

>>> W is set on instructions with the operand size encoding bit (w) in SDM.

>>> It isn't set on anything else.   How will it be a problem?

>>

>> "Problem" has to be seen from two perspectives here: There's no

>> problem with the generated code. But there is a problem in that

>> people other than you may legitimately wonder why the attribute

>> is there. I.e. its unnecessary presence is potentially confusing.

>> Guess how I came to put together this patch?

> 

> At first, I didn't know why W was used on these instructions.   I

> found my answer in SDM.   BTW, some other bits can be traced

> to SDW.

> 

>> Furthermore at least up until patch 9 the attribute is not used

>> consistently - it's been missing for MOVSX/MOVZX. Similarly

>> prior to commit 556059dd13 it hadn't been used for CRC32. Hence

>> your "its use follows what the SDM says" isn't really applicable.

>> (Note also how we've recently moved away from the sreg2 / sreg3

>> distinction the SDM makes.)

>>

> 

> W may not be used on all places where it should be.   If we don't

> want to totally remove W, we could add it where it should be used,

> remove it where it is used now.


I'm afraid I don't understand: Are you suggesting to cripple the
code just to be more in line with the SDM? The SDM _may_ be used
as a guideline, but I think there should be no requirement that
the code exactly match what the SDM says. Otherwise you'd also
be suggesting to revert quite a bit of other (more or less
recent) work of mine. Plus you'd chance to get into trouble if
SDM (Intel) and PM (AMD) would use differing classification or
terminology. (I hope it goes without saying that there are no
grounds for either of the two to be treated "better" than the
other.)

Instead the code should be written with ease of maintenance in
mind, which in particular includes having neither unduly many
templates, nor unduly many/stray attributes on individual
templates.

Jan
H.J. Lu Aug. 13, 2019, 10:50 p.m. | #9
On Mon, Aug 12, 2019 at 1:11 AM Jan Beulich <jbeulich@suse.com> wrote:
>

> On 09.08.2019 21:12,  H.J. Lu  wrote:

> > On Fri, Aug 9, 2019 at 12:42 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>

> >> On 08.08.2019 17:59,  H.J. Lu  wrote:

> >>> On Wed, Aug 7, 2019 at 8:49 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>>>

> >>>> On 07.08.2019 17:13,  H.J. Lu  wrote:

> >>>>> On Wed, Aug 7, 2019 at 12:43 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>>>>>

> >>>>>> On 06.08.2019 21:37,  H.J. Lu  wrote:

> >>>>>>> On Tue, Aug 6, 2019 at 7:25 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>>>>>>>

> >>>>>>>> The flag is used to indicate opcodes which can be switched between byte

> >>>>>>>> and word/dword/qword forms (in a "canonical" way). Obviously it's quite

> >>>>>>>> odd then to see it on insns not allowing for byte operands in the first

> >>>>>>>> place. As a result the opcode bytes need to be adjusted accordingly,

> >>>>>>>> which includes comparisons done in optimize_encoding().

> >>>>>>>

> >>>>>>> These encodings do allow byte operand.

> >>>>>>

> >>>>>> By "encodings" I assume you mean the opcodes, not the templates. The

> >>>>>> templates modified here all clearly don't allow byte operands, and

> >>>>>> that's what counts when considering whether W is applicable.

> >>>>>

> >>>>> i.tm.opcode_modifier.w is checked only in process_suffix.  This part

> >>>>>

> >>>>>          /* It's not a byte, select word/dword operation.  */

> >>>>>          if (i.tm.opcode_modifier.w)

> >>>>>            {

> >>>>>              if (i.tm.opcode_modifier.shortform)

> >>>>>                i.tm.base_opcode |= 8;

> >>>>>              else

> >>>>>                i.tm.base_opcode |= 1;

> >>>>>            }

> >>>>>

> >>>>> applies to encoding.  Even if we can't merge entries in i386-opc.tbl,

> >>>>> W still makes senses.   Will keeping W cause any issues?

> >>>>

> >>>> Probably not right now, but I'd have to invest time to re-check the

> >>>> rest of the series without it. But I still don't get it: Other than

> >>>> what you say, W does _not_ make sense when no accepted operand

> >>>> combination allows the if() above to be bypassed. It is an "alter

> >>>> the encoding if the operand is word/dword/qword, i.e. not byte" flag,

> >>>> implying that the encoding should remain unchanged for byte operands,

> >>>> which the templates in question don't accept in the first place.

> >>>>

> >>>> Let me state my position in another way: Every, absolutely every

> >>>> attribute in the templates should have a reason to be there.

> >>>> Everything else should be dropped. Over the last couple of years

> >>>> I've managed to get rid of quite a few pointlessly present

> >>>> attributes. The W flags here are just another example. Not

> >>>> following this fundamental way of handling things has led to the

> >>>> mess that the opcode table was and to a fair degree still is.

> >>>> This is actively hindering maintainability.

> >>>>

> >>>

> >>> W is set on instructions with the operand size encoding bit (w) in SDM.

> >>> It isn't set on anything else.   How will it be a problem?

> >>

> >> "Problem" has to be seen from two perspectives here: There's no

> >> problem with the generated code. But there is a problem in that

> >> people other than you may legitimately wonder why the attribute

> >> is there. I.e. its unnecessary presence is potentially confusing.

> >> Guess how I came to put together this patch?

> >

> > At first, I didn't know why W was used on these instructions.   I

> > found my answer in SDM.   BTW, some other bits can be traced

> > to SDW.

> >

> >> Furthermore at least up until patch 9 the attribute is not used

> >> consistently - it's been missing for MOVSX/MOVZX. Similarly

> >> prior to commit 556059dd13 it hadn't been used for CRC32. Hence

> >> your "its use follows what the SDM says" isn't really applicable.

> >> (Note also how we've recently moved away from the sreg2 / sreg3

> >> distinction the SDM makes.)

> >>

> >

> > W may not be used on all places where it should be.   If we don't

> > want to totally remove W, we could add it where it should be used,

> > remove it where it is used now.

>

> I'm afraid I don't understand: Are you suggesting to cripple the

> code just to be more in line with the SDM? The SDM _may_ be used

> as a guideline, but I think there should be no requirement that

> the code exactly match what the SDM says. Otherwise you'd also

> be suggesting to revert quite a bit of other (more or less

> recent) work of mine. Plus you'd chance to get into trouble if

> SDM (Intel) and PM (AMD) would use differing classification or

> terminology. (I hope it goes without saying that there are no

> grounds for either of the two to be treated "better" than the

> other.)


I just pointed out why W was done this way.

> Instead the code should be written with ease of maintenance in

> mind, which in particular includes having neither unduly many

> templates, nor unduly many/stray attributes on individual

> templates.


I am OK to remove W from these instructions.   But please document
how it should be used so that the next person can understand it.

Thanks.

-- 
H.J.

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -3974,7 +3974,7 @@  optimize_encoding (void)
  		&& i.reg_operands == 1
  		&& i.imm_operands == 1
  		&& i.op[0].imms->X_op == O_constant
-		&& ((i.tm.base_opcode == 0xb0
+		&& ((i.tm.base_opcode == 0xb8
  		     && i.tm.extension_opcode == None
  		     && fits_in_unsigned_long (i.op[0].imms->X_add_number))
  		    || (fits_in_imm31 (i.op[0].imms->X_add_number)
@@ -3984,7 +3984,7 @@  optimize_encoding (void)
  			    || (i.tm.base_opcode == 0x80
  				&& i.tm.extension_opcode == 0x4)
  			    || ((i.tm.base_opcode == 0xf6
-				 || i.tm.base_opcode == 0xc6)
+				 || (i.tm.base_opcode | 1) == 0xc7)
  				&& i.tm.extension_opcode == 0x0)))
  		    || (fits_in_imm7 (i.op[0].imms->X_add_number)
  			&& i.tm.base_opcode == 0x83
@@ -4010,7 +4010,7 @@  optimize_encoding (void)
  	   movq $imm32, %r64   -> movl $imm32, %r32
          */
        i.tm.opcode_modifier.norex64 = 1;
-      if (i.tm.base_opcode == 0xb0 || i.tm.base_opcode == 0xc6)
+      if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
  	{
  	  /* Handle
  	       movq $imm31, %r64   -> movl $imm31, %r32
@@ -4024,13 +4024,14 @@  optimize_encoding (void)
  	  i.types[0].bitfield.imm64 = 0;
  	  i.types[1].bitfield.dword = 1;
  	  i.types[1].bitfield.qword = 0;
-	  if (i.tm.base_opcode == 0xc6)
+	  if ((i.tm.base_opcode | 1) == 0xc7)
  	    {
  	      /* Handle
  		   movq $imm31, %r64   -> movl $imm31, %r32
  	       */
-	      i.tm.base_opcode = 0xb0;
+	      i.tm.base_opcode = 0xb8;
  	      i.tm.extension_opcode = None;
+	      i.tm.opcode_modifier.w = 0;
  	      i.tm.opcode_modifier.shortform = 1;
  	      i.tm.opcode_modifier.modrm = 0;
  	    }
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -60,7 +60,7 @@  mov, 2, 0x88, None, 1, 0, D|W|CheckRegSi
  // 64bit value.
  mov, 2, 0xb0, None, 1, 0, W|ShortForm|No_sSuf|No_qSuf|No_ldSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32 }
  mov, 2, 0xc6, 0x0, 1, 0, W|Modrm|No_sSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
-mov, 2, 0xb0, None, 1, Cpu64, W|ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Optimize, { Imm64, Reg64 }
+mov, 2, 0xb8, None, 1, Cpu64, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Optimize, { Imm64, Reg64 }
  // The segment register moves accept WordReg so that a segment register
  // can be copied to a 32 bit register, and vice versa, without using a
  // size prefix.  When moving to a 32 bit register, the upper 16 bits
@@ -77,7 +77,7 @@  mov, 2, 0xf21, None, 2, Cpu386|CpuNo64,
  mov, 2, 0xf21, None, 2, Cpu64, D|RegMem|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|NoRex64, { Debug, Reg64 }
  mov, 2, 0xf24, None, 2, Cpu386|CpuNo64, D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Test, Reg32 }
  movabs, 2, 0xa0, None, 1, Cpu64, D|W|No_sSuf|No_ldSuf, { Disp64|Unspecified|Byte|Word|Dword|Qword, Acc|Byte|Word|Dword|Qword }
-movabs, 2, 0xb0, None, 1, Cpu64, W|ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf, { Imm64, Reg64 }
+movabs, 2, 0xb8, None, 1, Cpu64, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf, { Imm64, Reg64 }
  
  // Move after swapping the bytes
  movbe, 2, 0x0f38f0, None, 3, CpuMovbe, Modrm|No_bSuf|No_sSuf|No_ldSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
@@ -972,10 +972,10 @@  movd, 2, 0xf6e, None, 2, CpuMMX|Cpu64, D
  // In the 64bit mode the short form mov immediate is redefined to have
  // 64bit displacement value.  We put the 64bit displacement first and
  // we only mark constants larger than 32bit as Disp64.
-movq, 2, 0xa0, None, 1, Cpu64, D|W|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Disp64|Unspecified|Qword, Acc|Qword }
-movq, 2, 0x88, None, 1, Cpu64, D|W|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3, { Reg64, Reg64|Unspecified|Qword|BaseIndex }
-movq, 2, 0xc6, 0x0, 1, Cpu64, W|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm32S, Reg64|Qword|Unspecified|BaseIndex }
-movq, 2, 0xb0, None, 1, Cpu64, W|ShortForm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Imm64, Reg64 }
+movq, 2, 0xa1, None, 1, Cpu64, D|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Disp64|Unspecified|Qword, Acc|Qword }
+movq, 2, 0x89, None, 1, Cpu64, D|Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3, { Reg64, Reg64|Unspecified|Qword|BaseIndex }
+movq, 2, 0xc7, 0x0, 1, Cpu64, Modrm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|HLEPrefixOk=3|Optimize, { Imm32S, Reg64|Qword|Unspecified|BaseIndex }
+movq, 2, 0xb8, None, 1, Cpu64, ShortForm|Size64|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Imm64, Reg64 }
  movq, 2, 0xf37e, None, 1, CpuAVX, Load|Modrm|Vex=1|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
  movq, 2, 0x66d6, None, 1, CpuAVX, Modrm|Vex=1|VexOpcode=0|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
  movq, 2, 0x666e, None, 1, CpuAVX|Cpu64, D|Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }