x86: Add support for Intel AMX instructions

Message ID 20200626194004.853516-1-hjl.tools@gmail.com
State Superseded
Headers show
Series
  • x86: Add support for Intel AMX instructions
Related show

Commit Message

Alan Modra via Binutils June 26, 2020, 7:40 p.m.
From: Lili Cui <lili.cui@intel.com>


gas/

	* doc/c-i386.texi: Document amx_int8, amx_bf16 and amx_tile.
	* config/tc-i386.c (i386_error): Add invalid_sib_address.
	(cpu_arch): Add .amx-int8, amx-bf16 and .amx-tile.
	(cpu_noarch): Add noamx_int8, noamx_bf16 and noamx_tile.
	(type_names): Add rTMM.
	(check_VecOperands): Disallow RegIP for non-vector SIB.
	(check_reverse): Handle invalid_sib_address.
	(build_modrm_byte): Handle VEXOP3 and non-vector SIB.
	* testsuite/gas/i386/x86-64-amx-intel.d: New.
	* testsuite/gas/i386/x86-64-amx-sibmem-inval.l: New.
	* testsuite/gas/i386/x86-64-amx-sibmem-inval.s: New.
	* testsuite/gas/i386/x86-64-amx.d: New.
	* testsuite/gas/i386/x86-64-amx.s: New.
	* testsuite/gas/i386/i386.exp: Run above new tests.

opcodes/

	* i386-dis.c (EV): New for generic memory operand.
	(XMT): New.
	(EXtmm): Likewise.
	(Vextmm): Likewise.
	(tmm_mode): Likewise.
	(void_mode): Likewise.
	(REG_VEX_W_0_0F3849_P_0_M_3): Likewise.
	(MOD_VEX_W_0_0F3849_P_0): Likewise.
	(MOD_VEX_W_0_0F3849_P_2): Likewise.
	(MOD_VEX_W_0_0F3849_P_3): Likewise.
	(MOD_VEX_W_0_0F384B_P_1): Likewise.
	(MOD_VEX_W_0_0F384B_P_2): Likewise.
	(MOD_VEX_W_0_0F384B_P_3): Likewise.
	(MOD_VEX_W_0_0F385C_P_1): Likewise.
	(MOD_VEX_W_0_0F385E_P_0): Likewise.
	(MOD_VEX_W_0_0F385E_P_1): Likewise.
	(MOD_VEX_W_0_0F385E_P_2): Likewise.
	(MOD_VEX_W_0_0F385E_P_3): Likewise.
	(RM_VEX_W_0_0F3849_P_0_M_3_R_0): Likewise.
	(PREFIX_VEX_0F3849): Likewise.
	(PREFIX_VEX_0F384B): Likewise.
	(PREFIX_VEX_0F385C): Likewise.
	(PREFIX_VEX_0F385E): Likewise.
	(X86_64_0F01_REG_3): Likewise.
	(X86_64_VEX_W_0_0F3849_P_0_M_0): Likewise.
	(X86_64_0F3849_MOD_3_REG_0_RM_0): Likewise.
	(X86_64_VEX_W_0_0F3849_P_2_M_0): Likewise.
	(X86_64_VEX_W_0_0F3849_P_3_M_0): Likewise.
	(X86_64_MOD_VEX_W_0_0F384B_P_1): Likewise.
	(X86_64_MOD_VEX_W_0_0F384B_P_2): Likewise.
	(X86_64_MOD_VEX_W_0_0F384B_P_3): Likewise.
	(X86_64_MOD_VEX_W_0_0F385C_P_1): Likewise.
	(X86_64_MOD_VEX_W_0_0F385E_P_0): Likewise.
	(X86_64_MOD_VEX_W_0_0F385E_P_1): Likewise.
	(X86_64_MOD_VEX_W_0_0F385E_P_2): Likewise.
	(X86_64_MOD_VEX_W_0_0F385E_P_3): Likewise.
	(VEX_W_0F3849_P_0): Likewise.
	(VEX_W_0F3849_P_2): Likewise.
	(VEX_W_0F3849_P_3): Likewise.
	(VEX_W_0F384B_P_1): Likewise.
	(VEX_W_0F384B_P_2): Likewise.
	(VEX_W_0F384B_P_3): Likewise.
	(VEX_W_0F385C_P_1): Likewise.
	(VEX_W_0F385E_P_0): Likewise.
	(VEX_W_0F385E_P_1): Likewise.
	(VEX_W_0F385E_P_2): Likewise.
	(VEX_W_0F385E_P_3): Likewise.
	(names_tmm): Likewise.
	(att_names_tmm): Likewise.
	(intel_operand_size): Handle void_mode.
	(OP_XMM): Handle tmm_mode.
	(OP_EX): Likewise.
	(OP_VEX): Likewise.
	* i386-gen.c (cpu_flag_init): Add entries for
	CpuAMX_INT8, CpuAMX_BF16 and CpuAMX_TILE.
	(operand_type_shorthands): Add RegTMM.
	(operand_type_init): Likewise.
	(operand_types): Add Tmmword.
	(cpu_flag_init): Add CPU_AMX_INT8, CpuAMX_BF16 and CpuAMX_TILE.
	(cpu_flags): Add CpuAMX_INT8, CpuAMX_BF16 and CpuAMX_TILE.
	* i386-opc.h (CpuAMX_INT8): New.
	(CpuAMX_BF16): Likewise.
	(CpuAMX_TILE): Likewise.
	(VEXOP3): Likewise.
	(SIBMEM): Likewise.
	(Tmmword): Likewise.
	(i386_cpu_flags): Add cpuamx_int8, cpuamx_bf16 and cpuamx_tile.
	(i386_opcode_modifier): Extend width of fields vexvvvv and sib.
	(i386_operand_type): Add tmmword.
	* i386-opc.tbl: Add AMX instructions.
	* i386-reg.tbl: Add AMX registers.
	* i386-init.h: Regenerated.
	* i386-tbl.h: Likewise.
---
 gas/config/tc-i386.c                          |    93 +-
 gas/doc/c-i386.texi                           |     7 +
 gas/testsuite/gas/i386/i386.exp               |     3 +
 gas/testsuite/gas/i386/x86-64-amx-intel.d     |    69 +
 .../gas/i386/x86-64-amx-sibmem-inval.l        |     7 +
 .../gas/i386/x86-64-amx-sibmem-inval.s        |    12 +
 gas/testsuite/gas/i386/x86-64-amx.d           |    69 +
 gas/testsuite/gas/i386/x86-64-amx.s           |    61 +
 opcodes/i386-dis.c                            |   290 +-
 opcodes/i386-gen.c                            |    18 +
 opcodes/i386-init.h                           |   506 +-
 opcodes/i386-opc.h                            |    20 +-
 opcodes/i386-opc.tbl                          |    28 +
 opcodes/i386-reg.tbl                          |     9 +
 opcodes/i386-tbl.h                            | 28388 ++++++++--------
 15 files changed, 15244 insertions(+), 14336 deletions(-)
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-intel.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-sibmem-inval.l
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-sibmem-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx.s

Patch

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index ae2a2c1a53..8c37ef5ce6 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -290,6 +290,7 @@  enum i386_error
     unsupported_with_intel_mnemonic,
     unsupported_syntax,
     unsupported,
+    invalid_sib_address,
     invalid_vsib_address,
     invalid_vector_register_set,
     unsupported_vector_index_register,
@@ -372,6 +373,9 @@  struct _i386_insn
     /* Has ZMM register operands.  */
     bfd_boolean has_regzmm;
 
+    /* Has TMM register operands.  */
+    bfd_boolean has_regtmm;
+
     /* Has GOTPC or TLS relocation.  */
     bfd_boolean has_gotpc_tls_reloc;
 
@@ -1202,6 +1206,12 @@  static const arch_entry cpu_arch[] =
     CPU_WAITPKG_FLAGS, 0 },
   { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
     CPU_CLDEMOTE_FLAGS, 0 },
+  { STRING_COMMA_LEN (".amx-int8"), PROCESSOR_UNKNOWN,
+    CPU_AMX_INT8_FLAGS, 0 },
+  { STRING_COMMA_LEN (".amx-bf16"), PROCESSOR_UNKNOWN,
+    CPU_AMX_BF16_FLAGS, 0 },
+  { STRING_COMMA_LEN (".amx-tile"), PROCESSOR_UNKNOWN,
+    CPU_AMX_TILE_FLAGS, 0 },
   { STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
     CPU_MOVDIRI_FLAGS, 0 },
   { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
@@ -1260,6 +1270,9 @@  static const noarch_entry cpu_noarch[] =
   { STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
   { STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
   { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
+  { STRING_COMMA_LEN ("noamx_int8"), CPU_ANY_AMX_INT8_FLAGS },
+  { STRING_COMMA_LEN ("noamx_bf16"), CPU_ANY_AMX_BF16_FLAGS },
+  { STRING_COMMA_LEN ("noamx_tile"), CPU_ANY_AMX_TILE_FLAGS },
   { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
   { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
   { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
@@ -2297,6 +2310,7 @@  operand_type_match (i386_operand_type overlap,
   temp.bitfield.xmmword = 0;
   temp.bitfield.ymmword = 0;
   temp.bitfield.zmmword = 0;
+  temp.bitfield.tmmword = 0;
   if (operand_type_all_zero (&temp))
     goto mismatch;
 
@@ -3305,6 +3319,7 @@  const type_names[] =
   { OPERAND_TYPE_REGXMM, "rXMM" },
   { OPERAND_TYPE_REGYMM, "rYMM" },
   { OPERAND_TYPE_REGZMM, "rZMM" },
+  { OPERAND_TYPE_REGTMM, "rTMM" },
   { OPERAND_TYPE_REGMASK, "Mask reg" },
 };
 
@@ -5793,9 +5808,18 @@  check_VecOperands (const insn_template *t)
       return 1;
     }
 
+  /* Disallow using IP register for the mandatory non-vector SIB.  */
+  if (t->opcode_modifier.sib == SIBMEM
+      && i.base_reg
+      && i.base_reg->reg_num == RegIP)
+    {
+	i.error = invalid_sib_address;
+	return 1;
+    }
+
   /* For VSIB byte, we need a vector register for index, and all vector
      registers must be distinct.  */
-  if (t->opcode_modifier.sib)
+  if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
     {
       if (!i.index_reg
 	  || !((t->opcode_modifier.sib == VECSIB128
@@ -6589,6 +6613,9 @@  match_template (char mnem_suffix)
 	  as_bad (_("unsupported instruction `%s'"),
 		  current_templates->start->name);
 	  return NULL;
+	case invalid_sib_address:
+	  err_msg = _("invalid SIB address");
+	  break;
 	case invalid_vsib_address:
 	  err_msg = _("invalid VSIB address");
 	  break;
@@ -7791,12 +7818,22 @@  build_modrm_byte (void)
      operands, it must be a instruction with VexNDS.  For a
      instruction with VexNDD, the destination register is encoded
      in VEX prefix.  If there are 4 register operands, it must be
-     a instruction with VEX prefix and 3 sources.  */
+     a instruction with VEX prefix and 3 sources. For instruction
+     with 3 register operands, the VEXOP3 indicates we are going
+     to use VEX.vvvv field to encode the third operand, which is
+     different from the VEXXDS case where VEX.vvvv is normally used
+     to encode the second operand. To be clear, the second operand
+     means operand OP2 and the third operand means operand OP3
+     in below Intel-syntax assembly code:
+
+        INST_OP OP1, OP2, OP3
+   */
   if (i.mem_operands == 0
       && ((i.reg_operands == 2
 	   && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
 	  || (i.reg_operands == 3
-	      && i.tm.opcode_modifier.vexvvvv == VEXXDS)
+	      && (i.tm.opcode_modifier.vexvvvv == VEXXDS
+		  || i.tm.opcode_modifier.vexvvvv == VEXOP3))
 	  || (i.reg_operands == 4 && vex_3_sources)))
     {
       switch (i.operands)
@@ -7808,10 +7845,11 @@  build_modrm_byte (void)
 	  /* When there are 3 operands, one of them may be immediate,
 	     which may be the first or the last operand.  Otherwise,
 	     the first operand must be shift count register (cl) or it
-	     is an instruction with VexNDS. */
+	     is an instruction with VexNDS or VEXOP3. */
 	  gas_assert (i.imm_operands == 1
 		      || (i.imm_operands == 0
 			  && (i.tm.opcode_modifier.vexvvvv == VEXXDS
+			      || i.tm.opcode_modifier.vexvvvv == VEXOP3
 			      || (i.types[0].bitfield.instance == RegC
 				  && i.types[0].bitfield.byte))));
 	  if (operand_type_check (i.types[0], imm)
@@ -7910,6 +7948,19 @@  build_modrm_byte (void)
 	      i.vex.register_specifier = i.op[vvvv].regs;
 	      dest++;
 	    }
+	  /* Unlike VEXXDS, we are going to use VEX.vvvv to encode
+	     the third operand which is i.op[source].  At this stage
+	     the variable source is 0 and the dest is 1.  Then we need
+	     to increase source to represent the second operand and dest
+	     to represent the first operand which is also the destination
+	     register, the rest code will take care of the encoding of
+	     those two operands.  */
+	  else if (i.tm.opcode_modifier.vexvvvv == VEXOP3)
+	    {
+	      i.vex.register_specifier = i.op[source].regs;
+	      source++;
+	      dest++;
+	    }
 	}
 
       i.rm.mode = 3;
@@ -7936,6 +7987,9 @@  build_modrm_byte (void)
 	      else if (i.types[dest].bitfield.ymmword
 		       || i.types[source].bitfield.ymmword)
 		i.has_regymm = TRUE;
+	      else if (i.types[dest].bitfield.tmmword
+		       || i.types[source].bitfield.tmmword)
+		i.has_regtmm = TRUE;
 	      else
 		i.has_regxmm = TRUE;
 	    }
@@ -7973,7 +8027,9 @@  build_modrm_byte (void)
 
 	  if (i.tm.opcode_modifier.sib)
 	    {
-	      if (i.index_reg->reg_num == RegIZ)
+	      /* The index register of VSIB shouldn't be RegIZ.  */
+	      if (i.tm.opcode_modifier.sib != SIBMEM
+		  && i.index_reg->reg_num == RegIZ)
 		abort ();
 
 	      i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
@@ -7996,8 +8052,22 @@  build_modrm_byte (void)
 		      i.types[op].bitfield.disp32s = 1;
 		    }
 		}
-	      i.sib.index = i.index_reg->reg_num;
-	      set_rex_vrex (i.index_reg, REX_X, FALSE);
+
+	      /* Since the mandatory SIB always has index register, so
+		 the code logic remains unchanged. The non-mandatory SIB
+		 without index register is allowed and will be handled
+		 later.  */
+	      if (i.index_reg)
+		{
+		  if (i.index_reg->reg_num == RegIZ)
+		    i.sib.index = NO_INDEX_REGISTER;
+		  else
+		    i.sib.index = i.index_reg->reg_num;
+		  if ((i.index_reg->reg_flags & RegRex) != 0)
+		    i.rex |= REX_X;
+		  if ((i.index_reg->reg_flags & RegVRex) != 0)
+		    i.vrex |= REX_X;
+		}
 	    }
 
 	  default_seg = &ds;
@@ -8011,7 +8081,9 @@  build_modrm_byte (void)
 		{
 		  i386_operand_type newdisp;
 
-		  gas_assert (!i.tm.opcode_modifier.sib);
+		  /* Only check for VSIB.  */
+		  gas_assert (!i.tm.opcode_modifier.sib
+			      || i.tm.opcode_modifier.sib == SIBMEM);
 		  /* Operand is just <disp>  */
 		  if (flag_code == CODE_64BIT)
 		    {
@@ -8149,7 +8221,10 @@  build_modrm_byte (void)
 	      i.sib.scale = i.log2_scale_factor;
 	      if (i.index_reg == 0)
 		{
-		  gas_assert (!i.tm.opcode_modifier.sib);
+		  /* Only check for VSIB.  */
+		  gas_assert (!i.tm.opcode_modifier.sib
+			      || i.tm.opcode_modifier.sib == SIBMEM);
+
 		  /* <disp>(%esp) becomes two byte modrm with no index
 		     register.  We've already stored the code for esp
 		     in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index d4e6fcb698..cb86cc7968 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -226,6 +226,12 @@  accept various extension mnemonics.  For example,
 @code{noenqcmd},
 @code{noserialize},
 @code{notsxldtrk},
+@code{amx_int8},
+@code{noamx_int8},
+@code{amx_bf16},
+@code{noamx_bf16},
+@code{amx_tile},
+@code{noamx_tile},
 @code{vmx},
 @code{vmfunc},
 @code{smx},
@@ -1504,6 +1510,7 @@  supported on the CPU specified.  The choices for @var{cpu_type} are:
 @item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
 @item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
 @item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
+@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_tile}
 @item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
 @item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
 @item @samp{.lwp} @tab @samp{.fma4} @tab @samp{.xop} @tab @samp{.cx16}
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index 6bee5fc9d8..fffa7e456b 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -1139,6 +1139,9 @@  if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t
     run_dump_test "x86-64-lfence-ret-d"
     run_dump_test "x86-64-lfence-ret-e"
     run_dump_test "x86-64-lfence-byte"
+    run_list_test "x86-64-amx-sibmem-inval"
+    run_dump_test "x86-64-amx"
+    run_dump_test "x86-64-amx-intel"
 
     if { ![istarget "*-*-aix*"]
       && ![istarget "*-*-beos*"]
diff --git a/gas/testsuite/gas/i386/x86-64-amx-intel.d b/gas/testsuite/gas/i386/x86-64-amx-intel.d
new file mode 100644
index 0000000000..d875f08bf3
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-intel.d
@@ -0,0 +1,69 @@ 
+#as:
+#objdump: -d -Mintel
+#name: x86_64 AMX insns in Intel syntax
+#source: x86-64-amx.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 78 49 04 51[ 	]*ldtilecfg \[rcx\+rdx\*2\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 49 04 51[ 	]*sttilecfg \[rcx\+rdx\*2\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 52 5c dc[ 	]*tdpbf16ps tmm3,tmm4,tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 63 5e ca[ 	]*tdpbssd tmm1,tmm2,tmm3
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 62 5e ca[ 	]*tdpbsud tmm1,tmm2,tmm3
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 61 5e ca[ 	]*tdpbusd tmm1,tmm2,tmm3
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 60 5e ca[ 	]*tdpbuud tmm1,tmm2,tmm3
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 25 00[ 	]*tileloadd tmm5,ds:0x0
+[ 	]*[a-f0-9]+:[ 	]*00 00 00[ 	]*
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 21[ 	]*tileloadd tmm5,\[rcx\+riz\*1\]
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7b 4b 2c 21[ 	]*tileloadd tmm5,\[ecx\+eiz\*1\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 11[ 	]*tileloadd tmm5,\[rcx\+rdx\*1\]
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7b 4b 0c 51[ 	]*tileloadd tmm1,\[ecx\+edx\*2\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 25 00[ 	]*tileloaddt1 tmm5,ds:0x0
+[ 	]*[a-f0-9]+:[ 	]*00 00 00[ 	]*
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 21[ 	]*tileloaddt1 tmm5,\[rcx\+riz\*1\]
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 79 4b 2c 21[ 	]*tileloaddt1 tmm5,\[ecx\+eiz\*1\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 11[ 	]*tileloaddt1 tmm5,\[rcx\+rdx\*1\]
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 79 4b 0c 51[ 	]*tileloaddt1 tmm1,\[ecx\+edx\*2\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 0c 61[ 	]*tileloaddt1 tmm1,\[rcx\+riz\*2\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 78 49 c0[ 	]*tilerelease *
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7a 4b 2c 21[ 	]*tilestored \[rcx\+riz\*1\],tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7a 4b 2c 21[ 	]*tilestored \[ecx\+eiz\*1\],tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7a 4b 2c 11[ 	]*tilestored \[rcx\+rdx\*1\],tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7a 4b 0c 51[ 	]*tilestored \[ecx\+edx\*2\],tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 c0[ 	]*tilezero tmm0
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 e8[ 	]*tilezero tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 f8[ 	]*tilezero tmm7
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 78 49 01[ 	]*ldtilecfg \[rcx\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 78 49 03[ 	]*ldtilecfg \[rbx\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 49 01[ 	]*sttilecfg \[rcx\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 49 03[ 	]*sttilecfg \[rbx\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 52 5c dc[ 	]*tdpbf16ps tmm3,tmm4,tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 63 5e ca[ 	]*tdpbssd tmm1,tmm2,tmm3
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 62 5e ca[ 	]*tdpbsud tmm1,tmm2,tmm3
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 61 5e ca[ 	]*tdpbusd tmm1,tmm2,tmm3
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 60 5e ca[ 	]*tdpbuud tmm1,tmm2,tmm3
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 25 00[ 	]*tileloadd tmm5,ds:0x0
+[ 	]*[a-f0-9]+:[ 	]*00 00 00[ 	]*
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 21[ 	]*tileloadd tmm5,\[rcx\+riz\*1\]
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7b 4b 2c 21[ 	]*tileloadd tmm5,\[ecx\+eiz\*1\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 11[ 	]*tileloadd tmm5,\[rcx\+rdx\*1\]
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7b 4b 0c 51[ 	]*tileloadd tmm1,\[ecx\+edx\*2\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 25 00[ 	]*tileloaddt1 tmm5,ds:0x0
+[ 	]*[a-f0-9]+:[ 	]*00 00 00[ 	]*
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 21[ 	]*tileloaddt1 tmm5,\[rcx\+riz\*1\]
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 79 4b 2c 21[ 	]*tileloaddt1 tmm5,\[ecx\+eiz\*1\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 11[ 	]*tileloaddt1 tmm5,\[rcx\+rdx\*1\]
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 79 4b 0c 51[ 	]*tileloaddt1 tmm1,\[ecx\+edx\*2\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 0c 61[ 	]*tileloaddt1 tmm1,\[rcx\+riz\*2\]
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 78 49 c0[ 	]*tilerelease *
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7a 4b 2c 21[ 	]*tilestored \[rcx\+riz\*1\],tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7a 4b 2c 21[ 	]*tilestored \[ecx\+eiz\*1\],tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7a 4b 2c 11[ 	]*tilestored \[rcx\+rdx\*1\],tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7a 4b 0c 51[ 	]*tilestored \[ecx\+edx\*2\],tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 c0[ 	]*tilezero tmm0
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 e8[ 	]*tilezero tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 f8[ 	]*tilezero tmm7
diff --git a/gas/testsuite/gas/i386/x86-64-amx-sibmem-inval.l b/gas/testsuite/gas/i386/x86-64-amx-sibmem-inval.l
new file mode 100644
index 0000000000..d3a84646f4
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-sibmem-inval.l
@@ -0,0 +1,7 @@ 
+.* Assembler messages:
+.*:5: Error: invalid SIB address for `tileloadd'
+.*:6: Error: invalid SIB address for `tileloaddt1'
+.*:7: Error: invalid SIB address for `tilestored'
+.*:10: Error: invalid SIB address for `tileloadd'
+.*:11: Error: invalid SIB address for `tileloaddt1'
+.*:12: Error: invalid SIB address for `tilestored'
diff --git a/gas/testsuite/gas/i386/x86-64-amx-sibmem-inval.s b/gas/testsuite/gas/i386/x86-64-amx-sibmem-inval.s
new file mode 100644
index 0000000000..31efebfb8f
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-sibmem-inval.s
@@ -0,0 +1,12 @@ 
+# Check for SIBMEM operand used in certain AMX instructions
+
+    .text
+_start:
+    tileloadd (%rip), %tmm1
+    tileloaddt1 (%rip), %tmm1
+    tilestored  %tmm1, (%rip)
+
+    .intel_syntax noprefix
+    tileloadd tmm1, [rip]
+    tileloaddt1 tmm1, [rip]
+    tilestored  [rip], tmm1
diff --git a/gas/testsuite/gas/i386/x86-64-amx.d b/gas/testsuite/gas/i386/x86-64-amx.d
new file mode 100644
index 0000000000..5df3614de8
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx.d
@@ -0,0 +1,69 @@ 
+#as:
+#objdump: -d
+#name: x86_64 AMX insns
+#source: x86-64-amx.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 78 49 04 51[ 	]*ldtilecfg \(%rcx,%rdx,2\)
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 49 04 51[ 	]*sttilecfg \(%rcx,%rdx,2\)
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 52 5c dc[ 	]*tdpbf16ps %tmm5,%tmm4,%tmm3
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 63 5e ca[ 	]*tdpbssd %tmm3,%tmm2,%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 62 5e ca[ 	]*tdpbsud %tmm3,%tmm2,%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 61 5e ca[ 	]*tdpbusd %tmm3,%tmm2,%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 60 5e ca[ 	]*tdpbuud %tmm3,%tmm2,%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 25 00[ 	]*tileloadd 0x0,%tmm5
+[ 	]*[a-f0-9]+:[ 	]*00 00 00[ 	]*
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 21[ 	]*tileloadd \(%rcx,%riz,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7b 4b 2c 21[ 	]*tileloadd \(%ecx,%eiz,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 11[ 	]*tileloadd \(%rcx,%rdx,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7b 4b 0c 51[ 	]*tileloadd \(%ecx,%edx,2\),%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 25 00[ 	]*tileloaddt1 0x0,%tmm5
+[ 	]*[a-f0-9]+:[ 	]*00 00 00[ 	]*
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 21[ 	]*tileloaddt1 \(%rcx,%riz,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 79 4b 2c 21[ 	]*tileloaddt1 \(%ecx,%eiz,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 11[ 	]*tileloaddt1 \(%rcx,%rdx,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 79 4b 0c 51[ 	]*tileloaddt1 \(%ecx,%edx,2\),%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 0c 61[ 	]*tileloaddt1 \(%rcx,%riz,2\),%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 78 49 c0[ 	]*tilerelease *
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7a 4b 2c 21[ 	]*tilestored %tmm5,\(%rcx,%riz,1\)
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7a 4b 2c 21[ 	]*tilestored %tmm5,\(%ecx,%eiz,1\)
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7a 4b 2c 11[ 	]*tilestored %tmm5,\(%rcx,%rdx,1\)
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7a 4b 0c 51[ 	]*tilestored %tmm1,\(%ecx,%edx,2\)
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 c0[ 	]*tilezero %tmm0
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 e8[ 	]*tilezero %tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 f8[ 	]*tilezero %tmm7
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 78 49 01[ 	]*ldtilecfg \(%rcx\)
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 78 49 03[ 	]*ldtilecfg \(%rbx\)
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 49 01[ 	]*sttilecfg \(%rcx\)
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 49 03[ 	]*sttilecfg \(%rbx\)
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 52 5c dc[ 	]*tdpbf16ps %tmm5,%tmm4,%tmm3
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 63 5e ca[ 	]*tdpbssd %tmm3,%tmm2,%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 62 5e ca[ 	]*tdpbsud %tmm3,%tmm2,%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 61 5e ca[ 	]*tdpbusd %tmm3,%tmm2,%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 60 5e ca[ 	]*tdpbuud %tmm3,%tmm2,%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 25 00[ 	]*tileloadd 0x0,%tmm5
+[ 	]*[a-f0-9]+:[ 	]*00 00 00[ 	]*
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 21[ 	]*tileloadd \(%rcx,%riz,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7b 4b 2c 21[ 	]*tileloadd \(%ecx,%eiz,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 4b 2c 11[ 	]*tileloadd \(%rcx,%rdx,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7b 4b 0c 51[ 	]*tileloadd \(%ecx,%edx,2\),%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 25 00[ 	]*tileloaddt1 0x0,%tmm5
+[ 	]*[a-f0-9]+:[ 	]*00 00 00[ 	]*
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 21[ 	]*tileloaddt1 \(%rcx,%riz,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 79 4b 2c 21[ 	]*tileloaddt1 \(%ecx,%eiz,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 2c 11[ 	]*tileloaddt1 \(%rcx,%rdx,1\),%tmm5
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 79 4b 0c 51[ 	]*tileloaddt1 \(%ecx,%edx,2\),%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 79 4b 0c 61[ 	]*tileloaddt1 \(%rcx,%riz,2\),%tmm1
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 78 49 c0[ 	]*tilerelease *
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7a 4b 2c 21[ 	]*tilestored %tmm5,\(%rcx,%riz,1\)
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7a 4b 2c 21[ 	]*tilestored %tmm5,\(%ecx,%eiz,1\)
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7a 4b 2c 11[ 	]*tilestored %tmm5,\(%rcx,%rdx,1\)
+[ 	]*[a-f0-9]+:[ 	]*67 c4 e2 7a 4b 0c 51[ 	]*tilestored %tmm1,\(%ecx,%edx,2\)
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 c0[ 	]*tilezero %tmm0
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 e8[ 	]*tilezero %tmm5
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7b 49 f8[ 	]*tilezero %tmm7
diff --git a/gas/testsuite/gas/i386/x86-64-amx.s b/gas/testsuite/gas/i386/x86-64-amx.s
new file mode 100644
index 0000000000..c70543152b
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx.s
@@ -0,0 +1,61 @@ 
+
+  .allow_index_reg
+  .text
+_start:
+  ldtilecfg  (%rcx,%rdx,2)
+  sttilecfg  (%rcx,%rdx,2)
+  tdpbf16ps %tmm5, %tmm4, %tmm3
+  tdpbssd %tmm3, %tmm2, %tmm1
+  tdpbsud %tmm3, %tmm2, %tmm1
+  tdpbusd %tmm3, %tmm2, %tmm1
+  tdpbuud %tmm3, %tmm2, %tmm1
+  tileloadd foo, %tmm5
+  tileloadd (%rcx), %tmm5
+  tileloadd (%ecx), %tmm5
+  tileloadd (%rcx,%rdx,1), %tmm5
+  tileloadd (%ecx,%edx,2), %tmm1
+  tileloaddt1 foo, %tmm5
+  tileloaddt1 (%rcx), %tmm5
+  tileloaddt1 (%ecx), %tmm5
+  tileloaddt1 (%rcx,%rdx,1), %tmm5
+  tileloaddt1 (%ecx,%edx,2), %tmm1
+  tileloaddt1 (%rcx,%riz,2), %tmm1
+  tilerelease
+  tilestored %tmm5, (%rcx)
+  tilestored %tmm5, (%ecx)
+  tilestored %tmm5, (%rcx,%rdx,1)
+  tilestored %tmm1, (%ecx,%edx,2)
+  tilezero %tmm0
+  tilezero %tmm5
+  tilezero %tmm7
+
+
+  .intel_syntax noprefix
+  ldtilecfg  [rcx]
+  ldtilecfg  [rbx]
+  sttilecfg  [rcx]
+  sttilecfg  [rbx]
+  tdpbf16ps tmm3, tmm4, tmm5
+  tdpbssd tmm1, tmm2, tmm3
+  tdpbsud tmm1, tmm2, tmm3
+  tdpbusd tmm1, tmm2, tmm3
+  tdpbuud tmm1, tmm2, tmm3
+  tileloadd tmm5, foo
+  tileloadd tmm5, [rcx]
+  tileloadd tmm5, [ecx]
+  tileloadd tmm5, [rcx+rdx]
+  tileloadd tmm1, [ecx+edx*2]
+  tileloaddt1 tmm5, foo
+  tileloaddt1 tmm5, [rcx]
+  tileloaddt1 tmm5, [ecx]
+  tileloaddt1 tmm5, [rcx+rdx]
+  tileloaddt1 tmm1, [ecx+edx*2]
+  tileloaddt1 tmm1, [rcx+riz*2]
+  tilerelease
+  tilestored [rcx], tmm5
+  tilestored [ecx], tmm5
+  tilestored [rcx+rdx], tmm5
+  tilestored [ecx+edx*2], tmm1
+  tilezero tmm0
+  tilezero tmm5
+  tilezero tmm7
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index e1ebb48553..e443918499 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -244,6 +244,7 @@  fetch_data (struct disassemble_info *info, bfd_byte *addr)
 #define Bad_Opcode NULL, { { NULL, 0 } }, 0
 
 #define Eb { OP_E, b_mode }
+#define EV { OP_E, void_mode }
 #define Ebnd { OP_E, bnd_mode }
 #define EbS { OP_E, b_swap_mode }
 #define EbndS { OP_E, bnd_swap_mode }
@@ -374,6 +375,7 @@  fetch_data (struct disassemble_info *info, bfd_byte *addr)
 #define XMScalar { OP_XMM, scalar_mode }
 #define XMGatherQ { OP_XMM, vex_vsib_q_w_dq_mode }
 #define XMM { OP_XMM, xmm_mode }
+#define XMT { OP_XMM, tmm_mode }
 #define XMxmmq { OP_XMM, xmmq_mode }
 #define EM { OP_EM, v_mode }
 #define EMS { OP_EM, v_swap_mode }
@@ -393,6 +395,7 @@  fetch_data (struct disassemble_info *info, bfd_byte *addr)
 #define EXxS { OP_EX, x_swap_mode }
 #define EXxmm { OP_EX, xmm_mode }
 #define EXymm { OP_EX, ymm_mode }
+#define EXtmm { OP_EX, tmm_mode }
 #define EXxmmq { OP_EX, xmmq_mode }
 #define EXEvexHalfBcstXmmq { OP_EX, evex_half_bcst_xmmq_mode }
 #define EXxmm_mb { OP_EX, xmm_mb_mode }
@@ -423,6 +426,7 @@  fetch_data (struct disassemble_info *info, bfd_byte *addr)
 #define Vex128 { OP_VEX, vex128_mode }
 #define Vex256 { OP_VEX, vex256_mode }
 #define VexGdq { OP_VEX, dq_mode }
+#define Vextmm { OP_VEX, tmm_mode }
 #define EXdVexScalarS { OP_EX_Vex, d_scalar_swap_mode }
 #define EXqVexScalarS { OP_EX_Vex, q_scalar_swap_mode }
 #define EXVexW { OP_EX_VexW, x_mode }
@@ -544,8 +548,12 @@  enum
   ymmq_mode,
   /* 32-byte YMM or 16-byte word operand */
   ymmxmm_mode,
+  /* TMM operand */
+  tmm_mode,
   /* d_mode in 32bit, q_mode in 64bit mode.  */
   m_mode,
+  /* A generic memory operand.  */
+  void_mode,
   /* pair of v_mode operands */
   a_mode,
   cond_jump_mode,
@@ -749,6 +757,7 @@  enum
   REG_VEX_0F72,
   REG_VEX_0F73,
   REG_VEX_0FAE,
+  REG_VEX_W_0_0F3849_P_0_M_3,
   REG_VEX_0F38F3,
   REG_XOP_LWPCB,
   REG_XOP_LWP,
@@ -832,6 +841,17 @@  enum
   MOD_0FE7_PREFIX_2,
   MOD_0FF0_PREFIX_3,
   MOD_0F382A_PREFIX_2,
+  MOD_VEX_W_0_0F3849_P_0,
+  MOD_VEX_W_0_0F3849_P_2,
+  MOD_VEX_W_0_0F3849_P_3,
+  MOD_VEX_W_0_0F384B_P_1,
+  MOD_VEX_W_0_0F384B_P_2,
+  MOD_VEX_W_0_0F384B_P_3,
+  MOD_VEX_W_0_0F385C_P_1,
+  MOD_VEX_W_0_0F385E_P_0,
+  MOD_VEX_W_0_0F385E_P_1,
+  MOD_VEX_W_0_0F385E_P_2,
+  MOD_VEX_W_0_0F385E_P_3,
   MOD_0F38F5_PREFIX_2,
   MOD_0F38F6_PREFIX_0,
   MOD_0F38F8_PREFIX_1,
@@ -961,6 +981,7 @@  enum
   RM_0F1E_P_1_MOD_3_REG_7,
   RM_0FAE_REG_6_MOD_3_P_0,
   RM_0FAE_REG_7_MOD_3,
+  RM_VEX_W_0_0F3849_P_0_M_3_R_0
 };
 
 enum
@@ -1296,9 +1317,13 @@  enum
   PREFIX_VEX_0F3845,
   PREFIX_VEX_0F3846,
   PREFIX_VEX_0F3847,
+  PREFIX_VEX_0F3849,
+  PREFIX_VEX_0F384B,
   PREFIX_VEX_0F3858,
   PREFIX_VEX_0F3859,
   PREFIX_VEX_0F385A,
+  PREFIX_VEX_0F385C,
+  PREFIX_VEX_0F385E,
   PREFIX_VEX_0F3878,
   PREFIX_VEX_0F3879,
   PREFIX_VEX_0F388C,
@@ -1767,7 +1792,19 @@  enum
   X86_64_0F01_REG_0,
   X86_64_0F01_REG_1,
   X86_64_0F01_REG_2,
-  X86_64_0F01_REG_3
+  X86_64_0F01_REG_3,
+  X86_64_VEX_W_0_0F3849_P_0_M_0,
+  X86_64_0F3849_MOD_3_REG_0_RM_0,
+  X86_64_VEX_W_0_0F3849_P_2_M_0,
+  X86_64_VEX_W_0_0F3849_P_3_M_0,
+  X86_64_MOD_VEX_W_0_0F384B_P_1,
+  X86_64_MOD_VEX_W_0_0F384B_P_2,
+  X86_64_MOD_VEX_W_0_0F384B_P_3,
+  X86_64_MOD_VEX_W_0_0F385C_P_1,
+  X86_64_MOD_VEX_W_0_0F385E_P_0,
+  X86_64_MOD_VEX_W_0_0F385E_P_1,
+  X86_64_MOD_VEX_W_0_0F385E_P_2,
+  X86_64_MOD_VEX_W_0_0F385E_P_3
 };
 
 enum
@@ -2006,9 +2043,20 @@  enum
   VEX_W_0F382F_P_2_M_0,
   VEX_W_0F3836_P_2,
   VEX_W_0F3846_P_2,
+  VEX_W_0F3849_P_0,
+  VEX_W_0F3849_P_2,
+  VEX_W_0F3849_P_3,
+  VEX_W_0F384B_P_1,
+  VEX_W_0F384B_P_2,
+  VEX_W_0F384B_P_3,
   VEX_W_0F3858_P_2,
   VEX_W_0F3859_P_2,
   VEX_W_0F385A_P_2_M_0,
+  VEX_W_0F385C_P_1,
+  VEX_W_0F385E_P_0,
+  VEX_W_0F385E_P_1,
+  VEX_W_0F385E_P_2,
+  VEX_W_0F385E_P_3,
   VEX_W_0F3878_P_2,
   VEX_W_0F3879_P_2,
   VEX_W_0F38CF_P_2,
@@ -3153,6 +3201,16 @@  static const char *att_names_zmm[] = {
   "%zmm28", "%zmm29", "%zmm30", "%zmm31"
 };
 
+static const char **names_tmm;
+static const char *intel_names_tmm[] = {
+  "tmm0", "tmm1", "tmm2", "tmm3",
+  "tmm4", "tmm5", "tmm6", "tmm7"
+};
+static const char *att_names_tmm[] = {
+  "%tmm0", "%tmm1", "%tmm2", "%tmm3",
+  "%tmm4", "%tmm5", "%tmm6", "%tmm7"
+};
+
 static const char **names_mask;
 static const char *intel_names_mask[] = {
   "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7"
@@ -3521,6 +3579,10 @@  static const struct dis386 reg_table[][8] = {
     { MOD_TABLE (MOD_VEX_0FAE_REG_2) },
     { MOD_TABLE (MOD_VEX_0FAE_REG_3) },
   },
+  /* REG_VEX_W_0_0F3849_P_0_M_3 */
+  {
+    { RM_TABLE (RM_VEX_W_0_0F3849_P_0_M_3_R_0) },
+  },
   /* REG_VEX_0F38F3 */
   {
     { Bad_Opcode },
@@ -5902,6 +5964,22 @@  static const struct dis386 prefix_table[][4] = {
     { "vpsllv%LW", { XM, Vex, EXx }, 0 },
   },
 
+  /* PREFIX_VEX_0F3849 */
+  {
+    { VEX_W_TABLE (VEX_W_0F3849_P_0) },
+    { Bad_Opcode },
+    { VEX_W_TABLE (VEX_W_0F3849_P_2) },
+    { VEX_W_TABLE (VEX_W_0F3849_P_3) },
+  },
+
+  /* PREFIX_VEX_0F384B */
+  {
+    { Bad_Opcode },
+    { VEX_W_TABLE (VEX_W_0F384B_P_1) },
+    { VEX_W_TABLE (VEX_W_0F384B_P_2) },
+    { VEX_W_TABLE (VEX_W_0F384B_P_3) },
+  },
+
   /* PREFIX_VEX_0F3858 */
   {
     { Bad_Opcode },
@@ -5923,6 +6001,21 @@  static const struct dis386 prefix_table[][4] = {
     { MOD_TABLE (MOD_VEX_0F385A_PREFIX_2) },
   },
 
+  /* PREFIX_VEX_0F385C */
+  {
+    { Bad_Opcode },
+    { VEX_W_TABLE (VEX_W_0F385C_P_1) },
+    { Bad_Opcode },
+  },
+
+  /* PREFIX_VEX_0F385E */
+  {
+    { VEX_W_TABLE (VEX_W_0F385E_P_0) },
+    { VEX_W_TABLE (VEX_W_0F385E_P_1) },
+    { VEX_W_TABLE (VEX_W_0F385E_P_2) },
+    { VEX_W_TABLE (VEX_W_0F385E_P_3) },
+  },
+
   /* PREFIX_VEX_0F3878 */
   {
     { Bad_Opcode },
@@ -6938,6 +7031,78 @@  static const struct dis386 x86_64_table[][2] = {
     { "lidt{Q|Q}", { M }, 0 },
     { "lidt", { M }, 0 },
   },
+
+  /* X86_64_VEX_W_0_0F3849_P_0_M_0 */
+  {
+    { Bad_Opcode },
+    { "ldtilecfg", { EV }, 0 },
+  },
+
+  /* X86_64_0F3849_MOD_3_REG_0_RM_0 */
+  {
+    { Bad_Opcode },
+    { "tilerelease", { Skip_MODRM }, 0 },
+  },
+
+  /* X86_64_VEX_W_0_0F3849_P_2_M_0 */
+  {
+    { Bad_Opcode },
+    { "sttilecfg", { EV }, 0 },
+  },
+
+  /* X86_64_VEX_W_0_0F3849_P_3_M_0 */
+  {
+    { Bad_Opcode },
+    { "tilezero", { XMT, Skip_MODRM }, 0 },
+  },
+
+  /* X86_64_MOD_VEX_W_0_0F384B_P_1 */
+  {
+    { Bad_Opcode },
+    { "tilestored", { EV, XMT }, 0 },
+  },
+
+  /* X86_64_MOD_VEX_W_0_0F384B_P_2 */
+  {
+    { Bad_Opcode },
+    { "tileloaddt1", { XMT, EV }, 0 },
+  },
+
+  /* X86_64_MOD_VEX_W_0_0F384B_P_3 */
+  {
+    { Bad_Opcode },
+    { "tileloadd", { XMT, EV }, 0 },
+  },
+
+  /* X86_64_MOD_VEX_W_0_0F385C_P_1 */
+  {
+    { Bad_Opcode },
+    { "tdpbf16ps", { XMT, EXtmm, Vextmm }, 0 },
+  },
+
+  /* X86_64_MOD_VEX_W_0_0F385E_P_0 */
+  {
+    { Bad_Opcode },
+    { "tdpbuud", {XMT, EXtmm, Vextmm}, 0 },
+  },
+
+  /* X86_64_MOD_VEX_W_0_0F385E_P_1 */
+  {
+    { Bad_Opcode },
+    { "tdpbsud", {XMT, EXtmm, Vextmm}, 0 },
+  },
+
+  /* X86_64_MOD_VEX_W_0_0F385E_P_2 */
+  {
+    { Bad_Opcode },
+    { "tdpbusd", {XMT, EXtmm, Vextmm}, 0 },
+  },
+
+  /* X86_64_MOD_VEX_W_0_0F385E_P_3 */
+  {
+    { Bad_Opcode },
+    { "tdpbssd", {XMT, EXtmm, Vextmm}, 0 },
+  },
 };
 
 static const struct dis386 three_byte_table[][256] = {
@@ -8779,9 +8944,9 @@  static const struct dis386 vex_table[][256] = {
     { PREFIX_TABLE (PREFIX_VEX_0F3847) },
     /* 48 */
     { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_VEX_0F3849) },
     { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_VEX_0F384B) },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
@@ -8800,9 +8965,9 @@  static const struct dis386 vex_table[][256] = {
     { PREFIX_TABLE (PREFIX_VEX_0F3859) },
     { PREFIX_TABLE (PREFIX_VEX_0F385A) },
     { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_VEX_0F385C) },
     { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_VEX_0F385E) },
     { Bad_Opcode },
     /* 60 */
     { Bad_Opcode },
@@ -10036,6 +10201,30 @@  static const struct dis386 vex_w_table[][2] = {
     /* VEX_W_0F3846_P_2 */
     { "vpsravd",	{ XM, Vex, EXx }, 0 },
   },
+  {
+    /* VEX_W_0F3849_P_0 */
+    { MOD_TABLE (MOD_VEX_W_0_0F3849_P_0) },
+  },
+  {
+    /* VEX_W_0F3849_P_2 */
+    { MOD_TABLE (MOD_VEX_W_0_0F3849_P_2) },
+  },
+  {
+    /* VEX_W_0F3849_P_3 */
+    { MOD_TABLE (MOD_VEX_W_0_0F3849_P_3) },
+  },
+  {
+    /* VEX_W_0F384B_P_1 */
+    { MOD_TABLE (MOD_VEX_W_0_0F384B_P_1) },
+  },
+  {
+    /* VEX_W_0F384B_P_2 */
+    { MOD_TABLE (MOD_VEX_W_0_0F384B_P_2) },
+  },
+  {
+    /* VEX_W_0F384B_P_3 */
+    { MOD_TABLE (MOD_VEX_W_0_0F384B_P_3) },
+  },
   {
     /* VEX_W_0F3858_P_2 */
     { "vpbroadcastd", { XM, EXxmm_md }, 0 },
@@ -10048,6 +10237,26 @@  static const struct dis386 vex_w_table[][2] = {
     /* VEX_W_0F385A_P_2_M_0 */
     { "vbroadcasti128", { XM, Mxmm }, 0 },
   },
+  {
+    /* VEX_W_0F385C_P_1 */
+    { MOD_TABLE (MOD_VEX_W_0_0F385C_P_1) },
+  },
+  {
+    /* VEX_W_0F385E_P_0 */
+    { MOD_TABLE (MOD_VEX_W_0_0F385E_P_0) },
+  },
+  {
+    /* VEX_W_0F385E_P_1 */
+    { MOD_TABLE (MOD_VEX_W_0_0F385E_P_1) },
+  },
+  {
+    /* VEX_W_0F385E_P_2 */
+    { MOD_TABLE (MOD_VEX_W_0_0F385E_P_2) },
+  },
+  {
+    /* VEX_W_0F385E_P_3 */
+    { MOD_TABLE (MOD_VEX_W_0_0F385E_P_3) },
+  },
   {
     /* VEX_W_0F3878_P_2 */
     { "vpbroadcastb",	{ XM, EXxmm_mb }, 0 },
@@ -10474,6 +10683,57 @@  static const struct dis386 mod_table[][2] = {
     /* MOD_0F382A_PREFIX_2 */
     { "movntdqa",	{ XM, Mx }, 0 },
   },
+  {
+    /* MOD_VEX_W_0_0F3849_P_0 */
+    { X86_64_TABLE (X86_64_VEX_W_0_0F3849_P_0_M_0) },
+    { REG_TABLE (REG_VEX_W_0_0F3849_P_0_M_3) },
+  },
+  {
+    /* MOD_VEX_W_0_0F3849_P_2 */
+    { X86_64_TABLE (X86_64_VEX_W_0_0F3849_P_2_M_0) },
+  },
+  {
+    /* MOD_VEX_W_0_0F3849_P_3 */
+    { Bad_Opcode },
+    { X86_64_TABLE (X86_64_VEX_W_0_0F3849_P_3_M_0) },
+  },
+  {
+    /* MOD_VEX_W_0_0F384B_P_1 */
+    { X86_64_TABLE (X86_64_MOD_VEX_W_0_0F384B_P_1) },
+  },
+  {
+    /* MOD_VEX_W_0_0F384B_P_2 */
+    { X86_64_TABLE (X86_64_MOD_VEX_W_0_0F384B_P_2) },
+  },
+  {
+    /* MOD_VEX_W_0_0F384B_P_3 */
+    { X86_64_TABLE (X86_64_MOD_VEX_W_0_0F384B_P_3) },
+  },
+  {
+    /* MOD_VEX_W_0_0F385C_P_1 */
+    { Bad_Opcode },
+    { X86_64_TABLE (X86_64_MOD_VEX_W_0_0F385C_P_1) },
+  },
+  {
+    /* MOD_VEX_W_0_0F385E_P_0 */
+    { Bad_Opcode },
+    { X86_64_TABLE (X86_64_MOD_VEX_W_0_0F385E_P_0) },
+  },
+  {
+    /* MOD_VEX_W_0_0F385E_P_1 */
+    { Bad_Opcode },
+    { X86_64_TABLE (X86_64_MOD_VEX_W_0_0F385E_P_1) },
+  },
+  {
+    /* MOD_VEX_W_0_0F385E_P_2 */
+    { Bad_Opcode },
+    { X86_64_TABLE (X86_64_MOD_VEX_W_0_0F385E_P_2) },
+  },
+  {
+    /* MOD_VEX_W_0_0F385E_P_3 */
+    { Bad_Opcode },
+    { X86_64_TABLE (X86_64_MOD_VEX_W_0_0F385E_P_3) },
+  },
   {
     /* MOD_0F38F5_PREFIX_2 */
     { "wrussK",		{ M, Gdq }, PREFIX_OPCODE },
@@ -11035,6 +11295,10 @@  static const struct dis386 rm_table[][8] = {
     { "sfence",		{ Skip_MODRM }, 0 },
 
   },
+  {
+    /* RM_VEX_W_0_0F3849_P_0_M_3_R_0 */
+    { X86_64_TABLE (X86_64_0F3849_MOD_3_REG_0_RM_0) },
+  },
 };
 
 #define INTERNAL_DISASSEMBLER_ERROR _("<internal disassembler error>")
@@ -11926,6 +12190,7 @@  print_insn (bfd_vma pc, disassemble_info *info)
       names_xmm = intel_names_xmm;
       names_ymm = intel_names_ymm;
       names_zmm = intel_names_zmm;
+      names_tmm = intel_names_tmm;
       index64 = intel_index64;
       index32 = intel_index32;
       names_mask = intel_names_mask;
@@ -11948,6 +12213,7 @@  print_insn (bfd_vma pc, disassemble_info *info)
       names_xmm = att_names_xmm;
       names_ymm = att_names_ymm;
       names_zmm = att_names_zmm;
+      names_tmm = att_names_tmm;
       index64 = att_index64;
       index32 = att_index32;
       names_mask = att_names_mask;
@@ -13451,6 +13717,8 @@  intel_operand_size (int bytemode, int sizeflag)
     }
   switch (bytemode)
     {
+    case void_mode:
+      break;
     case b_mode:
     case b_swap_mode:
     case dqb_mode:
@@ -15172,6 +15440,7 @@  OP_XMM (int bytemode, int sizeflag ATTRIBUTE_UNUSED)
       && bytemode != xmmq_mode
       && bytemode != evex_half_bcst_xmmq_mode
       && bytemode != ymm_mode
+      && bytemode != tmm_mode
       && bytemode != scalar_mode)
     {
       switch (vex.length)
@@ -15210,6 +15479,8 @@  OP_XMM (int bytemode, int sizeflag ATTRIBUTE_UNUSED)
 	  abort ();
 	}
     }
+  else if (bytemode == tmm_mode)
+    names = names_tmm;
   else if (bytemode == ymm_mode)
     names = names_ymm;
   else
@@ -15334,6 +15605,7 @@  OP_EX (int bytemode, int sizeflag)
       && bytemode != xmmq_mode
       && bytemode != evex_half_bcst_xmmq_mode
       && bytemode != ymm_mode
+      && bytemode != tmm_mode
       && bytemode != d_scalar_mode
       && bytemode != d_scalar_swap_mode
       && bytemode != q_scalar_mode
@@ -15371,6 +15643,8 @@  OP_EX (int bytemode, int sizeflag)
 	  abort ();
 	}
     }
+  else if (bytemode == tmm_mode)
+    names = names_tmm;
   else if (bytemode == ymm_mode)
     names = names_ymm;
   else
@@ -15926,6 +16200,12 @@  OP_VEX (int bytemode, int sizeflag ATTRIBUTE_UNUSED)
       return;
     }
 
+  if (bytemode == tmm_mode)
+    {
+      oappend (names_tmm[reg]);
+      return;
+    }
+
   switch (vex.length)
     {
     case 128:
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index e7454db5d4..bc900bdb76 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -297,6 +297,12 @@  static initializer cpu_flag_init[] =
     "CpuWAITPKG" },
   { "CPU_CLDEMOTE_FLAGS",
     "CpuCLDEMOTE" },
+  { "CPU_AMX_INT8_FLAGS",
+    "CpuAMX_INT8" },
+  { "CPU_AMX_BF16_FLAGS",
+    "CpuAMX_BF16" },
+  { "CPU_AMX_TILE_FLAGS",
+    "CpuAMX_TILE" },
   { "CPU_MOVDIRI_FLAGS",
     "CpuMOVDIRI" },
   { "CPU_MOVDIR64B_FLAGS",
@@ -383,6 +389,12 @@  static initializer cpu_flag_init[] =
     "CpuAVX512_BITALG" },
   { "CPU_ANY_AVX512_BF16_FLAGS",
     "CpuAVX512_BF16" },
+  { "CPU_ANY_AMX_INT8_FLAGS",
+    "CpuAMX_INT8" },
+  { "CPU_ANY_AMX_BF16_FLAGS",
+    "CpuAMX_BF16" },
+  { "CPU_ANY_AMX_TILE_FLAGS",
+    "CpuAMX_TILE" },
   { "CPU_ANY_MOVDIRI_FLAGS",
     "CpuMOVDIRI" },
   { "CPU_ANY_MOVDIR64B_FLAGS",
@@ -459,6 +471,8 @@  static initializer operand_type_init[] =
     "Class=RegSIMD|Ymmword" },
   { "OPERAND_TYPE_REGZMM",
     "Class=RegSIMD|Zmmword" },
+  { "OPERAND_TYPE_REGTMM",
+    "Class=RegSIMD|Tmmword" },
   { "OPERAND_TYPE_REGMASK",
     "Class=RegMask" },
   { "OPERAND_TYPE_REGBND",
@@ -611,6 +625,9 @@  static bitfield cpu_flags[] =
   BITFIELD (CpuPCONFIG),
   BITFIELD (CpuWAITPKG),
   BITFIELD (CpuCLDEMOTE),
+  BITFIELD (CpuAMX_INT8),
+  BITFIELD (CpuAMX_BF16),
+  BITFIELD (CpuAMX_TILE),
   BITFIELD (CpuMOVDIRI),
   BITFIELD (CpuMOVDIR64B),
   BITFIELD (CpuENQCMD),
@@ -740,6 +757,7 @@  static bitfield operand_types[] =
   BITFIELD (Xmmword),
   BITFIELD (Ymmword),
   BITFIELD (Zmmword),
+  BITFIELD (Tmmword),
   BITFIELD (Unspecified),
 #ifdef OTUnused
   BITFIELD (OTUnused),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 174438698e..fa0e64ab63 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -223,6 +223,12 @@  enum
   /* CET instructions support required */
   CpuIBT,
   CpuSHSTK,
+  /* AMX-INT8 instructions required */
+  CpuAMX_INT8,
+  /* AMX-BF16 instructions required */
+  CpuAMX_BF16,
+  /* AMX-TILE instructions required */
+  CpuAMX_TILE,
   /* GFNI instructions required */
   CpuGFNI,
   /* VAES instructions required */
@@ -372,6 +378,9 @@  typedef union i386_cpu_flags
       unsigned int cpuptwrite:1;
       unsigned int cpuibt:1;
       unsigned int cpushstk:1;
+      unsigned int cpuamx_int8:1;
+      unsigned int cpuamx_bf16:1;
+      unsigned int cpuamx_tile:1;
       unsigned int cpugfni:1;
       unsigned int cpuvaes:1;
       unsigned int cpuvpclmulqdq:1;
@@ -528,10 +537,12 @@  enum
      instructions with 1 destination register operand.
      3. VEX.LWP.  Register destination is encoded in VEX.vvvv and one
 	of the operands can access a memory location.
+     4. VEX.OP3.  Use VEX.vvvv to encode the third operand.
    */
 #define VEXXDS	1
 #define VEXNDD	2
 #define VEXLWP	3
+#define VEXOP3	4
   VexVVVV,
   /* How the VEX.W bit is used:
      0: Set by the REX.W bit.
@@ -574,7 +585,9 @@  enum
 #define VECSIB128	1
 #define VECSIB256	2
 #define VECSIB512	3
+#define SIBMEM		4
   SIB,
+
   /* SSE to AVX support required */
   SSE2AVX,
   /* No AVX equivalent */
@@ -695,11 +708,11 @@  typedef struct i386_opcode_modifier
   unsigned int norex64:1;
   unsigned int ugh:1;
   unsigned int vex:2;
-  unsigned int vexvvvv:2;
+  unsigned int vexvvvv:3;
   unsigned int vexw:2;
   unsigned int vexopcode:3;
   unsigned int vexsources:2;
-  unsigned int sib:2;
+  unsigned int sib:3;
   unsigned int sse2avx:1;
   unsigned int noavx:1;
   unsigned int evex:3;
@@ -803,6 +816,8 @@  enum
   Ymmword,
   /* ZMMWORD size.  */
   Zmmword,
+  /* TMMWORD size.  */
+  Tmmword,
   /* Unspecified memory size.  */
   Unspecified,
 
@@ -847,6 +862,7 @@  typedef union i386_operand_type
       unsigned int xmmword:1;
       unsigned int ymmword:1;
       unsigned int zmmword:1;
+      unsigned int tmmword:1;
       unsigned int unspecified:1;
 #ifdef OTUnused
       unsigned int unused:(OTNumOfBits - OTUnused);
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index ded96884c0..904a95e6e4 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -52,6 +52,7 @@ 
 #define RegXMM Class=RegSIMD|Xmmword
 #define RegYMM Class=RegSIMD|Ymmword
 #define RegZMM Class=RegSIMD|Zmmword
+#define RegTMM Class=RegSIMD|Tmmword
 
 #define RegMask Class=RegMask
 
@@ -80,6 +81,11 @@ 
 #define VexW0 VexW=VEXW0
 #define VexW1 VexW=VEXW1
 #define VexWIG VexW=VEXWIG
+#define VexOP3 VexVVVV=VEXOP3
+#define VexSIB128 SIB=VECSIB128
+#define VecSIB256 SIB=VECSIB256
+#define VecSIB512 SIB=VECSIB512
+#define Sibmem SIB=SIBMEM
 
 #define Vex128 Vex=VEX128
 #define Vex256 Vex=VEX256
@@ -4093,3 +4099,25 @@  xsusldtrk, 0, 0xf20f01e8, None, 3, CpuTSXLDTRK, No_bSuf|No_wSuf|No_lSuf|No_sSuf|
 xresldtrk, 0, 0xf20f01e9, None, 3, CpuTSXLDTRK, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
 
 // TSXLDTRK instructions end.
+
+// AMX instructions.
+
+ldtilecfg, 1, 0x49, None, 1, CpuAMX_TILE|Cpu64, Modrm|Vex|VexOpcode=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex }
+sttilecfg, 1, 0x6649, None, 1, CpuAMX_TILE|Cpu64, Modrm|Vex|VexOpcode=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex }
+
+// Use VexOP3 to indicate we are going to use Vex.vvvv field to encode the third operand.
+tdpbf16ps, 3, 0xf35c, None, 1, CpuAMX_BF16|Cpu64, Modrm|Vex|VexOpcode=1|VexOP3|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
+tdpbssd, 3, 0xf25e, None, 1, CpuAMX_INT8|Cpu64, Modrm|Vex|VexOpcode=1|VexOP3|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
+tdpbuud, 3, 0x5e,   None, 1, CpuAMX_INT8|Cpu64, Modrm|Vex|VexOpcode=1|VexOP3|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
+tdpbusd, 3, 0x665e, None, 1, CpuAMX_INT8|Cpu64, Modrm|Vex|VexOpcode=1|VexOP3|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
+tdpbsud, 3, 0xf35e, None, 1, CpuAMX_INT8|Cpu64, Modrm|Vex|VexOpcode=1|VexOP3|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
+
+tileloadd, 2, 0xf24B, None, 1, CpuAMX_TILE|Cpu64, Modrm|Sibmem|Vex|VexOpcode=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex, RegTMM }
+tileloaddt1, 2, 0x664B, None, 1, CpuAMX_TILE|Cpu64, Modrm|Sibmem|Vex|VexOpcode=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex, RegTMM }
+tilestored, 2, 0xf34B, None, 1, CpuAMX_TILE|Cpu64, Modrm|Sibmem|Vex|VexOpcode=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, Unspecified|BaseIndex }
+
+tilerelease, 0, 0x49, 0xc0, 1, CpuAMX_TILE|Cpu64, Vex|VexOpcode=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { 0 }
+
+tilezero, 1, 0xf249, None, 1, CpuAMX_TILE|Cpu64, Modrm|Vex|VexOpcode=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM }
+
+// AMX instructions end.
diff --git a/opcodes/i386-reg.tbl b/opcodes/i386-reg.tbl
index cdff763ca7..ca7eeba488 100644
--- a/opcodes/i386-reg.tbl
+++ b/opcodes/i386-reg.tbl
@@ -278,6 +278,15 @@  zmm28, Class=RegSIMD|Zmmword, RegVRex|RegRex, 4, Dw2Inval, Dw2Inval
 zmm29, Class=RegSIMD|Zmmword, RegVRex|RegRex, 5, Dw2Inval, Dw2Inval
 zmm30, Class=RegSIMD|Zmmword, RegVRex|RegRex, 6, Dw2Inval, Dw2Inval
 zmm31, Class=RegSIMD|Zmmword, RegVRex|RegRex, 7, Dw2Inval, Dw2Inval
+// TMM registers for AMX
+tmm0, Class=RegSIMD|Tmmword, 0, 0, Dw2Inval, Dw2Inval
+tmm1, Class=RegSIMD|Tmmword, 0, 1, Dw2Inval, Dw2Inval
+tmm2, Class=RegSIMD|Tmmword, 0, 2, Dw2Inval, Dw2Inval
+tmm3, Class=RegSIMD|Tmmword, 0, 3, Dw2Inval, Dw2Inval
+tmm4, Class=RegSIMD|Tmmword, 0, 4, Dw2Inval, Dw2Inval
+tmm5, Class=RegSIMD|Tmmword, 0, 5, Dw2Inval, Dw2Inval
+tmm6, Class=RegSIMD|Tmmword, 0, 6, Dw2Inval, Dw2Inval
+tmm7, Class=RegSIMD|Tmmword, 0, 7, Dw2Inval, Dw2Inval
 // Bound registers for MPX
 bnd0, Class=RegBND, 0, 0, Dw2Inval, Dw2Inval
 bnd1, Class=RegBND, 0, 1, Dw2Inval, Dw2Inval