[RFC,15/X,libsanitizer] Add in MTE stubs

Message ID VI1PR08MB5471CA3016166350102AE8D4E0BA0@VI1PR08MB5471.eurprd08.prod.outlook.com
State New
Headers show
Series
  • [RFC,15/X,libsanitizer] Add in MTE stubs
Related show

Commit Message

Matthew Malcomson Sept. 6, 2019, 2:46 p.m.
This patch in the series is just for demonstration, here we add stubs
where MTE would be implemented.

At the moment all implementations are dummies of some sort, the assembly
generated uses `mov` instead of `irg`, `add` instead of `addg`, and
`sub` instead of `subg`.  This should mean the binaries do all the same
actions except ignoring tags.

For a hardware implementation of memory tagging checks are done
automatically so adding HWASAN_CHECK is not needed.  This means that the
`hwasan` pass is no longer needed.
Similarly, the `sanopt` pass is not run when compiling for hardware
memory tagging since it provides no benefit without the HWASAN_CHECK
functions.

This patch also gives backends extra control over how a tag is stored in
a pointer and how many real-memory bytes is represented by each byte in
the shadow space.

gcc/ChangeLog:

2019-09-06  Matthew Malcomson  <matthew.malcomson@arm.com>

	* asan.c (hwasan_increment_tag): Avoid special handling around
	background tag for hardwawre implementation.
	(hwasan_copy_tag): New.
	(hwasan_tag_init): Choose initialisation value based on
	hardware/software tagging.
	(hwasan_emit_prologue): Account for hardware checking.
	(hwasan_emit_uncolour_frame): Account for hardware checking.
	(hwasan_finish_file): Assert not called for hardware checking.
	(hwasan_expand_check_ifn): Assert not called for hardware
	checking.
	(gate_hwasan): Don't run when have hardware checking.
	* asan.h (hwasan_copy_tag): New decl.
	(HWASAN_TAG_SIZE): Use backend hook if hardware checking.
	(HWASAN_TAG_GRANULE_SIZE): Use backend hook if hardware
	checking.
	* builtins.c (expand_builtin_alloca): Extra TODO comment.
	(expand_stack_restore): Extra TODO comment.
	* cfgexpand.c (expand_stack_vars): Only record untagged bases
	for hardware checking.
	* config/aarch64/aarch64.c (aarch64_tag_memory): Add dummy hook.
	(aarch64_gentag): Add dummy hook.
	(TARGET_MEMTAG_TAG): New.
	(TARGET_MEMTAG_GENTAG): New.
	* config/aarch64/aarch64.h (AARCH64_ISA_MEMTAG): New macro.
	(HARDWARE_MEMORY_TAGGING): Test for MTE.
	* config/aarch64/aarch64.md (random_tag,
	plain_offset_tag<mode>): New.
	(addtag<mode>4): Implement for MTE.
	* config/aarch64/predicates.md (aarch64_MTE_value_offset): New
	predicate.
	* defaults.h (HARDWARE_MEMORY_TAGGING):
	* doc/tm.texi: Document new hooks.
	* doc/tm.texi.in: Document new hooks.
	* internal-fn.c (expand_HWASAN_MARK): Account for hardware
	checking.
	* sanopt.c (sanitize_asan_mark_unpoison):
	* target.def (targetm.memtag.tag_size): New.
	(targetm.memtag.granule_size): New.
	(targetm.memtag.copy_tag): New.
	(targetm.memtag.tag): New.
	* targhooks.c (default_memtag_tag_size): New.
	(default_memtag_granule_size): New.
	(default_memtag_copy_tag): New.
	* targhooks.h (default_memtag_tag_size): New decl.
	(default_memtag_granule_size): New decl.
	(default_memtag_copy_tag): New decl.



###############     Attachment also inlined for ease of reply    ###############
diff --git a/gcc/asan.h b/gcc/asan.h
index 6e5ba8be606e9a1eae2afe57f17ccca5562167fd..2d697158a15e7e3078902c4fb742819f90b9a0c4 100644
--- a/gcc/asan.h
+++ b/gcc/asan.h
@@ -27,10 +27,10 @@ extern void hwasan_finish_file (void);
 extern void hwasan_record_base (rtx);
 extern uint8_t hwasan_current_tag ();
 extern void hwasan_increment_tag ();
+extern rtx hwasan_extract_tag (rtx);
 extern rtx hwasan_with_tag (rtx, poly_int64);
 extern void hwasan_tag_init ();
 extern rtx hwasan_create_untagged_base (rtx);
-extern rtx hwasan_extract_tag (rtx tagged_pointer);
 extern rtx hwasan_base ();
 extern void hwasan_emit_prologue (rtx *, rtx *, poly_int64 *, uint8_t *, size_t);
 extern rtx_insn *hwasan_emit_uncolour_frame (rtx, rtx, rtx_insn *);
@@ -100,7 +100,7 @@ extern hash_set <tree> *asan_used_labels;
    required.
    If changing this value, be careful of the predicates/constraints on the
    addtag<mode>4 patterns in the backend.  */
-#define HWASAN_TAG_SIZE 4
+#define HWASAN_TAG_SIZE (HARDWARE_MEMORY_TAGGING ? targetm.memtag.tag_size () : 4)
 /* Tag Granule of HWASAN shadow stack.
    This is the size in real memory that each byte in the shadow memory refers
    to.  I.e. if a variable is X bytes long in memory then it's colour in shadow
@@ -109,12 +109,12 @@ extern hash_set <tree> *asan_used_labels;
    two variables that are neighbours in memory and share a tag granule would
    need to share the same colour (as the shared tag granule can only store one
    colour).  */
-#define HWASAN_TAG_GRANULE_SIZE (1ULL << HWASAN_TAG_SIZE)
-/* How many bits to shift in order to access the tag bits.
-   This approach assumes that the tag is stored in the top N bits of a pointer,
-   and hence that shifting a known amount will leave just the tag bits.  */
-#define HWASAN_SHIFT 56
-#define HWASAN_SHIFT_RTX const_int_rtx[MAX_SAVED_CONST_INT + HWASAN_SHIFT]
+#define HWASAN_TAG_GRANULE_SIZE (HARDWARE_MEMORY_TAGGING ? targetm.memtag.granule_size () : (1ULL << HWASAN_TAG_SIZE))
+
+/*
+   The following HWASAN_* macros are only used when HARDWARE_MEMORY_TAGGING is
+   false, which is why we don't define anything for the case where it's true.
+ */
 /* Define the tag for the stack background.
    NOTE: Having a background colour of zero is hard-coded in the runtime
    library, so we can't really change this.
@@ -125,6 +125,11 @@ extern hash_set <tree> *asan_used_labels;
    ensure things like the return address etc can't be affected by accesses
    through pointer to a user-object.  */
 #define HWASAN_STACK_BACKGROUND 0
+/* How many bits to shift in order to access the tag bits.
+   This approach assumes that the tag is stored in the top N bits of a pointer,
+   and hence that shifting a known amount will leave just the tag bits.  */
+#define HWASAN_SHIFT 56
+#define HWASAN_SHIFT_RTX const_int_rtx[MAX_SAVED_CONST_INT + HWASAN_SHIFT]
 
 /* Various flags for Asan builtins.  */
 enum asan_check_flags
diff --git a/gcc/asan.c b/gcc/asan.c
index ad3d5a6451d3ecd9ff79b768c1e9a3fb92272a7e..5fc8e36865e5c442e7e68aa481c6899fde3ad16a 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -3914,7 +3914,7 @@ hwasan_increment_tag ()
 
      That's a help when debugging -- every variable should have a non-zero
      colour.  */
-  if (tag_offset == HWASAN_STACK_BACKGROUND)
+  if (! HARDWARE_MEMORY_TAGGING && tag_offset == HWASAN_STACK_BACKGROUND)
     tag_offset += 1;
 }
 
@@ -3947,7 +3947,9 @@ hwasan_tag_init ()
   asan_used_labels = NULL;
 
   hwasan_base_ptr = NULL_RTX;
-  tag_offset = HWASAN_STACK_BACKGROUND + 1;
+  tag_offset = HARDWARE_MEMORY_TAGGING
+    ? 0
+    : HWASAN_STACK_BACKGROUND + 1;
 }
 
 void
@@ -3958,7 +3960,8 @@ hwasan_emit_prologue (rtx *bases,
 		      size_t length)
 {
   /*
-    NOTE: bases contains both the tagged and untagged base.
+    NOTE: When running the software emulation we record both the tagged and
+    untagged bases.
     This allows us to get both the original frame tag and the untagged variable
     pointer with a minimal of extra instructions.
 
@@ -3969,8 +3972,8 @@ hwasan_emit_prologue (rtx *bases,
     pointers in __hwasan_tag_memory.  We need the tagged base pointer to obtain
     the base tag for an offset.
 
-    We also will need the tagged base pointer for MTE, since the ADDTAG
-    instruction takes a tagged pointer.
+    We need the tagged base pointer for MTE, since the ADDTAG instruction takes
+    a tagged pointer.
  */
   for (size_t i = 0; (i * 2) + 1 < length; i++)
     {
@@ -4002,19 +4005,29 @@ hwasan_emit_prologue (rtx *bases,
 	gcc_assert (tmp % HWASAN_TAG_GRANULE_SIZE == 0);
 
       /* TODO Other options (i.e. inline options)  */
-      /* TODO At the moment we don't generate a random base tag for each
-         frame.  When that happens we will need to generate the tag by
-         adding tags[i] to the frame tag fetched from `bases[i]`.  */
-      rtx ret = init_one_libfunc ("__hwasan_tag_memory");
-      emit_library_call (ret,
-	  LCT_NORMAL,
-	  VOIDmode,
-	  plus_constant (ptr_mode, untagged_bases[i], bot),
-	  ptr_mode,
-	  const_int_rtx[MAX_SAVED_CONST_INT + tags[i]],
-	  QImode,
-	  gen_int_mode (size, ptr_mode),
-	  ptr_mode);
+      if (! HARDWARE_MEMORY_TAGGING )
+	{
+	  /* TODO At the moment we don't generate a random base tag for each
+	     frame.  When that happens we will need to generate the tag by
+	     adding tags[i] to the frame tag fetched from `bases[i]`.  */
+	  rtx ret = init_one_libfunc ("__hwasan_tag_memory");
+	  emit_library_call (ret,
+			     LCT_NORMAL,
+			     VOIDmode,
+			     plus_constant (ptr_mode, untagged_bases[i], bot),
+			     ptr_mode,
+			     const_int_rtx[MAX_SAVED_CONST_INT + tags[i]],
+			     QImode,
+			     gen_int_mode (size, ptr_mode),
+			     ptr_mode);
+	}
+      else
+	{
+	  targetm.memtag.tag (bases[i],
+			      bot,
+			      tags[i],
+			      gen_int_mode (size, ptr_mode));
+	}
     }
 }
 
@@ -4046,11 +4059,19 @@ hwasan_emit_uncolour_frame (rtx dynamic, rtx vars, rtx_insn *before)
 				  NULL_RTX, /* unsignedp = */0, OPTAB_DIRECT);
 
   /* TODO Other options (i.e. inline options)  */
-  rtx ret = init_one_libfunc ("__hwasan_tag_memory");
-  emit_library_call (ret, LCT_NORMAL, VOIDmode,
-      bot_rtx, ptr_mode,
-      const0_rtx, QImode,
-      size_rtx, ptr_mode);
+  if (! HARDWARE_MEMORY_TAGGING )
+    {
+      rtx ret = init_one_libfunc ("__hwasan_tag_memory");
+      emit_library_call (ret, LCT_NORMAL, VOIDmode,
+			 bot_rtx, ptr_mode,
+			 const0_rtx, QImode,
+			 size_rtx, ptr_mode);
+    }
+  else
+    {
+      targetm.memtag.copy_tag (bot_rtx, stack_pointer_rtx);
+      targetm.memtag.tag (bot_rtx, 0, 0, size_rtx);
+    }
 
   do_pending_stack_adjust ();
   rtx_insn *insns = get_insns ();
@@ -4096,6 +4117,7 @@ static GTY(()) tree hwasan_ctor_statements;
 void
 hwasan_finish_file (void)
 {
+  gcc_assert (! HARDWARE_MEMORY_TAGGING);
   /* Avoid instrumenting code in the hwasan constructors/destructors.  */
   flag_sanitize &= ~SANITIZE_HWADDRESS;
   /* TODO Only do this if in userspace.
@@ -4113,6 +4135,7 @@ hwasan_finish_file (void)
 bool
 hwasan_expand_check_ifn (gimple_stmt_iterator *iter, bool)
 {
+  gcc_assert (! HARDWARE_MEMORY_TAGGING);
   // TODO For now only implementing the function when using calls.
   // This is a little easier, and means I can rely on the library
   // implementation while checking my instrumentation code for now.
@@ -4215,7 +4238,7 @@ hwasan_expand_mark_ifn (gimple_stmt_iterator *)
 bool
 gate_hwasan ()
 {
-  return memory_tagging_p ();
+  return memory_tagging_p () && ! HARDWARE_MEMORY_TAGGING;
 }
 bool
 hardware_memory_tagging_p ()
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 3f32754c4d35fc34af7c53156d2a356f69a94a8f..544d07b67a74875fdd93b152b5720f58a10931a1 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5312,6 +5312,17 @@ expand_builtin_frame_address (tree fndecl, tree exp)
 static rtx
 expand_builtin_alloca (tree exp)
 {
+  /* TODO For hardware memory tagging we will need to call the backend to tag
+     this memory since the `hwasan` pass will not be run.
+
+     The `hwasan` pass is mainly to add HWASAN_CHECK internal functions where
+     checks should be made.  With hardware memory tagging the checks are done
+     automatically by the architecture.
+
+     The `hwasan` pass also modifies the behaviour of the alloca builtin
+     function in a target-independent manner, but when memory tagging is
+     handled by the backend it is more convenient to handle the tagging in the
+     alloca hook.  */
   rtx op0;
   rtx result;
   unsigned int align;
@@ -6932,6 +6943,9 @@ expand_builtin_set_thread_pointer (tree exp)
 static void
 expand_stack_restore (tree var)
 {
+  /* TODO If memory tagging is enabled through the hardware we need to uncolour
+     the stack from where we are to where we're going. (i.e. colour in the
+     background stack colour).  */
   rtx_insn *prev;
   rtx sa = expand_normal (var);
 
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 9f0872b32354cbc3186f3f2d2600f711a46926d1..061f00c2e1cf5d1b86fb3dd03d27fd0bf905721a 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -1041,6 +1041,8 @@ struct stack_vars_data
      ASAN records HOST_WIDE_INT offsets (that was enough before the
      introduction of SVE vectors) which  */
   auto_vec<poly_int64> hwasan_vec;
+  /* HWASAN needs to record untagged base pointers when there isn't hardware
+     memory tagging enabled by the architecture.  */
   auto_vec<rtx> hwasan_untagged_base_vec;
   auto_vec<rtx> hwasan_base_vec;
 
@@ -1174,7 +1176,8 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 	      gcc_assert (stack_vars[i].alignb >= HWASAN_TAG_GRANULE_SIZE);
 	      offset = alloc_stack_frame_space (0, HWASAN_TAG_GRANULE_SIZE);
 	      data->hwasan_vec.safe_push (offset);
-	      data->hwasan_untagged_base_vec.safe_push (virtual_stack_vars_rtx);
+	      if (! HARDWARE_MEMORY_TAGGING)
+		data->hwasan_untagged_base_vec.safe_push (virtual_stack_vars_rtx);
 	    }
 	  /* ASAN description strings don't yet have a syntax for expressing
 	     polynomial offsets.  */
@@ -1290,10 +1293,18 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 		 requirement means that the alignment requirement is greater
 		 than the required alignment for tags.
 		*/
-	      if (!large_untagged_base)
-		large_untagged_base = hwasan_create_untagged_base (large_base);
 	      data->hwasan_vec.safe_push (large_alloc);
-	      data->hwasan_untagged_base_vec.safe_push (large_untagged_base);
+
+	      if (! HARDWARE_MEMORY_TAGGING )
+	      {
+		/* We only need to record the untagged bases for HWASAN, since
+		   the runtime library for that doesn't accept tagged pointers.
+		   For hardware implementations of memory tagging there is no
+		   use of recording these untagged versions.  */
+		if (!large_untagged_base)
+		  large_untagged_base = hwasan_create_untagged_base (large_base);
+		data->hwasan_untagged_base_vec.safe_push (large_untagged_base);
+	      }
 	    }
 	  offset = large_alloc;
 	  large_alloc += stack_vars[i].size;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 7bd3bf525dd71347a12ed9cd2227bc2cd6e9cc55..8ea219f25ecb13d26e5d84ee45e8ced61c3e72a9 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -235,6 +235,7 @@ extern unsigned aarch64_architecture_version;
 #define AARCH64_ISA_F16FML	   (aarch64_isa_flags & AARCH64_FL_F16FML)
 #define AARCH64_ISA_RCPC8_4	   (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
 #define AARCH64_ISA_V8_5	   (aarch64_isa_flags & AARCH64_FL_V8_5)
+#define AARCH64_ISA_MEMTAG	   (aarch64_isa_flags & AARCH64_FL_MEMTAG)
 
 /* Crypto is an optional extension to AdvSIMD.  */
 #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
@@ -509,6 +510,10 @@ extern unsigned aarch64_architecture_version;
 #define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
 #define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
+/* We have memory tag checking if we have the MEMTAG extension enabled and
+   hence want to handle that and colouring memory ourselves.  */
+#define HARDWARE_MEMORY_TAGGING AARCH64_ISA_MEMTAG
+
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
 #define DONT_USE_BUILTIN_SETJMP 1
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 8a290dcd9046be4775627dec0e9b3bf826ce3770..444021f81191a2eee3e50a4fcb4ae6ccb33182ab 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18772,6 +18772,22 @@ aarch64_stack_protect_guard (void)
   return NULL_TREE;
 }
 
+/* Implement TARGET_MEMTAG_TAG for AArch64. This is only available when
+   AARCH64_ISA_MEMTAG is available.  TODO Eventually we would just want
+   something to emit a loop of STG or ST2G.  Currently unimplemented.  */
+void
+aarch64_tag_memory (rtx tagged_start, poly_int64 address_offset, uint8_t tag_offset,
+		    rtx size)
+{
+  return;
+}
+
+void
+aarch64_gentag (rtx a, rtx b)
+{
+  emit_insn (gen_random_tag (a, b));
+}
+
 /* Implement TARGET_ASM_FILE_END for AArch64.  This adds the AArch64 GNU NOTE
    section at the end if needed.  */
 #define GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
@@ -19336,6 +19352,14 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_GET_MULTILIB_ABI_NAME
 #define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name
 
+#ifdef AARCH64_ISA_MEMTAG
+#undef TARGET_MEMTAG_TAG
+#define TARGET_MEMTAG_TAG aarch64_tag_memory
+
+#undef TARGET_MEMTAG_GENTAG
+#define TARGET_MEMTAG_GENTAG aarch64_gentag
+#endif
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index cb0d3ae6bbf3ed439c7b27683726f4c30b04777d..70a927b6b70e4e498d962c5e9cf1344f2661e377 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -231,6 +231,7 @@ (define_c_enum "unspec" [
     UNSPEC_REV_SUBREG
     UNSPEC_SPECULATION_TRACKER
     UNSPEC_COPYSIGN
+    UNSPEC_GENTAG
 ])
 
 (define_c_enum "unspecv" [
@@ -409,6 +410,30 @@ (define_expand "cbranch<mode>4"
   "
 )
 
+;; TODO
+;;    Need to put in some sort of random tag here.
+;;    At the moment skipping it because I don't know what instructions to use.
+;;    (plus, starting at zero means that I can know what to expect during
+;;    development.
+(define_insn "random_tag"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_GENTAG))]
+  ""
+  ;; "AARCH64_ISA_MEMTAG"
+  "mov\\t%0, %1 // irg\\t%0, %1"
+)
+
+(define_insn "plain_offset_tag<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r,r")
+    (addtag:GPI (match_operand:GPI 1 "register_operand" "r,r")
+     (match_operand:GPI 2 "aarch64_MTE_value_offset" "I,J")
+     (match_operand:GPI 3 "aarch64_MTE_tag_offset" "i,i")))]
+  "AARCH64_ISA_MEMTAG"
+  "@
+  add\\t%0, %1, %2     // addg\\t%0, %1, %2, %3
+  sub\\t%0, %1, #%n2   // subg\\t%0, %1, #%n2, %3"
+)
+
 (define_expand "addtag<mode>4"
   [(set (match_operand:GPI 0 "register_operand" "")
     (addtag:GPI (match_operand:GPI 1 "register_operand" "")
@@ -417,13 +442,41 @@ (define_expand "addtag<mode>4"
   ""
 {
   gcc_assert (can_create_pseudo_p ());
-  /* Simply add the two values as a constant and use that.  The adddi pattern
-     will handle the fact that the integer is out of range for ADD.  */
-  poly_int64 val = rtx_to_poly_int64 (operands[2]);
-  val += ((uint64_t)INTVAL(operands[3]) << 56);
-  emit_insn (gen_add<mode>3 (operands[0], operands[1],
-			     immed_wide_int_const (val, <MODE>mode)));
-  DONE;
+
+  if (!AARCH64_ISA_MEMTAG)
+    {
+      /* TODO
+	  Need to look into what the most efficient code sequence is.
+	  Right now just want something to work so I can bootstrap again and
+	  check for mistakes.
+
+	  This is a code sequence that would be emitted *many* times, so we
+	  want it as small as possible.
+	*/
+    /* Simply add the two values as a constant and use that.  The adddi
+       pattern will handle the fact that the integer is out of the
+       representable range.  */
+      poly_int64 val = rtx_to_poly_int64 (operands[2]);
+      val += ((uint64_t)INTVAL(operands[3]) << 56);
+      emit_insn (gen_add<mode>3 (operands[0], operands[1],
+				 immed_wide_int_const (val, <MODE>mode)));
+      DONE;
+    }
+  else if (!aarch64_MTE_value_offset (operands[2], <MODE>mode))
+    {
+      /* Must always be able to create a pseudo register.
+	 This pattern requires the ability to generate new pseudo registers,
+	 since there's no way to handle a constant that's too large other than
+         adding that constant into the current register.
+         If we had a too-large constant at the point where we can't create a
+         pseudo register, then using the basic `plus` pattern would end up with
+         a `plus` pattern that has a too-large constant, which would fail there
+         instead of here. */
+      rtx newreg = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_add<mode>3 (newreg, operands[1], operands[2]));
+      operands[2] = const0_rtx;
+      operands[1] = newreg;
+    }
 })
 
 (define_expand "cbranchcc4"
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index e2aa0290f833fbffedec1d8dab219f72eb17419e..0edfb6de7d1ddb466d114b7510e58499117061c5 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -141,6 +141,13 @@ (define_predicate "aarch64_MTE_tag_offset"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 16)")))
 
+;; TODO
+;;    Will have to change the constant from 4096 to 64 when switching to addg.
+(define_predicate "aarch64_MTE_value_offset"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -4096, 4096)")))
+
+
 (define_predicate "aarch64_pluslong_strict_immedate"
   (and (match_operand 0 "aarch64_pluslong_immediate")
        (not (match_operand 0 "aarch64_plus_immediate"))))
diff --git a/gcc/defaults.h b/gcc/defaults.h
index b7534256119bd7834f2fa9d5f32863822d3b393a..b47fe6b35554091c9f3228169ad1f6ff4c692b14 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1294,6 +1294,16 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define EH_RETURN_HANDLER_RTX NULL
 #endif
 
+/* Indicate whether this backend has automatic access checks for tagged
+   pointers (i.e. HWASAN) if so then HWASAN memory tagging can be implemented
+   with much less instrumentation.
+   If a backend advertises that they have this it must also handle tagging
+   shadow memory themselves by implementing TARGET_MEMORY_TAG and avoiding the
+   background stack colour automatically in the addtag pattern.  */
+#ifndef HARDWARE_MEMORY_TAGGING
+#define HARDWARE_MEMORY_TAGGING 0
+#endif
+
 #ifdef GCC_INSN_FLAGS_H
 /* Dependent default target macro definitions
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 67d79a23799cd3057f7d91bd538c7ee76c836f82..7f0cc257a6634e3b0a2013b7fb9ffa4083f19038 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -2968,6 +2968,30 @@ This hook defines the machine mode to use for the boolean result of  conditional
 A target hook which lets a backend compute the set of pressure classes to  be used by those optimization passes which take register pressure into  account, as opposed to letting IRA compute them.  It returns the number of  register classes stored in the array @var{pressure_classes}.
 @end deftypefn
 
+@deftypefn {Target Hook} uint8_t TARGET_MEMTAG_TAG_SIZE ()
+Return the size in bits of a tag for this platform.
+@end deftypefn
+
+@deftypefn {Target Hook} uint8_t TARGET_MEMTAG_GRANULE_SIZE ()
+Return how many bytes in real memory each byte in shadow memory represents.
+I.e. one byte in shadow memory being colour 1 implies the assocaiated
+targetm.memtag.granule_size () bytes in real memory must all be accessed by
+pointers tagged as 1.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_MEMTAG_COPY_TAG (rtx @var{to}, rtx @var{from})
+Emit insns to copy the tag in FROM to TO.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_MEMTAG_TAG (rtx @var{tagged_start}, poly_int64 @var{address_offset}, uint8_t @var{tag_offset}, rtx @var{size})
+This function should emit an RTX to colour memory.
+It's given arguments TAGGED_START, ADDRESS_OFFSET, TAG_OFFSET, SIZE, where
+TAGGED_START and SIZE are RTL expressions, ADDRESS_OFFSET is a poly_int64
+and TAG_OFFSET is a uint8_t.
+It should emit RTL to colour "shadow memory" for the relevant range the
+colour of the tag it was given.
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_MEMTAG_GENTAG (rtx @var{base}, rtx @var{untagged})
 Set the BASE argument to UNTAGGED with some random tag.
 This function is used to generate a tagged base for the current stack frame.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index e1ec503befadb4061fbd3b95e55757fe22d33c39..6c77c09fca161dc3ade98d81b16c1b01af3c0bc7 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -2370,6 +2370,14 @@ in the reload pass.
 
 @hook TARGET_COMPUTE_PRESSURE_CLASSES
 
+@hook TARGET_MEMTAG_TAG_SIZE
+
+@hook TARGET_MEMTAG_GRANULE_SIZE
+
+@hook TARGET_MEMTAG_COPY_TAG
+
+@hook TARGET_MEMTAG_TAG
+
 @hook TARGET_MEMTAG_GENTAG
 
 @node Stack and Calling
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index c530fe8951c30987c874df83e74be6d058730134..a58a55ad59b2ad7a6b93e2f5f2bbb40b8da51c5e 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -503,9 +503,6 @@ expand_HWASAN_MARK (internal_fn, gcall *gc)
   gcc_checking_assert (TREE_CODE (base) == ADDR_EXPR);
   rtx base_rtx = expand_normal (base);
 
-  rtx tag = is_poison ? const0_rtx : hwasan_extract_tag (base_rtx);
-  rtx address = hwasan_create_untagged_base (base_rtx);
-
   tree len = gimple_call_arg (gc, 2);
   gcc_assert (tree_fits_shwi_p (len));
   unsigned HOST_WIDE_INT size_in_bytes = tree_to_shwi (len);
@@ -515,13 +512,24 @@ expand_HWASAN_MARK (internal_fn, gcall *gc)
   rtx size = gen_int_mode (size_in_bytes, Pmode);
 
   /* TODO Other options (i.e. inline options)  */
-  rtx func = init_one_libfunc ("__hwasan_tag_memory");
-  emit_library_call (func,
-      LCT_NORMAL,
-      VOIDmode,
-      address, ptr_mode,
-      tag, QImode,
-      size, ptr_mode);
+  if (! HARDWARE_MEMORY_TAGGING )
+    {
+      rtx func = init_one_libfunc ("__hwasan_tag_memory");
+      rtx address = hwasan_create_untagged_base (base_rtx);
+      rtx tag = is_poison ? const0_rtx : hwasan_extract_tag (base_rtx);
+      emit_library_call (func,
+			 LCT_NORMAL,
+			 VOIDmode,
+			 address, ptr_mode,
+			 tag, QImode,
+			 size, ptr_mode);
+    }
+  else
+    {
+      if (is_poison)
+	targetm.memtag.copy_tag (base_rtx, stack_pointer_rtx);
+      targetm.memtag.tag (base_rtx, 0, 0, size);
+    }
 }
 
 /* This should get expanded in the sanopt pass.  */
diff --git a/gcc/target.def b/gcc/target.def
index 5326cb070dec78f19bfe0844a9d5e50c69e7dcc1..e0c543254538c802e1e0a059e1a3e60a045a0cdf 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -6709,6 +6709,34 @@ HOOK_VECTOR_END (mode_switching)
 HOOK_VECTOR (TARGET_MEMTAG_, memtag)
 
 DEFHOOK
+(tag_size,
+ "Return the size in bits of a tag for this platform.",
+ uint8_t, (), default_memtag_tag_size)
+
+DEFHOOK
+(granule_size,
+ "Return how many bytes in real memory each byte in shadow memory represents.\n\
+I.e. one byte in shadow memory being colour 1 implies the assocaiated\n\
+targetm.memtag.granule_size () bytes in real memory must all be accessed by\n\
+pointers tagged as 1.",
+uint8_t, (), default_memtag_granule_size)
+
+DEFHOOK
+(copy_tag,
+ "Emit insns to copy the tag in FROM to TO.",
+void, (rtx to, rtx from), default_memtag_copy_tag)
+
+DEFHOOK
+(tag,
+ "This function should emit an RTX to colour memory.\n\
+It's given arguments TAGGED_START, ADDRESS_OFFSET, TAG_OFFSET, SIZE, where\n\
+TAGGED_START and SIZE are RTL expressions, ADDRESS_OFFSET is a poly_int64\n\
+and TAG_OFFSET is a uint8_t.\n\
+It should emit RTL to colour \"shadow memory\" for the relevant range the\n\
+colour of the tag it was given.",
+  void, (rtx tagged_start, poly_int64 address_offset, uint8_t tag_offset, rtx size), NULL)
+
+DEFHOOK
 (gentag,
  "Set the BASE argument to UNTAGGED with some random tag.\n\
 This function is used to generate a tagged base for the current stack frame.",
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 493ff3bb29263f8360bea0f7ead1092b4d0c646a..1c09bbad964b7705eea493a828d5906ad07481b6 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -288,5 +288,8 @@ extern rtx default_speculation_safe_value (machine_mode, rtx, rtx, rtx);
 extern void default_remove_extra_call_preserved_regs (rtx_insn *,
 						      HARD_REG_SET *);
 
+extern uint8_t default_memtag_tag_size ();
+extern uint8_t default_memtag_granule_size ();
 extern void default_memtag_gentag (rtx, rtx);
+extern void default_memtag_copy_tag (rtx, rtx);
 #endif /* GCC_TARGHOOKS_H */
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 58ec711ce81ee67d692e81d2616bd3422f6bd092..bf078295cbd9514778454cef775dd288fd502641 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -70,6 +70,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "flags.h"
 #include "explow.h"
+#include "expmed.h"
 #include "calls.h"
 #include "expr.h"
 #include "output.h"
@@ -83,6 +84,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "real.h"
 #include "langhooks.h"
 #include "sbitmap.h"
+#include "attribs.h"
+#include "asan.h"
 
 bool
 default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
@@ -2391,6 +2394,18 @@ default_remove_extra_call_preserved_regs (rtx_insn *, HARD_REG_SET *)
 {
 }
 
+uint8_t
+default_memtag_tag_size ()
+{
+  return 4;
+}
+
+uint8_t
+default_memtag_granule_size ()
+{
+  return 1ULL << default_memtag_tag_size ();
+}
+
 void
 default_memtag_gentag (rtx base, rtx untagged)
 {
@@ -2402,5 +2417,40 @@ default_memtag_gentag (rtx base, rtx untagged)
   emit_move_insn (base, untagged);
 }
 
+void
+default_memtag_copy_tag (rtx to, rtx from)
+{
+  /* TODO: For AArch64 this can cause a problem in LRA if the `to`
+     RTX eventually resolves to being the stack pointer.
+     This happens because the instruction that gets emitted from
+     `store_bit_field` corresponds to a pattern that can't handle the stack
+     pointer and LRA can't figure out to use a temporary register in the `bfi`
+     instruction's place.
+
+     This doesn't cause a problem at the moment since there's currently no way
+     the stack pointer should be given to this function.  The hook is only used
+     when poisoning variables with HWASAN_MARK, and in that function the `to`
+     RTX should always be pointing to a tagged variable on the stack (since
+     the variable is tagged it can't be the stack pointer since that is
+     untagged).
+
+     Eventually we will be generating random tags as the "start" tag for each
+     frame.  When this happens we can no longer avoid the background colour at
+     compile time since we will not know what offset to avoid.
+     This will mean we no longer avoid a `tag_offset` of 0, and hence
+     `hwasan_with_tag` could emit simple PLUS statements.
+
+     When that happens, the last variable on the stack could very well have
+     a zero tag offset and somewhere else in the compiler could optimise that
+     to simply use the stack pointer.
+
+     That would trigger an ICE due to LRA being unable to reload the
+     `insv_regdi` pattern.
+
+     This is another thing that needs to be fixed but I'll deal with later.  */
+  rtx temp = hwasan_extract_tag (from);
+  store_bit_field (to, 8, 56, 0, 0,
+		   QImode, temp, false);
+}
 
 #include "gt-targhooks.h"

Patch

diff --git a/gcc/asan.h b/gcc/asan.h
index 6e5ba8be606e9a1eae2afe57f17ccca5562167fd..2d697158a15e7e3078902c4fb742819f90b9a0c4 100644
--- a/gcc/asan.h
+++ b/gcc/asan.h
@@ -27,10 +27,10 @@  extern void hwasan_finish_file (void);
 extern void hwasan_record_base (rtx);
 extern uint8_t hwasan_current_tag ();
 extern void hwasan_increment_tag ();
+extern rtx hwasan_extract_tag (rtx);
 extern rtx hwasan_with_tag (rtx, poly_int64);
 extern void hwasan_tag_init ();
 extern rtx hwasan_create_untagged_base (rtx);
-extern rtx hwasan_extract_tag (rtx tagged_pointer);
 extern rtx hwasan_base ();
 extern void hwasan_emit_prologue (rtx *, rtx *, poly_int64 *, uint8_t *, size_t);
 extern rtx_insn *hwasan_emit_uncolour_frame (rtx, rtx, rtx_insn *);
@@ -100,7 +100,7 @@  extern hash_set <tree> *asan_used_labels;
    required.
    If changing this value, be careful of the predicates/constraints on the
    addtag<mode>4 patterns in the backend.  */
-#define HWASAN_TAG_SIZE 4
+#define HWASAN_TAG_SIZE (HARDWARE_MEMORY_TAGGING ? targetm.memtag.tag_size () : 4)
 /* Tag Granule of HWASAN shadow stack.
    This is the size in real memory that each byte in the shadow memory refers
    to.  I.e. if a variable is X bytes long in memory then it's colour in shadow
@@ -109,12 +109,12 @@  extern hash_set <tree> *asan_used_labels;
    two variables that are neighbours in memory and share a tag granule would
    need to share the same colour (as the shared tag granule can only store one
    colour).  */
-#define HWASAN_TAG_GRANULE_SIZE (1ULL << HWASAN_TAG_SIZE)
-/* How many bits to shift in order to access the tag bits.
-   This approach assumes that the tag is stored in the top N bits of a pointer,
-   and hence that shifting a known amount will leave just the tag bits.  */
-#define HWASAN_SHIFT 56
-#define HWASAN_SHIFT_RTX const_int_rtx[MAX_SAVED_CONST_INT + HWASAN_SHIFT]
+#define HWASAN_TAG_GRANULE_SIZE (HARDWARE_MEMORY_TAGGING ? targetm.memtag.granule_size () : (1ULL << HWASAN_TAG_SIZE))
+
+/*
+   The following HWASAN_* macros are only used when HARDWARE_MEMORY_TAGGING is
+   false, which is why we don't define anything for the case where it's true.
+ */
 /* Define the tag for the stack background.
    NOTE: Having a background colour of zero is hard-coded in the runtime
    library, so we can't really change this.
@@ -125,6 +125,11 @@  extern hash_set <tree> *asan_used_labels;
    ensure things like the return address etc can't be affected by accesses
    through pointer to a user-object.  */
 #define HWASAN_STACK_BACKGROUND 0
+/* How many bits to shift in order to access the tag bits.
+   This approach assumes that the tag is stored in the top N bits of a pointer,
+   and hence that shifting a known amount will leave just the tag bits.  */
+#define HWASAN_SHIFT 56
+#define HWASAN_SHIFT_RTX const_int_rtx[MAX_SAVED_CONST_INT + HWASAN_SHIFT]
 
 /* Various flags for Asan builtins.  */
 enum asan_check_flags
diff --git a/gcc/asan.c b/gcc/asan.c
index ad3d5a6451d3ecd9ff79b768c1e9a3fb92272a7e..5fc8e36865e5c442e7e68aa481c6899fde3ad16a 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -3914,7 +3914,7 @@  hwasan_increment_tag ()
 
      That's a help when debugging -- every variable should have a non-zero
      colour.  */
-  if (tag_offset == HWASAN_STACK_BACKGROUND)
+  if (! HARDWARE_MEMORY_TAGGING && tag_offset == HWASAN_STACK_BACKGROUND)
     tag_offset += 1;
 }
 
@@ -3947,7 +3947,9 @@  hwasan_tag_init ()
   asan_used_labels = NULL;
 
   hwasan_base_ptr = NULL_RTX;
-  tag_offset = HWASAN_STACK_BACKGROUND + 1;
+  tag_offset = HARDWARE_MEMORY_TAGGING
+    ? 0
+    : HWASAN_STACK_BACKGROUND + 1;
 }
 
 void
@@ -3958,7 +3960,8 @@  hwasan_emit_prologue (rtx *bases,
 		      size_t length)
 {
   /*
-    NOTE: bases contains both the tagged and untagged base.
+    NOTE: When running the software emulation we record both the tagged and
+    untagged bases.
     This allows us to get both the original frame tag and the untagged variable
     pointer with a minimal of extra instructions.
 
@@ -3969,8 +3972,8 @@  hwasan_emit_prologue (rtx *bases,
     pointers in __hwasan_tag_memory.  We need the tagged base pointer to obtain
     the base tag for an offset.
 
-    We also will need the tagged base pointer for MTE, since the ADDTAG
-    instruction takes a tagged pointer.
+    We need the tagged base pointer for MTE, since the ADDTAG instruction takes
+    a tagged pointer.
  */
   for (size_t i = 0; (i * 2) + 1 < length; i++)
     {
@@ -4002,19 +4005,29 @@  hwasan_emit_prologue (rtx *bases,
 	gcc_assert (tmp % HWASAN_TAG_GRANULE_SIZE == 0);
 
       /* TODO Other options (i.e. inline options)  */
-      /* TODO At the moment we don't generate a random base tag for each
-         frame.  When that happens we will need to generate the tag by
-         adding tags[i] to the frame tag fetched from `bases[i]`.  */
-      rtx ret = init_one_libfunc ("__hwasan_tag_memory");
-      emit_library_call (ret,
-	  LCT_NORMAL,
-	  VOIDmode,
-	  plus_constant (ptr_mode, untagged_bases[i], bot),
-	  ptr_mode,
-	  const_int_rtx[MAX_SAVED_CONST_INT + tags[i]],
-	  QImode,
-	  gen_int_mode (size, ptr_mode),
-	  ptr_mode);
+      if (! HARDWARE_MEMORY_TAGGING )
+	{
+	  /* TODO At the moment we don't generate a random base tag for each
+	     frame.  When that happens we will need to generate the tag by
+	     adding tags[i] to the frame tag fetched from `bases[i]`.  */
+	  rtx ret = init_one_libfunc ("__hwasan_tag_memory");
+	  emit_library_call (ret,
+			     LCT_NORMAL,
+			     VOIDmode,
+			     plus_constant (ptr_mode, untagged_bases[i], bot),
+			     ptr_mode,
+			     const_int_rtx[MAX_SAVED_CONST_INT + tags[i]],
+			     QImode,
+			     gen_int_mode (size, ptr_mode),
+			     ptr_mode);
+	}
+      else
+	{
+	  targetm.memtag.tag (bases[i],
+			      bot,
+			      tags[i],
+			      gen_int_mode (size, ptr_mode));
+	}
     }
 }
 
@@ -4046,11 +4059,19 @@  hwasan_emit_uncolour_frame (rtx dynamic, rtx vars, rtx_insn *before)
 				  NULL_RTX, /* unsignedp = */0, OPTAB_DIRECT);
 
   /* TODO Other options (i.e. inline options)  */
-  rtx ret = init_one_libfunc ("__hwasan_tag_memory");
-  emit_library_call (ret, LCT_NORMAL, VOIDmode,
-      bot_rtx, ptr_mode,
-      const0_rtx, QImode,
-      size_rtx, ptr_mode);
+  if (! HARDWARE_MEMORY_TAGGING )
+    {
+      rtx ret = init_one_libfunc ("__hwasan_tag_memory");
+      emit_library_call (ret, LCT_NORMAL, VOIDmode,
+			 bot_rtx, ptr_mode,
+			 const0_rtx, QImode,
+			 size_rtx, ptr_mode);
+    }
+  else
+    {
+      targetm.memtag.copy_tag (bot_rtx, stack_pointer_rtx);
+      targetm.memtag.tag (bot_rtx, 0, 0, size_rtx);
+    }
 
   do_pending_stack_adjust ();
   rtx_insn *insns = get_insns ();
@@ -4096,6 +4117,7 @@  static GTY(()) tree hwasan_ctor_statements;
 void
 hwasan_finish_file (void)
 {
+  gcc_assert (! HARDWARE_MEMORY_TAGGING);
   /* Avoid instrumenting code in the hwasan constructors/destructors.  */
   flag_sanitize &= ~SANITIZE_HWADDRESS;
   /* TODO Only do this if in userspace.
@@ -4113,6 +4135,7 @@  hwasan_finish_file (void)
 bool
 hwasan_expand_check_ifn (gimple_stmt_iterator *iter, bool)
 {
+  gcc_assert (! HARDWARE_MEMORY_TAGGING);
   // TODO For now only implementing the function when using calls.
   // This is a little easier, and means I can rely on the library
   // implementation while checking my instrumentation code for now.
@@ -4215,7 +4238,7 @@  hwasan_expand_mark_ifn (gimple_stmt_iterator *)
 bool
 gate_hwasan ()
 {
-  return memory_tagging_p ();
+  return memory_tagging_p () && ! HARDWARE_MEMORY_TAGGING;
 }
 bool
 hardware_memory_tagging_p ()
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 3f32754c4d35fc34af7c53156d2a356f69a94a8f..544d07b67a74875fdd93b152b5720f58a10931a1 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5312,6 +5312,17 @@  expand_builtin_frame_address (tree fndecl, tree exp)
 static rtx
 expand_builtin_alloca (tree exp)
 {
+  /* TODO For hardware memory tagging we will need to call the backend to tag
+     this memory since the `hwasan` pass will not be run.
+
+     The `hwasan` pass is mainly to add HWASAN_CHECK internal functions where
+     checks should be made.  With hardware memory tagging the checks are done
+     automatically by the architecture.
+
+     The `hwasan` pass also modifies the behaviour of the alloca builtin
+     function in a target-independent manner, but when memory tagging is
+     handled by the backend it is more convenient to handle the tagging in the
+     alloca hook.  */
   rtx op0;
   rtx result;
   unsigned int align;
@@ -6932,6 +6943,9 @@  expand_builtin_set_thread_pointer (tree exp)
 static void
 expand_stack_restore (tree var)
 {
+  /* TODO If memory tagging is enabled through the hardware we need to uncolour
+     the stack from where we are to where we're going. (i.e. colour in the
+     background stack colour).  */
   rtx_insn *prev;
   rtx sa = expand_normal (var);
 
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 9f0872b32354cbc3186f3f2d2600f711a46926d1..061f00c2e1cf5d1b86fb3dd03d27fd0bf905721a 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -1041,6 +1041,8 @@  struct stack_vars_data
      ASAN records HOST_WIDE_INT offsets (that was enough before the
      introduction of SVE vectors) which  */
   auto_vec<poly_int64> hwasan_vec;
+  /* HWASAN needs to record untagged base pointers when there isn't hardware
+     memory tagging enabled by the architecture.  */
   auto_vec<rtx> hwasan_untagged_base_vec;
   auto_vec<rtx> hwasan_base_vec;
 
@@ -1174,7 +1176,8 @@  expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 	      gcc_assert (stack_vars[i].alignb >= HWASAN_TAG_GRANULE_SIZE);
 	      offset = alloc_stack_frame_space (0, HWASAN_TAG_GRANULE_SIZE);
 	      data->hwasan_vec.safe_push (offset);
-	      data->hwasan_untagged_base_vec.safe_push (virtual_stack_vars_rtx);
+	      if (! HARDWARE_MEMORY_TAGGING)
+		data->hwasan_untagged_base_vec.safe_push (virtual_stack_vars_rtx);
 	    }
 	  /* ASAN description strings don't yet have a syntax for expressing
 	     polynomial offsets.  */
@@ -1290,10 +1293,18 @@  expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 		 requirement means that the alignment requirement is greater
 		 than the required alignment for tags.
 		*/
-	      if (!large_untagged_base)
-		large_untagged_base = hwasan_create_untagged_base (large_base);
 	      data->hwasan_vec.safe_push (large_alloc);
-	      data->hwasan_untagged_base_vec.safe_push (large_untagged_base);
+
+	      if (! HARDWARE_MEMORY_TAGGING )
+	      {
+		/* We only need to record the untagged bases for HWASAN, since
+		   the runtime library for that doesn't accept tagged pointers.
+		   For hardware implementations of memory tagging there is no
+		   use of recording these untagged versions.  */
+		if (!large_untagged_base)
+		  large_untagged_base = hwasan_create_untagged_base (large_base);
+		data->hwasan_untagged_base_vec.safe_push (large_untagged_base);
+	      }
 	    }
 	  offset = large_alloc;
 	  large_alloc += stack_vars[i].size;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 7bd3bf525dd71347a12ed9cd2227bc2cd6e9cc55..8ea219f25ecb13d26e5d84ee45e8ced61c3e72a9 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -235,6 +235,7 @@  extern unsigned aarch64_architecture_version;
 #define AARCH64_ISA_F16FML	   (aarch64_isa_flags & AARCH64_FL_F16FML)
 #define AARCH64_ISA_RCPC8_4	   (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
 #define AARCH64_ISA_V8_5	   (aarch64_isa_flags & AARCH64_FL_V8_5)
+#define AARCH64_ISA_MEMTAG	   (aarch64_isa_flags & AARCH64_FL_MEMTAG)
 
 /* Crypto is an optional extension to AdvSIMD.  */
 #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
@@ -509,6 +510,10 @@  extern unsigned aarch64_architecture_version;
 #define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
 #define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
+/* We have memory tag checking if we have the MEMTAG extension enabled and
+   hence want to handle that and colouring memory ourselves.  */
+#define HARDWARE_MEMORY_TAGGING AARCH64_ISA_MEMTAG
+
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
 #define DONT_USE_BUILTIN_SETJMP 1
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 8a290dcd9046be4775627dec0e9b3bf826ce3770..444021f81191a2eee3e50a4fcb4ae6ccb33182ab 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18772,6 +18772,22 @@  aarch64_stack_protect_guard (void)
   return NULL_TREE;
 }
 
+/* Implement TARGET_MEMTAG_TAG for AArch64. This is only available when
+   AARCH64_ISA_MEMTAG is available.  TODO Eventually we would just want
+   something to emit a loop of STG or ST2G.  Currently unimplemented.  */
+void
+aarch64_tag_memory (rtx tagged_start, poly_int64 address_offset, uint8_t tag_offset,
+		    rtx size)
+{
+  return;
+}
+
+void
+aarch64_gentag (rtx a, rtx b)
+{
+  emit_insn (gen_random_tag (a, b));
+}
+
 /* Implement TARGET_ASM_FILE_END for AArch64.  This adds the AArch64 GNU NOTE
    section at the end if needed.  */
 #define GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
@@ -19336,6 +19352,14 @@  aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_GET_MULTILIB_ABI_NAME
 #define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name
 
+#ifdef AARCH64_ISA_MEMTAG
+#undef TARGET_MEMTAG_TAG
+#define TARGET_MEMTAG_TAG aarch64_tag_memory
+
+#undef TARGET_MEMTAG_GENTAG
+#define TARGET_MEMTAG_GENTAG aarch64_gentag
+#endif
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index cb0d3ae6bbf3ed439c7b27683726f4c30b04777d..70a927b6b70e4e498d962c5e9cf1344f2661e377 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -231,6 +231,7 @@  (define_c_enum "unspec" [
     UNSPEC_REV_SUBREG
     UNSPEC_SPECULATION_TRACKER
     UNSPEC_COPYSIGN
+    UNSPEC_GENTAG
 ])
 
 (define_c_enum "unspecv" [
@@ -409,6 +410,30 @@  (define_expand "cbranch<mode>4"
   "
 )
 
+;; TODO
+;;    Need to put in some sort of random tag here.
+;;    At the moment skipping it because I don't know what instructions to use.
+;;    (plus, starting at zero means that I can know what to expect during
+;;    development.
+(define_insn "random_tag"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_GENTAG))]
+  ""
+  ;; "AARCH64_ISA_MEMTAG"
+  "mov\\t%0, %1 // irg\\t%0, %1"
+)
+
+(define_insn "plain_offset_tag<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r,r")
+    (addtag:GPI (match_operand:GPI 1 "register_operand" "r,r")
+     (match_operand:GPI 2 "aarch64_MTE_value_offset" "I,J")
+     (match_operand:GPI 3 "aarch64_MTE_tag_offset" "i,i")))]
+  "AARCH64_ISA_MEMTAG"
+  "@
+  add\\t%0, %1, %2     // addg\\t%0, %1, %2, %3
+  sub\\t%0, %1, #%n2   // subg\\t%0, %1, #%n2, %3"
+)
+
 (define_expand "addtag<mode>4"
   [(set (match_operand:GPI 0 "register_operand" "")
     (addtag:GPI (match_operand:GPI 1 "register_operand" "")
@@ -417,13 +442,41 @@  (define_expand "addtag<mode>4"
   ""
 {
   gcc_assert (can_create_pseudo_p ());
-  /* Simply add the two values as a constant and use that.  The adddi pattern
-     will handle the fact that the integer is out of range for ADD.  */
-  poly_int64 val = rtx_to_poly_int64 (operands[2]);
-  val += ((uint64_t)INTVAL(operands[3]) << 56);
-  emit_insn (gen_add<mode>3 (operands[0], operands[1],
-			     immed_wide_int_const (val, <MODE>mode)));
-  DONE;
+
+  if (!AARCH64_ISA_MEMTAG)
+    {
+      /* TODO
+	  Need to look into what the most efficient code sequence is.
+	  Right now just want something to work so I can bootstrap again and
+	  check for mistakes.
+
+	  This is a code sequence that would be emitted *many* times, so we
+	  want it as small as possible.
+	*/
+    /* Simply add the two values as a constant and use that.  The adddi
+       pattern will handle the fact that the integer is out of the
+       representable range.  */
+      poly_int64 val = rtx_to_poly_int64 (operands[2]);
+      val += ((uint64_t)INTVAL(operands[3]) << 56);
+      emit_insn (gen_add<mode>3 (operands[0], operands[1],
+				 immed_wide_int_const (val, <MODE>mode)));
+      DONE;
+    }
+  else if (!aarch64_MTE_value_offset (operands[2], <MODE>mode))
+    {
+      /* Must always be able to create a pseudo register.
+	 This pattern requires the ability to generate new pseudo registers,
+	 since there's no way to handle a constant that's too large other than
+         adding that constant into the current register.
+         If we had a too-large constant at the point where we can't create a
+         pseudo register, then using the basic `plus` pattern would end up with
+         a `plus` pattern that has a too-large constant, which would fail there
+         instead of here. */
+      rtx newreg = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_add<mode>3 (newreg, operands[1], operands[2]));
+      operands[2] = const0_rtx;
+      operands[1] = newreg;
+    }
 })
 
 (define_expand "cbranchcc4"
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index e2aa0290f833fbffedec1d8dab219f72eb17419e..0edfb6de7d1ddb466d114b7510e58499117061c5 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -141,6 +141,13 @@  (define_predicate "aarch64_MTE_tag_offset"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 16)")))
 
+;; TODO
+;;    Will have to change the constant from 4096 to 64 when switching to addg.
+(define_predicate "aarch64_MTE_value_offset"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -4096, 4096)")))
+
+
 (define_predicate "aarch64_pluslong_strict_immedate"
   (and (match_operand 0 "aarch64_pluslong_immediate")
        (not (match_operand 0 "aarch64_plus_immediate"))))
diff --git a/gcc/defaults.h b/gcc/defaults.h
index b7534256119bd7834f2fa9d5f32863822d3b393a..b47fe6b35554091c9f3228169ad1f6ff4c692b14 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1294,6 +1294,16 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define EH_RETURN_HANDLER_RTX NULL
 #endif
 
+/* Indicate whether this backend has automatic access checks for tagged
+   pointers (i.e. HWASAN) if so then HWASAN memory tagging can be implemented
+   with much less instrumentation.
+   If a backend advertises that they have this it must also handle tagging
+   shadow memory themselves by implementing TARGET_MEMORY_TAG and avoiding the
+   background stack colour automatically in the addtag pattern.  */
+#ifndef HARDWARE_MEMORY_TAGGING
+#define HARDWARE_MEMORY_TAGGING 0
+#endif
+
 #ifdef GCC_INSN_FLAGS_H
 /* Dependent default target macro definitions
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 67d79a23799cd3057f7d91bd538c7ee76c836f82..7f0cc257a6634e3b0a2013b7fb9ffa4083f19038 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -2968,6 +2968,30 @@  This hook defines the machine mode to use for the boolean result of  conditional
 A target hook which lets a backend compute the set of pressure classes to  be used by those optimization passes which take register pressure into  account, as opposed to letting IRA compute them.  It returns the number of  register classes stored in the array @var{pressure_classes}.
 @end deftypefn
 
+@deftypefn {Target Hook} uint8_t TARGET_MEMTAG_TAG_SIZE ()
+Return the size in bits of a tag for this platform.
+@end deftypefn
+
+@deftypefn {Target Hook} uint8_t TARGET_MEMTAG_GRANULE_SIZE ()
+Return how many bytes in real memory each byte in shadow memory represents.
+I.e. one byte in shadow memory being colour 1 implies the assocaiated
+targetm.memtag.granule_size () bytes in real memory must all be accessed by
+pointers tagged as 1.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_MEMTAG_COPY_TAG (rtx @var{to}, rtx @var{from})
+Emit insns to copy the tag in FROM to TO.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_MEMTAG_TAG (rtx @var{tagged_start}, poly_int64 @var{address_offset}, uint8_t @var{tag_offset}, rtx @var{size})
+This function should emit an RTX to colour memory.
+It's given arguments TAGGED_START, ADDRESS_OFFSET, TAG_OFFSET, SIZE, where
+TAGGED_START and SIZE are RTL expressions, ADDRESS_OFFSET is a poly_int64
+and TAG_OFFSET is a uint8_t.
+It should emit RTL to colour "shadow memory" for the relevant range the
+colour of the tag it was given.
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_MEMTAG_GENTAG (rtx @var{base}, rtx @var{untagged})
 Set the BASE argument to UNTAGGED with some random tag.
 This function is used to generate a tagged base for the current stack frame.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index e1ec503befadb4061fbd3b95e55757fe22d33c39..6c77c09fca161dc3ade98d81b16c1b01af3c0bc7 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -2370,6 +2370,14 @@  in the reload pass.
 
 @hook TARGET_COMPUTE_PRESSURE_CLASSES
 
+@hook TARGET_MEMTAG_TAG_SIZE
+
+@hook TARGET_MEMTAG_GRANULE_SIZE
+
+@hook TARGET_MEMTAG_COPY_TAG
+
+@hook TARGET_MEMTAG_TAG
+
 @hook TARGET_MEMTAG_GENTAG
 
 @node Stack and Calling
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index c530fe8951c30987c874df83e74be6d058730134..a58a55ad59b2ad7a6b93e2f5f2bbb40b8da51c5e 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -503,9 +503,6 @@  expand_HWASAN_MARK (internal_fn, gcall *gc)
   gcc_checking_assert (TREE_CODE (base) == ADDR_EXPR);
   rtx base_rtx = expand_normal (base);
 
-  rtx tag = is_poison ? const0_rtx : hwasan_extract_tag (base_rtx);
-  rtx address = hwasan_create_untagged_base (base_rtx);
-
   tree len = gimple_call_arg (gc, 2);
   gcc_assert (tree_fits_shwi_p (len));
   unsigned HOST_WIDE_INT size_in_bytes = tree_to_shwi (len);
@@ -515,13 +512,24 @@  expand_HWASAN_MARK (internal_fn, gcall *gc)
   rtx size = gen_int_mode (size_in_bytes, Pmode);
 
   /* TODO Other options (i.e. inline options)  */
-  rtx func = init_one_libfunc ("__hwasan_tag_memory");
-  emit_library_call (func,
-      LCT_NORMAL,
-      VOIDmode,
-      address, ptr_mode,
-      tag, QImode,
-      size, ptr_mode);
+  if (! HARDWARE_MEMORY_TAGGING )
+    {
+      rtx func = init_one_libfunc ("__hwasan_tag_memory");
+      rtx address = hwasan_create_untagged_base (base_rtx);
+      rtx tag = is_poison ? const0_rtx : hwasan_extract_tag (base_rtx);
+      emit_library_call (func,
+			 LCT_NORMAL,
+			 VOIDmode,
+			 address, ptr_mode,
+			 tag, QImode,
+			 size, ptr_mode);
+    }
+  else
+    {
+      if (is_poison)
+	targetm.memtag.copy_tag (base_rtx, stack_pointer_rtx);
+      targetm.memtag.tag (base_rtx, 0, 0, size);
+    }
 }
 
 /* This should get expanded in the sanopt pass.  */
diff --git a/gcc/target.def b/gcc/target.def
index 5326cb070dec78f19bfe0844a9d5e50c69e7dcc1..e0c543254538c802e1e0a059e1a3e60a045a0cdf 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -6709,6 +6709,34 @@  HOOK_VECTOR_END (mode_switching)
 HOOK_VECTOR (TARGET_MEMTAG_, memtag)
 
 DEFHOOK
+(tag_size,
+ "Return the size in bits of a tag for this platform.",
+ uint8_t, (), default_memtag_tag_size)
+
+DEFHOOK
+(granule_size,
+ "Return how many bytes in real memory each byte in shadow memory represents.\n\
+I.e. one byte in shadow memory being colour 1 implies the assocaiated\n\
+targetm.memtag.granule_size () bytes in real memory must all be accessed by\n\
+pointers tagged as 1.",
+uint8_t, (), default_memtag_granule_size)
+
+DEFHOOK
+(copy_tag,
+ "Emit insns to copy the tag in FROM to TO.",
+void, (rtx to, rtx from), default_memtag_copy_tag)
+
+DEFHOOK
+(tag,
+ "This function should emit an RTX to colour memory.\n\
+It's given arguments TAGGED_START, ADDRESS_OFFSET, TAG_OFFSET, SIZE, where\n\
+TAGGED_START and SIZE are RTL expressions, ADDRESS_OFFSET is a poly_int64\n\
+and TAG_OFFSET is a uint8_t.\n\
+It should emit RTL to colour \"shadow memory\" for the relevant range the\n\
+colour of the tag it was given.",
+  void, (rtx tagged_start, poly_int64 address_offset, uint8_t tag_offset, rtx size), NULL)
+
+DEFHOOK
 (gentag,
  "Set the BASE argument to UNTAGGED with some random tag.\n\
 This function is used to generate a tagged base for the current stack frame.",
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 493ff3bb29263f8360bea0f7ead1092b4d0c646a..1c09bbad964b7705eea493a828d5906ad07481b6 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -288,5 +288,8 @@  extern rtx default_speculation_safe_value (machine_mode, rtx, rtx, rtx);
 extern void default_remove_extra_call_preserved_regs (rtx_insn *,
 						      HARD_REG_SET *);
 
+extern uint8_t default_memtag_tag_size ();
+extern uint8_t default_memtag_granule_size ();
 extern void default_memtag_gentag (rtx, rtx);
+extern void default_memtag_copy_tag (rtx, rtx);
 #endif /* GCC_TARGHOOKS_H */
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 58ec711ce81ee67d692e81d2616bd3422f6bd092..bf078295cbd9514778454cef775dd288fd502641 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -70,6 +70,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "flags.h"
 #include "explow.h"
+#include "expmed.h"
 #include "calls.h"
 #include "expr.h"
 #include "output.h"
@@ -83,6 +84,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "real.h"
 #include "langhooks.h"
 #include "sbitmap.h"
+#include "attribs.h"
+#include "asan.h"
 
 bool
 default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
@@ -2391,6 +2394,18 @@  default_remove_extra_call_preserved_regs (rtx_insn *, HARD_REG_SET *)
 {
 }
 
+uint8_t
+default_memtag_tag_size ()
+{
+  return 4;
+}
+
+uint8_t
+default_memtag_granule_size ()
+{
+  return 1ULL << default_memtag_tag_size ();
+}
+
 void
 default_memtag_gentag (rtx base, rtx untagged)
 {
@@ -2402,5 +2417,40 @@  default_memtag_gentag (rtx base, rtx untagged)
   emit_move_insn (base, untagged);
 }
 
+void
+default_memtag_copy_tag (rtx to, rtx from)
+{
+  /* TODO: For AArch64 this can cause a problem in LRA if the `to`
+     RTX eventually resolves to being the stack pointer.
+     This happens because the instruction that gets emitted from
+     `store_bit_field` corresponds to a pattern that can't handle the stack
+     pointer and LRA can't figure out to use a temporary register in the `bfi`
+     instruction's place.
+
+     This doesn't cause a problem at the moment since there's currently no way
+     the stack pointer should be given to this function.  The hook is only used
+     when poisoning variables with HWASAN_MARK, and in that function the `to`
+     RTX should always be pointing to a tagged variable on the stack (since
+     the variable is tagged it can't be the stack pointer since that is
+     untagged).
+
+     Eventually we will be generating random tags as the "start" tag for each
+     frame.  When this happens we can no longer avoid the background colour at
+     compile time since we will not know what offset to avoid.
+     This will mean we no longer avoid a `tag_offset` of 0, and hence
+     `hwasan_with_tag` could emit simple PLUS statements.
+
+     When that happens, the last variable on the stack could very well have
+     a zero tag offset and somewhere else in the compiler could optimise that
+     to simply use the stack pointer.
+
+     That would trigger an ICE due to LRA being unable to reload the
+     `insv_regdi` pattern.
+
+     This is another thing that needs to be fixed but I'll deal with later.  */
+  rtx temp = hwasan_extract_tag (from);
+  store_bit_field (to, 8, 56, 0, 0,
+		   QImode, temp, false);
+}
 
 #include "gt-targhooks.h"