PowerPC64 ELFv2 -fpatchable-function-entry

Message ID 54f8546fad8fd8dd74d2b532d0eece816bc49e68.1620345817.git.amodra@gmail.com
State New
Headers show
Series
  • PowerPC64 ELFv2 -fpatchable-function-entry
Related show

Commit Message

Jonathan Wakely via Gcc-patches May 7, 2021, 2:49 a.m.
PowerPC64 ELFv2 dual entry point functions have a couple of problems
with -fpatchable-function-entry.  One is that the nops added after the
global entry land in the global entry code which is constrained to be
a power of two number of instructions, and zero global entry code has
special meaning for linkage.  The other is that the global entry code
isn't always used on function entry, and some uses of
-fpatchable-function-entry might want to affect all entries to the
function.  So this patch arranges to put one batch of nops before the
global entry, and the other after the local entry point.

	PR target/98125
	* config/rs6000/rs6000.c (rs6000_print_patchable_function_entry): New
	function.
	(TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY): Define.
	* config/rs6000/rs6000-logue.c: Include targhooks.h.
	(rs6000_output_function_prologue): Handle nops for
	-fpatchable-function-entry after local entry point.

Comments

Jonathan Wakely via Gcc-patches May 7, 2021, 1:46 p.m. | #1
On Fri, 2021-05-07 at 12:19 +0930, Alan Modra via Gcc-patches wrote:
> PowerPC64 ELFv2 dual entry point functions have a couple of problems

> with -fpatchable-function-entry.  One is that the nops added after the

> global entry land in the global entry code which is constrained to be

> a power of two number of instructions, and zero global entry code has

> special meaning for linkage.  The other is that the global entry code

> isn't always used on function entry, and some uses of

> -fpatchable-function-entry might want to affect all entries to the

> function.  So this patch arranges to put one batch of nops before the

> global entry, and the other after the local entry point.

> 


Hi,

Description good.  :-)

> 	PR target/98125

> 	* config/rs6000/rs6000.c (rs6000_print_patchable_function_entry): New

> 	function.

> 	(TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY): Define.

> 	* config/rs6000/rs6000-logue.c: Include targhooks.h.

> 	(rs6000_output_function_prologue): Handle nops for

> 	-fpatchable-function-entry after local entry point.

> 

> diff --git a/gcc/config/rs6000/rs6000-logue.c b/gcc/config/rs6000/rs6000-logue.c

> index b0ac183ceff..ffa3bb3dcf1 100644

> --- a/gcc/config/rs6000/rs6000-logue.c

> +++ b/gcc/config/rs6000/rs6000-logue.c

> @@ -51,6 +51,7 @@

>  #include "gstab.h"  /* for N_SLINE */

>  #include "dbxout.h" /* dbxout_ */

>  #endif

> +#include "targhooks.h"

> 

>  static int rs6000_ra_ever_killed (void);

>  static void is_altivec_return_reg (rtx, void *);

> @@ -3991,6 +3992,10 @@ rs6000_output_function_prologue (FILE *file)

>        fputs (",1\n", file);

>      }

> 

> +  int nops_after_entry = crtl->patch_area_size - crtl->patch_area_entry;

> +  if (nops_after_entry > 0)

> +    default_print_patchable_function_entry (file, nops_after_entry, false);

> +

>    /* Output -mprofile-kernel code.  This needs to be done here instead of

>       in output_function_profile since it must go after the ELFv2 ABI

>       local entry point.  */

ok


> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c

> index d43c36e7f1a..97f1b3e0674 100644

> --- a/gcc/config/rs6000/rs6000.c

> +++ b/gcc/config/rs6000/rs6000.c

> @@ -1404,6 +1404,10 @@ static const struct attribute_spec rs6000_attribute_table[] =

>  #undef TARGET_ASM_FUNCTION_EPILOGUE

>  #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue

> 

> +#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY

> +#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \

> +  rs6000_print_patchable_function_entry

> +


ok

>  #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA

>  #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra

> 

> @@ -14953,6 +14957,33 @@ rs6000_assemble_visibility (tree decl, int vis)

>  }

>  #endif

>  

> +/* Write NOPs into the asm outfile FILE around a function entry.  This

> +   routine may be called twice per function to put NOPs before and after

> +   the function entry.  If RECORD_P is true the location of the NOPs will

> +   be recorded by default_print_patchable_function_entry in a special

> +   object section called "__patchable_function_entries".  Disable output

> +   of any NOPs for the second call.  Those, if any, are output by

> +   rs6000_output_function_prologue.  This means that for ELFv2 any NOPs

> +   after the function entry are placed after the local entry point, not

> +   the global entry point.  NOPs after the entry may be found at

> +   record_loc + nops_before * 4 + local_entry_offset.  This holds true

> +   when nops_before is zero.  */

> +

> +static void

> +rs6000_print_patchable_function_entry (FILE *file,

> +				       unsigned HOST_WIDE_INT patch_area_size ATTRIBUTE_UNUSED,

> +				       bool record_p)

> +{

> +  /* Always call default_print_patchable_function_entry when RECORD_P in


when RECORD_P is true?  (implied, but I like to be specific..)
> +     order to output the location of the NOPs, but use the size of the

> +     area before the entry on both possible calls.  If RECORD_P is true

> +     on the second call then the area before the entry was zero size and

> +     thus no NOPs will be output.  */

> +  if (record_p)

> +    default_print_patchable_function_entry (file, crtl->patch_area_entry,

> +					    record_p);

> +}


ok.

lgtm,thx
-Will

> +

>  enum rtx_code

>  rs6000_reverse_condition (machine_mode mode, enum rtx_code code)

>  {
Segher Boessenkool May 10, 2021, 9:39 p.m. | #2
Hi!

On Fri, May 07, 2021 at 12:19:52PM +0930, Alan Modra wrote:
> PowerPC64 ELFv2 dual entry point functions have a couple of problems

> with -fpatchable-function-entry.  One is that the nops added after the

> global entry land in the global entry code which is constrained to be

> a power of two number of instructions, and zero global entry code has

> special meaning for linkage.  The other is that the global entry code

> isn't always used on function entry, and some uses of

> -fpatchable-function-entry might want to affect all entries to the

> function.  So this patch arranges to put one batch of nops before the

> global entry, and the other after the local entry point.


Wow ugly.  Not worse than patchable-funtion-enbtry itself I suppose :-)

> 	* config/rs6000/rs6000-logue.c: Include targhooks.h.


Huh, did it not already do that?!  Hrm, all the other hooks seem to be
called via rs6000.c currently.  But you do the same, so why do you need
to include the header in rs6000-logue.c?

> +/* Write NOPs into the asm outfile FILE around a function entry.  This

> +   routine may be called twice per function to put NOPs before and after

> +   the function entry.  If RECORD_P is true the location of the NOPs will

> +   be recorded by default_print_patchable_function_entry in a special

> +   object section called "__patchable_function_entries".  Disable output

> +   of any NOPs for the second call.  Those, if any, are output by

> +   rs6000_output_function_prologue.  This means that for ELFv2 any NOPs

> +   after the function entry are placed after the local entry point, not

> +   the global entry point.  NOPs after the entry may be found at

> +   record_loc + nops_before * 4 + local_entry_offset.  This holds true

> +   when nops_before is zero.  */

> +

> +static void

> +rs6000_print_patchable_function_entry (FILE *file,

> +				       unsigned HOST_WIDE_INT patch_area_size ATTRIBUTE_UNUSED,

> +				       bool record_p)


It is not a predicate.  Do not call it _p please.  Yes, I know the
generic code calls is this.  That needs fixing.

A better name (from the viewpoint of callers, which is what matters!)
might be first_time or similar?  Or something that really says what it
*does*?

> +{

> +  /* Always call default_print_patchable_function_entry when RECORD_P in

> +     order to output the location of the NOPs, but use the size of the

> +     area before the entry on both possible calls.  If RECORD_P is true

> +     on the second call then the area before the entry was zero size and

> +     thus no NOPs will be output.  */

> +  if (record_p)

> +    default_print_patchable_function_entry (file, crtl->patch_area_entry,

> +					    record_p);

> +}


That is trickiness that will break later.

Can you make this less hacky please?  Changing the generic code is just
fine as well, it needs some love.


Segher
Jonathan Wakely via Gcc-patches May 18, 2021, 10:13 a.m. | #3
On Mon, May 10, 2021 at 04:39:55PM -0500, Segher Boessenkool wrote:
> Hi!

> 

> On Fri, May 07, 2021 at 12:19:52PM +0930, Alan Modra wrote:

> > PowerPC64 ELFv2 dual entry point functions have a couple of problems

> > with -fpatchable-function-entry.  One is that the nops added after the

> > global entry land in the global entry code which is constrained to be

> > a power of two number of instructions, and zero global entry code has

> > special meaning for linkage.  The other is that the global entry code

> > isn't always used on function entry, and some uses of

> > -fpatchable-function-entry might want to affect all entries to the

> > function.  So this patch arranges to put one batch of nops before the

> > global entry, and the other after the local entry point.

> 

> Wow ugly.  Not worse than patchable-funtion-enbtry itself I suppose :-)

> 

> > 	* config/rs6000/rs6000-logue.c: Include targhooks.h.

> 

> Huh, did it not already do that?!  Hrm, all the other hooks seem to be

> called via rs6000.c currently.  But you do the same, so why do you need

> to include the header in rs6000-logue.c?


For the new call to default_print_patchable_function_entry in
rs6000_output_function_prologue.

> 

> > +/* Write NOPs into the asm outfile FILE around a function entry.  This

> > +   routine may be called twice per function to put NOPs before and after

> > +   the function entry.  If RECORD_P is true the location of the NOPs will

> > +   be recorded by default_print_patchable_function_entry in a special

> > +   object section called "__patchable_function_entries".  Disable output

> > +   of any NOPs for the second call.  Those, if any, are output by

> > +   rs6000_output_function_prologue.  This means that for ELFv2 any NOPs

> > +   after the function entry are placed after the local entry point, not

> > +   the global entry point.  NOPs after the entry may be found at

> > +   record_loc + nops_before * 4 + local_entry_offset.  This holds true

> > +   when nops_before is zero.  */

> > +

> > +static void

> > +rs6000_print_patchable_function_entry (FILE *file,

> > +				       unsigned HOST_WIDE_INT patch_area_size ATTRIBUTE_UNUSED,

> > +				       bool record_p)

> 

> It is not a predicate.  Do not call it _p please.  Yes, I know the

> generic code calls is this.  That needs fixing.

> 

> A better name (from the viewpoint of callers, which is what matters!)

> might be first_time or similar?  Or something that really says what it

> *does*?

> 

> > +{

> > +  /* Always call default_print_patchable_function_entry when RECORD_P in

> > +     order to output the location of the NOPs, but use the size of the

> > +     area before the entry on both possible calls.  If RECORD_P is true

> > +     on the second call then the area before the entry was zero size and

> > +     thus no NOPs will be output.  */

> > +  if (record_p)

> > +    default_print_patchable_function_entry (file, crtl->patch_area_entry,

> > +					    record_p);

> > +}

> 

> That is trickiness that will break later.


There is a fine line between trickiness and elegance.  Given the
current available crtl variables dealing with the patch area and the
current patchable_function_entry parameters, any supposedly "less
hacky" but correct expression will simplify down to what I wrote
here.

-- 
Alan Modra
Australia Development Lab, IBM
Segher Boessenkool May 18, 2021, 10:48 a.m. | #4
Hi!

On Tue, May 18, 2021 at 07:43:49PM +0930, Alan Modra wrote:
> On Mon, May 10, 2021 at 04:39:55PM -0500, Segher Boessenkool wrote:

> > Huh, did it not already do that?!  Hrm, all the other hooks seem to be

> > called via rs6000.c currently.  But you do the same, so why do you need

> > to include the header in rs6000-logue.c?

> 

> For the new call to default_print_patchable_function_entry in

> rs6000_output_function_prologue.


I see, thanks.

> > > +{

> > > +  /* Always call default_print_patchable_function_entry when RECORD_P in

> > > +     order to output the location of the NOPs, but use the size of the

> > > +     area before the entry on both possible calls.  If RECORD_P is true

> > > +     on the second call then the area before the entry was zero size and

> > > +     thus no NOPs will be output.  */

> > > +  if (record_p)

> > > +    default_print_patchable_function_entry (file, crtl->patch_area_entry,

> > > +					    record_p);

> > > +}

> > 

> > That is trickiness that will break later.

> 

> There is a fine line between trickiness and elegance.  Given the

> current available crtl variables dealing with the patch area and the

> current patchable_function_entry parameters, any supposedly "less

> hacky" but correct expression will simplify down to what I wrote

> here.


I wrote a bit later:

> > Can you make this less hacky please?  Changing the generic code is just

> > fine as well, it needs some love.


In effect making a callback / hook without making that explicit is bad
for maintainability.  We are in stage 1, now is a good time for
infrastructure changes.


Segher
Jonathan Wakely via Gcc-patches May 19, 2021, 12:09 p.m. | #5
On Tue, May 18, 2021 at 05:48:40AM -0500, Segher Boessenkool wrote:
> I wrote a bit later:

> 

> > > Can you make this less hacky please?  Changing the generic code is just

> > > fine as well, it needs some love.

> 

> In effect making a callback / hook without making that explicit is bad

> for maintainability.  We are in stage 1, now is a good time for

> infrastructure changes.


Yes, perhaps I could do that, and possibly even write a patch that is
accepted.  I'm not going to though, because I made a decision a little
while ago that I'm not going to contribute new gcc work.  This one was
just flushing some patches that I wrote a while ago.

-- 
Alan Modra
Australia Development Lab, IBM
Segher Boessenkool May 21, 2021, 10:21 a.m. | #6
On Wed, May 19, 2021 at 09:39:30PM +0930, Alan Modra wrote:
> On Tue, May 18, 2021 at 05:48:40AM -0500, Segher Boessenkool wrote:

> > I wrote a bit later:

> > 

> > > > Can you make this less hacky please?  Changing the generic code is just

> > > > fine as well, it needs some love.

> > 

> > In effect making a callback / hook without making that explicit is bad

> > for maintainability.  We are in stage 1, now is a good time for

> > infrastructure changes.

> 

> Yes, perhaps I could do that, and possibly even write a patch that is

> accepted.  I'm not going to though, because I made a decision a little

> while ago that I'm not going to contribute new gcc work.  This one was

> just flushing some patches that I wrote a while ago.


Okay, thanks.  I opened PR100714 to keep track of the work that will
need to be done.

Cheers,


Segher

Patch

diff --git a/gcc/config/rs6000/rs6000-logue.c b/gcc/config/rs6000/rs6000-logue.c
index b0ac183ceff..ffa3bb3dcf1 100644
--- a/gcc/config/rs6000/rs6000-logue.c
+++ b/gcc/config/rs6000/rs6000-logue.c
@@ -51,6 +51,7 @@ 
 #include "gstab.h"  /* for N_SLINE */
 #include "dbxout.h" /* dbxout_ */
 #endif
+#include "targhooks.h"
 
 static int rs6000_ra_ever_killed (void);
 static void is_altivec_return_reg (rtx, void *);
@@ -3991,6 +3992,10 @@  rs6000_output_function_prologue (FILE *file)
       fputs (",1\n", file);
     }
 
+  int nops_after_entry = crtl->patch_area_size - crtl->patch_area_entry;
+  if (nops_after_entry > 0)
+    default_print_patchable_function_entry (file, nops_after_entry, false);
+
   /* Output -mprofile-kernel code.  This needs to be done here instead of
      in output_function_profile since it must go after the ELFv2 ABI
      local entry point.  */
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d43c36e7f1a..97f1b3e0674 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1404,6 +1404,10 @@  static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_ASM_FUNCTION_EPILOGUE
 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
 
+#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
+#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
+  rs6000_print_patchable_function_entry
+
 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
 
@@ -14953,6 +14957,33 @@  rs6000_assemble_visibility (tree decl, int vis)
 }
 #endif
 
+/* Write NOPs into the asm outfile FILE around a function entry.  This
+   routine may be called twice per function to put NOPs before and after
+   the function entry.  If RECORD_P is true the location of the NOPs will
+   be recorded by default_print_patchable_function_entry in a special
+   object section called "__patchable_function_entries".  Disable output
+   of any NOPs for the second call.  Those, if any, are output by
+   rs6000_output_function_prologue.  This means that for ELFv2 any NOPs
+   after the function entry are placed after the local entry point, not
+   the global entry point.  NOPs after the entry may be found at
+   record_loc + nops_before * 4 + local_entry_offset.  This holds true
+   when nops_before is zero.  */
+
+static void
+rs6000_print_patchable_function_entry (FILE *file,
+				       unsigned HOST_WIDE_INT patch_area_size ATTRIBUTE_UNUSED,
+				       bool record_p)
+{
+  /* Always call default_print_patchable_function_entry when RECORD_P in
+     order to output the location of the NOPs, but use the size of the
+     area before the entry on both possible calls.  If RECORD_P is true
+     on the second call then the area before the entry was zero size and
+     thus no NOPs will be output.  */
+  if (record_p)
+    default_print_patchable_function_entry (file, crtl->patch_area_entry,
+					    record_p);
+}
+
 enum rtx_code
 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
 {