x86: Skip ISA check for always_inline in system headers

Message ID 20210324172344.153078-1-hjl.tools@gmail.com
State New
Headers show
Series
  • x86: Skip ISA check for always_inline in system headers
Related show

Commit Message

H.J. Lu via Gcc-patches March 24, 2021, 5:23 p.m.
For always_inline in system headers, we don't know if caller's ISAs are
compatible with callee's ISAs until much later.  Skip ISA check for
always_inline in system headers if caller has target attribute.

gcc/

	PR target/98209
	PR target/99744
	* config/i386/i386.c (ix86_can_inline_p): Don't check ISA for
	always_inline in system headers.

gcc/testsuite/

	PR target/98209
	PR target/99744
	* gcc.target/i386/pr98209.c: New test.
	* gcc.target/i386/pr99744-1.c: Likewise.
	* gcc.target/i386/pr99744-2.c: Likewise.
---
 gcc/config/i386/i386.c                    | 24 +++++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr98209.c   | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr99744-1.c | 16 +++++++++++++++
 gcc/testsuite/gcc.target/i386/pr99744-2.c | 13 ++++++++++++
 4 files changed, 58 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98209.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-2.c

-- 
2.30.2

Comments

H.J. Lu via Gcc-patches March 25, 2021, 6:59 a.m. | #1
On Wed, Mar 24, 2021 at 6:23 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>

> For always_inline in system headers, we don't know if caller's ISAs are

> compatible with callee's ISAs until much later.  Skip ISA check for

> always_inline in system headers if caller has target attribute.

>

> gcc/

>

>         PR target/98209

>         PR target/99744

>         * config/i386/i386.c (ix86_can_inline_p): Don't check ISA for

>         always_inline in system headers.

>

> gcc/testsuite/

>

>         PR target/98209

>         PR target/99744

>         * gcc.target/i386/pr98209.c: New test.

>         * gcc.target/i386/pr99744-1.c: Likewise.

>         * gcc.target/i386/pr99744-2.c: Likewise.


LGTM.

Thanks,
Uros.

> ---

>  gcc/config/i386/i386.c                    | 24 +++++++++++++++--------

>  gcc/testsuite/gcc.target/i386/pr98209.c   | 13 ++++++++++++

>  gcc/testsuite/gcc.target/i386/pr99744-1.c | 16 +++++++++++++++

>  gcc/testsuite/gcc.target/i386/pr99744-2.c | 13 ++++++++++++

>  4 files changed, 58 insertions(+), 8 deletions(-)

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr98209.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-1.c

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-2.c

>

> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

> index 7c41302c75b..1b4567e34ba 100644

> --- a/gcc/config/i386/i386.c

> +++ b/gcc/config/i386/i386.c

> @@ -578,21 +578,29 @@ ix86_can_inline_p (tree caller, tree callee)

>         && lookup_attribute ("always_inline",

>                             DECL_ATTRIBUTES (callee)));

>

> +  /* NB: Skip ISA check for always_inline in system headers if caller

> +     has target attribute.  */

> +  bool skip_isa_check = (always_inline

> +                        && caller_tree != target_option_default_node

> +                        && DECL_IN_SYSTEM_HEADER (callee));

> +

>    cgraph_node *callee_node = cgraph_node::get (callee);

>    /* Callee's isa options should be a subset of the caller's, i.e. a SSE4

>       function can inline a SSE2 function but a SSE2 function can't inline

>       a SSE4 function.  */

> -  if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)

> -       != callee_opts->x_ix86_isa_flags)

> -      || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)

> -         != callee_opts->x_ix86_isa_flags2))

> +  if (!skip_isa_check

> +      && (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)

> +          != callee_opts->x_ix86_isa_flags)

> +         || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)

> +             != callee_opts->x_ix86_isa_flags2)))

>      ret = false;

>

>    /* See if we have the same non-isa options.  */

> -  else if ((!always_inline

> -           && caller_opts->x_target_flags != callee_opts->x_target_flags)

> -          || (caller_opts->x_target_flags & ~always_inline_safe_mask)

> -              != (callee_opts->x_target_flags & ~always_inline_safe_mask))

> +  else if (!skip_isa_check

> +          && ((!always_inline

> +               && caller_opts->x_target_flags != callee_opts->x_target_flags)

> +              || ((caller_opts->x_target_flags & ~always_inline_safe_mask)

> +                  != (callee_opts->x_target_flags & ~always_inline_safe_mask))))

>      ret = false;

>

>    /* See if arch, tune, etc. are the same.  */

> diff --git a/gcc/testsuite/gcc.target/i386/pr98209.c b/gcc/testsuite/gcc.target/i386/pr98209.c

> new file mode 100644

> index 00000000000..4566d4c7fc3

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr98209.c

> @@ -0,0 +1,13 @@

> +/* { dg-do run { target *-*-linux* } } */

> +/* { dg-options "-O2 -D_FORTIFY_SOURCE=2" } */

> +

> +#include <stdio.h>

> +

> +extern int main(int argc, char** argv)

> +  __attribute__ ((__target__ ("no-sse,no-mmx")));

> +

> +int main(int argc, char** argv)

> +{

> +  printf ("hello!\n");

> +  return 0;

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr99744-1.c b/gcc/testsuite/gcc.target/i386/pr99744-1.c

> new file mode 100644

> index 00000000000..92535923a56

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr99744-1.c

> @@ -0,0 +1,16 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O0" } */

> +

> +#include <x86intrin.h>

> +

> +extern unsigned long long int curr_deadline;

> +extern void bar (void);

> +

> +__attribute__ ((target("general-regs-only")))

> +void

> +foo (void)

> +{

> +  if (__rdtsc () < curr_deadline)

> +    return;

> +  bar ();

> +}

> diff --git a/gcc/testsuite/gcc.target/i386/pr99744-2.c b/gcc/testsuite/gcc.target/i386/pr99744-2.c

> new file mode 100644

> index 00000000000..4bb2a6bb6dc

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr99744-2.c

> @@ -0,0 +1,13 @@

> +/* { dg-do compile { target { ! ia32 } } } */

> +/* { dg-options "-O2" } */

> +

> +#include <x86intrin.h>

> +

> +extern __m128i x, y;

> +

> +__attribute__ ((target("general-regs-only")))

> +void

> +foo (void)

> +{

> +  x = _mm_move_epi64 (y); /* { dg-error "SSE register return with SSE disabled" } */

> +}

> --

> 2.30.2

>
H.J. Lu via Gcc-patches March 25, 2021, 12:54 p.m. | #2
On Wed, Mar 24, 2021 at 10:23:44AM -0700, H.J. Lu via Gcc-patches wrote:
> For always_inline in system headers, we don't know if caller's ISAs are

> compatible with callee's ISAs until much later.  Skip ISA check for

> always_inline in system headers if caller has target attribute.

> 

> gcc/

> 

> 	PR target/98209

> 	PR target/99744

> 	* config/i386/i386.c (ix86_can_inline_p): Don't check ISA for

> 	always_inline in system headers.


Aren't *intrin.h system headers too?
Doesn't this mean we can now inline all the intrinsics if the caller doesn't
have the default target options and doesn't have the needed ISA?

Consider e.g.
#include <x86intrin.h>

#ifdef FOO
void
foo (__m512 *p)
{
  *p = _mm512_setzero_ps ();
}
#else
__attribute__((target ("avx"))) void
bar (__m512 *p)
{
  *p = _mm512_setzero_ps ();
}
#endif

#ifdef FOO
void
baz (__m512d *p, __m512d *q, int mask)
{
  *p = _mm512_mask_mov_pd (*p, mask, *q);
}
#else
__attribute__((target ("avx"))) void
qux (__m512d *p, __m512d *q, int mask)
{
  *p = _mm512_mask_mov_pd (*p, mask, *q);
}
#endif

If you compile this without your patch, you'll get
inlining failed in call to ‘always_inline’ ‘_mm512_setzero_ps’: target specific option mismatch
errors in all cases (always the first one), but with your patch
the _mm512_setzero_ps (); gets through completely and on the mask move
one gets instead
‘__builtin_ia32_movapd512_mask’ needs isa option -mavx512f
error and
the ABI for passing parameters with 64-byte alignment has changed in GCC 4.6
note.  IMNSHO this change needs to be reverted and we need to come up with a
way (some attribute) to say explicitly whether we can or can't inline that
always_inline function despite target specific option mismatches.

	Jakub
H.J. Lu via Gcc-patches March 25, 2021, 1:02 p.m. | #3
On Thu, Mar 25, 2021 at 1:54 PM Jakub Jelinek <jakub@redhat.com> wrote:
>

> On Wed, Mar 24, 2021 at 10:23:44AM -0700, H.J. Lu via Gcc-patches wrote:

> > For always_inline in system headers, we don't know if caller's ISAs are

> > compatible with callee's ISAs until much later.  Skip ISA check for

> > always_inline in system headers if caller has target attribute.

> >

> > gcc/

> >

> >       PR target/98209

> >       PR target/99744

> >       * config/i386/i386.c (ix86_can_inline_p): Don't check ISA for

> >       always_inline in system headers.

>

> Aren't *intrin.h system headers too?


I was under impression that they are not, since they live outside of
/usr/include.

> Doesn't this mean we can now inline all the intrinsics if the caller doesn't

> have the default target options and doesn't have the needed ISA?


No, we should not.


> Consider e.g.

> #include <x86intrin.h>

>

> #ifdef FOO

> void

> foo (__m512 *p)

> {

>   *p = _mm512_setzero_ps ();

> }

> #else

> __attribute__((target ("avx"))) void

> bar (__m512 *p)

> {

>   *p = _mm512_setzero_ps ();

> }

> #endif

>

> #ifdef FOO

> void

> baz (__m512d *p, __m512d *q, int mask)

> {

>   *p = _mm512_mask_mov_pd (*p, mask, *q);

> }

> #else

> __attribute__((target ("avx"))) void

> qux (__m512d *p, __m512d *q, int mask)

> {

>   *p = _mm512_mask_mov_pd (*p, mask, *q);

> }

> #endif

>

> If you compile this without your patch, you'll get

> inlining failed in call to ‘always_inline’ ‘_mm512_setzero_ps’: target specific option mismatch

> errors in all cases (always the first one), but with your patch

> the _mm512_setzero_ps (); gets through completely and on the mask move

> one gets instead

> ‘__builtin_ia32_movapd512_mask’ needs isa option -mavx512f

> error and

> the ABI for passing parameters with 64-byte alignment has changed in GCC 4.6

> note.  IMNSHO this change needs to be reverted and we need to come up with a

> way (some attribute) to say explicitly whether we can or can't inline that

> always_inline function despite target specific option mismatches.


If the patch does not differentiate between system and user headers,
then please revert it.

Uros.
H.J. Lu via Gcc-patches March 25, 2021, 1:11 p.m. | #4
On Thu, Mar 25, 2021 at 2:03 PM Uros Bizjak via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>

> On Thu, Mar 25, 2021 at 1:54 PM Jakub Jelinek <jakub@redhat.com> wrote:

> >

> > On Wed, Mar 24, 2021 at 10:23:44AM -0700, H.J. Lu via Gcc-patches wrote:

> > > For always_inline in system headers, we don't know if caller's ISAs are

> > > compatible with callee's ISAs until much later.  Skip ISA check for

> > > always_inline in system headers if caller has target attribute.

> > >

> > > gcc/

> > >

> > >       PR target/98209

> > >       PR target/99744

> > >       * config/i386/i386.c (ix86_can_inline_p): Don't check ISA for

> > >       always_inline in system headers.

> >

> > Aren't *intrin.h system headers too?

>

> I was under impression that they are not, since they live outside of

> /usr/include.

>

> > Doesn't this mean we can now inline all the intrinsics if the caller doesn't

> > have the default target options and doesn't have the needed ISA?

>

> No, we should not.

>

>

> > Consider e.g.

> > #include <x86intrin.h>

> >

> > #ifdef FOO

> > void

> > foo (__m512 *p)

> > {

> >   *p = _mm512_setzero_ps ();

> > }

> > #else

> > __attribute__((target ("avx"))) void

> > bar (__m512 *p)

> > {

> >   *p = _mm512_setzero_ps ();

> > }

> > #endif

> >

> > #ifdef FOO

> > void

> > baz (__m512d *p, __m512d *q, int mask)

> > {

> >   *p = _mm512_mask_mov_pd (*p, mask, *q);

> > }

> > #else

> > __attribute__((target ("avx"))) void

> > qux (__m512d *p, __m512d *q, int mask)

> > {

> >   *p = _mm512_mask_mov_pd (*p, mask, *q);

> > }

> > #endif

> >

> > If you compile this without your patch, you'll get

> > inlining failed in call to ‘always_inline’ ‘_mm512_setzero_ps’: target specific option mismatch

> > errors in all cases (always the first one), but with your patch

> > the _mm512_setzero_ps (); gets through completely and on the mask move

> > one gets instead

> > ‘__builtin_ia32_movapd512_mask’ needs isa option -mavx512f

> > error and

> > the ABI for passing parameters with 64-byte alignment has changed in GCC 4.6

> > note.  IMNSHO this change needs to be reverted and we need to come up with a

> > way (some attribute) to say explicitly whether we can or can't inline that

> > always_inline function despite target specific option mismatches.

>

> If the patch does not differentiate between system and user headers,

> then please revert it.


Note that my suggestion to give leeway to always_inline annotated functions
wasn't restricted to system headers but would apply generally with the logic
that if people use always_inline then they should better make sure that
such inlining is valid - after all always_inline is an attribute that should be
used if inlining is required for functional correctness, it is _not_
an optimization
hint.

Now IIRC there were some cases where we end up with obscure ICEs when
using the "wrong" intrinsics in a function context that has certain ISA features
disabled.  But then I might misremember.

Richard.

> Uros.
H.J. Lu via Gcc-patches March 25, 2021, 1:13 p.m. | #5
On Thu, Mar 25, 2021 at 02:02:16PM +0100, Uros Bizjak wrote:
> > Aren't *intrin.h system headers too?

> 

> I was under impression that they are not, since they live outside of

> /usr/include.


Yes, they aren't in /usr/include, but they are still system headers.
If I preprocess something that #include <x86intrin.h> with my system
compiler, I get:
# 1 "/usr/lib/gcc/x86_64-redhat-linux/10/include/x86intrin.h" 1 3 4
where that 3 stands for system header.

My preference would be a new attribute that for always_inline says it is ok
to inline even when there are target or optimization mismatches (and
effectively get the target/optimization options from the caller for the
body) and start using that new attribute in glibc headers (for
-D_FORTIFY_SOURCE wrappers at least, those really don't have any target
dependencies nor anything floating point that might e.g. depend on
-ffast-math etc.) and perhaps the __rdtsc and similar intrinsics in
*intrin.h.
Even that can be a can of worms, because some target or optimization options
are used already in the FE processing or during the GIMPLE passes before
inlining, and while it might work somehow if e.g. during those passes we
treat it like -ffast-math and after inlining not like that or vice versa,
there is a risk that we e.g. fold/lower something with some assumptions and
later assume that (with different options) such constructs can't appear in
the IL.

> If the patch does not differentiate between system and user headers,

> then please revert it.


It does but intrinsic headers are system headers.

	Jakub
H.J. Lu via Gcc-patches March 25, 2021, 1:21 p.m. | #6
On Thu, Mar 25, 2021 at 2:14 PM Jakub Jelinek via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>

> On Thu, Mar 25, 2021 at 02:02:16PM +0100, Uros Bizjak wrote:

> > > Aren't *intrin.h system headers too?

> >

> > I was under impression that they are not, since they live outside of

> > /usr/include.

>

> Yes, they aren't in /usr/include, but they are still system headers.

> If I preprocess something that #include <x86intrin.h> with my system

> compiler, I get:

> # 1 "/usr/lib/gcc/x86_64-redhat-linux/10/include/x86intrin.h" 1 3 4

> where that 3 stands for system header.

>

> My preference would be a new attribute that for always_inline says it is ok

> to inline even when there are target or optimization mismatches (and

> effectively get the target/optimization options from the caller for the

> body) and start using that new attribute in glibc headers (for

> -D_FORTIFY_SOURCE wrappers at least, those really don't have any target

> dependencies nor anything floating point that might e.g. depend on

> -ffast-math etc.) and perhaps the __rdtsc and similar intrinsics in

> *intrin.h.


Err, but _which_ mismatches do you ignore with such new attribute?
If I have __rdtsc and compile that into a -mno-rdtsc unit/function would
that be OK?

IMHO always-inline obviously means "ignore any option mismatches", the
user has to make sure to not introduce incompatible caller/callee pairs, if
the pair is incompatible the unit is ill-formed (we can't inline, and thus
violate always-inline) - we might want to document "no diagnostic required"
here, eventually even "behavior is undefined".

Everything else is just putting a new label on exactly the same problem.

Either a function is always-inline or it is not.  It looks to me the x86
intrinsic functions are not?  At -O0 we have many of them as macros and
thus "inline" them with any kind of option mismatch as well.  Those are
likely the cases where inlining is required - ignoring option mismatches
there is thus obviously OK.

Richard.

> Even that can be a can of worms, because some target or optimization options

> are used already in the FE processing or during the GIMPLE passes before

> inlining, and while it might work somehow if e.g. during those passes we

> treat it like -ffast-math and after inlining not like that or vice versa,

> there is a risk that we e.g. fold/lower something with some assumptions and

> later assume that (with different options) such constructs can't appear in

> the IL.

>

> > If the patch does not differentiate between system and user headers,

> > then please revert it.

>

> It does but intrinsic headers are system headers.

>

>         Jakub

>
H.J. Lu via Gcc-patches March 25, 2021, 1:21 p.m. | #7
On Thu, Mar 25, 2021 at 6:13 AM Jakub Jelinek <jakub@redhat.com> wrote:
>

> On Thu, Mar 25, 2021 at 02:02:16PM +0100, Uros Bizjak wrote:

> > > Aren't *intrin.h system headers too?

> >

> > I was under impression that they are not, since they live outside of

> > /usr/include.

>

> Yes, they aren't in /usr/include, but they are still system headers.

> If I preprocess something that #include <x86intrin.h> with my system

> compiler, I get:

> # 1 "/usr/lib/gcc/x86_64-redhat-linux/10/include/x86intrin.h" 1 3 4

> where that 3 stands for system header.

>

> My preference would be a new attribute that for always_inline says it is ok

> to inline even when there are target or optimization mismatches (and

> effectively get the target/optimization options from the caller for the

> body) and start using that new attribute in glibc headers (for

> -D_FORTIFY_SOURCE wrappers at least, those really don't have any target

> dependencies nor anything floating point that might e.g. depend on

> -ffast-math etc.) and perhaps the __rdtsc and similar intrinsics in

> *intrin.h.

> Even that can be a can of worms, because some target or optimization options

> are used already in the FE processing or during the GIMPLE passes before

> inlining, and while it might work somehow if e.g. during those passes we

> treat it like -ffast-math and after inlining not like that or vice versa,

> there is a risk that we e.g. fold/lower something with some assumptions and

> later assume that (with different options) such constructs can't appear in

> the IL.

>

> > If the patch does not differentiate between system and user headers,

> > then please revert it.

>

> It does but intrinsic headers are system headers.

>

>         Jakub

>


Before my patch:

[hjl@gnu-cfl-2 gcc]$ cat y.c
#include <x86intrin.h>

#ifdef FOO
void
foo (__m512 *p)
{
  *p = _mm512_setzero_ps ();
}
#else
__attribute__((target ("avx"))) void
bar (__m512 *p)
{
  *p = _mm512_setzero_ps ();
}
#endif
[hjl@gnu-cfl-2 gcc]$ gcc -O2 -S y.c
y.c: In function ‘bar’:
y.c:13:6: warning: AVX512F vector return without AVX512F enabled
changes the ABI [-Wpsabi]
   13 |   *p = _mm512_setzero_ps ();
      |   ~~~^~~~~~~~~~~~~~~~~~~~~~
In file included from
/usr/lib/gcc/x86_64-redhat-linux/10/include/immintrin.h:55,
                 from
/usr/lib/gcc/x86_64-redhat-linux/10/include/x86intrin.h:32,
                 from y.c:1:
/usr/lib/gcc/x86_64-redhat-linux/10/include/avx512fintrin.h:310:1:
error: inlining failed in call to ‘always_inline’ ‘_mm512_setzero_ps’:
target specific option mismatch
  310 | _mm512_setzero_ps (void)
      | ^~~~~~~~~~~~~~~~~
y.c:13:8: note: called from here
   13 |   *p = _mm512_setzero_ps ();
      |        ^~~~~~~~~~~~~~~~~~~~
[hjl@gnu-cfl-2 gcc]$ gcc -O2 -S y.c -DFOO
y.c: In function ‘foo’:
y.c:7:6: warning: AVX512F vector return without AVX512F enabled
changes the ABI [-Wpsabi]
    7 |   *p = _mm512_setzero_ps ();
      |   ~~~^~~~~~~~~~~~~~~~~~~~~~
In file included from
/usr/lib/gcc/x86_64-redhat-linux/10/include/immintrin.h:55,
                 from
/usr/lib/gcc/x86_64-redhat-linux/10/include/x86intrin.h:32,
                 from y.c:1:
/usr/lib/gcc/x86_64-redhat-linux/10/include/avx512fintrin.h:310:1:
error: inlining failed in call to ‘always_inline’ ‘_mm512_setzero_ps’:
target specific option mismatch
  310 | _mm512_setzero_ps (void)
      | ^~~~~~~~~~~~~~~~~
y.c:7:8: note: called from here
    7 |   *p = _mm512_setzero_ps ();
      |        ^~~~~~~~~~~~~~~~~~~~
[hjl@gnu-cfl-2 gcc]$

After my patch,

[hjl@gnu-cfl-2 gcc]$ ./xgcc -B./ -O2 -S y.c -DFOO
y.c: In function ‘foo’:
y.c:7:6: warning: AVX512F vector return without AVX512F enabled
changes the ABI [-Wpsabi]
    7 |   *p = _mm512_setzero_ps ();
      |   ~~~^~~~~~~~~~~~~~~~~~~~~~
In file included from ./include/immintrin.h:49,
                 from ./include/x86intrin.h:32,
                 from y.c:1:
./include/avx512fintrin.h:305:1: error: inlining failed in call to
‘always_inline’ ‘_mm512_setzero_ps’: target specific option mismatch
  305 | _mm512_setzero_ps (void)
      | ^~~~~~~~~~~~~~~~~
y.c:7:8: note: called from here
    7 |   *p = _mm512_setzero_ps ();
      |        ^~~~~~~~~~~~~~~~~~~~
[hjl@gnu-cfl-2 gcc]$ ./xgcc -B./ -O2 -S y.c
y.c: In function ‘bar’:
y.c:13:6: warning: AVX512F vector return without AVX512F enabled
changes the ABI [-Wpsabi]
   13 |   *p = _mm512_setzero_ps ();
      |   ~~~^~~~~~~~~~~~~~~~~~~~~~
[hjl@gnu-cfl-2 gcc]$

If you look at the generated code:

vpxor %xmm0, %xmm0, %xmm0
vmovdqa %xmm0, (%rdi)
vmovdqa %xmm0, 16(%rdi)
vmovdqa %xmm0, 32(%rdi)
vmovdqa %xmm0, 48(%rdi)
ret

The ABI change warning is on _mm512_setzero_ps.   Since it is inlined,
there is no wrong code here.  I don't believe my patch will cause the wrong
code nor ICE.

-- 
H.J.
H.J. Lu via Gcc-patches March 25, 2021, 1:30 p.m. | #8
On Thu, Mar 25, 2021 at 2:25 PM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>

> On Thu, Mar 25, 2021 at 6:13 AM Jakub Jelinek <jakub@redhat.com> wrote:

> >

> > On Thu, Mar 25, 2021 at 02:02:16PM +0100, Uros Bizjak wrote:

> > > > Aren't *intrin.h system headers too?

> > >

> > > I was under impression that they are not, since they live outside of

> > > /usr/include.

> >

> > Yes, they aren't in /usr/include, but they are still system headers.

> > If I preprocess something that #include <x86intrin.h> with my system

> > compiler, I get:

> > # 1 "/usr/lib/gcc/x86_64-redhat-linux/10/include/x86intrin.h" 1 3 4

> > where that 3 stands for system header.

> >

> > My preference would be a new attribute that for always_inline says it is ok

> > to inline even when there are target or optimization mismatches (and

> > effectively get the target/optimization options from the caller for the

> > body) and start using that new attribute in glibc headers (for

> > -D_FORTIFY_SOURCE wrappers at least, those really don't have any target

> > dependencies nor anything floating point that might e.g. depend on

> > -ffast-math etc.) and perhaps the __rdtsc and similar intrinsics in

> > *intrin.h.

> > Even that can be a can of worms, because some target or optimization options

> > are used already in the FE processing or during the GIMPLE passes before

> > inlining, and while it might work somehow if e.g. during those passes we

> > treat it like -ffast-math and after inlining not like that or vice versa,

> > there is a risk that we e.g. fold/lower something with some assumptions and

> > later assume that (with different options) such constructs can't appear in

> > the IL.

> >

> > > If the patch does not differentiate between system and user headers,

> > > then please revert it.

> >

> > It does but intrinsic headers are system headers.

> >

> >         Jakub

> >

>

> Before my patch:

>

> [hjl@gnu-cfl-2 gcc]$ cat y.c

> #include <x86intrin.h>

>

> #ifdef FOO

> void

> foo (__m512 *p)

> {

>   *p = _mm512_setzero_ps ();

> }

> #else

> __attribute__((target ("avx"))) void

> bar (__m512 *p)

> {

>   *p = _mm512_setzero_ps ();

> }

> #endif

> [hjl@gnu-cfl-2 gcc]$ gcc -O2 -S y.c

> y.c: In function ‘bar’:

> y.c:13:6: warning: AVX512F vector return without AVX512F enabled

> changes the ABI [-Wpsabi]

>    13 |   *p = _mm512_setzero_ps ();

>       |   ~~~^~~~~~~~~~~~~~~~~~~~~~

> In file included from

> /usr/lib/gcc/x86_64-redhat-linux/10/include/immintrin.h:55,

>                  from

> /usr/lib/gcc/x86_64-redhat-linux/10/include/x86intrin.h:32,

>                  from y.c:1:

> /usr/lib/gcc/x86_64-redhat-linux/10/include/avx512fintrin.h:310:1:

> error: inlining failed in call to ‘always_inline’ ‘_mm512_setzero_ps’:

> target specific option mismatch

>   310 | _mm512_setzero_ps (void)

>       | ^~~~~~~~~~~~~~~~~

> y.c:13:8: note: called from here

>    13 |   *p = _mm512_setzero_ps ();

>       |        ^~~~~~~~~~~~~~~~~~~~

> [hjl@gnu-cfl-2 gcc]$ gcc -O2 -S y.c -DFOO

> y.c: In function ‘foo’:

> y.c:7:6: warning: AVX512F vector return without AVX512F enabled

> changes the ABI [-Wpsabi]

>     7 |   *p = _mm512_setzero_ps ();

>       |   ~~~^~~~~~~~~~~~~~~~~~~~~~

> In file included from

> /usr/lib/gcc/x86_64-redhat-linux/10/include/immintrin.h:55,

>                  from

> /usr/lib/gcc/x86_64-redhat-linux/10/include/x86intrin.h:32,

>                  from y.c:1:

> /usr/lib/gcc/x86_64-redhat-linux/10/include/avx512fintrin.h:310:1:

> error: inlining failed in call to ‘always_inline’ ‘_mm512_setzero_ps’:

> target specific option mismatch

>   310 | _mm512_setzero_ps (void)

>       | ^~~~~~~~~~~~~~~~~

> y.c:7:8: note: called from here

>     7 |   *p = _mm512_setzero_ps ();

>       |        ^~~~~~~~~~~~~~~~~~~~

> [hjl@gnu-cfl-2 gcc]$

>

> After my patch,

>

> [hjl@gnu-cfl-2 gcc]$ ./xgcc -B./ -O2 -S y.c -DFOO

> y.c: In function ‘foo’:

> y.c:7:6: warning: AVX512F vector return without AVX512F enabled

> changes the ABI [-Wpsabi]

>     7 |   *p = _mm512_setzero_ps ();

>       |   ~~~^~~~~~~~~~~~~~~~~~~~~~

> In file included from ./include/immintrin.h:49,

>                  from ./include/x86intrin.h:32,

>                  from y.c:1:

> ./include/avx512fintrin.h:305:1: error: inlining failed in call to

> ‘always_inline’ ‘_mm512_setzero_ps’: target specific option mismatch

>   305 | _mm512_setzero_ps (void)

>       | ^~~~~~~~~~~~~~~~~

> y.c:7:8: note: called from here

>     7 |   *p = _mm512_setzero_ps ();

>       |        ^~~~~~~~~~~~~~~~~~~~

> [hjl@gnu-cfl-2 gcc]$ ./xgcc -B./ -O2 -S y.c

> y.c: In function ‘bar’:

> y.c:13:6: warning: AVX512F vector return without AVX512F enabled

> changes the ABI [-Wpsabi]

>    13 |   *p = _mm512_setzero_ps ();

>       |   ~~~^~~~~~~~~~~~~~~~~~~~~~

> [hjl@gnu-cfl-2 gcc]$

>

> If you look at the generated code:

>

> vpxor %xmm0, %xmm0, %xmm0

> vmovdqa %xmm0, (%rdi)

> vmovdqa %xmm0, 16(%rdi)

> vmovdqa %xmm0, 32(%rdi)

> vmovdqa %xmm0, 48(%rdi)

> ret

>

> The ABI change warning is on _mm512_setzero_ps.   Since it is inlined,

> there is no wrong code here.  I don't believe my patch will cause the wrong

> code nor ICE.


The ICE chance is that we fail to expand some __builtin_ia32_* or that
we expand it but will not recognize the used insn because it is gated on
a not enabled architecture feature.  The fix would of course be to
fail expansion with a proper diagnostic here, but not sure if we reliably
do this.

That said, I agree with the direction of the patch but I'd have removed
the system header check entirely - I'd even have done this change in
the middle-end and avoid having the target particicpate in inlining decisions
of always-inline functions.

Richard.

>

> --

> H.J.
H.J. Lu via Gcc-patches March 25, 2021, 1:39 p.m. | #9
On Thu, Mar 25, 2021 at 02:21:21PM +0100, Richard Biener wrote:
> Err, but _which_ mismatches do you ignore with such new attribute?


We'd need to define it.

> If I have __rdtsc and compile that into a -mno-rdtsc unit/function would

> that be OK?


There is no -mno-rdtsc or -mrdtsc, rdtsc insn is always available.
So from that POV, having the new attribute on __rdtsc is fine.

The problem with H.J.'s patch is that we'll now accept a lot of code that
should be rejected, we relied for years on the _mm* intrinsics implemented
as always_inline and for it being rejected if users try to use it from
incompatible callers.
Some of them will be still rejected but with different diagnostics (because
users can use the (unsupported) builtins directly, we don't have a guarantee
they aren't used in code with arbitrary ISA flags), some of them will ICE
(if we didn't catch such uses of unsupported builtins, something to be fixed
for sure), and others that are implemented without builtins will be
accepted, which means people will have broken code in the wild that might
then not compile with clang or icc but will compile with gcc.

always_inline simply sometimes means always inline except when it is address
taken and not called directly and at other times ... or when there is a
target or optimization specific option mismatch.

Perhaps a way to make it work most of the time would be to amend
H.J.'s patch to do it only if the callee target options are the default
ones.
That would mean we keep the existing behavior for the
#pragma GCC push_options
#pragma GCC target("avx2")
inline __attribute__((always_inline)) ...
#pragma GCC pop_options
cases and not for the 
__fortify_function int
open (const char *__path, int __oflag, ...)
{
...
}
cases.  But right now that wouldn't work reliably, because we add those
push_options/target ... only if the currently selected target (e.g. from
command line options) doesn't already include those.
So, we would let the
__attribute__((target ("no-sse3")))
void bar (void) { _mm256_*(); }
cases through.

Another question is what H.J.'s patch will do for LTO with
-fno-early-inlining.  If the always_inline functions are only inlined
during LTO inlining, == comparisons with default target option node are just
weird.

	Jakub
H.J. Lu via Gcc-patches March 25, 2021, 1:45 p.m. | #10
On Thu, Mar 25, 2021 at 7:59 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>

> On Wed, Mar 24, 2021 at 6:23 PM H.J. Lu <hjl.tools@gmail.com> wrote:

> >

> > For always_inline in system headers, we don't know if caller's ISAs are

> > compatible with callee's ISAs until much later.  Skip ISA check for

> > always_inline in system headers if caller has target attribute.

> >

> > gcc/

> >

> >         PR target/98209

> >         PR target/99744

> >         * config/i386/i386.c (ix86_can_inline_p): Don't check ISA for

> >         always_inline in system headers.

> >

> > gcc/testsuite/

> >

> >         PR target/98209

> >         PR target/99744

> >         * gcc.target/i386/pr98209.c: New test.

> >         * gcc.target/i386/pr99744-1.c: Likewise.

> >         * gcc.target/i386/pr99744-2.c: Likewise.

>

> LGTM.


It looks to me that this patch needs some more discussion, and the
solution isn't clear at all.

HJ, can you please revert the patch?

Thanks,
Uros.
H.J. Lu via Gcc-patches March 25, 2021, 2:24 p.m. | #11
On Thu, Mar 25, 2021 at 2:39 PM Jakub Jelinek <jakub@redhat.com> wrote:
>

> On Thu, Mar 25, 2021 at 02:21:21PM +0100, Richard Biener wrote:

> > Err, but _which_ mismatches do you ignore with such new attribute?

>

> We'd need to define it.

>

> > If I have __rdtsc and compile that into a -mno-rdtsc unit/function would

> > that be OK?

>

> There is no -mno-rdtsc or -mrdtsc, rdtsc insn is always available.

> So from that POV, having the new attribute on __rdtsc is fine.

>

> The problem with H.J.'s patch is that we'll now accept a lot of code that

> should be rejected, we relied for years on the _mm* intrinsics implemented

> as always_inline and for it being rejected if users try to use it from

> incompatible callers.

> Some of them will be still rejected but with different diagnostics (because

> users can use the (unsupported) builtins directly, we don't have a guarantee

> they aren't used in code with arbitrary ISA flags), some of them will ICE

> (if we didn't catch such uses of unsupported builtins, something to be fixed

> for sure), and others that are implemented without builtins will be

> accepted, which means people will have broken code in the wild that might

> then not compile with clang or icc but will compile with gcc.

>

> always_inline simply sometimes means always inline except when it is address

> taken and not called directly and at other times ... or when there is a

> target or optimization specific option mismatch.


Well, but in all of those cases the program was invalid (and is diagnosed).
So it simply means "always inline".  That we abuse the always-inline
(error) diagnostic to tell users about possible problems (and in HJs case
and the fortify case reject valid programs) is IMHO a bug.

>

> Perhaps a way to make it work most of the time would be to amend

> H.J.'s patch to do it only if the callee target options are the default

> ones.

> That would mean we keep the existing behavior for the

> #pragma GCC push_options

> #pragma GCC target("avx2")

> inline __attribute__((always_inline)) ...

> #pragma GCC pop_options

> cases and not for the

> __fortify_function int

> open (const char *__path, int __oflag, ...)

> {

> ...

> }

> cases.  But right now that wouldn't work reliably, because we add those

> push_options/target ... only if the currently selected target (e.g. from

> command line options) doesn't already include those.

> So, we would let the

> __attribute__((target ("no-sse3")))

> void bar (void) { _mm256_*(); }

> cases through.

>

> Another question is what H.J.'s patch will do for LTO with

> -fno-early-inlining.  If the always_inline functions are only inlined

> during LTO inlining, == comparisons with default target option node are just

> weird.


We perform always-inline inlining early even with -fno-early-inlining.  But I
think we don't reliably diagnose indirect calls or address-taking of
always-inline
functions and will emit them out-of-line if they end up "used".  That's a bug
I think.

static inline __attribute__((always_inline)) void f () {}
void (*p)() = f;

is not diagnosed and 'f' is emitted out of line.

That said, I think comparing with some "default" options isn't a good
way either.

Richard.

>

>         Jakub

>
H.J. Lu via Gcc-patches March 25, 2021, 2:32 p.m. | #12
On Thu, Mar 25, 2021 at 03:24:38PM +0100, Richard Biener wrote:
> Well, but in all of those cases the program was invalid (and is diagnosed).

> So it simply means "always inline".  That we abuse the always-inline

> (error) diagnostic to tell users about possible problems (and in HJs case

> and the fortify case reject valid programs) is IMHO a bug.


No, we don't diagnose the cases where we chose to implement the intrinsics
e.g. using generic vectors anymore with H.J.'s patch.
It is fine to use generic vectors directly, but invalid and non-portable
to call e.g. AVX2 intrinsics from SSE2 functions even if they are
implemented using generic vectors.

> We perform always-inline inlining early even with -fno-early-inlining.  But I

> think we don't reliably diagnose indirect calls or address-taking of

> always-inline

> functions and will emit them out-of-line if they end up "used".  That's a bug

> I think.

> 

> static inline __attribute__((always_inline)) void f () {}

> void (*p)() = f;

> 

> is not diagnosed and 'f' is emitted out of line.


I think we can't do such a change, too much code in the wild relies on
always_inline not diagnosing the indirect call.  Including all of glibc
headers.
Doing
int (*p) (const char *, int, ...) = open;
is perfectly valid C and we must not diagnose that, even when for
fortification glibc has an always_inline wrapper around it.

Yes, it is unfortunate that we in the past called the attribute
always_inline when we really didn't mean (or perhaps some of us meant but
didn't implement) always, but at this point we'd need a new attribute to
mean diagnose non-indirect calls/taking address of except in direct calls as
errors.  And IMHO we need a different attribute to mean inline despite
target/optimization option mismatches.
That way users can choose what exactly they want on a case by case basis.

	Jakub
H.J. Lu via Gcc-patches March 25, 2021, 2:40 p.m. | #13
On Thu, Mar 25, 2021 at 3:32 PM Jakub Jelinek <jakub@redhat.com> wrote:
>

> On Thu, Mar 25, 2021 at 03:24:38PM +0100, Richard Biener wrote:

> > Well, but in all of those cases the program was invalid (and is diagnosed).

> > So it simply means "always inline".  That we abuse the always-inline

> > (error) diagnostic to tell users about possible problems (and in HJs case

> > and the fortify case reject valid programs) is IMHO a bug.

>

> No, we don't diagnose the cases where we chose to implement the intrinsics

> e.g. using generic vectors anymore with H.J.'s patch.

> It is fine to use generic vectors directly, but invalid and non-portable

> to call e.g. AVX2 intrinsics from SSE2 functions even if they are

> implemented using generic vectors.

>

> > We perform always-inline inlining early even with -fno-early-inlining.  But I

> > think we don't reliably diagnose indirect calls or address-taking of

> > always-inline

> > functions and will emit them out-of-line if they end up "used".  That's a bug

> > I think.

> >

> > static inline __attribute__((always_inline)) void f () {}

> > void (*p)() = f;

> >

> > is not diagnosed and 'f' is emitted out of line.

>

> I think we can't do such a change, too much code in the wild relies on

> always_inline not diagnosing the indirect call.  Including all of glibc

> headers.

> Doing

> int (*p) (const char *, int, ...) = open;

> is perfectly valid C and we must not diagnose that, even when for

> fortification glibc has an always_inline wrapper around it.


I think the "proper" way to do this is to have 'open' above end up
refering to the out-of-line 'open' in the DSO, _not_ to emit the
fortification wrapper out-of-line.  But then, yes, it shouldn't
be always-inline then.  It should be like the former extern inline
extension.

> Yes, it is unfortunate that we in the past called the attribute

> always_inline when we really didn't mean (or perhaps some of us meant but

> didn't implement) always, but at this point we'd need a new attribute to

> mean diagnose non-indirect calls/taking address of except in direct calls as

> errors.  And IMHO we need a different attribute to mean inline despite

> target/optimization option mismatches.

> That way users can choose what exactly they want on a case by case basis.


But we have existing issues with [target] options differing and existing old
uses of always_inline (like the fortification wrappers).  Adding a new attribute
will not fix those issues.  Do you propose to not fix them and instead only
fix the new general_regs_only always-inline function glibc wants to add?

IMHO we have to fix the existing always_inline and we need a _new_
attribute to get the desired diagnostics on intrinsics.  Something
like __attribute__((need_target("avx"))) for AVX intrinsics?

Richard.

>         Jakub

>
H.J. Lu via Gcc-patches March 25, 2021, 2:54 p.m. | #14
On Thu, Mar 25, 2021 at 03:40:51PM +0100, Richard Biener wrote:
> I think the "proper" way to do this is to have 'open' above end up

> refering to the out-of-line 'open' in the DSO, _not_ to emit the

> fortification wrapper out-of-line.  But then, yes, it shouldn't

> be always-inline then.  It should be like the former extern inline

> extension.


It is extern inline __attribute__((gnu_inline, always_inline, artificial))
I think.  But the always_inline is completely intentional there,
we don't want the decision whether to inline it or not being done based on
its size, amount of functions already inlined into the caller before,
whether the call is cold or hot, etc.  It is a matter of security.
If it is taking address, we want the library routine in that case, sure.

> But we have existing issues with [target] options differing and existing old

> uses of always_inline (like the fortification wrappers).  Adding a new attribute

> will not fix those issues.  Do you propose to not fix them and instead only

> fix the new general_regs_only always-inline function glibc wants to add?


Yes.
Basically solve the problem for the fortification wrappers and rdtsc or
whatever other always inlines don't really require any specific
target/optimize options.

> IMHO we have to fix the existing always_inline and we need a _new_

> attribute to get the desired diagnostics on intrinsics.  Something

> like __attribute__((need_target("avx"))) for AVX intrinsics?


Or, if we go this route in addition to adding
at least a new attributes for the "diagnose taking address without
direct call", we'd need probably not just that,
but also pragma way to specify it for a lot of functions together,
otherwise it would be a maintainance nightmare.

	Jakub
H.J. Lu via Gcc-patches March 25, 2021, 6:36 p.m. | #15
On Thu, Mar 25, 2021 at 7:54 AM Jakub Jelinek via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>

> On Thu, Mar 25, 2021 at 03:40:51PM +0100, Richard Biener wrote:

> > I think the "proper" way to do this is to have 'open' above end up

> > refering to the out-of-line 'open' in the DSO, _not_ to emit the

> > fortification wrapper out-of-line.  But then, yes, it shouldn't

> > be always-inline then.  It should be like the former extern inline

> > extension.

>

> It is extern inline __attribute__((gnu_inline, always_inline, artificial))

> I think.  But the always_inline is completely intentional there,

> we don't want the decision whether to inline it or not being done based on

> its size, amount of functions already inlined into the caller before,

> whether the call is cold or hot, etc.  It is a matter of security.

> If it is taking address, we want the library routine in that case, sure.

>

> > But we have existing issues with [target] options differing and existing old

> > uses of always_inline (like the fortification wrappers).  Adding a new attribute

> > will not fix those issues.  Do you propose to not fix them and instead only

> > fix the new general_regs_only always-inline function glibc wants to add?

>

> Yes.

> Basically solve the problem for the fortification wrappers and rdtsc or

> whatever other always inlines don't really require any specific

> target/optimize options.

>

> > IMHO we have to fix the existing always_inline and we need a _new_

> > attribute to get the desired diagnostics on intrinsics.  Something

> > like __attribute__((need_target("avx"))) for AVX intrinsics?

>

> Or, if we go this route in addition to adding

> at least a new attributes for the "diagnose taking address without

> direct call", we'd need probably not just that,

> but also pragma way to specify it for a lot of functions together,

> otherwise it would be a maintainance nightmare.

>


How can we move forward with it?  I'd like to resolve it in GCC 11.

Thanks.

-- 
H.J.
H.J. Lu via Gcc-patches March 26, 2021, 8:06 a.m. | #16
On Thu, Mar 25, 2021 at 7:37 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>

> On Thu, Mar 25, 2021 at 7:54 AM Jakub Jelinek via Gcc-patches

> <gcc-patches@gcc.gnu.org> wrote:

> >

> > On Thu, Mar 25, 2021 at 03:40:51PM +0100, Richard Biener wrote:

> > > I think the "proper" way to do this is to have 'open' above end up

> > > refering to the out-of-line 'open' in the DSO, _not_ to emit the

> > > fortification wrapper out-of-line.  But then, yes, it shouldn't

> > > be always-inline then.  It should be like the former extern inline

> > > extension.

> >

> > It is extern inline __attribute__((gnu_inline, always_inline, artificial))

> > I think.  But the always_inline is completely intentional there,

> > we don't want the decision whether to inline it or not being done based on

> > its size, amount of functions already inlined into the caller before,

> > whether the call is cold or hot, etc.  It is a matter of security.

> > If it is taking address, we want the library routine in that case, sure.

> >

> > > But we have existing issues with [target] options differing and existing old

> > > uses of always_inline (like the fortification wrappers).  Adding a new attribute

> > > will not fix those issues.  Do you propose to not fix them and instead only

> > > fix the new general_regs_only always-inline function glibc wants to add?

> >

> > Yes.

> > Basically solve the problem for the fortification wrappers and rdtsc or

> > whatever other always inlines don't really require any specific

> > target/optimize options.

> >

> > > IMHO we have to fix the existing always_inline and we need a _new_

> > > attribute to get the desired diagnostics on intrinsics.  Something

> > > like __attribute__((need_target("avx"))) for AVX intrinsics?

> >

> > Or, if we go this route in addition to adding

> > at least a new attributes for the "diagnose taking address without

> > direct call", we'd need probably not just that,

> > but also pragma way to specify it for a lot of functions together,

> > otherwise it would be a maintainance nightmare.

> >

>

> How can we move forward with it?  I'd like to resolve it in GCC 11.


So I looked closer and we handle target attribute mismatches different
from optimization attribute mismatches (the latter are validated in
can_inline_edge_by_limits_p, the former in can_inline_edge_p).
For optimize attribute differences we're ignoring all (even semantic
differences):

     /* Until GCC 4.9 we did not check the semantics-altering flags
        below and inlined across optimization boundaries.
        Enabling checks below breaks several packages by refusing
        to inline library always_inline functions. See PR65873.
        Disable the check for early inlining for now until better solution
        is found.  */
     if (always_inline && early)
        ;
      /* There are some options that change IL semantics which means
         we cannot inline in these cases for correctness reason.
         Not even for always_inline declared functions.  */
     else if (check_match (flag_wrapv)
...
      /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
      else if (always_inline)
        ;
      /* When user added an attribute to the callee honor it.  */
      else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
               && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
        {
          e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
          inlinable = false;
        }

so the original intent was to do things "correctly" but then as now seen
with target attribute mismatches we run into problems.  Thus now we
allow all always-inlines.

I suppose diagnosing

static inline void __attribute__((target("avx"),always_inline))
foo_avx_optimized () {...}
void bar()
{
  if (cpu_supports ("avx"))
   foo_avx_optimized ();
}

for the missed optimization because of the always-inline
(foo_avx_optimized will inherit
the callers target flags and _not_ be avx optimized) might be nice,
but well, at least this
kind of inlining will not generate wrong code.

Thus we IMHO can do sth like

diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index f15c4828958..d4d4ac366c8 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -374,9 +374,14 @@ can_inline_edge_p (struct cgraph_edge *e, bool report,
       e->inline_failed = CIF_UNSPECIFIED;
       inlinable = false;
     }
-  /* Check compatibility of target optimization options.  */
-  else if (!targetm.target_option.can_inline_p (caller->decl,
-                                               callee->decl))
+  /* Check compatibility of target optimization options.  Be consistent with
+     handling of early always-inlines and optimize attribute differences
+     handled in can_inline_edge_by_limits_p.  */
+  else if ((!early
+           || !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
+           || !lookup_attribute ("always_inline",
+                                 DECL_ATTRIBUTES (callee->decl)))
+          && !targetm.target_option.can_inline_p (caller->decl, callee->decl))
     {
       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
       inlinable = false;

alternatively, if we want to give targets the opportunity to disallow some very
specific bad attribute differences, push this logic into all target
(and the default)
hooks (maybe as extra flag to the hook).

Refactoring can_inline_edge_p and can_inline_edge_by_limits_p to put the
option checks next to each other would also be good, this avoids differences
like above.

That said, IMHO always-inline should be what it says.  If we want sth for some
useful diagnostics then that should be a new attribute (need_target
("...") or so).

And to fix possible wrong-code/ICE issues we'd need support for different
options on different code regions more fine grained than function level.  For
optimize attributes this means all semantic changing options should reflect
themselves in the IL, for target options I have no good idea.

Richard.

> Thanks.

>

> --

> H.J.
H.J. Lu via Gcc-patches March 26, 2021, 8:33 a.m. | #17
On Thu, Mar 25, 2021 at 11:36:37AM -0700, H.J. Lu via Gcc-patches wrote:
> How can we move forward with it?  I'd like to resolve it in GCC 11.


I think it is too late for GCC 11 for this.
Especially if the solution would be that we change the behavior of existing
attribute, we would need enough time to test everything in the wild that
we don't break it badly, even if we add new attributes that cover the
previous behavior.  Only if we keep the behavior of existing attribute
and add a new one with the new behavior it would be something that could
be considered for GCC 11 IMNSHO but then you'd need to change the glibc
headers in time too to buy into the new attribute.
We need analysis of all GCC targets with target attribute support and
handle them consistently.

	Jakub
H.J. Lu via Gcc-patches March 26, 2021, 10:13 a.m. | #18
On Fri, Mar 26, 2021 at 9:34 AM Jakub Jelinek via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>

> On Thu, Mar 25, 2021 at 11:36:37AM -0700, H.J. Lu via Gcc-patches wrote:

> > How can we move forward with it?  I'd like to resolve it in GCC 11.

>

> I think it is too late for GCC 11 for this.

> Especially if the solution would be that we change the behavior of existing

> attribute, we would need enough time to test everything in the wild that

> we don't break it badly,


But isn't the suggested change only going to make programs we reject now
with an error accepted or ICEing?  Thus, no program that works right now
should break.

Richard.

> even if we add new attributes that cover the

> previous behavior.  Only if we keep the behavior of existing attribute

> and add a new one with the new behavior it would be something that could

> be considered for GCC 11 IMNSHO but then you'd need to change the glibc

> headers in time too to buy into the new attribute.

> We need analysis of all GCC targets with target attribute support and

> handle them consistently.

>

>         Jakub

>
H.J. Lu via Gcc-patches March 26, 2021, 10:26 a.m. | #19
On Fri, Mar 26, 2021 at 11:13:21AM +0100, Richard Biener wrote:
> On Fri, Mar 26, 2021 at 9:34 AM Jakub Jelinek via Gcc-patches

> <gcc-patches@gcc.gnu.org> wrote:

> >

> > On Thu, Mar 25, 2021 at 11:36:37AM -0700, H.J. Lu via Gcc-patches wrote:

> > > How can we move forward with it?  I'd like to resolve it in GCC 11.

> >

> > I think it is too late for GCC 11 for this.

> > Especially if the solution would be that we change the behavior of existing

> > attribute, we would need enough time to test everything in the wild that

> > we don't break it badly,

> 

> But isn't the suggested change only going to make programs we reject now

> with an error accepted or ICEing?  Thus, no program that works right now

> should break.


That is true, but even
accepts-invalid
and
ice-on-invalid-code
would be important regressions.
Changing the always_inline attribute behavior without at least avoiding
the first of those for our intrinsics would be bad, and we need to look what
people use always_inline in the wild for and what are their expectations.
And for the intrinsics we need something maintainable, we have > 5000
intrinsics on i386 alone, > 4000 on aarch64, > 7000 on arm, > 600 on rs6000,
> 100 on sparc, I bet most of them rely on the current behavior.

I think the world doesn't end if we do it for GCC 12 only, do it right for
everything we are aware of and have many months to figure out what impact it
will have on programs in the wild.

	Jakub
H.J. Lu via Gcc-patches March 26, 2021, 12:09 p.m. | #20
On Fri, Mar 26, 2021 at 11:26 AM Jakub Jelinek <jakub@redhat.com> wrote:
>

> On Fri, Mar 26, 2021 at 11:13:21AM +0100, Richard Biener wrote:

> > On Fri, Mar 26, 2021 at 9:34 AM Jakub Jelinek via Gcc-patches

> > <gcc-patches@gcc.gnu.org> wrote:

> > >

> > > On Thu, Mar 25, 2021 at 11:36:37AM -0700, H.J. Lu via Gcc-patches wrote:

> > > > How can we move forward with it?  I'd like to resolve it in GCC 11.

> > >

> > > I think it is too late for GCC 11 for this.

> > > Especially if the solution would be that we change the behavior of existing

> > > attribute, we would need enough time to test everything in the wild that

> > > we don't break it badly,

> >

> > But isn't the suggested change only going to make programs we reject now

> > with an error accepted or ICEing?  Thus, no program that works right now

> > should break.

>

> That is true, but even

> accepts-invalid

> and

> ice-on-invalid-code

> would be important regressions.

> Changing the always_inline attribute behavior without at least avoiding

> the first of those for our intrinsics would be bad, and we need to look what

> people use always_inline in the wild for and what are their expectations.

> And for the intrinsics we need something maintainable, we have > 5000

> intrinsics on i386 alone, > 4000 on aarch64, > 7000 on arm, > 600 on rs6000,

> > 100 on sparc, I bet most of them rely on the current behavior.

> I think the world doesn't end if we do it for GCC 12 only, do it right for

> everything we are aware of and have many months to figure out what impact it

> will have on programs in the wild.


As said, my opinion is that this fallout doesn't "exist" in the wild
since it can
only exist for code we reject right now which in my definition of
"out in the wild" makes it not exist.  I consider only code accepted by
the compiler as valid "out in the wild" example.

See also the behavior of always-inline with regard to the optimize attribute.

So yes, a better solution would be nice but I can't see any since the
underlying issue is known since a long time and thus the pragmatic
solution is the best (IMHO), also from a QOI perspective.  For intrinsics
it also avoids differences with -O0 vs -O with what we accept and reject.

Richard.

>         Jakub

>
Florian Weimer March 26, 2021, 1:46 p.m. | #21
* Jakub Jelinek via Gcc-patches:

> On Thu, Mar 25, 2021 at 11:36:37AM -0700, H.J. Lu via Gcc-patches wrote:

>> How can we move forward with it?  I'd like to resolve it in GCC 11.

>

> I think it is too late for GCC 11 for this.

> Especially if the solution would be that we change the behavior of existing

> attribute, we would need enough time to test everything in the wild that

> we don't break it badly, even if we add new attributes that cover the

> previous behavior.  Only if we keep the behavior of existing attribute

> and add a new one with the new behavior it would be something that could

> be considered for GCC 11 IMNSHO but then you'd need to change the glibc

> headers in time too to buy into the new attribute.

> We need analysis of all GCC targets with target attribute support and

> handle them consistently.


I think H.J. needs this for a function that isn't even always_inline,
just extern inline __attribute__ ((gnu_inline)).  Is that aspect
something that could be solved for GCC 11?
H.J. Lu via Gcc-patches March 26, 2021, 2:12 p.m. | #22
On Fri, Mar 26, 2021 at 2:49 PM Florian Weimer <fw@deneb.enyo.de> wrote:
>

> * Jakub Jelinek via Gcc-patches:

>

> > On Thu, Mar 25, 2021 at 11:36:37AM -0700, H.J. Lu via Gcc-patches wrote:

> >> How can we move forward with it?  I'd like to resolve it in GCC 11.

> >

> > I think it is too late for GCC 11 for this.

> > Especially if the solution would be that we change the behavior of existing

> > attribute, we would need enough time to test everything in the wild that

> > we don't break it badly, even if we add new attributes that cover the

> > previous behavior.  Only if we keep the behavior of existing attribute

> > and add a new one with the new behavior it would be something that could

> > be considered for GCC 11 IMNSHO but then you'd need to change the glibc

> > headers in time too to buy into the new attribute.

> > We need analysis of all GCC targets with target attribute support and

> > handle them consistently.

>

> I think H.J. needs this for a function that isn't even always_inline,

> just extern inline __attribute__ ((gnu_inline)).  Is that aspect

> something that could be solved for GCC 11?


But that should already work, no?  Yes, it won't inline but also not
error.  Unless glibc lacks the out-of-line definition, that is.

Richard.
Florian Weimer March 26, 2021, 3:20 p.m. | #23
* Richard Biener:

>> I think H.J. needs this for a function that isn't even always_inline,

>> just extern inline __attribute__ ((gnu_inline)).  Is that aspect

>> something that could be solved for GCC 11?

>

> But that should already work, no?  Yes, it won't inline but also not

> error.  Unless glibc lacks the out-of-line definition, that is.


It does not work:

extern double strtod (const char *, char **);

extern __inline __attribute__ ((__gnu_inline__)) double
atof (const char *__nptr)
{
  return strtod (__nptr, (char **) ((void *)0));
}

fails with -mno-sse:

t.c: In function ‘atof’:
t.c:5:1: error: SSE register return with SSE disabled

I don't think we need to support calling atof under these
circumstances (in fact, this is impossible to support because there is
no ABI we could use for the call).  But we need to ignore the inline
function definition, like we ignore function declarations.  Otherwise
we'll have to patch a lot of headers to support -mno-sse.

Or has this already been fixed differently in GCC 11?
H.J. Lu via Gcc-patches March 26, 2021, 3:56 p.m. | #24
On March 26, 2021 4:20:28 PM GMT+01:00, Florian Weimer <fw@deneb.enyo.de> wrote:
>* Richard Biener:

>

>>> I think H.J. needs this for a function that isn't even

>always_inline,

>>> just extern inline __attribute__ ((gnu_inline)).  Is that aspect

>>> something that could be solved for GCC 11?

>>

>> But that should already work, no?  Yes, it won't inline but also not

>> error.  Unless glibc lacks the out-of-line definition, that is.

>

>It does not work:

>

>extern double strtod (const char *, char **);

>

>extern __inline __attribute__ ((__gnu_inline__)) double

>atof (const char *__nptr)

>{

>  return strtod (__nptr, (char **) ((void *)0));

>}

>

>fails with -mno-sse:

>

>t.c: In function ‘atof’:

>t.c:5:1: error: SSE register return with SSE disabled

>

>I don't think we need to support calling atof under these

>circumstances (in fact, this is impossible to support because there is

>no ABI we could use for the call).  But we need to ignore the inline

>function definition, like we ignore function declarations.  Otherwise

>we'll have to patch a lot of headers to support -mno-sse.

>

>Or has this already been fixed differently in GCC 11?


I think that has been fixed differently already.

Richard.

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7c41302c75b..1b4567e34ba 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -578,21 +578,29 @@  ix86_can_inline_p (tree caller, tree callee)
        && lookup_attribute ("always_inline",
 			    DECL_ATTRIBUTES (callee)));
 
+  /* NB: Skip ISA check for always_inline in system headers if caller
+     has target attribute.  */
+  bool skip_isa_check = (always_inline
+			 && caller_tree != target_option_default_node
+			 && DECL_IN_SYSTEM_HEADER (callee));
+
   cgraph_node *callee_node = cgraph_node::get (callee);
   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
      function can inline a SSE2 function but a SSE2 function can't inline
      a SSE4 function.  */
-  if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
-       != callee_opts->x_ix86_isa_flags)
-      || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
-	  != callee_opts->x_ix86_isa_flags2))
+  if (!skip_isa_check
+      && (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
+	   != callee_opts->x_ix86_isa_flags)
+	  || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
+	      != callee_opts->x_ix86_isa_flags2)))
     ret = false;
 
   /* See if we have the same non-isa options.  */
-  else if ((!always_inline
-	    && caller_opts->x_target_flags != callee_opts->x_target_flags)
-	   || (caller_opts->x_target_flags & ~always_inline_safe_mask)
-	       != (callee_opts->x_target_flags & ~always_inline_safe_mask))
+  else if (!skip_isa_check
+	   && ((!always_inline
+		&& caller_opts->x_target_flags != callee_opts->x_target_flags)
+	       || ((caller_opts->x_target_flags & ~always_inline_safe_mask)
+		   != (callee_opts->x_target_flags & ~always_inline_safe_mask))))
     ret = false;
 
   /* See if arch, tune, etc. are the same.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr98209.c b/gcc/testsuite/gcc.target/i386/pr98209.c
new file mode 100644
index 00000000000..4566d4c7fc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98209.c
@@ -0,0 +1,13 @@ 
+/* { dg-do run { target *-*-linux* } } */
+/* { dg-options "-O2 -D_FORTIFY_SOURCE=2" } */
+
+#include <stdio.h>
+
+extern int main(int argc, char** argv)
+  __attribute__ ((__target__ ("no-sse,no-mmx")));
+
+int main(int argc, char** argv)
+{
+  printf ("hello!\n");
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-1.c b/gcc/testsuite/gcc.target/i386/pr99744-1.c
new file mode 100644
index 00000000000..92535923a56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-1.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+#include <x86intrin.h>
+
+extern unsigned long long int curr_deadline;
+extern void bar (void);
+
+__attribute__ ((target("general-regs-only")))
+void
+foo (void)
+{
+  if (__rdtsc () < curr_deadline)
+    return; 
+  bar ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-2.c b/gcc/testsuite/gcc.target/i386/pr99744-2.c
new file mode 100644
index 00000000000..4bb2a6bb6dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-2.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <x86intrin.h>
+
+extern __m128i x, y;
+
+__attribute__ ((target("general-regs-only")))
+void
+foo (void)
+{
+  x = _mm_move_epi64 (y); /* { dg-error "SSE register return with SSE disabled" } */
+}