[v2,aarch64] Add CPU support for Ampere Computing's eMAG.

Message ID 20181120152251.12204-1-christoph.muellner@theobroma-systems.com
State Superseded
Headers show
Series
  • [v2,aarch64] Add CPU support for Ampere Computing's eMAG.
Related show

Commit Message

Christoph Müllner Nov. 20, 2018, 3:22 p.m.
Tested with "make check" and no regressions found.

*** gcc/ChangeLog ***

2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>

	* config/aarch64/aarch64-cores.def: Define emag.
	* config/aarch64/aarch64-tune.md: Regenerated with emag.
	* config/aarch64/aarch64.c (emag_tunings): New struct.
	* doc/invoke.texi: Document mtune value.

Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>

---
 gcc/config/aarch64/aarch64-cores.def |  3 +++
 gcc/config/aarch64/aarch64-tune.md   |  2 +-
 gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++
 gcc/doc/invoke.texi                  |  2 +-
 4 files changed, 30 insertions(+), 2 deletions(-)

-- 
2.9.5

Comments

Kyrill Tkachov Nov. 20, 2018, 4:14 p.m. | #1
Hi Christoph,

On 20/11/18 15:22, Christoph Muellner wrote:
> Tested with "make check" and no regressions found.

>

> *** gcc/ChangeLog ***

>

> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>

>

> 	* config/aarch64/aarch64-cores.def: Define emag.

> 	* config/aarch64/aarch64-tune.md: Regenerated with emag.

> 	* config/aarch64/aarch64.c (emag_tunings): New struct.

> 	* doc/invoke.texi: Document mtune value.

>

> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>

> ---

>   gcc/config/aarch64/aarch64-cores.def |  3 +++

>   gcc/config/aarch64/aarch64-tune.md   |  2 +-

>   gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++

>   gcc/doc/invoke.texi                  |  2 +-

>   4 files changed, 30 insertions(+), 2 deletions(-)

>

> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def

> index 1f3ac56..8eee97f 100644

> --- a/gcc/config/aarch64/aarch64-cores.def

> +++ b/gcc/config/aarch64/aarch64-cores.def

> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH

>   AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)

>   AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)

>   

> +/* Ampere Computing cores. */

> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8, emag, 0x50, 0x000, 3)

> +


According to your previous reply, the 0x3 should be in the "PART" field, that is
..., 0x50, 0x3, -1)

Thanks,
Kyrill

>   /* APM ('P') cores. */

>   AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)

>   

> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md

> index fade1d4..2fc7f03 100644

> --- a/gcc/config/aarch64/aarch64-tune.md

> +++ b/gcc/config/aarch64/aarch64-tune.md

> @@ -1,5 +1,5 @@

>   ;; -*- buffer-read-only: t -*-

>   ;; Generated automatically by gentune.sh from aarch64-cores.def

>   (define_attr "tune"

> -	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"

> +	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"

>   	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

> index f7f88a9..995aafe 100644

> --- a/gcc/config/aarch64/aarch64.c

> +++ b/gcc/config/aarch64/aarch64.c

> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =

>     &xgene1_prefetch_tune

>   };

>   

> +static const struct tune_params emag_tunings =

> +{

> +  &xgene1_extra_costs,

> +  &xgene1_addrcost_table,

> +  &xgene1_regmove_cost,

> +  &xgene1_vector_cost,

> +  &generic_branch_cost,

> +  &xgene1_approx_modes,

> +  6, /* memmov_cost  */

> +  4, /* issue_rate  */

> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */

> +  "16",	/* function_align.  */

> +  "16",	/* jump_align.  */

> +  "16",	/* loop_align.  */

> +  2,	/* int_reassoc_width.  */

> +  4,	/* fp_reassoc_width.  */

> +  1,	/* vec_reassoc_width.  */

> +  2,	/* min_div_recip_mul_sf.  */

> +  2,	/* min_div_recip_mul_df.  */

> +  17,	/* max_case_values.  */

> +  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */

> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */

> +  &xgene1_prefetch_tune

> +};

> +

>   static const struct tune_params qdf24xx_tunings =

>   {

>     &qdf24xx_extra_costs,

> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

> index e016dce..ac81fb2 100644

> --- a/gcc/doc/invoke.texi

> +++ b/gcc/doc/invoke.texi

> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the

>   performance of the code.  Permissible values for this option are:

>   @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},

>   @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},

> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},

> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},

>   @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},

>   @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},

>   @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},
Christoph Müllner Nov. 20, 2018, 4:20 p.m. | #2
> On 20.11.2018, at 17:14, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote:

> 

> Hi Christoph,

> 

> On 20/11/18 15:22, Christoph Muellner wrote:

>> Tested with "make check" and no regressions found.

>> 

>> *** gcc/ChangeLog ***

>> 

>> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>

>> 

>> 	* config/aarch64/aarch64-cores.def: Define emag.

>> 	* config/aarch64/aarch64-tune.md: Regenerated with emag.

>> 	* config/aarch64/aarch64.c (emag_tunings): New struct.

>> 	* doc/invoke.texi: Document mtune value.

>> 

>> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>

>> ---

>>  gcc/config/aarch64/aarch64-cores.def |  3 +++

>>  gcc/config/aarch64/aarch64-tune.md   |  2 +-

>>  gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++

>>  gcc/doc/invoke.texi                  |  2 +-

>>  4 files changed, 30 insertions(+), 2 deletions(-)

>> 

>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def

>> index 1f3ac56..8eee97f 100644

>> --- a/gcc/config/aarch64/aarch64-cores.def

>> +++ b/gcc/config/aarch64/aarch64-cores.def

>> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH

>>  AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)

>>  AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)

>>  +/* Ampere Computing cores. */

>> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8, emag, 0x50, 0x000, 3)

>> +

> 

> According to your previous reply, the 0x3 should be in the "PART" field, that is

> ..., 0x50, 0x3, -1)


Should have been "variant field" in the email.
The v2 patch is correct and tested:

processor       : 0
BogoMIPS        : 100.00
Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid
CPU implementer : 0x50
CPU architecture: 8
CPU variant     : 0x3
CPU part        : 0x000
CPU revision    : 1

gcc -mcpu=native -Q --help=target
  -mcpu=                                emag+crypto+crc+aes+sha2+profile

Thanks,
Christoph



> 

> Thanks,

> Kyrill

> 

>>  /* APM ('P') cores. */

>>  AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)

>>  diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md

>> index fade1d4..2fc7f03 100644

>> --- a/gcc/config/aarch64/aarch64-tune.md

>> +++ b/gcc/config/aarch64/aarch64-tune.md

>> @@ -1,5 +1,5 @@

>>  ;; -*- buffer-read-only: t -*-

>>  ;; Generated automatically by gentune.sh from aarch64-cores.def

>>  (define_attr "tune"

>> -	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"

>> +	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"

>>  	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))

>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

>> index f7f88a9..995aafe 100644

>> --- a/gcc/config/aarch64/aarch64.c

>> +++ b/gcc/config/aarch64/aarch64.c

>> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =

>>    &xgene1_prefetch_tune

>>  };

>>  +static const struct tune_params emag_tunings =

>> +{

>> +  &xgene1_extra_costs,

>> +  &xgene1_addrcost_table,

>> +  &xgene1_regmove_cost,

>> +  &xgene1_vector_cost,

>> +  &generic_branch_cost,

>> +  &xgene1_approx_modes,

>> +  6, /* memmov_cost  */

>> +  4, /* issue_rate  */

>> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */

>> +  "16",	/* function_align.  */

>> +  "16",	/* jump_align.  */

>> +  "16",	/* loop_align.  */

>> +  2,	/* int_reassoc_width.  */

>> +  4,	/* fp_reassoc_width.  */

>> +  1,	/* vec_reassoc_width.  */

>> +  2,	/* min_div_recip_mul_sf.  */

>> +  2,	/* min_div_recip_mul_df.  */

>> +  17,	/* max_case_values.  */

>> +  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */

>> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */

>> +  &xgene1_prefetch_tune

>> +};

>> +

>>  static const struct tune_params qdf24xx_tunings =

>>  {

>>    &qdf24xx_extra_costs,

>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

>> index e016dce..ac81fb2 100644

>> --- a/gcc/doc/invoke.texi

>> +++ b/gcc/doc/invoke.texi

>> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the

>>  performance of the code.  Permissible values for this option are:

>>  @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},

>>  @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},

>> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},

>> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},

>>  @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},

>>  @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},

>>  @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},
Kyrill Tkachov Nov. 20, 2018, 4:43 p.m. | #3
On 20/11/18 16:20, Christoph Müllner wrote:
>> On 20.11.2018, at 17:14, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote:

>>

>> Hi Christoph,

>>

>> On 20/11/18 15:22, Christoph Muellner wrote:

>>> Tested with "make check" and no regressions found.

>>>

>>> *** gcc/ChangeLog ***

>>>

>>> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>

>>>

>>> 	* config/aarch64/aarch64-cores.def: Define emag.

>>> 	* config/aarch64/aarch64-tune.md: Regenerated with emag.

>>> 	* config/aarch64/aarch64.c (emag_tunings): New struct.

>>> 	* doc/invoke.texi: Document mtune value.

>>>

>>> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>

>>> ---

>>>   gcc/config/aarch64/aarch64-cores.def |  3 +++

>>>   gcc/config/aarch64/aarch64-tune.md   |  2 +-

>>>   gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++

>>>   gcc/doc/invoke.texi                  |  2 +-

>>>   4 files changed, 30 insertions(+), 2 deletions(-)

>>>

>>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def

>>> index 1f3ac56..8eee97f 100644

>>> --- a/gcc/config/aarch64/aarch64-cores.def

>>> +++ b/gcc/config/aarch64/aarch64-cores.def

>>> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH

>>>   AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)

>>>   AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)

>>>   +/* Ampere Computing cores. */

>>> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8, emag, 0x50, 0x000, 3)

>>> +

>> According to your previous reply, the 0x3 should be in the "PART" field, that is

>> ..., 0x50, 0x3, -1)

> Should have been "variant field" in the email.

> The v2 patch is correct and tested:


I see, that does look correct then.

> processor       : 0

> BogoMIPS        : 100.00

> Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid


This line says that the CPU supports the "crc" and "crypto" option extensions by default so
the 5th field should include AARCH64_FL_CRC | AARCH64_FL_CRYPTO so that the user gets them
by default when they use -mcpu=emag.

Thanks, this is really helpful.
Kyrill

> CPU implementer : 0x50

> CPU architecture: 8

> CPU variant     : 0x3

> CPU part        : 0x000

> CPU revision    : 1

>

> gcc -mcpu=native -Q --help=target

>    -mcpu=                                emag+crypto+crc+aes+sha2+profile

>

> Thanks,

> Christoph

>

>

>

>> Thanks,

>> Kyrill

>>

>>>   /* APM ('P') cores. */

>>>   AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)

>>>   diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md

>>> index fade1d4..2fc7f03 100644

>>> --- a/gcc/config/aarch64/aarch64-tune.md

>>> +++ b/gcc/config/aarch64/aarch64-tune.md

>>> @@ -1,5 +1,5 @@

>>>   ;; -*- buffer-read-only: t -*-

>>>   ;; Generated automatically by gentune.sh from aarch64-cores.def

>>>   (define_attr "tune"

>>> -	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"

>>> +	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"

>>>   	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))

>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

>>> index f7f88a9..995aafe 100644

>>> --- a/gcc/config/aarch64/aarch64.c

>>> +++ b/gcc/config/aarch64/aarch64.c

>>> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =

>>>     &xgene1_prefetch_tune

>>>   };

>>>   +static const struct tune_params emag_tunings =

>>> +{

>>> +  &xgene1_extra_costs,

>>> +  &xgene1_addrcost_table,

>>> +  &xgene1_regmove_cost,

>>> +  &xgene1_vector_cost,

>>> +  &generic_branch_cost,

>>> +  &xgene1_approx_modes,

>>> +  6, /* memmov_cost  */

>>> +  4, /* issue_rate  */

>>> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */

>>> +  "16",	/* function_align.  */

>>> +  "16",	/* jump_align.  */

>>> +  "16",	/* loop_align.  */

>>> +  2,	/* int_reassoc_width.  */

>>> +  4,	/* fp_reassoc_width.  */

>>> +  1,	/* vec_reassoc_width.  */

>>> +  2,	/* min_div_recip_mul_sf.  */

>>> +  2,	/* min_div_recip_mul_df.  */

>>> +  17,	/* max_case_values.  */

>>> +  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */

>>> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */

>>> +  &xgene1_prefetch_tune

>>> +};

>>> +

>>>   static const struct tune_params qdf24xx_tunings =

>>>   {

>>>     &qdf24xx_extra_costs,

>>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

>>> index e016dce..ac81fb2 100644

>>> --- a/gcc/doc/invoke.texi

>>> +++ b/gcc/doc/invoke.texi

>>> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the

>>>   performance of the code.  Permissible values for this option are:

>>>   @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},

>>>   @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},

>>> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},

>>> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},

>>>   @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},

>>>   @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},

>>>   @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},
Christoph Müllner Nov. 20, 2018, 4:57 p.m. | #4
> On 20.11.2018, at 17:43, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote:

> 

> On 20/11/18 16:20, Christoph Müllner wrote:

>>> On 20.11.2018, at 17:14, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote:

>>> 

>>> Hi Christoph,

>>> 

>>> On 20/11/18 15:22, Christoph Muellner wrote:

>>>> Tested with "make check" and no regressions found.

>>>> 

>>>> *** gcc/ChangeLog ***

>>>> 

>>>> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>

>>>> 

>>>> 	* config/aarch64/aarch64-cores.def: Define emag.

>>>> 	* config/aarch64/aarch64-tune.md: Regenerated with emag.

>>>> 	* config/aarch64/aarch64.c (emag_tunings): New struct.

>>>> 	* doc/invoke.texi: Document mtune value.

>>>> 

>>>> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>

>>>> ---

>>>>  gcc/config/aarch64/aarch64-cores.def |  3 +++

>>>>  gcc/config/aarch64/aarch64-tune.md   |  2 +-

>>>>  gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++

>>>>  gcc/doc/invoke.texi                  |  2 +-

>>>>  4 files changed, 30 insertions(+), 2 deletions(-)

>>>> 

>>>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def

>>>> index 1f3ac56..8eee97f 100644

>>>> --- a/gcc/config/aarch64/aarch64-cores.def

>>>> +++ b/gcc/config/aarch64/aarch64-cores.def

>>>> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH

>>>>  AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)

>>>>  AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)

>>>>  +/* Ampere Computing cores. */

>>>> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8, emag, 0x50, 0x000, 3)

>>>> +

>>> According to your previous reply, the 0x3 should be in the "PART" field, that is

>>> ..., 0x50, 0x3, -1)

>> Should have been "variant field" in the email.

>> The v2 patch is correct and tested:

> 

> I see, that does look correct then.

> 

>> processor       : 0

>> BogoMIPS        : 100.00

>> Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid

> 

> This line says that the CPU supports the "crc" and "crypto" option extensions by default so

> the 5th field should include AARCH64_FL_CRC | AARCH64_FL_CRYPTO so that the user gets them

> by default when they use -mcpu=emag.


Good hint!

Thanks,
Christoph

> 

> Thanks, this is really helpful.

> Kyrill

> 

>> CPU implementer : 0x50

>> CPU architecture: 8

>> CPU variant     : 0x3

>> CPU part        : 0x000

>> CPU revision    : 1

>> 

>> gcc -mcpu=native -Q --help=target

>>   -mcpu=                                emag+crypto+crc+aes+sha2+profile

>> 

>> Thanks,

>> Christoph

>> 

>> 

>> 

>>> Thanks,

>>> Kyrill

>>> 

>>>>  /* APM ('P') cores. */

>>>>  AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)

>>>>  diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md

>>>> index fade1d4..2fc7f03 100644

>>>> --- a/gcc/config/aarch64/aarch64-tune.md

>>>> +++ b/gcc/config/aarch64/aarch64-tune.md

>>>> @@ -1,5 +1,5 @@

>>>>  ;; -*- buffer-read-only: t -*-

>>>>  ;; Generated automatically by gentune.sh from aarch64-cores.def

>>>>  (define_attr "tune"

>>>> -	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"

>>>> +	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"

>>>>  	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))

>>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

>>>> index f7f88a9..995aafe 100644

>>>> --- a/gcc/config/aarch64/aarch64.c

>>>> +++ b/gcc/config/aarch64/aarch64.c

>>>> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =

>>>>    &xgene1_prefetch_tune

>>>>  };

>>>>  +static const struct tune_params emag_tunings =

>>>> +{

>>>> +  &xgene1_extra_costs,

>>>> +  &xgene1_addrcost_table,

>>>> +  &xgene1_regmove_cost,

>>>> +  &xgene1_vector_cost,

>>>> +  &generic_branch_cost,

>>>> +  &xgene1_approx_modes,

>>>> +  6, /* memmov_cost  */

>>>> +  4, /* issue_rate  */

>>>> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */

>>>> +  "16",	/* function_align.  */

>>>> +  "16",	/* jump_align.  */

>>>> +  "16",	/* loop_align.  */

>>>> +  2,	/* int_reassoc_width.  */

>>>> +  4,	/* fp_reassoc_width.  */

>>>> +  1,	/* vec_reassoc_width.  */

>>>> +  2,	/* min_div_recip_mul_sf.  */

>>>> +  2,	/* min_div_recip_mul_df.  */

>>>> +  17,	/* max_case_values.  */

>>>> +  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */

>>>> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */

>>>> +  &xgene1_prefetch_tune

>>>> +};

>>>> +

>>>>  static const struct tune_params qdf24xx_tunings =

>>>>  {

>>>>    &qdf24xx_extra_costs,

>>>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

>>>> index e016dce..ac81fb2 100644

>>>> --- a/gcc/doc/invoke.texi

>>>> +++ b/gcc/doc/invoke.texi

>>>> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the

>>>>  performance of the code.  Permissible values for this option are:

>>>>  @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},

>>>>  @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},

>>>> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},

>>>> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},

>>>>  @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},

>>>>  @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},

>>>>  @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},

Patch

diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 1f3ac56..8eee97f 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -61,6 +61,9 @@  AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
 AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
 AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
 
+/* Ampere Computing cores. */
+AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8, emag, 0x50, 0x000, 3)
+
 /* APM ('P') cores. */
 AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
 
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index fade1d4..2fc7f03 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@ 
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
+	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f7f88a9..995aafe 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -957,6 +957,31 @@  static const struct tune_params xgene1_tunings =
   &xgene1_prefetch_tune
 };
 
+static const struct tune_params emag_tunings =
+{
+  &xgene1_extra_costs,
+  &xgene1_addrcost_table,
+  &xgene1_regmove_cost,
+  &xgene1_vector_cost,
+  &generic_branch_cost,
+  &xgene1_approx_modes,
+  6, /* memmov_cost  */
+  4, /* issue_rate  */
+  AARCH64_FUSE_NOTHING, /* fusible_ops  */
+  "16",	/* function_align.  */
+  "16",	/* jump_align.  */
+  "16",	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  1,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  17,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */
+  &xgene1_prefetch_tune
+};
+
 static const struct tune_params qdf24xx_tunings =
 {
   &qdf24xx_extra_costs,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e016dce..ac81fb2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -15288,7 +15288,7 @@  Specify the name of the target processor for which GCC should tune the
 performance of the code.  Permissible values for this option are:
 @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
 @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
-@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},
+@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
 @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
 @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
 @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},