[1/2] Add support for IVOPT

Message ID 1557887990-18668-2-git-send-email-kugan.vivekanandarajah@linaro.org
State New
Headers show
Series
  • [1/2] Add support for IVOPT
Related show

Commit Message

Kugan Vivekanandarajah May 15, 2019, 2:39 a.m.
From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>


gcc/ChangeLog:

2019-05-15  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

	PR target/88834
	* tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
	IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
	(find_interesting_uses_stmt): Likewise.
	(get_alias_ptr_type_for_ptr_address): Likewise.
	(add_iv_candidate_for_use): Add scaled index candidate if useful.

Change-Id: I8e8151fe2dde2845dedf38b090103694da6fc9d1
---
 gcc/tree-ssa-loop-ivopts.c | 60 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

-- 
2.7.4

Comments

Richard Sandiford May 15, 2019, 6:57 a.m. | #1
Thanks for doing this.

kugan.vivekanandarajah@linaro.org writes:
> From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>

>

> gcc/ChangeLog:

>

> 2019-05-15  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

>

> 	PR target/88834

> 	* tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle

> 	IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.

> 	(find_interesting_uses_stmt): Likewise.

> 	(get_alias_ptr_type_for_ptr_address): Likewise.

> 	(add_iv_candidate_for_use): Add scaled index candidate if useful.

>

> Change-Id: I8e8151fe2dde2845dedf38b090103694da6fc9d1

> ---

>  gcc/tree-ssa-loop-ivopts.c | 60 +++++++++++++++++++++++++++++++++++++++++++++-

>  1 file changed, 59 insertions(+), 1 deletion(-)

>

> diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c

> index 9864b59..115a70c 100644

> --- a/gcc/tree-ssa-loop-ivopts.c

> +++ b/gcc/tree-ssa-loop-ivopts.c

> @@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)

>    switch (gimple_call_internal_fn (call))

>      {

>      case IFN_MASK_LOAD:

> +    case IFN_MASK_LOAD_LANES:

>        if (op_p == gimple_call_arg_ptr (call, 0))

>  	return TREE_TYPE (gimple_call_lhs (call));

>        return NULL_TREE;

>  

>      case IFN_MASK_STORE:

> +    case IFN_MASK_STORE_LANES:

>        if (op_p == gimple_call_arg_ptr (call, 0))

>  	return TREE_TYPE (gimple_call_arg (call, 3));

>        return NULL_TREE;

> @@ -2545,7 +2547,7 @@ find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)

>  	  return;

>  	}

>  

> -      /* TODO -- we should also handle address uses of type

> +      /* TODO -- we should also handle all address uses of type

>  

>  	 memory = call (whatever);

>  

> @@ -2553,6 +2555,27 @@ find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)

>  

>  	 call (memory).  */

>      }

> +  else if (is_gimple_call (stmt))

> +    {

> +      gcall *call = dyn_cast <gcall *> (stmt);

> +      if (call

> +	  && gimple_call_internal_p (call)

> +	  && (gimple_call_internal_fn (call) == IFN_MASK_LOAD_LANES

> +	      || gimple_call_internal_fn (call) == IFN_MASK_STORE_LANES))

> +	{

> +	  tree *arg = gimple_call_arg_ptr (call, 0);

> +	  struct iv *civ = get_iv (data, *arg);

> +	  tree mem_type = get_mem_type_for_internal_fn (call, arg);

> +	  if (civ && mem_type)

> +	    {

> +	      civ = alloc_iv (data, civ->base, civ->step);

> +	      record_group_use (data, arg, civ, stmt, USE_PTR_ADDRESS,

> +				mem_type);

> +	      return;

> +	    }

> +	}

> +    }

> +


Why do you need to handle this specially?  Does:

  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
    {
      op = USE_FROM_PTR (use_p);

      if (TREE_CODE (op) != SSA_NAME)
	continue;

      iv = get_iv (data, op);
      if (!iv)
	continue;

      if (!find_address_like_use (data, stmt, use_p->use, iv))
	find_interesting_uses_op (data, op);
    }

not do the right thing for the load/store lane case?

> @@ -3500,6 +3523,39 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)

>      basetype = sizetype;

>    record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);

>  

> +  /* Compare the cost of an address with an unscaled index with the cost of

> +    an address with a scaled index and add candidate if useful. */

> +  if (use != NULL && use->type == USE_PTR_ADDRESS)


I think we want this for all address uses.  E.g. for SVE, masked and
unmasked accesses would both benefit.

> +    {

> +      struct mem_address parts = {NULL_TREE, integer_one_node,

> +				  NULL_TREE, NULL_TREE, NULL_TREE};


Might be better to use "= {}" and initialise the fields that matter by
assignment.  As it stands this uses integer_one_node as the base, but I
couldn't tell if that was deliberate.

> +      poly_uint64 temp;

> +      poly_int64 fact;

> +      bool speed = optimize_loop_for_speed_p (data->current_loop);

> +      poly_int64 poly_step = tree_to_poly_int64 (iv->step);


The step could be variable, so we should check whether this holds
using poly_int_tree_p.

> +      machine_mode mem_mode = TYPE_MODE (use->mem_type);

> +      addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));

> +

> +      fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));


This is simpler as:

  GET_MODE_UNIT_SIZE (TYPE_MODE (use->mem_type));

It's always a compile-time constant, so "fact" can be int/unsigned int
rather than poly_int64.

> +      parts.index = integer_one_node;

> +

> +      if (fact.is_constant ()

> +	  && can_div_trunc_p (poly_step, fact, &temp))


I think it only makes sense to add the candidate if poly_step is an exact
multiple of fact, so I think we should use multiple_p here.

> +	{

> +	  /* Addressing mode "base + index".  */

> +	  rtx addr = addr_for_mem_ref (&parts, as, false);

> +	  unsigned cost = address_cost (addr, mem_mode, as, speed);

> +	  tree step = wide_int_to_tree (sizetype,

> +					exact_div (poly_step, fact));


The multiple_p mentioned above would provide this result too.
We only need to calculate "step" if we decided to add the candidate,
so I think it should be in the "if" below.

> +	  parts.step = wide_int_to_tree (sizetype, fact);

> +	  /* Addressing mode "base + index << scale".  */

> +	  addr = addr_for_mem_ref (&parts, as, false);

> +	  unsigned new_cost = address_cost (addr, mem_mode, as, speed);

> +	  if (new_cost < cost)


I think it'd be worth splitting the guts of this check out into a helper,
since it's something that could be reusable.  Maybe:

  unsigned int preferred_mem_scalar_factor (machine_mode);

with the only supported values for now being 1 and GET_MODE_INNER_SIZE.
(Could be extended later if a target needs it.)

Thanks,
Richard
Richard Sandiford May 16, 2019, 7:25 a.m. | #2
Richard Sandiford <richard.sandiford@arm.com> writes:
> kugan.vivekanandarajah@linaro.org writes:

>> +	  parts.step = wide_int_to_tree (sizetype, fact);

>> +	  /* Addressing mode "base + index << scale".  */

>> +	  addr = addr_for_mem_ref (&parts, as, false);

>> +	  unsigned new_cost = address_cost (addr, mem_mode, as, speed);

>> +	  if (new_cost < cost)

>

> I think it'd be worth splitting the guts of this check out into a helper,

> since it's something that could be reusable.  Maybe:

>

>   unsigned int preferred_mem_scalar_factor (machine_mode);

>

> with the only supported values for now being 1 and GET_MODE_INNER_SIZE.

> (Could be extended later if a target needs it.)


Er, I did of course mean "scale_factor" :-)
Richard Biener May 16, 2019, 11:14 a.m. | #3
On Wed, May 15, 2019 at 4:40 AM <kugan.vivekanandarajah@linaro.org> wrote:
>

> From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>

>

> gcc/ChangeLog:

>

> 2019-05-15  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

>

>         PR target/88834

>         * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle

>         IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.

>         (find_interesting_uses_stmt): Likewise.

>         (get_alias_ptr_type_for_ptr_address): Likewise.

>         (add_iv_candidate_for_use): Add scaled index candidate if useful.

>

> Change-Id: I8e8151fe2dde2845dedf38b090103694da6fc9d1

> ---

>  gcc/tree-ssa-loop-ivopts.c | 60 +++++++++++++++++++++++++++++++++++++++++++++-

>  1 file changed, 59 insertions(+), 1 deletion(-)

>

> diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c

> index 9864b59..115a70c 100644

> --- a/gcc/tree-ssa-loop-ivopts.c

> +++ b/gcc/tree-ssa-loop-ivopts.c

> @@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)

>    switch (gimple_call_internal_fn (call))

>      {

>      case IFN_MASK_LOAD:

> +    case IFN_MASK_LOAD_LANES:

>        if (op_p == gimple_call_arg_ptr (call, 0))

>         return TREE_TYPE (gimple_call_lhs (call));

>        return NULL_TREE;

>

>      case IFN_MASK_STORE:

> +    case IFN_MASK_STORE_LANES:

>        if (op_p == gimple_call_arg_ptr (call, 0))

>         return TREE_TYPE (gimple_call_arg (call, 3));

>        return NULL_TREE;

> @@ -2545,7 +2547,7 @@ find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)

>           return;

>         }

>

> -      /* TODO -- we should also handle address uses of type

> +      /* TODO -- we should also handle all address uses of type

>

>          memory = call (whatever);

>

> @@ -2553,6 +2555,27 @@ find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)

>

>          call (memory).  */

>      }

> +  else if (is_gimple_call (stmt))

> +    {

> +      gcall *call = dyn_cast <gcall *> (stmt);

> +      if (call


that's testing things twice, just do

       else if (gcall *call = dyn_cast <gcall *> (stmt))
         {
...

no other comments besides why do you need _LANES handling here where
the w/o _LANES handling didn't need anything.

> +         && gimple_call_internal_p (call)

> +         && (gimple_call_internal_fn (call) == IFN_MASK_LOAD_LANES

> +             || gimple_call_internal_fn (call) == IFN_MASK_STORE_LANES))

> +       {

> +         tree *arg = gimple_call_arg_ptr (call, 0);

> +         struct iv *civ = get_iv (data, *arg);

> +         tree mem_type = get_mem_type_for_internal_fn (call, arg);

> +         if (civ && mem_type)

> +           {

> +             civ = alloc_iv (data, civ->base, civ->step);

> +             record_group_use (data, arg, civ, stmt, USE_PTR_ADDRESS,

> +                               mem_type);

> +             return;

> +           }

> +       }

> +    }

> +

>

>    if (gimple_code (stmt) == GIMPLE_PHI

>        && gimple_bb (stmt) == data->current_loop->header)

> @@ -3500,6 +3523,39 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)

>      basetype = sizetype;

>    record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);

>

> +  /* Compare the cost of an address with an unscaled index with the cost of

> +    an address with a scaled index and add candidate if useful. */

> +  if (use != NULL && use->type == USE_PTR_ADDRESS)

> +    {

> +      struct mem_address parts = {NULL_TREE, integer_one_node,

> +                                 NULL_TREE, NULL_TREE, NULL_TREE};

> +      poly_uint64 temp;

> +      poly_int64 fact;

> +      bool speed = optimize_loop_for_speed_p (data->current_loop);

> +      poly_int64 poly_step = tree_to_poly_int64 (iv->step);

> +      machine_mode mem_mode = TYPE_MODE (use->mem_type);

> +      addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));

> +

> +      fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));

> +      parts.index = integer_one_node;

> +

> +      if (fact.is_constant ()

> +         && can_div_trunc_p (poly_step, fact, &temp))

> +       {

> +         /* Addressing mode "base + index".  */

> +         rtx addr = addr_for_mem_ref (&parts, as, false);

> +         unsigned cost = address_cost (addr, mem_mode, as, speed);

> +         tree step = wide_int_to_tree (sizetype,

> +                                       exact_div (poly_step, fact));

> +         parts.step = wide_int_to_tree (sizetype, fact);

> +         /* Addressing mode "base + index << scale".  */

> +         addr = addr_for_mem_ref (&parts, as, false);

> +         unsigned new_cost = address_cost (addr, mem_mode, as, speed);

> +         if (new_cost < cost)

> +           add_candidate (data, size_int (0), step, true, NULL);

> +       }

> +    }

> +

>    /* Record common candidate with constant offset stripped in base.

>       Like the use itself, we also add candidate directly for it.  */

>    base = strip_offset (iv->base, &offset);

> @@ -7112,6 +7168,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)

>      {

>      case IFN_MASK_LOAD:

>      case IFN_MASK_STORE:

> +    case IFN_MASK_LOAD_LANES:

> +    case IFN_MASK_STORE_LANES:

>        /* The second argument contains the correct alias type.  */

>        gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));

>        return TREE_TYPE (gimple_call_arg (call, 1));

> --

> 2.7.4

>
Kugan Vivekanandarajah May 16, 2019, 11:35 p.m. | #4
Hi Richard,

On Wed, 15 May 2019 at 16:57, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>

> Thanks for doing this.

>

> kugan.vivekanandarajah@linaro.org writes:

> > From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>

> >

> > gcc/ChangeLog:

> >

> > 2019-05-15  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

> >

> >       PR target/88834

> >       * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle

> >       IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.

> >       (find_interesting_uses_stmt): Likewise.

> >       (get_alias_ptr_type_for_ptr_address): Likewise.

> >       (add_iv_candidate_for_use): Add scaled index candidate if useful.

> >

> > Change-Id: I8e8151fe2dde2845dedf38b090103694da6fc9d1

> > ---

> >  gcc/tree-ssa-loop-ivopts.c | 60 +++++++++++++++++++++++++++++++++++++++++++++-

> >  1 file changed, 59 insertions(+), 1 deletion(-)

> >

> > diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c

> > index 9864b59..115a70c 100644

> > --- a/gcc/tree-ssa-loop-ivopts.c

> > +++ b/gcc/tree-ssa-loop-ivopts.c

> > @@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)

> >    switch (gimple_call_internal_fn (call))

> >      {

> >      case IFN_MASK_LOAD:

> > +    case IFN_MASK_LOAD_LANES:

> >        if (op_p == gimple_call_arg_ptr (call, 0))

> >       return TREE_TYPE (gimple_call_lhs (call));

> >        return NULL_TREE;

> >

> >      case IFN_MASK_STORE:

> > +    case IFN_MASK_STORE_LANES:

> >        if (op_p == gimple_call_arg_ptr (call, 0))

> >       return TREE_TYPE (gimple_call_arg (call, 3));

> >        return NULL_TREE;

> > @@ -2545,7 +2547,7 @@ find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)

> >         return;

> >       }

> >

> > -      /* TODO -- we should also handle address uses of type

> > +      /* TODO -- we should also handle all address uses of type

> >

> >        memory = call (whatever);

> >

> > @@ -2553,6 +2555,27 @@ find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)

> >

> >        call (memory).  */

> >      }

> > +  else if (is_gimple_call (stmt))

> > +    {

> > +      gcall *call = dyn_cast <gcall *> (stmt);

> > +      if (call

> > +       && gimple_call_internal_p (call)

> > +       && (gimple_call_internal_fn (call) == IFN_MASK_LOAD_LANES

> > +           || gimple_call_internal_fn (call) == IFN_MASK_STORE_LANES))

> > +     {

> > +       tree *arg = gimple_call_arg_ptr (call, 0);

> > +       struct iv *civ = get_iv (data, *arg);

> > +       tree mem_type = get_mem_type_for_internal_fn (call, arg);

> > +       if (civ && mem_type)

> > +         {

> > +           civ = alloc_iv (data, civ->base, civ->step);

> > +           record_group_use (data, arg, civ, stmt, USE_PTR_ADDRESS,

> > +                             mem_type);

> > +           return;

> > +         }

> > +     }

> > +    }

> > +

>

> Why do you need to handle this specially?  Does:

>

>   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)

>     {

>       op = USE_FROM_PTR (use_p);

>

>       if (TREE_CODE (op) != SSA_NAME)

>         continue;

>

>       iv = get_iv (data, op);

>       if (!iv)

>         continue;

>

>       if (!find_address_like_use (data, stmt, use_p->use, iv))

>         find_interesting_uses_op (data, op);

>     }

>

> not do the right thing for the load/store lane case?

Right, I initially thought load lanes should be handled differently
but turned out they can be done the same way. I should have removed
it. Done now.

>

> > @@ -3500,6 +3523,39 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)

> >      basetype = sizetype;

> >    record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);

> >

> > +  /* Compare the cost of an address with an unscaled index with the cost of

> > +    an address with a scaled index and add candidate if useful. */

> > +  if (use != NULL && use->type == USE_PTR_ADDRESS)

>

> I think we want this for all address uses.  E.g. for SVE, masked and

> unmasked accesses would both benefit.

OK.

>

> > +    {

> > +      struct mem_address parts = {NULL_TREE, integer_one_node,

> > +                               NULL_TREE, NULL_TREE, NULL_TREE};

>

> Might be better to use "= {}" and initialise the fields that matter by

> assignment.  As it stands this uses integer_one_node as the base, but I

> couldn't tell if that was deliberate.


I just copied this part from get_address_cost, similar to what is done
there. I have now changed the way you suggested but using the values
used in get_address_cost.
>

> > +      poly_uint64 temp;

> > +      poly_int64 fact;

> > +      bool speed = optimize_loop_for_speed_p (data->current_loop);

> > +      poly_int64 poly_step = tree_to_poly_int64 (iv->step);

>

> The step could be variable, so we should check whether this holds

> using poly_int_tree_p.

OK.

>

> > +      machine_mode mem_mode = TYPE_MODE (use->mem_type);

> > +      addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));

> > +

> > +      fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));

>

> This is simpler as:

>

>   GET_MODE_UNIT_SIZE (TYPE_MODE (use->mem_type));

>

OK.

> It's always a compile-time constant, so "fact" can be int/unsigned int

> rather than poly_int64.

OK.

>

> > +      parts.index = integer_one_node;

> > +

> > +      if (fact.is_constant ()

> > +       && can_div_trunc_p (poly_step, fact, &temp))

>

> I think it only makes sense to add the candidate if poly_step is an exact

> multiple of fact, so I think we should use multiple_p here.

OK.
>

> > +     {

> > +       /* Addressing mode "base + index".  */

> > +       rtx addr = addr_for_mem_ref (&parts, as, false);

> > +       unsigned cost = address_cost (addr, mem_mode, as, speed);

> > +       tree step = wide_int_to_tree (sizetype,

> > +                                     exact_div (poly_step, fact));

>

> The multiple_p mentioned above would provide this result too.

> We only need to calculate "step" if we decided to add the candidate,

> so I think it should be in the "if" below.

OK.
>

> > +       parts.step = wide_int_to_tree (sizetype, fact);

> > +       /* Addressing mode "base + index << scale".  */

> > +       addr = addr_for_mem_ref (&parts, as, false);

> > +       unsigned new_cost = address_cost (addr, mem_mode, as, speed);

> > +       if (new_cost < cost)

>

> I think it'd be worth splitting the guts of this check out into a helper,

> since it's something that could be reusable.  Maybe:

>

>   unsigned int preferred_mem_scalar_factor (machine_mode);

>

> with the only supported values for now being 1 and GET_MODE_INNER_SIZE.

> (Could be extended later if a target needs it.)

Done in the attached patch. Not tested yet but does this look good if
no regressions?

Thanks,
Kugan

> Thanks,

> Richard
Kugan Vivekanandarajah May 17, 2019, 1:16 a.m. | #5
Hi Richard,

On Thu, 16 May 2019 at 21:14, Richard Biener <richard.guenther@gmail.com> wrote:
>

> On Wed, May 15, 2019 at 4:40 AM <kugan.vivekanandarajah@linaro.org> wrote:

> >

> > From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>

> >

> > gcc/ChangeLog:

> >

> > 2019-05-15  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

> >

> >         PR target/88834

> >         * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle

> >         IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.

> >         (find_interesting_uses_stmt): Likewise.

> >         (get_alias_ptr_type_for_ptr_address): Likewise.

> >         (add_iv_candidate_for_use): Add scaled index candidate if useful.

> >

> > Change-Id: I8e8151fe2dde2845dedf38b090103694da6fc9d1

> > ---

> >  gcc/tree-ssa-loop-ivopts.c | 60 +++++++++++++++++++++++++++++++++++++++++++++-

> >  1 file changed, 59 insertions(+), 1 deletion(-)

> >

> > diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c

> > index 9864b59..115a70c 100644

> > --- a/gcc/tree-ssa-loop-ivopts.c

> > +++ b/gcc/tree-ssa-loop-ivopts.c

> > @@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)

> >    switch (gimple_call_internal_fn (call))

> >      {

> >      case IFN_MASK_LOAD:

> > +    case IFN_MASK_LOAD_LANES:

> >        if (op_p == gimple_call_arg_ptr (call, 0))

> >         return TREE_TYPE (gimple_call_lhs (call));

> >        return NULL_TREE;

> >

> >      case IFN_MASK_STORE:

> > +    case IFN_MASK_STORE_LANES:

> >        if (op_p == gimple_call_arg_ptr (call, 0))

> >         return TREE_TYPE (gimple_call_arg (call, 3));

> >        return NULL_TREE;

> > @@ -2545,7 +2547,7 @@ find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)

> >           return;

> >         }

> >

> > -      /* TODO -- we should also handle address uses of type

> > +      /* TODO -- we should also handle all address uses of type

> >

> >          memory = call (whatever);

> >

> > @@ -2553,6 +2555,27 @@ find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)

> >

> >          call (memory).  */

> >      }

> > +  else if (is_gimple_call (stmt))

> > +    {

> > +      gcall *call = dyn_cast <gcall *> (stmt);

> > +      if (call

>

> that's testing things twice, just do

>

>        else if (gcall *call = dyn_cast <gcall *> (stmt))

>          {

> ...

>

> no other comments besides why do you need _LANES handling here where

> the w/o _LANES handling didn't need anything.

Right,  I have now changed this in the revised patch.

Thanks,
Kugan

>

> > +         && gimple_call_internal_p (call)

> > +         && (gimple_call_internal_fn (call) == IFN_MASK_LOAD_LANES

> > +             || gimple_call_internal_fn (call) == IFN_MASK_STORE_LANES))

> > +       {

> > +         tree *arg = gimple_call_arg_ptr (call, 0);

> > +         struct iv *civ = get_iv (data, *arg);

> > +         tree mem_type = get_mem_type_for_internal_fn (call, arg);

> > +         if (civ && mem_type)

> > +           {

> > +             civ = alloc_iv (data, civ->base, civ->step);

> > +             record_group_use (data, arg, civ, stmt, USE_PTR_ADDRESS,

> > +                               mem_type);

> > +             return;

> > +           }

> > +       }

> > +    }

> > +

> >

> >    if (gimple_code (stmt) == GIMPLE_PHI

> >        && gimple_bb (stmt) == data->current_loop->header)

> > @@ -3500,6 +3523,39 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)

> >      basetype = sizetype;

> >    record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);

> >

> > +  /* Compare the cost of an address with an unscaled index with the cost of

> > +    an address with a scaled index and add candidate if useful. */

> > +  if (use != NULL && use->type == USE_PTR_ADDRESS)

> > +    {

> > +      struct mem_address parts = {NULL_TREE, integer_one_node,

> > +                                 NULL_TREE, NULL_TREE, NULL_TREE};

> > +      poly_uint64 temp;

> > +      poly_int64 fact;

> > +      bool speed = optimize_loop_for_speed_p (data->current_loop);

> > +      poly_int64 poly_step = tree_to_poly_int64 (iv->step);

> > +      machine_mode mem_mode = TYPE_MODE (use->mem_type);

> > +      addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));

> > +

> > +      fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));

> > +      parts.index = integer_one_node;

> > +

> > +      if (fact.is_constant ()

> > +         && can_div_trunc_p (poly_step, fact, &temp))

> > +       {

> > +         /* Addressing mode "base + index".  */

> > +         rtx addr = addr_for_mem_ref (&parts, as, false);

> > +         unsigned cost = address_cost (addr, mem_mode, as, speed);

> > +         tree step = wide_int_to_tree (sizetype,

> > +                                       exact_div (poly_step, fact));

> > +         parts.step = wide_int_to_tree (sizetype, fact);

> > +         /* Addressing mode "base + index << scale".  */

> > +         addr = addr_for_mem_ref (&parts, as, false);

> > +         unsigned new_cost = address_cost (addr, mem_mode, as, speed);

> > +         if (new_cost < cost)

> > +           add_candidate (data, size_int (0), step, true, NULL);

> > +       }

> > +    }

> > +

> >    /* Record common candidate with constant offset stripped in base.

> >       Like the use itself, we also add candidate directly for it.  */

> >    base = strip_offset (iv->base, &offset);

> > @@ -7112,6 +7168,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)

> >      {

> >      case IFN_MASK_LOAD:

> >      case IFN_MASK_STORE:

> > +    case IFN_MASK_LOAD_LANES:

> > +    case IFN_MASK_STORE_LANES:

> >        /* The second argument contains the correct alias type.  */

> >        gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));

> >        return TREE_TYPE (gimple_call_arg (call, 1));

> > --

> > 2.7.4

> >
Richard Sandiford May 17, 2019, 8:47 a.m. | #6
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> writes:
> [...]

>> > +    {

>> > +      struct mem_address parts = {NULL_TREE, integer_one_node,

>> > +                               NULL_TREE, NULL_TREE, NULL_TREE};

>>

>> Might be better to use "= {}" and initialise the fields that matter by

>> assignment.  As it stands this uses integer_one_node as the base, but I

>> couldn't tell if that was deliberate.

>

> I just copied this part from get_address_cost, similar to what is done

> there.


Ah, sorry :-)

> I have now changed the way you suggested but using the values

> used in get_address_cost.


Thanks.

> [...]

> @@ -3479,6 +3481,35 @@ add_iv_candidate_derived_from_uses (struct ivopts_data *data)

>    data->iv_common_cands.truncate (0);

>  }

>  

> +/* Return the preferred mem scale factor for accessing MEM_MODE

> +   of BASE in LOOP.  */

> +static unsigned int

> +preferred_mem_scale_factor (struct loop *loop,

> +			    tree base, machine_mode mem_mode)


IMO this should live in tree-ssa-address.c instead.

The only use of "loop" is to test for size vs. speed, but other callers
might want to make that decision based on individual blocks, so I think
it would make sense to pass a "speed" bool instead.  Probably also worth
making it the last parameter, so that the order is consistent with
address_cost (though probably then inconsistent with something else :-)).

> [...]

> @@ -3500,6 +3531,28 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)

>      basetype = sizetype;

>    record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);

>  

> +  /* Compare the cost of an address with an unscaled index with the cost of

> +    an address with a scaled index and add candidate if useful. */

> +  if (use != NULL

> +      && poly_int_tree_p (iv->step)

> +      && tree_fits_poly_int64_p (iv->step)

> +      && address_p (use->type))

> +    {

> +      poly_int64 new_step;

> +      poly_int64 poly_step = tree_to_poly_int64 (iv->step);


This should be:

  poly_int64 step;
  if (use != NULL
      && poly_int_tree_p (iv->step, &step)
      && address_p (use->type))
    {
      poly_int64 new_step;

> +      unsigned int fact

> +	= preferred_mem_scale_factor (data->current_loop,

> +				       use->iv->base,

> +				       TYPE_MODE (use->mem_type));

> +

> +      if ((fact != 1)

> +	  && multiple_p (poly_step, fact, &new_step))


Should be no brackets around "fact != 1".

> [...]


Looks really good to me otherwise, thanks.  Bin, any comments?

Richard
Kugan Vivekanandarajah May 22, 2019, 2:12 a.m. | #7
Hi Richard,


On Fri, 17 May 2019 at 18:47, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>

> Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> writes:

> > [...]

> >> > +    {

> >> > +      struct mem_address parts = {NULL_TREE, integer_one_node,

> >> > +                               NULL_TREE, NULL_TREE, NULL_TREE};

> >>

> >> Might be better to use "= {}" and initialise the fields that matter by

> >> assignment.  As it stands this uses integer_one_node as the base, but I

> >> couldn't tell if that was deliberate.

> >

> > I just copied this part from get_address_cost, similar to what is done

> > there.

>

> Ah, sorry :-)

>

> > I have now changed the way you suggested but using the values

> > used in get_address_cost.

>

> Thanks.

>

> > [...]

> > @@ -3479,6 +3481,35 @@ add_iv_candidate_derived_from_uses (struct ivopts_data *data)

> >    data->iv_common_cands.truncate (0);

> >  }

> >

> > +/* Return the preferred mem scale factor for accessing MEM_MODE

> > +   of BASE in LOOP.  */

> > +static unsigned int

> > +preferred_mem_scale_factor (struct loop *loop,

> > +                         tree base, machine_mode mem_mode)

>

> IMO this should live in tree-ssa-address.c instead.

>

> The only use of "loop" is to test for size vs. speed, but other callers

> might want to make that decision based on individual blocks, so I think

> it would make sense to pass a "speed" bool instead.  Probably also worth

> making it the last parameter, so that the order is consistent with

> address_cost (though probably then inconsistent with something else :-)).

>

> > [...]

> > @@ -3500,6 +3531,28 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)

> >      basetype = sizetype;

> >    record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);

> >

> > +  /* Compare the cost of an address with an unscaled index with the cost of

> > +    an address with a scaled index and add candidate if useful. */

> > +  if (use != NULL

> > +      && poly_int_tree_p (iv->step)

> > +      && tree_fits_poly_int64_p (iv->step)

> > +      && address_p (use->type))

> > +    {

> > +      poly_int64 new_step;

> > +      poly_int64 poly_step = tree_to_poly_int64 (iv->step);

>

> This should be:

>

>   poly_int64 step;

>   if (use != NULL

>       && poly_int_tree_p (iv->step, &step)

>       && address_p (use->type))

>     {

>       poly_int64 new_step;

>

> > +      unsigned int fact

> > +     = preferred_mem_scale_factor (data->current_loop,

> > +                                    use->iv->base,

> > +                                    TYPE_MODE (use->mem_type));

> > +

> > +      if ((fact != 1)

> > +       && multiple_p (poly_step, fact, &new_step))

>

> Should be no brackets around "fact != 1".

>

> > [...]

>

> Looks really good to me otherwise, thanks.  Bin, any comments?

Revised patch which handles the above review comments is attached.

Thanks,
Kugan

> Richard
From 6a146662fab39de876de332bacbb1a3300caefb8 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
Date: Wed, 15 May 2019 09:16:43 +1000
Subject: [PATCH 1/2] Add support for IVOPT

gcc/ChangeLog:

2019-05-15  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

	PR target/88834
	* tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
	IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
	(get_alias_ptr_type_for_ptr_address): Likewise.
	(add_iv_candidate_for_use): Add scaled index candidate if useful.
	* tree-ssa-address.c (preferred_mem_scale_factor): New.

Change-Id: Ie47b1722dc4fb430f07dadb8a58385759e75df58
---
 gcc/tree-ssa-address.c     | 28 ++++++++++++++++++++++++++++
 gcc/tree-ssa-address.h     |  3 +++
 gcc/tree-ssa-loop-ivopts.c | 26 +++++++++++++++++++++++++-
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c
index 1c17e93..fdb6619 100644
--- a/gcc/tree-ssa-address.c
+++ b/gcc/tree-ssa-address.c
@@ -1127,6 +1127,34 @@ maybe_fold_tmr (tree ref)
   return new_ref;
 }
 
+/* Return the preferred mem scale factor for accessing MEM_MODE
+   of BASE which is optimized for SPEED.  */
+unsigned int
+preferred_mem_scale_factor (tree base, machine_mode mem_mode,
+			    bool speed)
+{
+  struct mem_address parts = {};
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base));
+  unsigned int fact = GET_MODE_UNIT_SIZE (mem_mode);
+
+  /* Addressing mode "base + index".  */
+  parts.index = integer_one_node;
+  parts.base = integer_one_node;
+  rtx addr = addr_for_mem_ref (&parts, as, false);
+  unsigned cost = address_cost (addr, mem_mode, as, speed);
+
+  /* Addressing mode "base + index << scale".  */
+  parts.step = wide_int_to_tree (sizetype, fact);
+  addr = addr_for_mem_ref (&parts, as, false);
+  unsigned new_cost = address_cost (addr, mem_mode, as, speed);
+
+  /* Compare the cost of an address with an unscaled index with
+     a scaled index and return factor if useful. */
+  if (new_cost < cost)
+    return GET_MODE_UNIT_SIZE (mem_mode);
+  return 1;
+}
+
 /* Dump PARTS to FILE.  */
 
 extern void dump_mem_address (FILE *, struct mem_address *);
diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h
index 6fa4eae..9812f36 100644
--- a/gcc/tree-ssa-address.h
+++ b/gcc/tree-ssa-address.h
@@ -39,4 +39,7 @@ tree create_mem_ref (gimple_stmt_iterator *, tree,
 extern void copy_ref_info (tree, tree);
 tree maybe_fold_tmr (tree);
 
+extern unsigned int preferred_mem_scale_factor (tree base,
+						machine_mode mem_mode,
+						bool speed);
 #endif /* GCC_TREE_SSA_ADDRESS_H */
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 9864b59..e6462fe 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
   switch (gimple_call_internal_fn (call))
     {
     case IFN_MASK_LOAD:
+    case IFN_MASK_LOAD_LANES:
       if (op_p == gimple_call_arg_ptr (call, 0))
 	return TREE_TYPE (gimple_call_lhs (call));
       return NULL_TREE;
 
     case IFN_MASK_STORE:
+    case IFN_MASK_STORE_LANES:
       if (op_p == gimple_call_arg_ptr (call, 0))
 	return TREE_TYPE (gimple_call_arg (call, 3));
       return NULL_TREE;
@@ -3479,7 +3481,7 @@ add_iv_candidate_derived_from_uses (struct ivopts_data *data)
   data->iv_common_cands.truncate (0);
 }
 
-/* Adds candidates based on the value of USE's iv.  */
+  /* Adds candidates based on the value of USE's iv.  */
 
 static void
 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
@@ -3500,6 +3502,26 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
     basetype = sizetype;
   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
 
+  /* Compare the cost of an address with an unscaled index with the cost of
+    an address with a scaled index and add candidate if useful. */
+  poly_int64 step;
+  if (use != NULL
+      && poly_int_tree_p (iv->step, &step)
+      && address_p (use->type))
+    {
+      poly_int64 new_step;
+      unsigned int fact = preferred_mem_scale_factor
+	(use->iv->base,
+	 TYPE_MODE (use->mem_type),
+	 optimize_loop_for_speed_p (data->current_loop));
+
+      if (fact != 1
+	  && multiple_p (step, fact, &new_step))
+	add_candidate (data, size_int (0),
+		       wide_int_to_tree (sizetype, new_step),
+		       true, NULL);
+    }
+
   /* Record common candidate with constant offset stripped in base.
      Like the use itself, we also add candidate directly for it.  */
   base = strip_offset (iv->base, &offset);
@@ -7112,6 +7134,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)
     {
     case IFN_MASK_LOAD:
     case IFN_MASK_STORE:
+    case IFN_MASK_LOAD_LANES:
+    case IFN_MASK_STORE_LANES:
       /* The second argument contains the correct alias type.  */
       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
       return TREE_TYPE (gimple_call_arg (call, 1));

Patch

diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 9864b59..115a70c 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -2451,11 +2451,13 @@  get_mem_type_for_internal_fn (gcall *call, tree *op_p)
   switch (gimple_call_internal_fn (call))
     {
     case IFN_MASK_LOAD:
+    case IFN_MASK_LOAD_LANES:
       if (op_p == gimple_call_arg_ptr (call, 0))
 	return TREE_TYPE (gimple_call_lhs (call));
       return NULL_TREE;
 
     case IFN_MASK_STORE:
+    case IFN_MASK_STORE_LANES:
       if (op_p == gimple_call_arg_ptr (call, 0))
 	return TREE_TYPE (gimple_call_arg (call, 3));
       return NULL_TREE;
@@ -2545,7 +2547,7 @@  find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
 	  return;
 	}
 
-      /* TODO -- we should also handle address uses of type
+      /* TODO -- we should also handle all address uses of type
 
 	 memory = call (whatever);
 
@@ -2553,6 +2555,27 @@  find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
 
 	 call (memory).  */
     }
+  else if (is_gimple_call (stmt))
+    {
+      gcall *call = dyn_cast <gcall *> (stmt);
+      if (call
+	  && gimple_call_internal_p (call)
+	  && (gimple_call_internal_fn (call) == IFN_MASK_LOAD_LANES
+	      || gimple_call_internal_fn (call) == IFN_MASK_STORE_LANES))
+	{
+	  tree *arg = gimple_call_arg_ptr (call, 0);
+	  struct iv *civ = get_iv (data, *arg);
+	  tree mem_type = get_mem_type_for_internal_fn (call, arg);
+	  if (civ && mem_type)
+	    {
+	      civ = alloc_iv (data, civ->base, civ->step);
+	      record_group_use (data, arg, civ, stmt, USE_PTR_ADDRESS,
+				mem_type);
+	      return;
+	    }
+	}
+    }
+
 
   if (gimple_code (stmt) == GIMPLE_PHI
       && gimple_bb (stmt) == data->current_loop->header)
@@ -3500,6 +3523,39 @@  add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
     basetype = sizetype;
   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
 
+  /* Compare the cost of an address with an unscaled index with the cost of
+    an address with a scaled index and add candidate if useful. */
+  if (use != NULL && use->type == USE_PTR_ADDRESS)
+    {
+      struct mem_address parts = {NULL_TREE, integer_one_node,
+				  NULL_TREE, NULL_TREE, NULL_TREE};
+      poly_uint64 temp;
+      poly_int64 fact;
+      bool speed = optimize_loop_for_speed_p (data->current_loop);
+      poly_int64 poly_step = tree_to_poly_int64 (iv->step);
+      machine_mode mem_mode = TYPE_MODE (use->mem_type);
+      addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
+
+      fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));
+      parts.index = integer_one_node;
+
+      if (fact.is_constant ()
+	  && can_div_trunc_p (poly_step, fact, &temp))
+	{
+	  /* Addressing mode "base + index".  */
+	  rtx addr = addr_for_mem_ref (&parts, as, false);
+	  unsigned cost = address_cost (addr, mem_mode, as, speed);
+	  tree step = wide_int_to_tree (sizetype,
+					exact_div (poly_step, fact));
+	  parts.step = wide_int_to_tree (sizetype, fact);
+	  /* Addressing mode "base + index << scale".  */
+	  addr = addr_for_mem_ref (&parts, as, false);
+	  unsigned new_cost = address_cost (addr, mem_mode, as, speed);
+	  if (new_cost < cost)
+	    add_candidate (data, size_int (0), step, true, NULL);
+	}
+    }
+
   /* Record common candidate with constant offset stripped in base.
      Like the use itself, we also add candidate directly for it.  */
   base = strip_offset (iv->base, &offset);
@@ -7112,6 +7168,8 @@  get_alias_ptr_type_for_ptr_address (iv_use *use)
     {
     case IFN_MASK_LOAD:
     case IFN_MASK_STORE:
+    case IFN_MASK_LOAD_LANES:
+    case IFN_MASK_STORE_LANES:
       /* The second argument contains the correct alias type.  */
       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
       return TREE_TYPE (gimple_call_arg (call, 1));