[01/11] Schedule SLP earlier

Message ID 87zhy9dkfd.fsf@arm.com
State New
Headers show
Series
  • Add a vec_basic_block of scalar statements
Related show

Commit Message

Richard Sandiford July 30, 2018, 11:36 a.m.
vect_transform_loop used to call vect_schedule_slp lazily when it
came across the first SLP statement, but it seems easier to do it
before the main loop.


2018-07-30  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vect-loop.c (vect_transform_loop_stmt): Remove slp_scheduled
	argument.
	(vect_transform_loop): Update calls accordingly.  Schedule SLP
	instances before the main loop, if any exist.

Comments

Richard Biener Aug. 1, 2018, 12:48 p.m. | #1
On Mon, Jul 30, 2018 at 1:37 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>

> vect_transform_loop used to call vect_schedule_slp lazily when it

> came across the first SLP statement, but it seems easier to do it

> before the main loop.


Indeed.

OK.
Richard.

>

> 2018-07-30  Richard Sandiford  <richard.sandiford@arm.com>

>

> gcc/

>         * tree-vect-loop.c (vect_transform_loop_stmt): Remove slp_scheduled

>         argument.

>         (vect_transform_loop): Update calls accordingly.  Schedule SLP

>         instances before the main loop, if any exist.

>

> Index: gcc/tree-vect-loop.c

> ===================================================================

> --- gcc/tree-vect-loop.c        2018-07-30 12:32:15.000000000 +0100

> +++ gcc/tree-vect-loop.c        2018-07-30 12:32:16.190624704 +0100

> @@ -8199,14 +8199,12 @@ scale_profile_for_vect_loop (struct loop

>  }

>

>  /* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.

> -   When vectorizing STMT_INFO as a store, set *SEEN_STORE to its stmt_vec_info.

> -   *SLP_SCHEDULE is a running record of whether we have called

> -   vect_schedule_slp.  */

> +   When vectorizing STMT_INFO as a store, set *SEEN_STORE to its

> +   stmt_vec_info.  */

>

>  static void

>  vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,

> -                         gimple_stmt_iterator *gsi,

> -                         stmt_vec_info *seen_store, bool *slp_scheduled)

> +                         gimple_stmt_iterator *gsi, stmt_vec_info *seen_store)

>  {

>    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);

>    poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);

> @@ -8237,24 +8235,10 @@ vect_transform_loop_stmt (loop_vec_info

>         dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");

>      }

>

> -  /* SLP.  Schedule all the SLP instances when the first SLP stmt is

> -     reached.  */

> -  if (slp_vect_type slptype = STMT_SLP_TYPE (stmt_info))

> -    {

> -

> -      if (!*slp_scheduled)

> -       {

> -         *slp_scheduled = true;

> -

> -         DUMP_VECT_SCOPE ("scheduling SLP instances");

> -

> -         vect_schedule_slp (loop_vinfo);

> -       }

> -

> -      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */

> -      if (slptype == pure_slp)

> -       return;

> -    }

> +  /* Pure SLP statements have already been vectorized.  We still need

> +     to apply loop vectorization to hybrid SLP statements.  */

> +  if (PURE_SLP_STMT (stmt_info))

> +    return;

>

>    if (dump_enabled_p ())

>      dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");

> @@ -8284,7 +8268,6 @@ vect_transform_loop (loop_vec_info loop_

>    tree niters_vector_mult_vf = NULL_TREE;

>    poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);

>    unsigned int lowest_vf = constant_lower_bound (vf);

> -  bool slp_scheduled = false;

>    gimple *stmt;

>    bool check_profitability = false;

>    unsigned int th;

> @@ -8390,6 +8373,14 @@ vect_transform_loop (loop_vec_info loop_

>      /* This will deal with any possible peeling.  */

>      vect_prepare_for_masked_peels (loop_vinfo);

>

> +  /* Schedule the SLP instances first, then handle loop vectorization

> +     below.  */

> +  if (!loop_vinfo->slp_instances.is_empty ())

> +    {

> +      DUMP_VECT_SCOPE ("scheduling SLP instances");

> +      vect_schedule_slp (loop_vinfo);

> +    }

> +

>    /* FORNOW: the vectorizer supports only loops which body consist

>       of one basic block (header + empty latch). When the vectorizer will

>       support more involved loop forms, the order by which the BBs are

> @@ -8468,16 +8459,15 @@ vect_transform_loop (loop_vec_info loop_

>                           stmt_vec_info pat_stmt_info

>                             = loop_vinfo->lookup_stmt (gsi_stmt (subsi));

>                           vect_transform_loop_stmt (loop_vinfo, pat_stmt_info,

> -                                                   &si, &seen_store,

> -                                                   &slp_scheduled);

> +                                                   &si, &seen_store);

>                         }

>                       stmt_vec_info pat_stmt_info

>                         = STMT_VINFO_RELATED_STMT (stmt_info);

>                       vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,

> -                                               &seen_store, &slp_scheduled);

> +                                               &seen_store);

>                     }

>                   vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,

> -                                           &seen_store, &slp_scheduled);

> +                                           &seen_store);

>                 }

>               gsi_next (&si);

>               if (seen_store)

Patch

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	2018-07-30 12:32:15.000000000 +0100
+++ gcc/tree-vect-loop.c	2018-07-30 12:32:16.190624704 +0100
@@ -8199,14 +8199,12 @@  scale_profile_for_vect_loop (struct loop
 }
 
 /* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.
-   When vectorizing STMT_INFO as a store, set *SEEN_STORE to its stmt_vec_info.
-   *SLP_SCHEDULE is a running record of whether we have called
-   vect_schedule_slp.  */
+   When vectorizing STMT_INFO as a store, set *SEEN_STORE to its
+   stmt_vec_info.  */
 
 static void
 vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
-			  gimple_stmt_iterator *gsi,
-			  stmt_vec_info *seen_store, bool *slp_scheduled)
+			  gimple_stmt_iterator *gsi, stmt_vec_info *seen_store)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -8237,24 +8235,10 @@  vect_transform_loop_stmt (loop_vec_info
 	dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
     }
 
-  /* SLP.  Schedule all the SLP instances when the first SLP stmt is
-     reached.  */
-  if (slp_vect_type slptype = STMT_SLP_TYPE (stmt_info))
-    {
-
-      if (!*slp_scheduled)
-	{
-	  *slp_scheduled = true;
-
-	  DUMP_VECT_SCOPE ("scheduling SLP instances");
-
-	  vect_schedule_slp (loop_vinfo);
-	}
-
-      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
-      if (slptype == pure_slp)
-	return;
-    }
+  /* Pure SLP statements have already been vectorized.  We still need
+     to apply loop vectorization to hybrid SLP statements.  */
+  if (PURE_SLP_STMT (stmt_info))
+    return;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
@@ -8284,7 +8268,6 @@  vect_transform_loop (loop_vec_info loop_
   tree niters_vector_mult_vf = NULL_TREE;
   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   unsigned int lowest_vf = constant_lower_bound (vf);
-  bool slp_scheduled = false;
   gimple *stmt;
   bool check_profitability = false;
   unsigned int th;
@@ -8390,6 +8373,14 @@  vect_transform_loop (loop_vec_info loop_
     /* This will deal with any possible peeling.  */
     vect_prepare_for_masked_peels (loop_vinfo);
 
+  /* Schedule the SLP instances first, then handle loop vectorization
+     below.  */
+  if (!loop_vinfo->slp_instances.is_empty ())
+    {
+      DUMP_VECT_SCOPE ("scheduling SLP instances");
+      vect_schedule_slp (loop_vinfo);
+    }
+
   /* FORNOW: the vectorizer supports only loops which body consist
      of one basic block (header + empty latch). When the vectorizer will
      support more involved loop forms, the order by which the BBs are
@@ -8468,16 +8459,15 @@  vect_transform_loop (loop_vec_info loop_
 			  stmt_vec_info pat_stmt_info
 			    = loop_vinfo->lookup_stmt (gsi_stmt (subsi));
 			  vect_transform_loop_stmt (loop_vinfo, pat_stmt_info,
-						    &si, &seen_store,
-						    &slp_scheduled);
+						    &si, &seen_store);
 			}
 		      stmt_vec_info pat_stmt_info
 			= STMT_VINFO_RELATED_STMT (stmt_info);
 		      vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,
-						&seen_store, &slp_scheduled);
+						&seen_store);
 		    }
 		  vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
-					    &seen_store, &slp_scheduled);
+					    &seen_store);
 		}
 	      gsi_next (&si);
 	      if (seen_store)