[4/7] S/390: Change handling of group end.

Message ID 20190311125327.20093-5-rdapp@linux.ibm.com
State New
Headers show
Series
  • S/390: Rework instruction scheduling.
Related show

Commit Message

Robin Dapp March 11, 2019, 12:53 p.m.
This patch adds a scheduling state struct and changes
the handling of end-group conditions.

---
 gcc/config/s390/s390.c | 158 ++++++++++++++++++-----------------------
 1 file changed, 68 insertions(+), 90 deletions(-)

-- 
2.17.0

Patch

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 15926ec88cd..249df00268a 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -348,7 +348,6 @@  static rtx_insn *last_scheduled_insn;
 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
 
 #define NUM_SIDES 2
-static int current_side = 1;
 
 /* Estimate of number of cycles a long-running insn occupies an
    execution unit.  */
@@ -14261,17 +14260,15 @@  s390_bb_fallthru_entry_likely (basic_block bb)
   return true;
 }
 
-/* The s390_sched_state variable tracks the state of the current or
-   the last instruction group.
-
-   0,1,2 number of instructions scheduled in the current group
-   3     the last group is complete - normal insns
-   4     the last group was a cracked/expanded insn */
-
-static int s390_sched_state = 0;
+struct s390_sched_state
+{
+  /* Number of insns in the group.  */
+  int group_state;
+  /* Execution side of the group.  */
+  int side;
+} s390_sched_state;
 
-#define S390_SCHED_STATE_NORMAL  3
-#define S390_SCHED_STATE_CRACKED 4
+static struct s390_sched_state sched_state = {0, 1};
 
 #define S390_SCHED_ATTR_MASK_CRACKED    0x1
 #define S390_SCHED_ATTR_MASK_EXPANDED   0x2
@@ -14369,14 +14366,14 @@  s390_is_longrunning (rtx_insn *insn)
 
 /* Return the scheduling score for INSN.  The higher the score the
    better.  The score is calculated from the OOO scheduling attributes
-   of INSN and the scheduling state s390_sched_state.  */
+   of INSN and the scheduling state sched_state.  */
 static int
 s390_sched_score (rtx_insn *insn)
 {
   unsigned int mask = s390_get_sched_attrmask (insn);
   int score = 0;
 
-  switch (s390_sched_state)
+  switch (sched_state.group_state)
     {
     case 0:
       /* Try to put insns into the first slot which would otherwise
@@ -14408,21 +14405,6 @@  s390_sched_score (rtx_insn *insn)
       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
 	score += 10;
       break;
-    case S390_SCHED_STATE_NORMAL:
-      /* Prefer not cracked insns if the last was not cracked.  */
-      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
-	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
-	score += 5;
-      if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
-	score += 10;
-      break;
-    case S390_SCHED_STATE_CRACKED:
-      /* Try to keep cracked insns together to prevent them from
-	 interrupting groups.  */
-      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
-	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
-	score += 5;
-      break;
     }
 
   if (s390_tune >= PROCESSOR_2964_Z13)
@@ -14442,46 +14424,46 @@  s390_sched_score (rtx_insn *insn)
 	  score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
 		    MAX_SCHED_MIX_DISTANCE);
 
-      int other_side = 1 - current_side;
+      int other_side = 1 - sched_state.side;
 
       /* Try to delay long-running insns when side is busy.  */
       if (s390_is_longrunning (insn))
 	{
 	  if (s390_tune == PROCESSOR_2964_Z13)
 	    {
-	      if (get_attr_z13_unit_fxd (insn) && fxd_longrunning[current_side]
-		  && fxd_longrunning[other_side] <= fxd_longrunning[current_side])
+	      if (get_attr_z13_unit_fxd (insn) && fxd_longrunning[sched_state.side]
+		  && fxd_longrunning[other_side] <= fxd_longrunning[sched_state.side])
 		score = MAX (0, score - 10);
 
 	      if (get_attr_z13_unit_fxd (insn)
-		  && fxd_longrunning[other_side] >= fxd_longrunning[current_side])
+		  && fxd_longrunning[other_side] >= fxd_longrunning[sched_state.side])
 		score += 10;
 
-	      if (get_attr_z13_unit_fpd (insn) && fpd_longrunning[current_side]
-		  && fpd_longrunning[other_side] <= fpd_longrunning[current_side])
+	      if (get_attr_z13_unit_fpd (insn) && fpd_longrunning[sched_state.side]
+		  && fpd_longrunning[other_side] <= fpd_longrunning[sched_state.side])
 		score = MAX (0, score - 10);
 
 	      if (get_attr_z13_unit_fpd (insn)
-		  && fpd_longrunning[other_side] >= fpd_longrunning[current_side])
+		  && fpd_longrunning[other_side] >= fpd_longrunning[sched_state.side])
 		score += 10;
 	    }
 
 	  if (s390_tune == PROCESSOR_3906_Z14)
 	    {
-	      if (get_attr_z14_unit_fxd (insn) && fxd_longrunning[current_side]
-		  && fxd_longrunning[other_side] <= fxd_longrunning[current_side])
+	      if (get_attr_z14_unit_fxd (insn) && fxd_longrunning[sched_state.side]
+		  && fxd_longrunning[other_side] <= fxd_longrunning[sched_state.side])
 		score = MAX (0, score - 10);
 
 	      if (get_attr_z14_unit_fxd (insn)
-		  && fxd_longrunning[other_side] >= fxd_longrunning[current_side])
+		  && fxd_longrunning[other_side] >= fxd_longrunning[sched_state.side])
 		score += 10;
 
-	      if (get_attr_z14_unit_fpd (insn) && fpd_longrunning[current_side]
-		  && fpd_longrunning[other_side] <= fpd_longrunning[current_side])
+	      if (get_attr_z14_unit_fpd (insn) && fpd_longrunning[sched_state.side]
+		  && fpd_longrunning[other_side] <= fpd_longrunning[sched_state.side])
 		score = MAX (0, score - 10);
 
 	      if (get_attr_z14_unit_fpd (insn)
-		  && fpd_longrunning[other_side] >= fpd_longrunning[current_side])
+		  && fpd_longrunning[other_side] >= fpd_longrunning[sched_state.side])
 		score += 10;
 	    }
 	}
@@ -14554,7 +14536,7 @@  s390_sched_reorder (FILE *file, int verbose,
       if (verbose > 5)
 	{
 	  fprintf (file, "ready list ooo attributes - sched state: %d\n",
-		   s390_sched_state);
+		   sched_state.group_state);
 
 	  for (i = last_index; i >= 0; i--)
 	    {
@@ -14605,7 +14587,7 @@  s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
 {
   last_scheduled_insn = insn;
 
-  bool starts_group = false;
+  bool ends_group = false;
 
   if (s390_tune >= PROCESSOR_2827_ZEC12
       && reload_completed
@@ -14613,37 +14595,21 @@  s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
     {
       unsigned int mask = s390_get_sched_attrmask (insn);
 
-      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
-	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
-	  || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
-	starts_group = true;
+      /* Longrunning and side bookkeeping.  */
+      for (int i = 0; i < 2; i++)
+	{
+	  fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
+	  fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
+	}
 
-      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
-	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
-	s390_sched_state = S390_SCHED_STATE_CRACKED;
-      else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
-	       || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
-	s390_sched_state = S390_SCHED_STATE_NORMAL;
-      else
+      unsigned latency = insn_default_latency (insn);
+      if (s390_is_longrunning (insn))
 	{
-	  /* Only normal insns are left (mask == 0).  */
-	  switch (s390_sched_state)
-	    {
-	    case 0:
-	      starts_group = true;
-	      /* fallthrough */
-	    case 1:
-	    case 2:
-	      s390_sched_state++;
-	      break;
-	    case S390_SCHED_STATE_NORMAL:
-	      starts_group = true;
-	      s390_sched_state = 1;
-	      break;
-	    case S390_SCHED_STATE_CRACKED:
-	      s390_sched_state = S390_SCHED_STATE_NORMAL;
-	      break;
-	    }
+	  if (get_attr_z13_unit_fxd (insn)
+	      || get_attr_z14_unit_fxd (insn))
+	    fxd_longrunning[sched_state.side] = latency;
+	  else
+	    fpd_longrunning[sched_state.side] = latency;
 	}
 
       if (s390_tune >= PROCESSOR_2964_Z13)
@@ -14661,24 +14627,29 @@  s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
 	      last_scheduled_unit_distance[i]++;
 	}
 
-      /* If this insn started a new group, the side flipped.  */
-      if (starts_group)
-	current_side = current_side ? 0 : 1;
-
-      for (int i = 0; i < 2; i++)
+      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
+	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
+	  || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
+	  || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
 	{
-	  fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
-	  fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
+	  sched_state.group_state = 0;
+	  ends_group = true;
 	}
-
-      unsigned latency = insn_default_latency (insn);
-      if (s390_is_longrunning (insn))
+      else
 	{
-	  if (get_attr_z13_unit_fxd (insn)
-	      || get_attr_z14_unit_fxd (insn))
-	    fxd_longrunning[current_side] = latency;
-	  else
-	    fpd_longrunning[current_side] = latency;
+	  switch (sched_state.group_state)
+	    {
+	    case 0:
+	      sched_state.group_state++;
+	      break;
+	    case 1:
+	      sched_state.group_state++;
+	      break;
+	    case 2:
+	      sched_state.group_state++;
+	      ends_group = true;
+	      break;
+	    }
 	}
 
       if (verbose > 5)
@@ -14707,7 +14678,7 @@  s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
 		  fprintf (file, " %d", j);
 	      fprintf (file, ")");
 	    }
-	  fprintf (file, " sched state: %d\n", s390_sched_state);
+	  fprintf (file, " sched state: %d\n", sched_state.group_state);
 
 	  if (s390_tune >= PROCESSOR_2964_Z13)
 	    {
@@ -14721,6 +14692,13 @@  s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
 	      fprintf (file, "\n");
 	    }
 	}
+
+      /* If this insn ended a group, the next will be on the other side.  */
+      if (ends_group)
+	{
+	  sched_state.group_state = 0;
+	  sched_state.side = sched_state.side ? 0 : 1;
+	}
     }
 
   if (GET_CODE (PATTERN (insn)) != USE
@@ -14741,7 +14719,7 @@  s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
   /* If the next basic block is most likely entered via a fallthru edge
      we keep the last sched state.  Otherwise we start a new group.
      The scheduler traverses basic blocks in "instruction stream" ordering
-     so if we see a fallthru edge here, s390_sched_state will be of its
+     so if we see a fallthru edge here, sched_state will be of its
      source block.
 
      current_sched_info->prev_head is the insn before the first insn of the
@@ -14751,7 +14729,7 @@  s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
     ? NEXT_INSN (current_sched_info->prev_head) : NULL;
   basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
   if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
-    s390_sched_state = 0;
+    sched_state.group_state = 0;
 }
 
 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates