Message ID  DB5PR0801MB2742BC616B50F1CF7CAC3637E7FB0@DB5PR0801MB2742.eurprd08.prod.outlook.com 

State  New 
Headers  show 
Series 

Related  show 
Hi! Honza, do you think you could have a look at this P1 fix? Thanks. On Wed, Jan 31, 2018 at 10:03:51AM +0000, Bin Cheng wrote: > Hi, > This patch fixes invalid profile count information in vectorization peeling. > Current implementation is a bit confusing for me since it tries to compute > an overall probability based on scaling probability and change of estimated > niters. This patch does it in two steps. Firstly it does the scaling, then > it adjusts to new estimated niters by simply adjusting loop's latch count > information; scaling the loop's count information by the proportion > new_estimated_niters/old_estimate_niters. Of course we have to adjust loop > latch's count information back after scaling. > > Bootstrap and test on x86_64 and AArch64. gcc.dg/vect/pr79347.c is fixed > for both PR82965 and PR83991. Is this OK? > > Thanks, > bin > > 20180130 Bin Cheng <bin.cheng@arm.com> > > PR treeoptimization/82965 > PR treeoptimization/83991 > * cfgloopmanip.c (scale_loop_profile): Further scale loop's profile > information if the loop was predicted to iterate too many times. > diff git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c > index b9b76d8..1f560b8 100644 >  a/gcc/cfgloopmanip.c > +++ b/gcc/cfgloopmanip.c > @@ 509,7 +509,7 @@ scale_loop_profile (struct loop *loop, profile_probability p, > gcov_type iteration_bound) > { > gcov_type iterations = expected_loop_iterations_unbounded (loop); >  edge e; > + edge e, preheader_e; > edge_iterator ei; > > if (dump_file && (dump_flags & TDF_DETAILS)) > @@ 521,77 +521,66 @@ scale_loop_profile (struct loop *loop, profile_probability p, > (int)iteration_bound, (int)iterations); > } > > + /* Scale the probabilities. */ > + scale_loop_frequencies (loop, p); > + > /* See if loop is predicted to iterate too many times. */ >  if (iteration_bound && iterations > 0 >  && p.apply (iterations) > iteration_bound) > + if (iteration_bound == 0  iterations <= 0 > +  p.apply (iterations) <= iteration_bound) > + return; > + > + e = single_exit (loop); > + preheader_e = loop_preheader_edge (loop); > + profile_count count_in = preheader_e>count (); > + if (e && preheader_e > + && count_in > profile_count::zero () > + && loop>header>count.initialized_p ()) > { >  /* Fixing loop profile for different trip count is not trivial; the exit >  probabilities has to be updated to match and frequencies propagated down >  to the loop body. >  >  We fully update only the simple case of loop with single exit that is >  either from the latch or BB just before latch and leads from BB with >  simple conditional jump. This is OK for use in vectorizer. */ >  e = single_exit (loop); >  if (e) >  { >  edge other_e; >  profile_count count_delta; > + edge other_e; > + profile_count count_delta; > >  FOR_EACH_EDGE (other_e, ei, e>src>succs) >  if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) >  && e != other_e) >  break; > + FOR_EACH_EDGE (other_e, ei, e>src>succs) > + if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) > + && e != other_e) > + break; > >  /* Probability of exit must be 1/iterations. */ >  count_delta = e>count (); >  e>probability = profile_probability::always () > + /* Probability of exit must be 1/iterations. */ > + count_delta = e>count (); > + e>probability = profile_probability::always () > .apply_scale (1, iteration_bound); >  other_e>probability = e>probability.invert (); >  count_delta = e>count (); >  >  /* If latch exists, change its count, since we changed >  probability of exit. Theoretically we should update everything from >  source of exit edge to latch, but for vectorizer this is enough. */ >  if (loop>latch >  && loop>latch != e>src) >  { >  loop>latch>count += count_delta; >  } >  } > + other_e>probability = e>probability.invert (); > > /* Roughly speaking we want to reduce the loop body profile by the > difference of loop iterations. We however can do better if > we look at the actual profile, if it is available. */ >  p = p.apply_scale (iteration_bound, iterations); >  >  if (loop>header>count.initialized_p ()) >  { >  profile_count count_in = profile_count::zero (); > + p = profile_probability::always (); > >  FOR_EACH_EDGE (e, ei, loop>header>preds) >  if (e>src != loop>latch) >  count_in += e>count (); >  >  if (count_in > profile_count::zero () ) >  { >  p = count_in.probability_in (loop>header>count.apply_scale >  (iteration_bound, 1)); >  } >  } > + count_in = count_in.apply_scale (iteration_bound, 1); > + p = count_in.probability_in (loop>header>count); > if (!(p > profile_probability::never ())) > p = profile_probability::very_unlikely (); >  } > >  if (p >= profile_probability::always () >   !p.initialized_p ()) >  return; > + if (p >= profile_probability::always () > +  !p.initialized_p ()) > + return; > >  /* Scale the actual probabilities. */ >  scale_loop_frequencies (loop, p); >  if (dump_file && (dump_flags & TDF_DETAILS)) >  fprintf (dump_file, ";; guessed iterations are now %i\n", >  (int)expected_loop_iterations_unbounded (loop)); > + /* If latch exists, change its count, since we changed > + probability of exit. Theoretically we should update everything from > + source of exit edge to latch, but for vectorizer this is enough. */ > + if (loop>latch && loop>latch != e>src) > + loop>latch>count += count_delta; > + > + /* Scale the probabilities. */ > + scale_loop_frequencies (loop, p); > + > + /* Change latch's count back. */ > + if (loop>latch && loop>latch != e>src) > + loop>latch>count = count_delta; > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, ";; guessed iterations are now %i\n", > + (int)expected_loop_iterations_unbounded (loop)); > + } > } > > /* Recompute dominance information for basic blocks outside LOOP. */ Jakub
Ping^2 Thanks, bin On Mon, Feb 19, 2018 at 5:14 PM, Jakub Jelinek <jakub@redhat.com> wrote: > Hi! > > Honza, do you think you could have a look at this P1 fix? > > Thanks. > > On Wed, Jan 31, 2018 at 10:03:51AM +0000, Bin Cheng wrote: >> Hi, >> This patch fixes invalid profile count information in vectorization peeling. >> Current implementation is a bit confusing for me since it tries to compute >> an overall probability based on scaling probability and change of estimated >> niters. This patch does it in two steps. Firstly it does the scaling, then >> it adjusts to new estimated niters by simply adjusting loop's latch count >> information; scaling the loop's count information by the proportion >> new_estimated_niters/old_estimate_niters. Of course we have to adjust loop >> latch's count information back after scaling. >> >> Bootstrap and test on x86_64 and AArch64. gcc.dg/vect/pr79347.c is fixed >> for both PR82965 and PR83991. Is this OK? >> >> Thanks, >> bin >> >> 20180130 Bin Cheng <bin.cheng@arm.com> >> >> PR treeoptimization/82965 >> PR treeoptimization/83991 >> * cfgloopmanip.c (scale_loop_profile): Further scale loop's profile >> information if the loop was predicted to iterate too many times. > >> diff git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c >> index b9b76d8..1f560b8 100644 >>  a/gcc/cfgloopmanip.c >> +++ b/gcc/cfgloopmanip.c >> @@ 509,7 +509,7 @@ scale_loop_profile (struct loop *loop, profile_probability p, >> gcov_type iteration_bound) >> { >> gcov_type iterations = expected_loop_iterations_unbounded (loop); >>  edge e; >> + edge e, preheader_e; >> edge_iterator ei; >> >> if (dump_file && (dump_flags & TDF_DETAILS)) >> @@ 521,77 +521,66 @@ scale_loop_profile (struct loop *loop, profile_probability p, >> (int)iteration_bound, (int)iterations); >> } >> >> + /* Scale the probabilities. */ >> + scale_loop_frequencies (loop, p); >> + >> /* See if loop is predicted to iterate too many times. */ >>  if (iteration_bound && iterations > 0 >>  && p.apply (iterations) > iteration_bound) >> + if (iteration_bound == 0  iterations <= 0 >> +  p.apply (iterations) <= iteration_bound) >> + return; >> + >> + e = single_exit (loop); >> + preheader_e = loop_preheader_edge (loop); >> + profile_count count_in = preheader_e>count (); >> + if (e && preheader_e >> + && count_in > profile_count::zero () >> + && loop>header>count.initialized_p ()) >> { >>  /* Fixing loop profile for different trip count is not trivial; the exit >>  probabilities has to be updated to match and frequencies propagated down >>  to the loop body. >>  >>  We fully update only the simple case of loop with single exit that is >>  either from the latch or BB just before latch and leads from BB with >>  simple conditional jump. This is OK for use in vectorizer. */ >>  e = single_exit (loop); >>  if (e) >>  { >>  edge other_e; >>  profile_count count_delta; >> + edge other_e; >> + profile_count count_delta; >> >>  FOR_EACH_EDGE (other_e, ei, e>src>succs) >>  if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) >>  && e != other_e) >>  break; >> + FOR_EACH_EDGE (other_e, ei, e>src>succs) >> + if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) >> + && e != other_e) >> + break; >> >>  /* Probability of exit must be 1/iterations. */ >>  count_delta = e>count (); >>  e>probability = profile_probability::always () >> + /* Probability of exit must be 1/iterations. */ >> + count_delta = e>count (); >> + e>probability = profile_probability::always () >> .apply_scale (1, iteration_bound); >>  other_e>probability = e>probability.invert (); >>  count_delta = e>count (); >>  >>  /* If latch exists, change its count, since we changed >>  probability of exit. Theoretically we should update everything from >>  source of exit edge to latch, but for vectorizer this is enough. */ >>  if (loop>latch >>  && loop>latch != e>src) >>  { >>  loop>latch>count += count_delta; >>  } >>  } >> + other_e>probability = e>probability.invert (); >> >> /* Roughly speaking we want to reduce the loop body profile by the >> difference of loop iterations. We however can do better if >> we look at the actual profile, if it is available. */ >>  p = p.apply_scale (iteration_bound, iterations); >>  >>  if (loop>header>count.initialized_p ()) >>  { >>  profile_count count_in = profile_count::zero (); >> + p = profile_probability::always (); >> >>  FOR_EACH_EDGE (e, ei, loop>header>preds) >>  if (e>src != loop>latch) >>  count_in += e>count (); >>  >>  if (count_in > profile_count::zero () ) >>  { >>  p = count_in.probability_in (loop>header>count.apply_scale >>  (iteration_bound, 1)); >>  } >>  } >> + count_in = count_in.apply_scale (iteration_bound, 1); >> + p = count_in.probability_in (loop>header>count); >> if (!(p > profile_probability::never ())) >> p = profile_probability::very_unlikely (); >>  } >> >>  if (p >= profile_probability::always () >>   !p.initialized_p ()) >>  return; >> + if (p >= profile_probability::always () >> +  !p.initialized_p ()) >> + return; >> >>  /* Scale the actual probabilities. */ >>  scale_loop_frequencies (loop, p); >>  if (dump_file && (dump_flags & TDF_DETAILS)) >>  fprintf (dump_file, ";; guessed iterations are now %i\n", >>  (int)expected_loop_iterations_unbounded (loop)); >> + /* If latch exists, change its count, since we changed >> + probability of exit. Theoretically we should update everything from >> + source of exit edge to latch, but for vectorizer this is enough. */ >> + if (loop>latch && loop>latch != e>src) >> + loop>latch>count += count_delta; >> + >> + /* Scale the probabilities. */ >> + scale_loop_frequencies (loop, p); >> + >> + /* Change latch's count back. */ >> + if (loop>latch && loop>latch != e>src) >> + loop>latch>count = count_delta; >> + >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + fprintf (dump_file, ";; guessed iterations are now %i\n", >> + (int)expected_loop_iterations_unbounded (loop)); >> + } >> } >> >> /* Recompute dominance information for basic blocks outside LOOP. */ > > > Jakub
It's looks fixed https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82965#c12 on mips64el. Thanks. On Mon, Feb 26, 2018 at 8:02 PM, Bin.Cheng <amker.cheng@gmail.com> wrote: > Ping^2 > > Thanks, > bin > > On Mon, Feb 19, 2018 at 5:14 PM, Jakub Jelinek <jakub@redhat.com> wrote: >> Hi! >> >> Honza, do you think you could have a look at this P1 fix? >> >> Thanks. >> >> On Wed, Jan 31, 2018 at 10:03:51AM +0000, Bin Cheng wrote: >>> Hi, >>> This patch fixes invalid profile count information in vectorization peeling. >>> Current implementation is a bit confusing for me since it tries to compute >>> an overall probability based on scaling probability and change of estimated >>> niters. This patch does it in two steps. Firstly it does the scaling, then >>> it adjusts to new estimated niters by simply adjusting loop's latch count >>> information; scaling the loop's count information by the proportion >>> new_estimated_niters/old_estimate_niters. Of course we have to adjust loop >>> latch's count information back after scaling. >>> >>> Bootstrap and test on x86_64 and AArch64. gcc.dg/vect/pr79347.c is fixed >>> for both PR82965 and PR83991. Is this OK? >>> >>> Thanks, >>> bin >>> >>> 20180130 Bin Cheng <bin.cheng@arm.com> >>> >>> PR treeoptimization/82965 >>> PR treeoptimization/83991 >>> * cfgloopmanip.c (scale_loop_profile): Further scale loop's profile >>> information if the loop was predicted to iterate too many times. >> >>> diff git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c >>> index b9b76d8..1f560b8 100644 >>>  a/gcc/cfgloopmanip.c >>> +++ b/gcc/cfgloopmanip.c >>> @@ 509,7 +509,7 @@ scale_loop_profile (struct loop *loop, profile_probability p, >>> gcov_type iteration_bound) >>> { >>> gcov_type iterations = expected_loop_iterations_unbounded (loop); >>>  edge e; >>> + edge e, preheader_e; >>> edge_iterator ei; >>> >>> if (dump_file && (dump_flags & TDF_DETAILS)) >>> @@ 521,77 +521,66 @@ scale_loop_profile (struct loop *loop, profile_probability p, >>> (int)iteration_bound, (int)iterations); >>> } >>> >>> + /* Scale the probabilities. */ >>> + scale_loop_frequencies (loop, p); >>> + >>> /* See if loop is predicted to iterate too many times. */ >>>  if (iteration_bound && iterations > 0 >>>  && p.apply (iterations) > iteration_bound) >>> + if (iteration_bound == 0  iterations <= 0 >>> +  p.apply (iterations) <= iteration_bound) >>> + return; >>> + >>> + e = single_exit (loop); >>> + preheader_e = loop_preheader_edge (loop); >>> + profile_count count_in = preheader_e>count (); >>> + if (e && preheader_e >>> + && count_in > profile_count::zero () >>> + && loop>header>count.initialized_p ()) >>> { >>>  /* Fixing loop profile for different trip count is not trivial; the exit >>>  probabilities has to be updated to match and frequencies propagated down >>>  to the loop body. >>>  >>>  We fully update only the simple case of loop with single exit that is >>>  either from the latch or BB just before latch and leads from BB with >>>  simple conditional jump. This is OK for use in vectorizer. */ >>>  e = single_exit (loop); >>>  if (e) >>>  { >>>  edge other_e; >>>  profile_count count_delta; >>> + edge other_e; >>> + profile_count count_delta; >>> >>>  FOR_EACH_EDGE (other_e, ei, e>src>succs) >>>  if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) >>>  && e != other_e) >>>  break; >>> + FOR_EACH_EDGE (other_e, ei, e>src>succs) >>> + if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) >>> + && e != other_e) >>> + break; >>> >>>  /* Probability of exit must be 1/iterations. */ >>>  count_delta = e>count (); >>>  e>probability = profile_probability::always () >>> + /* Probability of exit must be 1/iterations. */ >>> + count_delta = e>count (); >>> + e>probability = profile_probability::always () >>> .apply_scale (1, iteration_bound); >>>  other_e>probability = e>probability.invert (); >>>  count_delta = e>count (); >>>  >>>  /* If latch exists, change its count, since we changed >>>  probability of exit. Theoretically we should update everything from >>>  source of exit edge to latch, but for vectorizer this is enough. */ >>>  if (loop>latch >>>  && loop>latch != e>src) >>>  { >>>  loop>latch>count += count_delta; >>>  } >>>  } >>> + other_e>probability = e>probability.invert (); >>> >>> /* Roughly speaking we want to reduce the loop body profile by the >>> difference of loop iterations. We however can do better if >>> we look at the actual profile, if it is available. */ >>>  p = p.apply_scale (iteration_bound, iterations); >>>  >>>  if (loop>header>count.initialized_p ()) >>>  { >>>  profile_count count_in = profile_count::zero (); >>> + p = profile_probability::always (); >>> >>>  FOR_EACH_EDGE (e, ei, loop>header>preds) >>>  if (e>src != loop>latch) >>>  count_in += e>count (); >>>  >>>  if (count_in > profile_count::zero () ) >>>  { >>>  p = count_in.probability_in (loop>header>count.apply_scale >>>  (iteration_bound, 1)); >>>  } >>>  } >>> + count_in = count_in.apply_scale (iteration_bound, 1); >>> + p = count_in.probability_in (loop>header>count); >>> if (!(p > profile_probability::never ())) >>> p = profile_probability::very_unlikely (); >>>  } >>> >>>  if (p >= profile_probability::always () >>>   !p.initialized_p ()) >>>  return; >>> + if (p >= profile_probability::always () >>> +  !p.initialized_p ()) >>> + return; >>> >>>  /* Scale the actual probabilities. */ >>>  scale_loop_frequencies (loop, p); >>>  if (dump_file && (dump_flags & TDF_DETAILS)) >>>  fprintf (dump_file, ";; guessed iterations are now %i\n", >>>  (int)expected_loop_iterations_unbounded (loop)); >>> + /* If latch exists, change its count, since we changed >>> + probability of exit. Theoretically we should update everything from >>> + source of exit edge to latch, but for vectorizer this is enough. */ >>> + if (loop>latch && loop>latch != e>src) >>> + loop>latch>count += count_delta; >>> + >>> + /* Scale the probabilities. */ >>> + scale_loop_frequencies (loop, p); >>> + >>> + /* Change latch's count back. */ >>> + if (loop>latch && loop>latch != e>src) >>> + loop>latch>count = count_delta; >>> + >>> + if (dump_file && (dump_flags & TDF_DETAILS)) >>> + fprintf (dump_file, ";; guessed iterations are now %i\n", >>> + (int)expected_loop_iterations_unbounded (loop)); >>> + } >>> } >>> >>> /* Recompute dominance information for basic blocks outside LOOP. */ >> >> >> Jakub
On Fri, Mar 9, 2018 at 10:25 AM, Paul Hua <paul.hua.gm@gmail.com> wrote: > It's looks fixed > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82965#c12 on mips64el. Hmm, is it fixed? or is it exposed now on mips64el? I read the latter from the comment. I think the issue is like explained, but haven't dug into when/why it is triggered in vect_peeling only for some targets. Honza asked couple questions about my patch offline, I will revisit the patch, see how to address his concern. Thanks, bin > > Thanks. > > On Mon, Feb 26, 2018 at 8:02 PM, Bin.Cheng <amker.cheng@gmail.com> wrote: >> Ping^2 >> >> Thanks, >> bin >> >> On Mon, Feb 19, 2018 at 5:14 PM, Jakub Jelinek <jakub@redhat.com> wrote: >>> Hi! >>> >>> Honza, do you think you could have a look at this P1 fix? >>> >>> Thanks. >>> >>> On Wed, Jan 31, 2018 at 10:03:51AM +0000, Bin Cheng wrote: >>>> Hi, >>>> This patch fixes invalid profile count information in vectorization peeling. >>>> Current implementation is a bit confusing for me since it tries to compute >>>> an overall probability based on scaling probability and change of estimated >>>> niters. This patch does it in two steps. Firstly it does the scaling, then >>>> it adjusts to new estimated niters by simply adjusting loop's latch count >>>> information; scaling the loop's count information by the proportion >>>> new_estimated_niters/old_estimate_niters. Of course we have to adjust loop >>>> latch's count information back after scaling. >>>> >>>> Bootstrap and test on x86_64 and AArch64. gcc.dg/vect/pr79347.c is fixed >>>> for both PR82965 and PR83991. Is this OK? >>>> >>>> Thanks, >>>> bin >>>> >>>> 20180130 Bin Cheng <bin.cheng@arm.com> >>>> >>>> PR treeoptimization/82965 >>>> PR treeoptimization/83991 >>>> * cfgloopmanip.c (scale_loop_profile): Further scale loop's profile >>>> information if the loop was predicted to iterate too many times. >>> >>>> diff git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c >>>> index b9b76d8..1f560b8 100644 >>>>  a/gcc/cfgloopmanip.c >>>> +++ b/gcc/cfgloopmanip.c >>>> @@ 509,7 +509,7 @@ scale_loop_profile (struct loop *loop, profile_probability p, >>>> gcov_type iteration_bound) >>>> { >>>> gcov_type iterations = expected_loop_iterations_unbounded (loop); >>>>  edge e; >>>> + edge e, preheader_e; >>>> edge_iterator ei; >>>> >>>> if (dump_file && (dump_flags & TDF_DETAILS)) >>>> @@ 521,77 +521,66 @@ scale_loop_profile (struct loop *loop, profile_probability p, >>>> (int)iteration_bound, (int)iterations); >>>> } >>>> >>>> + /* Scale the probabilities. */ >>>> + scale_loop_frequencies (loop, p); >>>> + >>>> /* See if loop is predicted to iterate too many times. */ >>>>  if (iteration_bound && iterations > 0 >>>>  && p.apply (iterations) > iteration_bound) >>>> + if (iteration_bound == 0  iterations <= 0 >>>> +  p.apply (iterations) <= iteration_bound) >>>> + return; >>>> + >>>> + e = single_exit (loop); >>>> + preheader_e = loop_preheader_edge (loop); >>>> + profile_count count_in = preheader_e>count (); >>>> + if (e && preheader_e >>>> + && count_in > profile_count::zero () >>>> + && loop>header>count.initialized_p ()) >>>> { >>>>  /* Fixing loop profile for different trip count is not trivial; the exit >>>>  probabilities has to be updated to match and frequencies propagated down >>>>  to the loop body. >>>>  >>>>  We fully update only the simple case of loop with single exit that is >>>>  either from the latch or BB just before latch and leads from BB with >>>>  simple conditional jump. This is OK for use in vectorizer. */ >>>>  e = single_exit (loop); >>>>  if (e) >>>>  { >>>>  edge other_e; >>>>  profile_count count_delta; >>>> + edge other_e; >>>> + profile_count count_delta; >>>> >>>>  FOR_EACH_EDGE (other_e, ei, e>src>succs) >>>>  if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) >>>>  && e != other_e) >>>>  break; >>>> + FOR_EACH_EDGE (other_e, ei, e>src>succs) >>>> + if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) >>>> + && e != other_e) >>>> + break; >>>> >>>>  /* Probability of exit must be 1/iterations. */ >>>>  count_delta = e>count (); >>>>  e>probability = profile_probability::always () >>>> + /* Probability of exit must be 1/iterations. */ >>>> + count_delta = e>count (); >>>> + e>probability = profile_probability::always () >>>> .apply_scale (1, iteration_bound); >>>>  other_e>probability = e>probability.invert (); >>>>  count_delta = e>count (); >>>>  >>>>  /* If latch exists, change its count, since we changed >>>>  probability of exit. Theoretically we should update everything from >>>>  source of exit edge to latch, but for vectorizer this is enough. */ >>>>  if (loop>latch >>>>  && loop>latch != e>src) >>>>  { >>>>  loop>latch>count += count_delta; >>>>  } >>>>  } >>>> + other_e>probability = e>probability.invert (); >>>> >>>> /* Roughly speaking we want to reduce the loop body profile by the >>>> difference of loop iterations. We however can do better if >>>> we look at the actual profile, if it is available. */ >>>>  p = p.apply_scale (iteration_bound, iterations); >>>>  >>>>  if (loop>header>count.initialized_p ()) >>>>  { >>>>  profile_count count_in = profile_count::zero (); >>>> + p = profile_probability::always (); >>>> >>>>  FOR_EACH_EDGE (e, ei, loop>header>preds) >>>>  if (e>src != loop>latch) >>>>  count_in += e>count (); >>>>  >>>>  if (count_in > profile_count::zero () ) >>>>  { >>>>  p = count_in.probability_in (loop>header>count.apply_scale >>>>  (iteration_bound, 1)); >>>>  } >>>>  } >>>> + count_in = count_in.apply_scale (iteration_bound, 1); >>>> + p = count_in.probability_in (loop>header>count); >>>> if (!(p > profile_probability::never ())) >>>> p = profile_probability::very_unlikely (); >>>>  } >>>> >>>>  if (p >= profile_probability::always () >>>>   !p.initialized_p ()) >>>>  return; >>>> + if (p >= profile_probability::always () >>>> +  !p.initialized_p ()) >>>> + return; >>>> >>>>  /* Scale the actual probabilities. */ >>>>  scale_loop_frequencies (loop, p); >>>>  if (dump_file && (dump_flags & TDF_DETAILS)) >>>>  fprintf (dump_file, ";; guessed iterations are now %i\n", >>>>  (int)expected_loop_iterations_unbounded (loop)); >>>> + /* If latch exists, change its count, since we changed >>>> + probability of exit. Theoretically we should update everything from >>>> + source of exit edge to latch, but for vectorizer this is enough. */ >>>> + if (loop>latch && loop>latch != e>src) >>>> + loop>latch>count += count_delta; >>>> + >>>> + /* Scale the probabilities. */ >>>> + scale_loop_frequencies (loop, p); >>>> + >>>> + /* Change latch's count back. */ >>>> + if (loop>latch && loop>latch != e>src) >>>> + loop>latch>count = count_delta; >>>> + >>>> + if (dump_file && (dump_flags & TDF_DETAILS)) >>>> + fprintf (dump_file, ";; guessed iterations are now %i\n", >>>> + (int)expected_loop_iterations_unbounded (loop)); >>>> + } >>>> } >>>> >>>> /* Recompute dominance information for basic blocks outside LOOP. */ >>> >>> >>> Jakub
On Fri, Mar 9, 2018 at 10:51 PM, Bin.Cheng <amker.cheng@gmail.com> wrote: > On Fri, Mar 9, 2018 at 10:25 AM, Paul Hua <paul.hua.gm@gmail.com> wrote: >> It's looks fixed >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82965#c12 on mips64el. > Hmm, is it fixed? or is it exposed now on mips64el? I read the latter > from the comment. It is fixed, Bootstrap and test passed on mips64el. Thanks, Paul Hua > I think the issue is like explained, but haven't dug into when/why it > is triggered in vect_peeling only for some targets. > Honza asked couple questions about my patch offline, I will revisit > the patch, see how to address > his concern. > > Thanks, > bin >> >> Thanks. >> >> On Mon, Feb 26, 2018 at 8:02 PM, Bin.Cheng <amker.cheng@gmail.com> wrote: >>> Ping^2 >>> >>> Thanks, >>> bin >>> >>> On Mon, Feb 19, 2018 at 5:14 PM, Jakub Jelinek <jakub@redhat.com> wrote: >>>> Hi! >>>> >>>> Honza, do you think you could have a look at this P1 fix? >>>> >>>> Thanks. >>>> >>>> On Wed, Jan 31, 2018 at 10:03:51AM +0000, Bin Cheng wrote: >>>>> Hi, >>>>> This patch fixes invalid profile count information in vectorization peeling. >>>>> Current implementation is a bit confusing for me since it tries to compute >>>>> an overall probability based on scaling probability and change of estimated >>>>> niters. This patch does it in two steps. Firstly it does the scaling, then >>>>> it adjusts to new estimated niters by simply adjusting loop's latch count >>>>> information; scaling the loop's count information by the proportion >>>>> new_estimated_niters/old_estimate_niters. Of course we have to adjust loop >>>>> latch's count information back after scaling. >>>>> >>>>> Bootstrap and test on x86_64 and AArch64. gcc.dg/vect/pr79347.c is fixed >>>>> for both PR82965 and PR83991. Is this OK? >>>>> >>>>> Thanks, >>>>> bin >>>>> >>>>> 20180130 Bin Cheng <bin.cheng@arm.com> >>>>> >>>>> PR treeoptimization/82965 >>>>> PR treeoptimization/83991 >>>>> * cfgloopmanip.c (scale_loop_profile): Further scale loop's profile >>>>> information if the loop was predicted to iterate too many times. >>>> >>>>> diff git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c >>>>> index b9b76d8..1f560b8 100644 >>>>>  a/gcc/cfgloopmanip.c >>>>> +++ b/gcc/cfgloopmanip.c >>>>> @@ 509,7 +509,7 @@ scale_loop_profile (struct loop *loop, profile_probability p, >>>>> gcov_type iteration_bound) >>>>> { >>>>> gcov_type iterations = expected_loop_iterations_unbounded (loop); >>>>>  edge e; >>>>> + edge e, preheader_e; >>>>> edge_iterator ei; >>>>> >>>>> if (dump_file && (dump_flags & TDF_DETAILS)) >>>>> @@ 521,77 +521,66 @@ scale_loop_profile (struct loop *loop, profile_probability p, >>>>> (int)iteration_bound, (int)iterations); >>>>> } >>>>> >>>>> + /* Scale the probabilities. */ >>>>> + scale_loop_frequencies (loop, p); >>>>> + >>>>> /* See if loop is predicted to iterate too many times. */ >>>>>  if (iteration_bound && iterations > 0 >>>>>  && p.apply (iterations) > iteration_bound) >>>>> + if (iteration_bound == 0  iterations <= 0 >>>>> +  p.apply (iterations) <= iteration_bound) >>>>> + return; >>>>> + >>>>> + e = single_exit (loop); >>>>> + preheader_e = loop_preheader_edge (loop); >>>>> + profile_count count_in = preheader_e>count (); >>>>> + if (e && preheader_e >>>>> + && count_in > profile_count::zero () >>>>> + && loop>header>count.initialized_p ()) >>>>> { >>>>>  /* Fixing loop profile for different trip count is not trivial; the exit >>>>>  probabilities has to be updated to match and frequencies propagated down >>>>>  to the loop body. >>>>>  >>>>>  We fully update only the simple case of loop with single exit that is >>>>>  either from the latch or BB just before latch and leads from BB with >>>>>  simple conditional jump. This is OK for use in vectorizer. */ >>>>>  e = single_exit (loop); >>>>>  if (e) >>>>>  { >>>>>  edge other_e; >>>>>  profile_count count_delta; >>>>> + edge other_e; >>>>> + profile_count count_delta; >>>>> >>>>>  FOR_EACH_EDGE (other_e, ei, e>src>succs) >>>>>  if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) >>>>>  && e != other_e) >>>>>  break; >>>>> + FOR_EACH_EDGE (other_e, ei, e>src>succs) >>>>> + if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) >>>>> + && e != other_e) >>>>> + break; >>>>> >>>>>  /* Probability of exit must be 1/iterations. */ >>>>>  count_delta = e>count (); >>>>>  e>probability = profile_probability::always () >>>>> + /* Probability of exit must be 1/iterations. */ >>>>> + count_delta = e>count (); >>>>> + e>probability = profile_probability::always () >>>>> .apply_scale (1, iteration_bound); >>>>>  other_e>probability = e>probability.invert (); >>>>>  count_delta = e>count (); >>>>>  >>>>>  /* If latch exists, change its count, since we changed >>>>>  probability of exit. Theoretically we should update everything from >>>>>  source of exit edge to latch, but for vectorizer this is enough. */ >>>>>  if (loop>latch >>>>>  && loop>latch != e>src) >>>>>  { >>>>>  loop>latch>count += count_delta; >>>>>  } >>>>>  } >>>>> + other_e>probability = e>probability.invert (); >>>>> >>>>> /* Roughly speaking we want to reduce the loop body profile by the >>>>> difference of loop iterations. We however can do better if >>>>> we look at the actual profile, if it is available. */ >>>>>  p = p.apply_scale (iteration_bound, iterations); >>>>>  >>>>>  if (loop>header>count.initialized_p ()) >>>>>  { >>>>>  profile_count count_in = profile_count::zero (); >>>>> + p = profile_probability::always (); >>>>> >>>>>  FOR_EACH_EDGE (e, ei, loop>header>preds) >>>>>  if (e>src != loop>latch) >>>>>  count_in += e>count (); >>>>>  >>>>>  if (count_in > profile_count::zero () ) >>>>>  { >>>>>  p = count_in.probability_in (loop>header>count.apply_scale >>>>>  (iteration_bound, 1)); >>>>>  } >>>>>  } >>>>> + count_in = count_in.apply_scale (iteration_bound, 1); >>>>> + p = count_in.probability_in (loop>header>count); >>>>> if (!(p > profile_probability::never ())) >>>>> p = profile_probability::very_unlikely (); >>>>>  } >>>>> >>>>>  if (p >= profile_probability::always () >>>>>   !p.initialized_p ()) >>>>>  return; >>>>> + if (p >= profile_probability::always () >>>>> +  !p.initialized_p ()) >>>>> + return; >>>>> >>>>>  /* Scale the actual probabilities. */ >>>>>  scale_loop_frequencies (loop, p); >>>>>  if (dump_file && (dump_flags & TDF_DETAILS)) >>>>>  fprintf (dump_file, ";; guessed iterations are now %i\n", >>>>>  (int)expected_loop_iterations_unbounded (loop)); >>>>> + /* If latch exists, change its count, since we changed >>>>> + probability of exit. Theoretically we should update everything from >>>>> + source of exit edge to latch, but for vectorizer this is enough. */ >>>>> + if (loop>latch && loop>latch != e>src) >>>>> + loop>latch>count += count_delta; >>>>> + >>>>> + /* Scale the probabilities. */ >>>>> + scale_loop_frequencies (loop, p); >>>>> + >>>>> + /* Change latch's count back. */ >>>>> + if (loop>latch && loop>latch != e>src) >>>>> + loop>latch>count = count_delta; >>>>> + >>>>> + if (dump_file && (dump_flags & TDF_DETAILS)) >>>>> + fprintf (dump_file, ";; guessed iterations are now %i\n", >>>>> + (int)expected_loop_iterations_unbounded (loop)); >>>>> + } >>>>> } >>>>> >>>>> /* Recompute dominance information for basic blocks outside LOOP. */ >>>> >>>> >>>> Jakub
diff git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index b9b76d8..1f560b8 100644  a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ 509,7 +509,7 @@ scale_loop_profile (struct loop *loop, profile_probability p, gcov_type iteration_bound) { gcov_type iterations = expected_loop_iterations_unbounded (loop);  edge e; + edge e, preheader_e; edge_iterator ei; if (dump_file && (dump_flags & TDF_DETAILS)) @@ 521,77 +521,66 @@ scale_loop_profile (struct loop *loop, profile_probability p, (int)iteration_bound, (int)iterations); } + /* Scale the probabilities. */ + scale_loop_frequencies (loop, p); + /* See if loop is predicted to iterate too many times. */  if (iteration_bound && iterations > 0  && p.apply (iterations) > iteration_bound) + if (iteration_bound == 0  iterations <= 0 +  p.apply (iterations) <= iteration_bound) + return; + + e = single_exit (loop); + preheader_e = loop_preheader_edge (loop); + profile_count count_in = preheader_e>count (); + if (e && preheader_e + && count_in > profile_count::zero () + && loop>header>count.initialized_p ()) {  /* Fixing loop profile for different trip count is not trivial; the exit  probabilities has to be updated to match and frequencies propagated down  to the loop body.   We fully update only the simple case of loop with single exit that is  either from the latch or BB just before latch and leads from BB with  simple conditional jump. This is OK for use in vectorizer. */  e = single_exit (loop);  if (e)  {  edge other_e;  profile_count count_delta; + edge other_e; + profile_count count_delta;  FOR_EACH_EDGE (other_e, ei, e>src>succs)  if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE))  && e != other_e)  break; + FOR_EACH_EDGE (other_e, ei, e>src>succs) + if (!(other_e>flags & (EDGE_ABNORMAL  EDGE_FAKE)) + && e != other_e) + break;  /* Probability of exit must be 1/iterations. */  count_delta = e>count ();  e>probability = profile_probability::always () + /* Probability of exit must be 1/iterations. */ + count_delta = e>count (); + e>probability = profile_probability::always () .apply_scale (1, iteration_bound);  other_e>probability = e>probability.invert ();  count_delta = e>count ();   /* If latch exists, change its count, since we changed  probability of exit. Theoretically we should update everything from  source of exit edge to latch, but for vectorizer this is enough. */  if (loop>latch  && loop>latch != e>src)  {  loop>latch>count += count_delta;  }  } + other_e>probability = e>probability.invert (); /* Roughly speaking we want to reduce the loop body profile by the difference of loop iterations. We however can do better if we look at the actual profile, if it is available. */  p = p.apply_scale (iteration_bound, iterations);   if (loop>header>count.initialized_p ())  {  profile_count count_in = profile_count::zero (); + p = profile_probability::always ();  FOR_EACH_EDGE (e, ei, loop>header>preds)  if (e>src != loop>latch)  count_in += e>count ();   if (count_in > profile_count::zero () )  {  p = count_in.probability_in (loop>header>count.apply_scale  (iteration_bound, 1));  }  } + count_in = count_in.apply_scale (iteration_bound, 1); + p = count_in.probability_in (loop>header>count); if (!(p > profile_probability::never ())) p = profile_probability::very_unlikely ();  }  if (p >= profile_probability::always ()   !p.initialized_p ())  return; + if (p >= profile_probability::always () +  !p.initialized_p ()) + return;  /* Scale the actual probabilities. */  scale_loop_frequencies (loop, p);  if (dump_file && (dump_flags & TDF_DETAILS))  fprintf (dump_file, ";; guessed iterations are now %i\n",  (int)expected_loop_iterations_unbounded (loop)); + /* If latch exists, change its count, since we changed + probability of exit. Theoretically we should update everything from + source of exit edge to latch, but for vectorizer this is enough. */ + if (loop>latch && loop>latch != e>src) + loop>latch>count += count_delta; + + /* Scale the probabilities. */ + scale_loop_frequencies (loop, p); + + /* Change latch's count back. */ + if (loop>latch && loop>latch != e>src) + loop>latch>count = count_delta; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; guessed iterations are now %i\n", + (int)expected_loop_iterations_unbounded (loop)); + } } /* Recompute dominance information for basic blocks outside LOOP. */