[1/4] qsort_chk: call from gcc_qsort instead of wrapping it

Message ID alpine.LNX.2.20.13.1808281157510.10521@monopod.intra.ispras.ru
State New
Headers show
Series
  • [1/4] qsort_chk: call from gcc_qsort instead of wrapping it
Related show

Commit Message

Alexander Monakov Aug. 28, 2018, 9:03 a.m.
Hi,

This is an independently useful patch that makes it easier to introduce
gcc_stablesort.

Swap responsibilities of gcc_qsort and qsort_chk: call the checking function
from the sorting function instead of wrapping gcc_qsort with qsort_chk.

	* gcc/sort.cc (gcc_qsort) [CHECKING_P]: Call qsort_chk.
	* gcc/system.h (qsort): Always redirect to gcc_qsort.  Update comment.
	* gcc/vec.c (qsort_chk): Do not call gcc_qsort.  Update comment.

-- 
2.13.3

Comments

Alexander Monakov Aug. 28, 2018, 9:14 a.m. | #1
This adds a stable sort to sort.cc: mergesort implementation is stable, so
we just need network sort limited to sizes 2-3 to get the complete sort stable.

As I don't want to duplicate code for this, I've chosen to have gcc_qsort
accept bit-inverted 'size' parameter to request stable sorting.

	* gcc/sort.cc (struct sort_ctx): New field 'nlim'. Use it...
	(mergesort): ... here as maximum count for using netsort.
	(gcc_qsort): Set nlim to 3 if stable sort is requested.
	(gcc_stablesort): New.
	* gcc/system.h (gcc_stablesort): Declare.

diff --git a/gcc/sort.cc b/gcc/sort.cc
index 9f8ee12e13b..b3be1eac72b 100644
--- a/gcc/sort.cc
+++ b/gcc/sort.cc
@@ -55,6 +55,7 @@ struct sort_ctx
   char   *out; // output buffer
   size_t n;    // number of elements
   size_t size; // element size
+  size_t nlim; // limit for network sort
 };
 
 /* Helper for netsort. Permute, possibly in-place, 2 or 3 elements,
@@ -178,7 +179,7 @@ do {                                  \
 static void
 mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp)
 {
-  if (likely (n <= 5))
+  if (likely (n <= c->nlim))
     {
       c->out = out;
       c->n = n;
@@ -221,8 +222,12 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
 {
   if (n < 2)
     return;
+  size_t nlim = 5;
+  bool stable = (ssize_t) size < 0;
+  if (stable)
+    nlim = 3, size = ~size;
   char *base = (char *)vbase;
-  sort_ctx c = {cmp, base, n, size};
+  sort_ctx c = {cmp, base, n, size, nlim};
   long long scratch[32];
   size_t bufsz = (n / 2) * size;
   void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);
@@ -233,3 +238,9 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
   qsort_chk (vbase, n, size, cmp);
 #endif
 }
+
+void
+gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
+{
+  gcc_qsort (vbase, n, ~size, cmp);
+}
diff --git a/gcc/system.h b/gcc/system.h
index 203c6a4f0cf..100feb567c9 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -1202,6 +1202,8 @@ helper_const_non_const_cast (const char *p)
    corresponding to vec::qsort (cmp): they use C qsort internally anyway.  */
 void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));
 void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *));
+void gcc_stablesort (void *, size_t, size_t,
+		     int (*)(const void *, const void *));
 #define PP_5th(a1, a2, a3, a4, a5, ...) a5
 #undef qsort
 #define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) (__VA_ARGS__)
-- 
2.13.3
Alexander Monakov Aug. 28, 2018, 9:17 a.m. | #2
This converts std::stable_sort use in tree-loop-distribution to gcc_stablesort.

	* gcc/tree-loop-distribution.c (offset_cmp): Convert to C-qsort-style
	tri-state comparator.
	(fuse_memset_builtins): Change std::stable_sort to  gcc_stablesort .

diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 120661447f0..d8db03b545b 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -90,7 +90,6 @@ along with GCC; see the file COPYING3.  If not see
 	data reuse.  */
 
 #include "config.h"
-#define INCLUDE_ALGORITHM /* stable_sort */
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
@@ -2561,12 +2560,14 @@ version_for_distribution_p (vec<struct partition *> *partitions,
 
 /* Compare base offset of builtin mem* partitions P1 and P2.  */
 
-static bool
-offset_cmp (struct partition *p1, struct partition *p2)
+static int
+offset_cmp (const void *vp1, const void *vp2)
 {
-  gcc_assert (p1 != NULL && p1->builtin != NULL);
-  gcc_assert (p2 != NULL && p2->builtin != NULL);
-  return p1->builtin->dst_base_offset < p2->builtin->dst_base_offset;
+  struct partition *p1 = *(struct partition *const *) vp1;
+  struct partition *p2 = *(struct partition *const *) vp2;
+  unsigned HOST_WIDE_INT o1 = p1->builtin->dst_base_offset;
+  unsigned HOST_WIDE_INT o2 = p2->builtin->dst_base_offset;
+  return (o2 < o1) - (o1 < o2);
 }
 
 /* Fuse adjacent memset builtin PARTITIONS if possible.  This is a special
@@ -2618,8 +2619,8 @@ fuse_memset_builtins (vec<struct partition *> *partitions)
 	}
 
       /* Stable sort is required in order to avoid breaking dependence.  */
-      std::stable_sort (&(*partitions)[i],
-			&(*partitions)[i] + j - i, offset_cmp);
+      gcc_stablesort (&(*partitions)[i], j - i, sizeof (*partitions)[i],
+		      offset_cmp);
       /* Continue with next partition.  */
       i = j;
     }
-- 
2.13.3
Richard Biener Aug. 28, 2018, 9:22 a.m. | #3
On Tue, Aug 28, 2018 at 11:03 AM Alexander Monakov <amonakov@ispras.ru> wrote:
>

> Hi,

>

> This is an independently useful patch that makes it easier to introduce

> gcc_stablesort.

>

> Swap responsibilities of gcc_qsort and qsort_chk: call the checking function

> from the sorting function instead of wrapping gcc_qsort with qsort_chk.


OK.

>         * gcc/sort.cc (gcc_qsort) [CHECKING_P]: Call qsort_chk.

>         * gcc/system.h (qsort): Always redirect to gcc_qsort.  Update comment.

>         * gcc/vec.c (qsort_chk): Do not call gcc_qsort.  Update comment.

>

> diff --git a/gcc/sort.cc b/gcc/sort.cc

> index 293e2058f89..9f8ee12e13b 100644

> --- a/gcc/sort.cc

> +++ b/gcc/sort.cc

> @@ -229,4 +229,7 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)

>    mergesort (base, &c, n, base, (char *)buf);

>    if (buf != scratch)

>      free (buf);

> +#if CHECKING_P

> +  qsort_chk (vbase, n, size, cmp);

> +#endif

>  }

> diff --git a/gcc/system.h b/gcc/system.h

> index f87fbaae69e..203c6a4f0cf 100644

> --- a/gcc/system.h

> +++ b/gcc/system.h

> @@ -1197,17 +1197,13 @@ helper_const_non_const_cast (const char *p)

>  /* Get definitions of HOST_WIDE_INT.  */

>  #include "hwint.h"

>

> -/* qsort comparator consistency checking: except in release-checking compilers,

> -   redirect 4-argument qsort calls to qsort_chk; keep 1-argument invocations

> +/* GCC qsort API-compatible functions: except in release-checking compilers,

> +   redirect 4-argument qsort calls to gcc_qsort; keep 1-argument invocations

>     corresponding to vec::qsort (cmp): they use C qsort internally anyway.  */

>  void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));

>  void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *));

>  #define PP_5th(a1, a2, a3, a4, a5, ...) a5

>  #undef qsort

> -#if CHECKING_P

> -#define qsort(...) PP_5th (__VA_ARGS__, qsort_chk, 3, 2, qsort, 0) (__VA_ARGS__)

> -#else

>  #define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) (__VA_ARGS__)

> -#endif

>

>  #endif /* ! GCC_SYSTEM_H */

> diff --git a/gcc/vec.c b/gcc/vec.c

> index beb857fd838..ac3226b5fcb 100644

> --- a/gcc/vec.c

> +++ b/gcc/vec.c

> @@ -201,21 +201,12 @@ qsort_chk_error (const void *p1, const void *p2, const void *p3,

>    internal_error ("qsort checking failed");

>  }

>

> -/* Wrapper around qsort with checking that CMP is consistent on given input.

> -

> -   Strictly speaking, passing invalid (non-transitive, non-anti-commutative)

> -   comparators to libc qsort can result in undefined behavior.  Therefore we

> -   should ideally perform consistency checks prior to invoking qsort, but in

> -   order to do that optimally we'd need to sort the array ourselves beforehand

> -   with a sorting routine known to be "safe".  Instead, we expect that most

> -   implementations in practice will still produce some permutation of input

> -   array even for invalid comparators, which enables us to perform checks on

> -   the output array.  */

> +/* Verify anti-symmetry and transitivity for comparator CMP on sorted array

> +   of N SIZE-sized elements pointed to by BASE.  */

>  void

>  qsort_chk (void *base, size_t n, size_t size,

>            int (*cmp)(const void *, const void *))

>  {

> -  gcc_qsort (base, n, size, cmp);

>  #if 0

>  #define LIM(n) (n)

>  #else

> --

> 2.13.3

>
Alexander Monakov Aug. 28, 2018, 9:22 a.m. | #4
This converts the use in bb-reorder.  I had to get a little bit creative with
the comparator as profile_count::operator> does not implement a strict weak
order.

	* gcc/bb-reorder.c (edge_order): Convert to C-qsort-style
	tri-state comparator.
	(reorder_basic_blocks_simple): Change std::stable_sort to gcc_stablesort.

diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c
index 57edde62c62..7c80609fbf4 100644
--- a/gcc/bb-reorder.c
+++ b/gcc/bb-reorder.c
@@ -91,7 +91,6 @@
 */
 
 #include "config.h"
-#define INCLUDE_ALGORITHM /* stable_sort */
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
@@ -2351,13 +2350,17 @@ reorder_basic_blocks_software_trace_cache (void)
   FREE (bbd);
 }
 
-/* Return true if edge E1 is more desirable as a fallthrough edge than
-   edge E2 is.  */
+/* Order edges by execution frequency, higher first.  */
 
-static bool
-edge_order (edge e1, edge e2)
+static int
+edge_order (const void *ve1, const void *ve2)
 {
-  return e1->count () > e2->count ();
+  edge e1 = *(const edge *) ve1;
+  edge e2 = *(const edge *) ve2;
+  profile_count c1 = e1->count ();
+  profile_count c2 = e2->count ();
+  profile_count m = c1.max (c2);
+  return (m == c2) - (m == c1);
 }
 
 /* Reorder basic blocks using the "simple" algorithm.  This tries to
@@ -2410,7 +2413,7 @@ reorder_basic_blocks_simple (void)
      all edges are equally desirable.  */
 
   if (optimize_function_for_speed_p (cfun))
-    std::stable_sort (edges, edges + n, edge_order);
+    gcc_stablesort (edges, n, sizeof *edges, edge_order);
 
   /* Now decide which of those edges to make fallthrough edges.  We set
      BB_VISITED if a block already has a fallthrough successor assigned
-- 
2.13.3
Richard Biener Aug. 28, 2018, 1:25 p.m. | #5
On Tue, Aug 28, 2018 at 11:14 AM Alexander Monakov <amonakov@ispras.ru> wrote:
>

> This adds a stable sort to sort.cc: mergesort implementation is stable, so

> we just need network sort limited to sizes 2-3 to get the complete sort stable.

>

> As I don't want to duplicate code for this, I've chosen to have gcc_qsort

> accept bit-inverted 'size' parameter to request stable sorting.


OK.

>         * gcc/sort.cc (struct sort_ctx): New field 'nlim'. Use it...

>         (mergesort): ... here as maximum count for using netsort.

>         (gcc_qsort): Set nlim to 3 if stable sort is requested.

>         (gcc_stablesort): New.

>         * gcc/system.h (gcc_stablesort): Declare.

>

> diff --git a/gcc/sort.cc b/gcc/sort.cc

> index 9f8ee12e13b..b3be1eac72b 100644

> --- a/gcc/sort.cc

> +++ b/gcc/sort.cc

> @@ -55,6 +55,7 @@ struct sort_ctx

>    char   *out; // output buffer

>    size_t n;    // number of elements

>    size_t size; // element size

> +  size_t nlim; // limit for network sort

>  };

>

>  /* Helper for netsort. Permute, possibly in-place, 2 or 3 elements,

> @@ -178,7 +179,7 @@ do {                                  \

>  static void

>  mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp)

>  {

> -  if (likely (n <= 5))

> +  if (likely (n <= c->nlim))

>      {

>        c->out = out;

>        c->n = n;

> @@ -221,8 +222,12 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)

>  {

>    if (n < 2)

>      return;

> +  size_t nlim = 5;

> +  bool stable = (ssize_t) size < 0;

> +  if (stable)

> +    nlim = 3, size = ~size;

>    char *base = (char *)vbase;

> -  sort_ctx c = {cmp, base, n, size};

> +  sort_ctx c = {cmp, base, n, size, nlim};

>    long long scratch[32];

>    size_t bufsz = (n / 2) * size;

>    void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);

> @@ -233,3 +238,9 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)

>    qsort_chk (vbase, n, size, cmp);

>  #endif

>  }

> +

> +void

> +gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp)

> +{

> +  gcc_qsort (vbase, n, ~size, cmp);

> +}

> diff --git a/gcc/system.h b/gcc/system.h

> index 203c6a4f0cf..100feb567c9 100644

> --- a/gcc/system.h

> +++ b/gcc/system.h

> @@ -1202,6 +1202,8 @@ helper_const_non_const_cast (const char *p)

>     corresponding to vec::qsort (cmp): they use C qsort internally anyway.  */

>  void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));

>  void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *));

> +void gcc_stablesort (void *, size_t, size_t,

> +                    int (*)(const void *, const void *));

>  #define PP_5th(a1, a2, a3, a4, a5, ...) a5

>  #undef qsort

>  #define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) (__VA_ARGS__)

> --

> 2.13.3

>
Richard Biener Aug. 28, 2018, 1:26 p.m. | #6
On Tue, Aug 28, 2018 at 11:17 AM Alexander Monakov <amonakov@ispras.ru> wrote:
>

> This converts std::stable_sort use in tree-loop-distribution to gcc_stablesort.

>

>         * gcc/tree-loop-distribution.c (offset_cmp): Convert to C-qsort-style

>         tri-state comparator.

>         (fuse_memset_builtins): Change std::stable_sort to  gcc_stablesort .


OK.

Richard.

> diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c

> index 120661447f0..d8db03b545b 100644

> --- a/gcc/tree-loop-distribution.c

> +++ b/gcc/tree-loop-distribution.c

> @@ -90,7 +90,6 @@ along with GCC; see the file COPYING3.  If not see

>         data reuse.  */

>

>  #include "config.h"

> -#define INCLUDE_ALGORITHM /* stable_sort */

>  #include "system.h"

>  #include "coretypes.h"

>  #include "backend.h"

> @@ -2561,12 +2560,14 @@ version_for_distribution_p (vec<struct partition *> *partitions,

>

>  /* Compare base offset of builtin mem* partitions P1 and P2.  */

>

> -static bool

> -offset_cmp (struct partition *p1, struct partition *p2)

> +static int

> +offset_cmp (const void *vp1, const void *vp2)

>  {

> -  gcc_assert (p1 != NULL && p1->builtin != NULL);

> -  gcc_assert (p2 != NULL && p2->builtin != NULL);

> -  return p1->builtin->dst_base_offset < p2->builtin->dst_base_offset;

> +  struct partition *p1 = *(struct partition *const *) vp1;

> +  struct partition *p2 = *(struct partition *const *) vp2;

> +  unsigned HOST_WIDE_INT o1 = p1->builtin->dst_base_offset;

> +  unsigned HOST_WIDE_INT o2 = p2->builtin->dst_base_offset;

> +  return (o2 < o1) - (o1 < o2);

>  }

>

>  /* Fuse adjacent memset builtin PARTITIONS if possible.  This is a special

> @@ -2618,8 +2619,8 @@ fuse_memset_builtins (vec<struct partition *> *partitions)

>         }

>

>        /* Stable sort is required in order to avoid breaking dependence.  */

> -      std::stable_sort (&(*partitions)[i],

> -                       &(*partitions)[i] + j - i, offset_cmp);

> +      gcc_stablesort (&(*partitions)[i], j - i, sizeof (*partitions)[i],

> +                     offset_cmp);

>        /* Continue with next partition.  */

>        i = j;

>      }

> --

> 2.13.3

>
Richard Biener Aug. 28, 2018, 1:33 p.m. | #7
On Tue, Aug 28, 2018 at 11:22 AM Alexander Monakov <amonakov@ispras.ru> wrote:
>

> This converts the use in bb-reorder.  I had to get a little bit creative with

> the comparator as profile_count::operator> does not implement a strict weak

> order.


So the previously used comparator was invalid?  I think your proposed one
warrants some comments.  Maybe trade speed for some clearer code?
The comments on the operator<> implementations are odd as well:

  /* Comparsions are three-state and conservative.  False is returned if
     the inequality can not be decided.  */
  bool operator< (const profile_count &other) const
    {

but bool can only represent a single state?  Are you supposed to do

  bool gt = count1 > count2;
  bool le = count1 <= count2
  if (!gt && !le)
    /* unordered */;
  else if (gt)
    ...
  else if (le)
    ...

?  And what do we want to do for the unordered case in bb-reorder?
Tie with current order, thus return zero?

Richard.

>         * gcc/bb-reorder.c (edge_order): Convert to C-qsort-style

>         tri-state comparator.

>         (reorder_basic_blocks_simple): Change std::stable_sort to gcc_stablesort.

>

> diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c

> index 57edde62c62..7c80609fbf4 100644

> --- a/gcc/bb-reorder.c

> +++ b/gcc/bb-reorder.c

> @@ -91,7 +91,6 @@

>  */

>

>  #include "config.h"

> -#define INCLUDE_ALGORITHM /* stable_sort */

>  #include "system.h"

>  #include "coretypes.h"

>  #include "backend.h"

> @@ -2351,13 +2350,17 @@ reorder_basic_blocks_software_trace_cache (void)

>    FREE (bbd);

>  }

>

> -/* Return true if edge E1 is more desirable as a fallthrough edge than

> -   edge E2 is.  */

> +/* Order edges by execution frequency, higher first.  */

>

> -static bool

> -edge_order (edge e1, edge e2)

> +static int

> +edge_order (const void *ve1, const void *ve2)

>  {

> -  return e1->count () > e2->count ();

> +  edge e1 = *(const edge *) ve1;

> +  edge e2 = *(const edge *) ve2;

> +  profile_count c1 = e1->count ();

> +  profile_count c2 = e2->count ();

> +  profile_count m = c1.max (c2);

> +  return (m == c2) - (m == c1);

>  }

>

>  /* Reorder basic blocks using the "simple" algorithm.  This tries to

> @@ -2410,7 +2413,7 @@ reorder_basic_blocks_simple (void)

>       all edges are equally desirable.  */

>

>    if (optimize_function_for_speed_p (cfun))

> -    std::stable_sort (edges, edges + n, edge_order);

> +    gcc_stablesort (edges, n, sizeof *edges, edge_order);

>

>    /* Now decide which of those edges to make fallthrough edges.  We set

>       BB_VISITED if a block already has a fallthrough successor assigned

> --

> 2.13.3

>
Alexander Monakov Aug. 28, 2018, 4 p.m. | #8
On Tue, 28 Aug 2018, Richard Biener wrote:

> On Tue, Aug 28, 2018 at 11:22 AM Alexander Monakov <amonakov@ispras.ru> wrote:

> >

> > This converts the use in bb-reorder.  I had to get a little bit creative with

> > the comparator as profile_count::operator> does not implement a strict weak

> > order.

> 

> So the previously used comparator was invalid?


Yes, when one of profile_count values is !initialized_p () (which happens in
testsuite).

> I think your proposed one

> warrants some comments.  Maybe trade speed for some clearer code?


I think it's not too complicated, but how about adding this comment:

  profile_count m = c1.max (c2);
  /* Return 0 if counts are equal, -1 if E1 has the larger count.  */
  return (m == c2) - (m == c1);

Or, alternatively, employ more branchy checks:

  if (c1 == c2)
    return 0;
  if (c1 == c1.max (c2))
    return -1;
  return 1;

> The comments on the operator<> implementations are odd as well:


(an aside, but: it also has strange redundant code like

      if (*this  == profile_count::zero ())
        return false;
      if (other == profile_count::zero ())
        return !(*this == profile_count::zero ());

where the second return could have been 'return true;')

>   /* Comparsions are three-state and conservative.  False is returned if

>      the inequality can not be decided.  */

>   bool operator< (const profile_count &other) const

>     {

> 

> but bool can only represent a single state?  Are you supposed to do

> 

>   bool gt = count1 > count2;

>   bool le = count1 <= count2

>   if (!gt && !le)

>     /* unordered */;

>   else if (gt)

>     ...

>   else if (le)

>     ...

> 

> ? 


*shudder*  I assume your question is directed to Honza, but IMO profile_count
comparison APIs could be easier to understand if there was a single function
implementing the meat of comparison and returning one of {unordered, equal,
less, greater}, and remaining operations would simply wrap it.

> And what do we want to do for the unordered case in bb-reorder?

> Tie with current order, thus return zero?


No, that wouldn't be valid for sorting.  My patch restores the previous
treatment (uninit/0-frequency edges are worse than any other).

Alexander
Michael Matz Aug. 28, 2018, 7:44 p.m. | #9
Hi,

On Tue, 28 Aug 2018, Alexander Monakov wrote:

> > I think your proposed one

> > warrants some comments.  Maybe trade speed for some clearer code?

> 

> I think it's not too complicated, but how about adding this comment:

> 

>   profile_count m = c1.max (c2);

>   /* Return 0 if counts are equal, -1 if E1 has the larger count.  */

>   return (m == c2) - (m == c1);

> 

> Or, alternatively, employ more branchy checks:

> 

>   if (c1 == c2)

>     return 0;

>   if (c1 == c1.max (c2))

>     return -1;

>   return 1;


You want to at least comment on why a tradition </>/== check isn't 
working, namely because of the uninitialized counts.  (Though I also think 
that comparing uninitialized counts is just non-sense and hence should 
actually be ruled out from the whole comparison business, at which point a 
normal <= check can be done again).

> > The comments on the operator<> implementations are odd as well:

> 

> (an aside, but: it also has strange redundant code like

> 

>       if (*this  == profile_count::zero ())

>         return false;

>       if (other == profile_count::zero ())

>         return !(*this == profile_count::zero ());

> 

> where the second return could have been 'return true;')


Oh my.


Ciao,
Michael.
Alexander Monakov Aug. 29, 2018, 2:07 p.m. | #10
On Tue, 28 Aug 2018, Michael Matz wrote:
> > I think it's not too complicated, but how about adding this comment:

> > 

> >   profile_count m = c1.max (c2);

> >   /* Return 0 if counts are equal, -1 if E1 has the larger count.  */

> >   return (m == c2) - (m == c1);

> > 

> > Or, alternatively, employ more branchy checks:

> > 

> >   if (c1 == c2)

> >     return 0;

> >   if (c1 == c1.max (c2))

> >     return -1;

> >   return 1;

> 

> You want to at least comment on why a tradition </>/== check isn't 

> working, namely because of the uninitialized counts.


Alright, so how about this comment:

  /* Using 'max', as profile_count::operator< does not establish
     a strict weak order when uninitialized counts may be present.  */
  profile_count m = c1.max (c2);
  return (m == c2) - (m == c1);

Or the same comment in the "branchy" alternative.

Thanks.
Alexander
Alexander Monakov Sept. 3, 2018, 11:17 a.m. | #11
On Wed, 29 Aug 2018, Alexander Monakov wrote:
> On Tue, 28 Aug 2018, Michael Matz wrote:

> > > I think it's not too complicated, but how about adding this comment:

> > > 

> > >   profile_count m = c1.max (c2);

> > >   /* Return 0 if counts are equal, -1 if E1 has the larger count.  */

> > >   return (m == c2) - (m == c1);

> > > 

> > > Or, alternatively, employ more branchy checks:

> > > 

> > >   if (c1 == c2)

> > >     return 0;

> > >   if (c1 == c1.max (c2))

> > >     return -1;

> > >   return 1;

> > 

> > You want to at least comment on why a tradition </>/== check isn't 

> > working, namely because of the uninitialized counts.

> 

> Alright, so how about this comment:

> 

>   /* Using 'max', as profile_count::operator< does not establish

>      a strict weak order when uninitialized counts may be present.  */

>   profile_count m = c1.max (c2);

>   return (m == c2) - (m == c1);

> 

> Or the same comment in the "branchy" alternative.


Ping, would any of proposed variants clarify the situation, and were there
any other issues with the patch?

Thanks.
Alexander
Richard Biener Sept. 3, 2018, 11:52 a.m. | #12
On Mon, Sep 3, 2018 at 1:17 PM Alexander Monakov <amonakov@ispras.ru> wrote:
>

> On Wed, 29 Aug 2018, Alexander Monakov wrote:

> > On Tue, 28 Aug 2018, Michael Matz wrote:

> > > > I think it's not too complicated, but how about adding this comment:

> > > >

> > > >   profile_count m = c1.max (c2);

> > > >   /* Return 0 if counts are equal, -1 if E1 has the larger count.  */

> > > >   return (m == c2) - (m == c1);

> > > >

> > > > Or, alternatively, employ more branchy checks:

> > > >

> > > >   if (c1 == c2)

> > > >     return 0;

> > > >   if (c1 == c1.max (c2))

> > > >     return -1;

> > > >   return 1;

> > >

> > > You want to at least comment on why a tradition </>/== check isn't

> > > working, namely because of the uninitialized counts.

> >

> > Alright, so how about this comment:

> >

> >   /* Using 'max', as profile_count::operator< does not establish

> >      a strict weak order when uninitialized counts may be present.  */

> >   profile_count m = c1.max (c2);

> >   return (m == c2) - (m == c1);


This variant works for me if you clarify that max () makes uninitialized
counts less.

> > Or the same comment in the "branchy" alternative.

>

> Ping, would any of proposed variants clarify the situation, and were there

> any other issues with the patch?


OK with the above variant.

Richard.

> Thanks.

> Alexander

Patch

diff --git a/gcc/sort.cc b/gcc/sort.cc
index 293e2058f89..9f8ee12e13b 100644
--- a/gcc/sort.cc
+++ b/gcc/sort.cc
@@ -229,4 +229,7 @@  gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
   mergesort (base, &c, n, base, (char *)buf);
   if (buf != scratch)
     free (buf);
+#if CHECKING_P
+  qsort_chk (vbase, n, size, cmp);
+#endif
 }
diff --git a/gcc/system.h b/gcc/system.h
index f87fbaae69e..203c6a4f0cf 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -1197,17 +1197,13 @@  helper_const_non_const_cast (const char *p)
 /* Get definitions of HOST_WIDE_INT.  */
 #include "hwint.h"
 
-/* qsort comparator consistency checking: except in release-checking compilers,
-   redirect 4-argument qsort calls to qsort_chk; keep 1-argument invocations
+/* GCC qsort API-compatible functions: except in release-checking compilers,
+   redirect 4-argument qsort calls to gcc_qsort; keep 1-argument invocations
    corresponding to vec::qsort (cmp): they use C qsort internally anyway.  */
 void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));
 void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *));
 #define PP_5th(a1, a2, a3, a4, a5, ...) a5
 #undef qsort
-#if CHECKING_P
-#define qsort(...) PP_5th (__VA_ARGS__, qsort_chk, 3, 2, qsort, 0) (__VA_ARGS__)
-#else
 #define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) (__VA_ARGS__)
-#endif
 
 #endif /* ! GCC_SYSTEM_H */
diff --git a/gcc/vec.c b/gcc/vec.c
index beb857fd838..ac3226b5fcb 100644
--- a/gcc/vec.c
+++ b/gcc/vec.c
@@ -201,21 +201,12 @@  qsort_chk_error (const void *p1, const void *p2, const void *p3,
   internal_error ("qsort checking failed");
 }
 
-/* Wrapper around qsort with checking that CMP is consistent on given input.
-
-   Strictly speaking, passing invalid (non-transitive, non-anti-commutative)
-   comparators to libc qsort can result in undefined behavior.  Therefore we
-   should ideally perform consistency checks prior to invoking qsort, but in
-   order to do that optimally we'd need to sort the array ourselves beforehand
-   with a sorting routine known to be "safe".  Instead, we expect that most
-   implementations in practice will still produce some permutation of input
-   array even for invalid comparators, which enables us to perform checks on
-   the output array.  */
+/* Verify anti-symmetry and transitivity for comparator CMP on sorted array
+   of N SIZE-sized elements pointed to by BASE.  */
 void
 qsort_chk (void *base, size_t n, size_t size,
 	   int (*cmp)(const void *, const void *))
 {
-  gcc_qsort (base, n, size, cmp);
 #if 0
 #define LIM(n) (n)
 #else