qsort issues

Message ID 20191014060944.GD10114@bubble.grove.modra.org
State New
Headers show
Series
  • qsort issues
Related show

Commit Message

Alan Modra Oct. 14, 2019, 6:09 a.m.
qsort isn't guaranteed to be a stable sort, that is, elements
comparing equal according to the comparison function may be reordered
relative to their original ordering.  Of course sometimes you may not
care, but even in those cases it is good to force some ordering
(ie. not have the comparison function return 0) so that linker output
is reproducible over different libc qsort implementations.

One way to make qsort stable (which the glibc manual incorrectly says
is the only way) is to augment the elements being sorted with a
monotonic counter of some kind, and use that counter as the final
arbiter of ordering in the comparison function.

Another way is to set up an array of pointers into the array of
elements, first pointer to first element, second pointer to second
element and so so, and sort the pointer array rather than the element
array.  Final arbiter in the comparison function then is the pointer
difference.  This works well with, for example, the symbol pointers
returned by _bfd_elf_canonicalize_symtab which point into a symbol
array.

This patch fixes a few places where sorting by symbol pointers is
appropriate, and adds comments where qsort stability is a non-issue.

	* elf-strtab.c (strrevcmp): Comment.
	* merge.c (strrevcmp): Likewise.
	* elf64-ppc.c (compare_symbols): Correct final pointer comparison.
	Comment on why comparing pointers ensures a stable sort.
	* elflink.c (struct elf_symbol): Add void* to union.
	(elf_sort_elf_symbol): Ensure a stable sort with pointer comparison.
	(elf_sym_name_compare): Likewise.
	(bfd_elf_match_symbols_in_sections): Style fix.
	(elf_link_sort_cmp1): Comment.


-- 
Alan Modra
Australia Development Lab, IBM

Patch

diff --git a/bfd/elf-strtab.c b/bfd/elf-strtab.c
index cc1dcb3f50..55e138a631 100644
--- a/bfd/elf-strtab.c
+++ b/bfd/elf-strtab.c
@@ -334,7 +334,9 @@  _bfd_elf_strtab_emit (register bfd *abfd, struct elf_strtab_hash *tab)
   return TRUE;
 }
 
-/* Compare two elf_strtab_hash_entry structures.  Called via qsort.  */
+/* Compare two elf_strtab_hash_entry structures.  Called via qsort.
+   Won't ever return zero as all entries differ, so there is no issue
+   with qsort stability here.  */
 
 static int
 strrevcmp (const void *a, const void *b)
diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c
index 32ed81d98e..2731e78618 100644
--- a/bfd/elf64-ppc.c
+++ b/bfd/elf64-ppc.c
@@ -2101,7 +2101,17 @@  compare_symbols (const void *ap, const void *bp)
   if ((a->flags & BSF_DYNAMIC) == 0 && (b->flags & BSF_DYNAMIC) != 0)
     return 1;
 
-  return a > b;
+  /* Finally, sort on where the symbol is in memory.  The symbols will
+     be in at most two malloc'd blocks, one for static syms, one for
+     dynamic syms, and we distinguish the two blocks above by testing
+     BSF_DYNAMIC.  Since we are sorting the symbol pointers which were
+     originally in the same order as the symbols (and we're not
+     sorting the symbols themselves), this ensures a stable sort.  */
+  if (a < b)
+    return -1;
+  if (a > b)
+    return 1;
+  return 0;
 }
 
 /* Search SYMS for a symbol of the given VALUE.  */
diff --git a/bfd/elflink.c b/bfd/elflink.c
index d0f70cb6f5..9d8dcff9a3 100644
--- a/bfd/elflink.c
+++ b/bfd/elflink.c
@@ -7862,6 +7862,7 @@  struct elf_symbol
     {
       Elf_Internal_Sym *isym;
       struct elf_symbuf_symbol *ssym;
+      void *p;
     } u;
   const char *name;
 };
@@ -7874,7 +7875,13 @@  elf_sort_elf_symbol (const void *arg1, const void *arg2)
   const Elf_Internal_Sym *s1 = *(const Elf_Internal_Sym **) arg1;
   const Elf_Internal_Sym *s2 = *(const Elf_Internal_Sym **) arg2;
 
-  return s1->st_shndx - s2->st_shndx;
+  if (s1->st_shndx != s2->st_shndx)
+    return s1->st_shndx > s2->st_shndx ? 1 : -1;
+  /* Final sort by the address of the sym in the symbuf ensures
+     a stable sort.  */
+  if (s1 != s2)
+    return s1 > s2 ? 1 : -1;
+  return 0;
 }
 
 static int
@@ -7882,7 +7889,12 @@  elf_sym_name_compare (const void *arg1, const void *arg2)
 {
   const struct elf_symbol *s1 = (const struct elf_symbol *) arg1;
   const struct elf_symbol *s2 = (const struct elf_symbol *) arg2;
-  return strcmp (s1->name, s2->name);
+  int ret = strcmp (s1->name, s2->name);
+  if (ret != 0)
+    return ret;
+  if (s1->u.p != s2->u.p)
+    return s1->u.p > s2->u.p ? 1 : -1;
+  return 0;
 }
 
 static struct elf_symbuf_head *
@@ -8005,8 +8017,10 @@  bfd_elf_match_symbols_in_sections (asection *sec1, asection *sec2,
 	goto done;
 
       if (!info->reduce_memory_overheads)
-	elf_tdata (bfd1)->symbuf = ssymbuf1
-	  = elf_create_symbuf (symcount1, isymbuf1);
+	{
+	  ssymbuf1 = elf_create_symbuf (symcount1, isymbuf1);
+	  elf_tdata (bfd1)->symbuf = ssymbuf1;
+	}
     }
 
   if (ssymbuf1 == NULL || ssymbuf2 == NULL)
@@ -8017,8 +8031,10 @@  bfd_elf_match_symbols_in_sections (asection *sec1, asection *sec2,
 	goto done;
 
       if (ssymbuf1 != NULL && !info->reduce_memory_overheads)
-	elf_tdata (bfd2)->symbuf = ssymbuf2
-	  = elf_create_symbuf (symcount2, isymbuf2);
+	{
+	  ssymbuf2 = elf_create_symbuf (symcount2, isymbuf2);
+	  elf_tdata (bfd2)->symbuf = ssymbuf2;
+	}
     }
 
   if (ssymbuf1 != NULL && ssymbuf2 != NULL)
@@ -9060,6 +9076,15 @@  struct elf_link_sort_rela
   Elf_Internal_Rela rela[1];
 };
 
+/* qsort stability here and for cmp2 is only an issue if multiple
+   dynamic relocations are emitted at the same address.  But targets
+   that apply a series of dynamic relocations each operating on the
+   result of the prior relocation can't use -z combreloc as
+   implemented anyway.  Such schemes tend to be broken by sorting on
+   symbol index.  That leaves dynamic NONE relocs as the only other
+   case where ld might emit multiple relocs at the same address, and
+   those are only emitted due to target bugs.  */
+
 static int
 elf_link_sort_cmp1 (const void *A, const void *B)
 {
diff --git a/bfd/merge.c b/bfd/merge.c
index 632c852390..11be669c2c 100644
--- a/bfd/merge.c
+++ b/bfd/merge.c
@@ -555,6 +555,9 @@  error_return:
   return FALSE;
 }
 
+/* qsort comparison function.  Won't ever return zero as all entries
+   differ, so there is no issue with qsort stability here.  */
+
 static int
 strrevcmp (const void *a, const void *b)
 {