[06/19] libctf: hashing

Message ID 20190430225706.159422-7-nick.alcock@oracle.com
State Superseded
Headers show
Series
  • libctf, and CTF support for objdump and readelf
Related show

Commit Message

Nick Alcock April 30, 2019, 10:56 p.m.
libctf maintains two distinct hash ADTs, one (ctf_dynhash) for wrapping
dynamically-generated unknown-sized hashes during CTF file construction,
one (ctf_hash) for wrapping unchanging hashes whose size is known at
creation time for reading CTF files that were previously created.

In the binutils implementation, these are both fairly thin wrappers
around libiberty hashtab.

Unusually, this code is not kept synchronized with libdtrace-ctf,
due to its dependence on libiberty hashtab.

libctf/
	* ctf-hash.c: New file.
	* ctf-impl.h: New declarations.
---
 libctf/ctf-hash.c | 277 ++++++++++++++++++++++++++++++++++++++++++++++
 libctf/ctf-impl.h |  29 +++++
 2 files changed, 306 insertions(+)
 create mode 100644 libctf/ctf-hash.c

-- 
2.21.0.237.gd0cfaa883d

Comments

Nick Clifton May 2, 2019, 4:16 p.m. | #1
Hi Nick,

> In the binutils implementation, these are both fairly thin wrappers

> around libiberty hashtab.


I could not think of anything useful to say about this patch, so this
is just a comment to let you know that I have looked over it.  I think
that this may well be the case with other patches in this series, so
from now on I will only post replies to patches where I actually have
something to say.  I will send an "end of comments" email as well
though, so that you know that I have finished.

Cheers
  Nick
Nick Alcock May 3, 2019, 7:33 p.m. | #2
On 2 May 2019, Nick Clifton uttered the following:

> Hi Nick,

>

>> In the binutils implementation, these are both fairly thin wrappers

>> around libiberty hashtab.

>

> I could not think of anything useful to say about this patch, so this

> is just a comment to let you know that I have looked over it.  I think


Thank you for this! Excellent reviews: I'm responding only slowly
because of the volume of changes your comments have generated. :)

 15 files changed, 345 insertions(+), 85 deletions(-)

so far... I can only hope that, one day many years from now, I can
review code I've never seen before as well as this.

Patch

diff --git a/libctf/ctf-hash.c b/libctf/ctf-hash.c
new file mode 100644
index 0000000000..fccc90f2cf
--- /dev/null
+++ b/libctf/ctf-hash.c
@@ -0,0 +1,277 @@ 
+/* Interface to hashtable implementations.
+   Copyright (C) 2006-2019 Free Software Foundation, Inc.
+
+   This file is part of libctf.
+
+   libctf is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+   See the GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ctf-impl.h>
+#include <string.h>
+#include "libiberty.h"
+#include "hashtab.h"
+
+/* We have two hashtable implementations: one, ctf_dynhash_*(), is an interface to
+   a dynamically-expanding hash with unknown size that should support addition
+   of large numbers of items, and removal as well, and is used only at
+   type-insertion time; the other, ctf_dynhash_*(), is an interface to a
+   fixed-size hash from const char * -> ctf_id_t with number of elements
+   specified at creation time, that should support addition of items but need
+   not support removal.  These can be implemented by the same underlying hashmap
+   if you wish.  */
+
+typedef struct ctf_helem
+{
+  void *key;			 /* Either a pointer, or a coerced ctf_id_t.  */
+  void *value;			 /* The value (possibly a coerced int).  */
+  ctf_hash_free_fun key_free;
+  ctf_hash_free_fun value_free;
+} ctf_helem_t;
+
+struct ctf_dynhash
+{
+  struct htab *htab;
+  ctf_hash_free_fun key_free;
+  ctf_hash_free_fun value_free;
+};
+
+/* Hash functions. */
+
+unsigned int
+ctf_hash_integer (const void *ptr)
+{
+  ctf_helem_t *hep = (ctf_helem_t *) ptr;
+
+  return htab_hash_pointer (hep->key);
+}
+
+int
+ctf_hash_eq_integer (const void *a, const void *b)
+{
+  ctf_helem_t *hep_a = (ctf_helem_t *) a;
+  ctf_helem_t *hep_b = (ctf_helem_t *) b;
+
+  return htab_eq_pointer (hep_a->key, hep_b->key);
+}
+
+unsigned int
+ctf_hash_string (const void *ptr)
+{
+  ctf_helem_t *hep = (ctf_helem_t *) ptr;
+
+  return htab_hash_string (hep->key);
+}
+
+int
+ctf_hash_eq_string (const void *a, const void *b)
+{
+  ctf_helem_t *hep_a = (ctf_helem_t *) a;
+  ctf_helem_t *hep_b = (ctf_helem_t *) b;
+
+  return !strcmp((const char *) hep_a->key, (const char *) hep_b->key);
+}
+
+/* The dynhash, used for hashes whose size is not known at creation time. */
+
+/* Free a single ctf_helem.  */
+
+static void
+ctf_dynhash_item_free (void *item)
+{
+  ctf_helem_t *helem = item;
+
+  if (helem->key_free && helem->key)
+    helem->key_free (helem->key);
+  if (helem->value_free && helem->value)
+    helem->value_free (helem->value);
+  free (helem);
+}
+
+ctf_dynhash_t *
+ctf_dynhash_create (ctf_hash_fun hash_fun, ctf_hash_eq_fun eq_fun,
+                    ctf_hash_free_fun key_free, ctf_hash_free_fun value_free)
+{
+  ctf_dynhash_t *dynhash;
+
+  dynhash = malloc (sizeof (ctf_dynhash_t));
+  if (!dynhash)
+    return NULL;
+
+  /* 7 is arbitrary and untested for now..  */
+  if ((dynhash->htab = htab_create_alloc (7, (htab_hash) hash_fun, eq_fun,
+                                          ctf_dynhash_item_free, xcalloc, free)) == NULL)
+    {
+      free (dynhash);
+      return NULL;
+    }
+
+  dynhash->key_free = key_free;
+  dynhash->value_free = value_free;
+
+  return dynhash;
+}
+
+static ctf_helem_t **
+ctf_hashtab_lookup (struct htab *htab, const void *key, enum insert_option insert)
+{
+  ctf_helem_t tmp = { .key = (void *) key };
+  return (ctf_helem_t **) htab_find_slot (htab, &tmp, insert);
+}
+
+static ctf_helem_t *
+ctf_hashtab_insert (struct htab *htab, void *key, void *value)
+{
+  ctf_helem_t **slot;
+
+  slot = ctf_hashtab_lookup (htab, key, INSERT);
+
+  if (!slot)
+    {
+      errno = -ENOMEM;
+      return NULL;
+    }
+
+  if (!*slot)
+    {
+      *slot = malloc (sizeof (ctf_helem_t));
+      if (!*slot)
+	return NULL;
+      (*slot)->key = key;
+    }
+  (*slot)->value = value;
+  return *slot;
+}
+
+int
+ctf_dynhash_insert (ctf_dynhash_t *hp, void *key, void *value)
+{
+  ctf_helem_t *slot;
+
+  slot = ctf_hashtab_insert (hp->htab, key, value);
+
+  if (!slot)
+    return errno;
+
+  /* We need to keep the key_free and value_free around in each item because the
+     del function has no visiblity into the hash as a whole, only into the
+     individual items.  */
+
+  slot->key_free = hp->key_free;
+  slot->value_free = hp->value_free;
+
+  return 0;
+}
+
+void
+ctf_dynhash_remove (ctf_dynhash_t *hp, const void *key)
+{
+  htab_remove_elt (hp->htab, (void *) key);
+}
+
+void *
+ctf_dynhash_lookup (ctf_dynhash_t *hp, const void *key)
+{
+  ctf_helem_t **slot;
+
+  slot = ctf_hashtab_lookup (hp->htab, key, NO_INSERT);
+
+  if (slot)
+    return (*slot)->value;
+
+  return NULL;
+}
+
+void
+ctf_dynhash_destroy (ctf_dynhash_t *hp)
+{
+  if (hp != NULL)
+    htab_delete (hp->htab);
+  free (hp);
+}
+
+/* ctf_hash, used for fixed-size maps from const char * -> ctf_id_t without
+   removal.  This is a straight cast of a hashtab.  */
+
+ctf_hash_t *
+ctf_hash_create (unsigned long nelems, ctf_hash_fun hash_fun,
+		 ctf_hash_eq_fun eq_fun)
+{
+  return (ctf_hash_t *) htab_create_alloc (nelems, (htab_hash) hash_fun,
+					   eq_fun, free, xcalloc, free);
+}
+
+uint32_t
+ctf_hash_size (const ctf_hash_t *hp)
+{
+  return htab_elements ((struct htab *) hp);
+}
+
+int
+ctf_hash_insert_type (ctf_hash_t *hp, ctf_file_t *fp, uint32_t type,
+		      uint32_t name)
+{
+  ctf_strs_t *ctsp = &fp->ctf_str[CTF_NAME_STID (name)];
+  const char *str = ctsp->cts_strs + CTF_NAME_OFFSET (name);
+
+  if (type == 0)
+    return EINVAL;
+
+  if (ctsp->cts_strs == NULL)
+    return ECTF_STRTAB;
+
+  if (ctsp->cts_len <= CTF_NAME_OFFSET (name))
+    return ECTF_BADNAME;
+
+  if (str[0] == '\0')
+    return 0;		   /* Just ignore empty strings on behalf of caller.  */
+
+  if (ctf_hashtab_insert ((struct htab *) hp, (char *) str,
+			  (void *) (ptrdiff_t) type) != NULL)
+    return 0;
+  return errno;
+}
+
+/* if the key is already in the hash, override the previous definition with
+   this new official definition. If the key is not present, then call
+   ctf_hash_insert_type() and hash it in.  */
+int
+ctf_hash_define_type (ctf_hash_t *hp, ctf_file_t *fp, uint32_t type,
+                      uint32_t name)
+{
+  /* This matches the semantics of ctf_hash_insert_type() in this
+     implementation anyway.  */
+
+  return ctf_hash_insert_type (hp, fp, type, name);
+}
+
+ctf_id_t
+ctf_hash_lookup_type (ctf_hash_t *hp, ctf_file_t *fp __attribute__ ((__unused__)),
+		      const char *key)
+{
+  ctf_helem_t **slot;
+
+  slot = ctf_hashtab_lookup ((struct htab *) hp, key, NO_INSERT);
+
+  if (slot)
+    return (ctf_id_t) ((*slot)->value);
+
+  return 0;
+}
+
+void
+ctf_hash_destroy (ctf_hash_t *hp)
+{
+  if (hp != NULL)
+    htab_delete ((struct htab *) hp);
+}
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index d4d71fbfdc..32f4bfeaef 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -59,12 +59,41 @@  extern "C"
 
 #endif
 
+/* libctf in-memory state.  */
+
+typedef struct ctf_fixed_hash ctf_hash_t; /* Private to ctf-hash.c.  */
+typedef struct ctf_dynhash ctf_dynhash_t; /* Private to ctf-hash.c.  */
+
 typedef struct ctf_list
 {
   struct ctf_list *l_prev;	/* Previous pointer or tail pointer.  */
   struct ctf_list *l_next;	/* Next pointer or head pointer.  */
 } ctf_list_t;
 
+typedef unsigned int (*ctf_hash_fun) (const void *ptr);
+extern unsigned int ctf_hash_integer (const void *ptr);
+extern unsigned int ctf_hash_string (const void *ptr);
+
+typedef int (*ctf_hash_eq_fun) (const void *, const void *);
+extern int ctf_hash_eq_integer (const void *, const void *);
+extern int ctf_hash_eq_string (const void *, const void *);
+
+typedef void (*ctf_hash_free_fun) (void *);
+
+extern ctf_hash_t *ctf_hash_create (unsigned long, ctf_hash_fun, ctf_hash_eq_fun);
+extern int ctf_hash_insert_type (ctf_hash_t *, ctf_file_t *, uint32_t, uint32_t);
+extern int ctf_hash_define_type (ctf_hash_t *, ctf_file_t *, uint32_t, uint32_t);
+extern ctf_id_t ctf_hash_lookup_type (ctf_hash_t *, ctf_file_t *, const char *);
+extern uint32_t ctf_hash_size (const ctf_hash_t *);
+extern void ctf_hash_destroy (ctf_hash_t *);
+
+extern ctf_dynhash_t *ctf_dynhash_create (ctf_hash_fun, ctf_hash_eq_fun,
+					  ctf_hash_free_fun, ctf_hash_free_fun);
+extern int ctf_dynhash_insert (ctf_dynhash_t *, void *, void *);
+extern void ctf_dynhash_remove (ctf_dynhash_t *, const void *);
+extern void *ctf_dynhash_lookup (ctf_dynhash_t *, const void *);
+extern void ctf_dynhash_destroy (ctf_dynhash_t *);
+
 #define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
 #define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))