[09/19] libctf: support getting strings from the ELF strtab

Message ID 20190716180420.236506-10-nick.alcock@oracle.com
State New
Headers show
Series
  • CTF linking support
Related show

Commit Message

Nick Alcock July 16, 2019, 6:04 p.m.
The CTF file format has always supported "external strtabs", which
internally are strtab offsets with their MSB on: such refs
get their strings from the strtab passed in at CTF file open time:
this is usually intended to be the ELF strtab, and that's what this
implementation is meant to support, though in theory the external
strtab could come from anywhere.

This commit adds support for these external strings in the ctf-string.c
strtab tracking layer.  It's quite easy: we just add a field csa_offset
to the atoms table that tracks all strings: this field tracks the offset
of the string in the ELF strtab (with its MSB already on, courtesy of
a new macro CTF_SET_STID), and adds a new function that sets the
csa_offset to the specified offset (plus MSB).  Then we just need to
avoid writing out strings with the csa_offset set to the internal
strtab, and note that the internal strtab is shorter than it might
otherwise be.

(We could in theory save a little more time here by eschewing sorting
such strings, since we never actually write the strings out anywhere,
but that would mean storing them separately and it's just not worth the
complexity cost until profiling shows it's worth doing.)

include/
	* ctf.h (CTF_SET_STID): New.

libctf/
	* ctf-impl.h (struct ctf_str_atom): New field csa_offset.
	(ctf_str_add_ref): Name the last arg.
	(ctf_str_add_external) New.
	* ctf-string.c (ctf_str_add_ref_internal): Return the atom, not the
	string.
	(ctf_str_add): Adjust accordingly.
	(ctf_str_add_ref): Likewise.  Move up in the file.
	(ctf_str_add_external): New: update the csa_offset.
	(ctf_str_count_strtab): Only account for strings with no csa_offset
	in the internal strtab length.
	(ctf_str_write_strtab): If the csa_offset is set, update the
	string's refs without writing the string out.
---
 include/ctf.h       |  1 +
 libctf/ctf-impl.h   |  4 +-
 libctf/ctf-string.c | 90 ++++++++++++++++++++++++++++++++-------------
 3 files changed, 68 insertions(+), 27 deletions(-)

-- 
2.22.0.238.g049a27acdc

Patch

diff --git a/include/ctf.h b/include/ctf.h
index f371cd73c9..ff3204b9aa 100644
--- a/include/ctf.h
+++ b/include/ctf.h
@@ -353,6 +353,7 @@  union
 
 #define CTF_NAME_STID(name)		((name) >> 31)
 #define CTF_NAME_OFFSET(name)		((name) & CTF_MAX_NAME)
+#define CTF_SET_STID(name, stid)	((name) | (stid) << 31)
 
 /* V2 only. */
 #define CTF_TYPE_INFO(kind, isroot, vlen) \
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index 5b331cbc6d..1c243d758c 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -192,6 +192,7 @@  typedef struct ctf_str_atom
 {
   const char *csa_str;		/* Backpointer to string (hash key).  */
   ctf_list_t csa_refs;		/* This string's refs.  */
+  uint32_t csa_offset;		/* External strtab offset, if any.  */
   unsigned long csa_snapshot_id; /* Snapshot ID at time of creation.  */
 } ctf_str_atom_t;
 
@@ -380,7 +381,8 @@  extern const char *ctf_strptr (ctf_file_t *, uint32_t);
 extern int ctf_str_create_atoms (ctf_file_t *);
 extern void ctf_str_free_atoms (ctf_file_t *);
 extern const char *ctf_str_add (ctf_file_t *, const char *);
-extern const char *ctf_str_add_ref (ctf_file_t *, const char *, uint32_t *);
+extern const char *ctf_str_add_ref (ctf_file_t *, const char *, uint32_t *ref);
+extern const char *ctf_str_add_external (ctf_file_t *, const char *, uint32_t offset);
 extern void ctf_str_rollback (ctf_file_t *, ctf_snapshot_id_t);
 extern void ctf_str_purge_refs (ctf_file_t *);
 extern ctf_strs_writable_t ctf_str_write_strtab (ctf_file_t *);
diff --git a/libctf/ctf-string.c b/libctf/ctf-string.c
index 27bd7c2bba..4d063bb5e6 100644
--- a/libctf/ctf-string.c
+++ b/libctf/ctf-string.c
@@ -88,11 +88,11 @@  ctf_str_free_atoms (ctf_file_t *fp)
   ctf_dynhash_destroy (fp->ctf_str_atoms);
 }
 
-/* Add a string to the atoms table and return it, or return an existing string
-   if present, copying the passed-in string.  Returns NULL only when out of
-   memory (and do not touch the passed-in string in that case).  Possibly
-   augment the ref list with the passed-in ref.  */
-static const char *
+/* Add a string to the atoms table, copying the passed-in string.  Return the
+   atom added. Return NULL only when out of memory (and do not touch the
+   passed-in string in that case).  Possibly augment the ref list with the
+   passed-in ref.  */
+static ctf_str_atom_t *
 ctf_str_add_ref_internal (ctf_file_t *fp, const char *str,
 			  int add_ref, uint32_t *ref)
 {
@@ -116,7 +116,7 @@  ctf_str_add_ref_internal (ctf_file_t *fp, const char *str,
 	  ctf_list_append (&atom->csa_refs, aref);
 	  fp->ctf_str_num_refs++;
 	}
-      return atom->csa_str;
+      return atom;
     }
 
   if ((atom = ctf_alloc (sizeof (struct ctf_str_atom))) == NULL)
@@ -136,7 +136,7 @@  ctf_str_add_ref_internal (ctf_file_t *fp, const char *str,
       ctf_list_append (&atom->csa_refs, aref);
       fp->ctf_str_num_refs++;
     }
-  return newstr;
+  return atom;
 
  oom:
   ctf_free (atom);
@@ -150,9 +150,48 @@  ctf_str_add_ref_internal (ctf_file_t *fp, const char *str,
 const char *
 ctf_str_add (ctf_file_t *fp, const char *str)
 {
-  if (str)
-    return ctf_str_add_ref_internal (fp, str, FALSE, 0);
-  return NULL;
+  ctf_str_atom_t *atom;
+  if (!str)
+    return NULL;
+
+  atom = ctf_str_add_ref_internal (fp, str, FALSE, 0);
+  if (!atom)
+    return NULL;
+
+  return atom->csa_str;
+}
+
+/* Like ctf_str_add(), but additionally augment the atom's refs list with the
+   passed-in ref, whether or not the string is already present.  There is no
+   attempt to deduplicate the refs list (but duplicates are harmless).  */
+const char *
+ctf_str_add_ref (ctf_file_t *fp, const char *str, uint32_t *ref)
+{
+  ctf_str_atom_t *atom;
+  if (!str)
+    return NULL;
+
+  atom = ctf_str_add_ref_internal (fp, str, TRUE, ref);
+  if (!atom)
+    return NULL;
+
+  return atom->csa_str;
+}
+
+/* Add an external strtab reference at OFFSET.  */
+const char *
+ctf_str_add_external (ctf_file_t *fp, const char *str, uint32_t offset)
+{
+  ctf_str_atom_t *atom;
+  if (!str)
+    return NULL;
+
+  atom = ctf_str_add_ref_internal (fp, str, FALSE, 0);
+  if (!atom)
+    return NULL;
+
+  atom->csa_offset = CTF_SET_STID (offset, CTF_STRTAB_1);
+  return atom->csa_str;
 }
 
 /* A ctf_dynhash_iter_remove() callback that removes atoms later than a given
@@ -173,17 +212,6 @@  ctf_str_rollback (ctf_file_t *fp, ctf_snapshot_id_t id)
   ctf_dynhash_iter_remove (fp->ctf_str_atoms, ctf_str_rollback_atom, &id);
 }
 
-/* Like ctf_str_add(), but additionally augment the atom's refs list with the
-   passed-in ref, whether or not the string is already present.  There is no
-   attempt to deduplicate the refs list (but duplicates are harmless).  */
-const char *
-ctf_str_add_ref (ctf_file_t *fp, const char *str, uint32_t *ref)
-{
-  if (str)
-    return ctf_str_add_ref_internal (fp, str, TRUE, ref);
-  return NULL;
-}
-
 /* An adaptor around ctf_purge_atom_refs.  */
 static void
 ctf_str_purge_one_atom_refs (void *key _libctf_unused_, void *value,
@@ -238,7 +266,11 @@  ctf_str_count_strtab (void *key _libctf_unused_, void *value,
   ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
   ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
 
-  s->strtab->cts_len += strlen (atom->csa_str) + 1;
+  /* We only factor in the length of items that have no offset:
+     other items are in the external strtab.  They still contribute to the
+     total count, though, because we still have to sort them.  */
+  if (!atom->csa_offset)
+    s->strtab->cts_len += strlen (atom->csa_str) + 1;
   s->strtab_count++;
 }
 
@@ -317,12 +349,18 @@  ctf_str_write_strtab (ctf_file_t *fp)
       return strtab;
     }
 
-  /* Update the strtab, and all refs.  */
+  /* Update all refs: also update the strtab if this is not an external strtab
+     pointer.  */
   for (i = 0; i < s.strtab_count; i++)
     {
-      strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str);
-      ctf_str_update_refs (sorttab[i], cur_stroff);
-      cur_stroff += strlen (sorttab[i]->csa_str) + 1;
+      if (sorttab[i]->csa_offset)
+	ctf_str_update_refs (sorttab[i], sorttab[i]->csa_offset);
+      else
+	{
+	  ctf_str_update_refs (sorttab[i], cur_stroff);
+	  strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str);
+	  cur_stroff += strlen (sorttab[i]->csa_str) + 1;
+	}
     }
   free (sorttab);