[v4,17/35,CHANGED] libctf: add CU-mapping machinery

Message ID 20190924135131.441906-18-nick.alcock@oracle.com
State New
Headers show
Series
  • CTF linking support
Related show

Commit Message

Nick Alcock Sept. 24, 2019, 1:51 p.m.
Once the deduplicator is capable of actually detecting conflicting types
with the same name (i.e., not yet) we will place such conflicting types,
and types that depend on them, into CTF dictionaries that are the child
of the main dictionary we usually emit: currently, this will lead to the
.ctf section becoming a CTF archive rather than a single dictionary,
with the default-named archive member (_CTF_SECTION, or NULL) being the
main shared dictionary with most of the types in it.

By default, the sections are named after the compilation unit they come
from (complete path and all), with the cuname field in the CTF header
providing further evidence of the name without requiring the caller to
engage in tiresome parsing.  But some callers may not wish the mapping
from input CU to output sub-dictionary to be purely CU-based.

The machinery here allows this to be freely changed, in two ways:

 - callers can call ctf_link_add_cu_mapping to specify that a single
   input compilation unit should have its types placed in some other CU
   if they conflict: the CU will always be created, even if empty, so
   the consuming program can depend on its existence.  You can map
   multiple input CUs to one output CU to force all their types to be
   merged together: if some of *those* types conflict, the behaviour is
   currently unspecified (the new deduplicator will specify it).

 - callers can call ctf_link_set_memb_name_changer to provide a function
   which is passed every CTF sub-dictionary name in turn (including
   _CTF_SECTION) and can return a new name, or NULL if no change is
   desired.  The mapping from input to output names should not map two
   input names to the same output name: if this happens, the two are not
   merged but will result in an archive with two members with the same
   name (technically valid, but it's hard to access the second
   same-named member: you have to do an iteration over archive members).

This is used by the kernel's ctfarchive machinery (not yet upstream) to
encode CTF under member names like {module name}.ctf rather than
.ctf.CU, but it is anticipated that other large projects may wish to
have their own storage for CTF outside of .ctf sections and may wish to
have new naming schemes that suit their special-purpose consumers.

New in v3.
v4: check for strdup failure.

include/
	* ctf-api.h (ctf_link_add_cu_mapping): New.
	(ctf_link_memb_name_changer_f): New.
	(ctf_link_set_memb_name_changer): New.

libctf/
	* ctf-impl.h (ctf_file_t) <ctf_link_cu_mappping>: New.
	<ctf_link_memb_name_changer>: Likewise.
	<ctf_link_memb_name_changer_arg>: Likewise.
	* ctf-create.c (ctf_update): Update accordingly.
	* ctf-open.c (ctf_file_close): Likewise.
	* ctf-link.c (ctf_create_per_cu): Apply the cu mapping.
	(ctf_link_add_cu_mapping): New.
	(ctf_link_set_memb_name_changer): Likewise.
        (ctf_change_parent_name): New.
	(ctf_name_list_accum_cb_arg_t) <dynames>: New, storage for names
	allocated by the caller's ctf_link_memb_name_changer.
	<ndynames>: Likewise.
	(ctf_accumulate_archive_names): Call the ctf_link_memb_name_changer.
	(ctf_link_write): Likewise (for _CTF_SECTION only): also call
        ctf_change_parent_name.  Free any resulting names.
---
 include/ctf-api.h   |  10 +++
 libctf/ctf-create.c |   4 ++
 libctf/ctf-impl.h   |   4 ++
 libctf/ctf-link.c   | 172 ++++++++++++++++++++++++++++++++++++++++++--
 libctf/ctf-open.c   |   1 +
 5 files changed, 186 insertions(+), 5 deletions(-)

-- 
2.23.0.239.g28aa4420fd

Patch

diff --git a/include/ctf-api.h b/include/ctf-api.h
index 4130a2ecd19..4ac5fea8bc6 100644
--- a/include/ctf-api.h
+++ b/include/ctf-api.h
@@ -421,6 +421,16 @@  extern int ctf_link_shuffle_syms (ctf_file_t *, ctf_link_iter_symbol_f *,
 extern unsigned char *ctf_link_write (ctf_file_t *, size_t *size,
 				      size_t threshold);
 
+/* Specialist linker functions.  These functions are not used by ld, but can be
+   used by other prgorams making use of the linker machinery for other purposes
+   to customize its output.  */
+extern int ctf_link_add_cu_mapping (ctf_file_t *, const char *from,
+				    const char *to);
+typedef char *ctf_link_memb_name_changer_f (ctf_file_t *,
+					    const char *, void *);
+extern void ctf_link_set_memb_name_changer
+  (ctf_file_t *, ctf_link_memb_name_changer_f *, void *);
+
 extern void ctf_setdebug (int debug);
 extern int ctf_getdebug (void);
 
diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c
index 3cb7d5b08ba..c1cf55fbd24 100644
--- a/libctf/ctf-create.c
+++ b/libctf/ctf-create.c
@@ -473,7 +473,10 @@  ctf_update (ctf_file_t *fp)
   nfp->ctf_link_inputs = fp->ctf_link_inputs;
   nfp->ctf_link_outputs = fp->ctf_link_outputs;
   nfp->ctf_syn_ext_strtab = fp->ctf_syn_ext_strtab;
+  nfp->ctf_link_cu_mapping = fp->ctf_link_cu_mapping;
   nfp->ctf_link_type_mapping = fp->ctf_link_type_mapping;
+  nfp->ctf_link_memb_name_changer = fp->ctf_link_memb_name_changer;
+  nfp->ctf_link_memb_name_changer_arg = fp->ctf_link_memb_name_changer_arg;
 
   nfp->ctf_snapshot_lu = fp->ctf_snapshots;
 
@@ -486,6 +489,7 @@  ctf_update (ctf_file_t *fp)
   fp->ctf_link_inputs = NULL;
   fp->ctf_link_outputs = NULL;
   fp->ctf_syn_ext_strtab = NULL;
+  fp->ctf_link_cu_mapping = NULL;
   fp->ctf_link_type_mapping = NULL;
 
   fp->ctf_dvhash = NULL;
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index d4fa30e4f3b..c08400c45d4 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -281,6 +281,10 @@  struct ctf_file
   ctf_dynhash_t *ctf_link_inputs; /* Inputs to this link.  */
   ctf_dynhash_t *ctf_link_outputs; /* Additional outputs from this link.  */
   ctf_dynhash_t *ctf_link_type_mapping; /* Map input types to output types.  */
+  ctf_dynhash_t *ctf_link_cu_mapping;	/* Map CU names to CTF dict names.  */
+  /* Allow the caller to Change the name of link archive members.  */
+  ctf_link_memb_name_changer_f *ctf_link_memb_name_changer;
+  void *ctf_link_memb_name_changer_arg; /* Argument for it.  */
   char *ctf_tmp_typeslice;	  /* Storage for slicing up type names.  */
   size_t ctf_tmp_typeslicelen;	  /* Size of the typeslice.  */
   void *ctf_specific;		  /* Data for ctf_get/setspecific().  */
diff --git a/libctf/ctf-link.c b/libctf/ctf-link.c
index e545194fbb6..1c0d79c990c 100644
--- a/libctf/ctf-link.c
+++ b/libctf/ctf-link.c
@@ -182,9 +182,26 @@  static ctf_file_t *
 ctf_create_per_cu (ctf_file_t *fp, const char *filename, const char *cuname)
 {
   ctf_file_t *cu_fp;
+  const char *ctf_name = NULL;
   char *dynname = NULL;
 
-  if ((cu_fp = ctf_dynhash_lookup (fp->ctf_link_outputs, filename)) == NULL)
+  /* First, check the mapping table and translate the per-CU name we use
+     accordingly.  We check both the input filename and the CU name.  Only if
+     neither are set do we fall back to the input filename as the per-CU
+     dictionary name.  We prefer the filename because this is easier for likely
+     callers to determine.  */
+
+  if (fp->ctf_link_cu_mapping)
+    {
+      if (((ctf_name = ctf_dynhash_lookup (fp->ctf_link_cu_mapping, filename)) == NULL) &&
+	  ((ctf_name = ctf_dynhash_lookup (fp->ctf_link_cu_mapping, cuname)) == NULL))
+	ctf_name = filename;
+    }
+
+  if (ctf_name == NULL)
+    ctf_name = filename;
+
+  if ((cu_fp = ctf_dynhash_lookup (fp->ctf_link_outputs, ctf_name)) == NULL)
     {
       int err;
 
@@ -197,7 +214,7 @@  ctf_create_per_cu (ctf_file_t *fp, const char *filename, const char *cuname)
 	  return NULL;
 	}
 
-      if ((dynname = strdup (filename)) == NULL)
+      if ((dynname = strdup (ctf_name)) == NULL)
 	goto oom;
       if (ctf_dynhash_insert (fp->ctf_link_outputs, dynname, cu_fp) < 0)
 	goto oom;
@@ -215,6 +232,79 @@  ctf_create_per_cu (ctf_file_t *fp, const char *filename, const char *cuname)
   return NULL;
 }
 
+/* Add a mapping directing that the CU named FROM should have its
+   conflicting/non-duplicate types (depending on link mode) go into a container
+   named TO.  Many FROMs can share a TO: in this case, the effect on conflicting
+   types is not yet defined (but in time an auto-renaming algorithm will be
+   added: ugly, but there is really no right thing one can do in this
+   situation).
+
+   We forcibly add a container named TO in every case, even though it may well
+   wind up empty, because clients that use this facility usually expect to find
+   every TO container present, even if empty, and malfunction otherwise.  */
+
+int
+ctf_link_add_cu_mapping (ctf_file_t *fp, const char *from, const char *to)
+{
+  int err;
+  char *f, *t;
+
+  if (fp->ctf_link_cu_mapping == NULL)
+    fp->ctf_link_cu_mapping = ctf_dynhash_create (ctf_hash_string,
+						  ctf_hash_eq_string, free,
+						  free);
+  if (fp->ctf_link_cu_mapping == NULL)
+    return ctf_set_errno (fp, ENOMEM);
+
+  if (fp->ctf_link_outputs == NULL)
+    fp->ctf_link_outputs = ctf_dynhash_create (ctf_hash_string,
+					       ctf_hash_eq_string, free,
+					       ctf_file_close_thunk);
+
+  if (fp->ctf_link_outputs == NULL)
+    return ctf_set_errno (fp, ENOMEM);
+
+  f = strdup (from);
+  t = strdup (to);
+  if (!f || !t)
+    goto oom;
+
+  if (ctf_create_per_cu (fp, t, t) == NULL)
+    goto oom_noerrno;				/* Errno is set for us.  */
+
+  err = ctf_dynhash_insert (fp->ctf_link_cu_mapping, f, t);
+  if (err)
+    {
+      ctf_set_errno (fp, err);
+      goto oom_noerrno;
+    }
+
+  return 0;
+
+ oom:
+  ctf_set_errno (fp, errno);
+ oom_noerrno:
+  free (f);
+  free (t);
+  return -1;
+}
+
+/* Set a function which is called to transform the names of archive members.
+   This is useful for applying regular transformations to many names, where
+   ctf_link_add_cu_mapping applies arbitrarily irregular changes to single
+   names.  The member name changer is applied at ctf_link_write time, so it
+   cannot conflate multiple CUs into one the way ctf_link_add_cu_mapping can.
+   The changer function accepts a name and should return a new
+   dynamically-allocated name, or NULL if the name should be left unchanged.  */
+void
+ctf_link_set_memb_name_changer (ctf_file_t *fp,
+				ctf_link_memb_name_changer_f *changer,
+				void *arg)
+{
+  fp->ctf_link_memb_name_changer = changer;
+  fp->ctf_link_memb_name_changer_arg = arg;
+}
+
 typedef struct ctf_link_in_member_cb_arg
 {
   ctf_file_t *out_fp;
@@ -228,7 +318,6 @@  typedef struct ctf_link_in_member_cb_arg
   int in_input_cu_file;
 } ctf_link_in_member_cb_arg_t;
 
-
 /* Link one type into the link.  We rely on ctf_add_type() to detect
    duplicates.  This is not terribly reliable yet (unnmamed types will be
    mindlessly duplicated), but will improve shortly.  */
@@ -267,7 +356,7 @@  ctf_link_one_type (ctf_id_t type, int isroot _libctf_unused_, void *arg_)
       ctf_set_errno (arg->out_fp, 0);
     }
 
-  if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->arcname,
+  if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->file_name,
 					  arg->cu_name)) == NULL)
     return -1;	 				/* Errno is set for us.  */
 
@@ -348,7 +437,7 @@  ctf_link_one_variable (const char *name, ctf_id_t type, void *arg_)
      type only present in the child.  Try adding to the child, creating if need
      be.  */
 
-  if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->arcname,
+  if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->file_name,
 					  arg->cu_name)) == NULL)
     return -1;	 				/* Errno is set for us.  */
 
@@ -590,6 +679,8 @@  typedef struct ctf_name_list_accum_cb_arg
   ctf_file_t *fp;
   ctf_file_t **files;
   size_t i;
+  char **dynames;
+  size_t ndynames;
 } ctf_name_list_accum_cb_arg_t;
 
 /* Accumulate the names and a count of the names in the link output hash,
@@ -623,12 +714,51 @@  ctf_accumulate_archive_names (void *key, void *value, void *arg_)
       ctf_set_errno (arg->fp, ENOMEM);
       return;
     }
+
+  /* Allow the caller to get in and modify the name at the last minute.  If the
+     caller *does* modify the name, we have to stash away the new name the
+     caller returned so we can free it later on.  (The original name is the key
+     of the ctf_link_outputs hash and is freed by the dynhash machinery.)  */
+
+  if (fp->ctf_link_memb_name_changer)
+    {
+      char **dynames;
+      char *dyname;
+      void *nc_arg = fp->ctf_link_memb_name_changer_arg;
+
+      dyname = fp->ctf_link_memb_name_changer (fp, name, nc_arg);
+
+      if (dyname != NULL)
+	{
+	  if ((dynames = realloc (arg->dynames,
+				  sizeof (char *) * ++(arg->ndynames))) == NULL)
+	    {
+	      (arg->ndynames)--;
+	      ctf_set_errno (arg->fp, ENOMEM);
+	      return;
+	    }
+	    arg->dynames = dynames;
+	    name = (const char *) dyname;
+	}
+    }
+
   arg->names = names;
   arg->names[(arg->i) - 1] = (char *) name;
   arg->files = files;
   arg->files[(arg->i) - 1] = fp;
 }
 
+/* Change the name of the parent CTF section, if the name transformer has got to
+   it.  */
+static void
+ctf_change_parent_name (void *key _libctf_unused_, void *value, void *arg)
+{
+  ctf_file_t *fp = (ctf_file_t *) value;
+  const char *name = (const char *) arg;
+
+  ctf_parent_name_set (fp, name);
+}
+
 /* Write out a CTF archive (if there are per-CU CTF files) or a CTF file
    (otherwise) into a new dynamically-allocated string, and return it.
    Members with sizes above THRESHOLD are compressed.  */
@@ -637,6 +767,7 @@  ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold)
 {
   ctf_name_list_accum_cb_arg_t arg;
   char **names;
+  char *transformed_name = NULL;
   ctf_file_t **files;
   FILE *f = NULL;
   int err;
@@ -676,7 +807,22 @@  ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold)
     }
   arg.names = names;
   memmove (&(arg.names[1]), arg.names, sizeof (char *) * (arg.i));
+
   arg.names[0] = (char *) _CTF_SECTION;
+  if (fp->ctf_link_memb_name_changer)
+    {
+      void *nc_arg = fp->ctf_link_memb_name_changer_arg;
+
+      transformed_name = fp->ctf_link_memb_name_changer (fp, _CTF_SECTION,
+							 nc_arg);
+
+      if (transformed_name != NULL)
+	{
+	  arg.names[0] = transformed_name;
+	  ctf_dynhash_iter (fp->ctf_link_outputs, ctf_change_parent_name,
+			    transformed_name);
+	}
+    }
 
   if ((files = realloc (arg.files,
 			sizeof (struct ctf_file *) * (arg.i + 1))) == NULL)
@@ -737,6 +883,14 @@  ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold)
   *size = fsize;
   free (arg.names);
   free (arg.files);
+  free (transformed_name);
+  if (arg.ndynames)
+    {
+      size_t i;
+      for (i = 0; i < arg.ndynames; i++)
+	free (arg.dynames[i]);
+      free (arg.dynames);
+    }
   return buf;
 
  err_no:
@@ -747,6 +901,14 @@  ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold)
     fclose (f);
   free (arg.names);
   free (arg.files);
+  free (transformed_name);
+  if (arg.ndynames)
+    {
+      size_t i;
+      for (i = 0; i < arg.ndynames; i++)
+	free (arg.dynames[i]);
+      free (arg.dynames);
+    }
   ctf_dprintf ("Cannot write archive in link: %s failure: %s\n", errloc,
 	       ctf_errmsg (ctf_errno (fp)));
   return NULL;
diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c
index 8d04940db0e..7f9504c68ce 100644
--- a/libctf/ctf-open.c
+++ b/libctf/ctf-open.c
@@ -1629,6 +1629,7 @@  ctf_file_close (ctf_file_t *fp)
   ctf_dynhash_destroy (fp->ctf_link_inputs);
   ctf_dynhash_destroy (fp->ctf_link_outputs);
   ctf_dynhash_destroy (fp->ctf_link_type_mapping);
+  ctf_dynhash_destroy (fp->ctf_link_cu_mapping);
 
   ctf_free (fp->ctf_sxlate);
   ctf_free (fp->ctf_txlate);