[v3,11/19] libctf: ELF file opening via BFD

Message ID 20190524201046.427681-12-nick.alcock@oracle.com
State New
Headers show
Series
  • libctf, and CTF support for objdump and readelf
Related show

Commit Message

Nick Alcock May 24, 2019, 8:10 p.m.
These functions let you open an ELF file with a customarily-named CTF
section in it, automatically opening the CTF file or archive and
associating the symbol and string tables in the ELF file with the CTF
container, so that you can look up the types of symbols in the ELF file
via ctf_lookup_by_symbol(), and so that strings can be shared between
the ELF file and CTF container, to save space.

It uses BFD machinery to do so.  This has now been lightly tested and
seems to work.  In particular, if you already have a bfd you can pass
it in to ctf_bfdopen(), and if you want a bfd made for you you can
call ctf_open() or ctf_fdopen(), optionally specifying a target (or
try once without a target and then again with one if you get
ECTF_BFD_AMBIGUOUS back).

We use a forward declaration for the struct bfd in ctf-api.h, so that
ctf-api.h users are not required to pull in <bfd.h>.  (This is mostly
for the sake of readelf.)

Changes from v2:
 - ctf_bfdopen(), ctf_fdopen() and ctf_open() now return a
   ctf_archive_t, not a ctf_file_t  ctf_fdopen() and ctf_open() can now
   be called on ELF files, raw CTF files or CTF archives, and the CTF
   section which ctf_open() reads (".ctf") can be a CTF archive itself.
 - ctf_bfdopen_ctfsect() is now public.
 - ctf_fdopen() and ctf_open() now call bfd_check_format() correctly,
   diagnose ambiguous files, and allow the caller to pass in a target
   name.
 - Many things (ctf_abfd, ctf_*_alloced, ctf_bfd_close) migrated from
   the struct ctf_impl into the struct ctf_archive_internal to allow for
   this.
 - Arrange to associate symtabs, strtabs, and CTF section data with the
   new fields in the struct ctf_archive_internal.
 - ctf_arc_open() migrated from ctf-archive.c into ctf-open-bfd.c and
   this commit, since it is a BFD user (it calls ctf_open(), which will,
   via ctf_fdopen(), eventually open raw archives).  ctf_close()
   moved into this commit because it is the converse of ctf_open(),
   which belongs here.
 - A little motion of code into prior commits in this series.

Changes from v1:
 - Correct erroneous license (GPLv2+ -> v3+) and reset copyright years.
 - Move out of ctf_lib.c; functions now based on BFD, and located in
   ctf-open-bfd.c.
 - New ctf_bfdopen() to do the low-level opening given a bfd.
 - New ctf_bfdopen_ctfsect() to do the low-level opening given a bfd and
   a separately-specified CTF section (will later be used to open
   archives using a bfd).

libctf/
	* ctf-open-bfd.c: New file.
	* ctf-open.c (ctf_close): New.
	* ctf-impl.h: Include bfd.h.
	(ctf_file): New members ctf_data_mmapped, ctf_data_mmapped_len.
	(ctf_archive_internal): New members ctfi_abfd, ctfi_data,
	ctfi_bfd_close.
	(ctf_bfdopen_ctfsect): New declaration.
	(_CTF_SECTION): likewise.

include/
	* ctf-api.h (struct bfd): New forward.
	(ctf_fdopen): New.
	(ctf_bfdopen): Likewise.
	(ctf_open): Likewise.
	(ctf_arc_open): Likewise.
---
 include/ctf-api.h     |  20 +++
 libctf/ctf-impl.h     |   6 +
 libctf/ctf-open-bfd.c | 351 ++++++++++++++++++++++++++++++++++++++++++
 libctf/ctf-open.c     |   8 +
 4 files changed, 385 insertions(+)
 create mode 100644 libctf/ctf-open-bfd.c

-- 
2.21.0.237.gd0cfaa883d

Patch

diff --git a/include/ctf-api.h b/include/ctf-api.h
index 4cac635e57..ee68efefe3 100644
--- a/include/ctf-api.h
+++ b/include/ctf-api.h
@@ -45,6 +45,11 @@  typedef struct ctf_file ctf_file_t;
 typedef struct ctf_archive_internal ctf_archive_t;
 typedef long ctf_id_t;
 
+/* This opaque definition allows libctf to accept BFD data structures without
+   importing all the BFD noise into users' namespaces.  */
+
+struct bfd;
+
 /* If the debugger needs to provide the CTF library with a set of raw buffers
    for use as the CTF data, symbol table, and string table, it can do so by
    filling in ctf_sect_t structures and passing them to ctf_bufopen().
@@ -205,8 +210,23 @@  typedef int ctf_archive_member_f (ctf_file_t *fp, const char *name, void *arg);
 typedef int ctf_archive_raw_member_f (const char *name, const void *content,
 				      size_t len, void *arg);
 
+/* Opening.  These mostly return an abstraction over both CTF files and CTF
+   archives: so they can be used to open both.  CTF files will appear to be an
+   archive with one member named '.ctf'.  The low-level functions
+   ctf_simple_open() and ctf_bufopen() return ctf_file_t's directly, and cannot
+   be used on CTF archives.  */
+
+extern ctf_archive_t *ctf_bfdopen (struct bfd *, int *);
+extern ctf_archive_t *ctf_bfdopen_ctfsect (struct bfd *, const ctf_sect_t *,
+					   int *);
+extern ctf_archive_t *ctf_fdopen (int fd, const char *filename,
+				  const char *target, int *errp);
+extern ctf_archive_t *ctf_open (const char *filename,
+				const char *target, int *errp);
+extern void ctf_close (ctf_archive_t *);
 extern ctf_sect_t ctf_getdatasect (const ctf_file_t *);
 extern ctf_archive_t *ctf_get_arc (const ctf_file_t *);
+extern ctf_archive_t *ctf_arc_open (const char *, int *);
 extern void ctf_arc_close (ctf_archive_t *);
 extern ctf_file_t *ctf_arc_open_by_name (const ctf_archive_t *,
 					 const char *, int *);
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index 898be8ce3a..8522a032dd 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -31,6 +31,7 @@ 
 #include <limits.h>
 #include <ctype.h>
 #include <elf.h>
+#include <bfd.h>
 
 #ifdef	__cplusplus
 extern "C"
@@ -188,6 +189,8 @@  struct ctf_file
   ctf_sect_t ctf_data;		    /* CTF data from object file.  */
   ctf_sect_t ctf_symtab;	    /* Symbol table from object file.  */
   ctf_sect_t ctf_strtab;	    /* String table from object file.  */
+  void *ctf_data_mmapped;	    /* CTF data we mmapped, to free later.  */
+  size_t ctf_data_mmapped_len;	    /* Length of CTF data we mmapped.  */
   ctf_hash_t *ctf_structs;	    /* Hash table of struct types.  */
   ctf_hash_t *ctf_unions;	    /* Hash table of union types.  */
   ctf_hash_t *ctf_enums;	    /* Hash table of enum types.  */
@@ -240,6 +243,8 @@  struct ctf_archive_internal
   ctf_sect_t ctfi_symsect;
   ctf_sect_t ctfi_strsect;
   void *ctfi_data;
+  bfd *ctfi_abfd;		    /* Optional source of section data.  */
+  void (*ctfi_bfd_close) (struct ctf_archive_internal *);
 };
 
 /* Return x rounded up to an alignment boundary.
@@ -358,6 +363,7 @@  extern Elf64_Sym *ctf_sym_to_elf64 (const Elf32_Sym *src, Elf64_Sym *dst);
 
 /* Variables, all underscore-prepended. */
 
+extern const char _CTF_SECTION[];	/* name of CTF ELF section */
 extern const char _CTF_NULLSTR[];	/* empty string */
 
 extern int _libctf_debug;	/* debugging messages enabled */
diff --git a/libctf/ctf-open-bfd.c b/libctf/ctf-open-bfd.c
new file mode 100644
index 0000000000..d6d442ea42
--- /dev/null
+++ b/libctf/ctf-open-bfd.c
@@ -0,0 +1,351 @@ 
+/* Opening CTF files with BFD.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+   This file is part of libctf.
+
+   libctf is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+   See the GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ctf-impl.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <elf.h>
+#include <bfd.h>
+
+#include "elf-bfd.h"
+
+/* Make a new struct ctf_archive_internal wrapper for a ctf_archive or a
+   ctf_file.  Closes ARC and/or FP on error.  Arrange to free the SYMSECT and
+   STRSECT interior on close.  */
+
+static struct ctf_archive_internal *
+ctf_new_archive_internal (int is_archive, struct ctf_archive *arc,
+			  ctf_file_t *fp, const ctf_sect_t *symsect,
+			  const ctf_sect_t *strsect,
+			  int *errp)
+{
+  struct ctf_archive_internal *arci;
+
+  if ((arci = calloc (1, sizeof (struct ctf_archive_internal))) == NULL)
+    {
+      if (is_archive)
+	ctf_arc_close_internal (arc);
+      else
+	ctf_file_close (fp);
+      return (ctf_set_open_errno (errp, errno));
+    }
+  arci->ctfi_is_archive = is_archive;
+  if (is_archive)
+    arci->ctfi_archive = arc;
+  else
+    arci->ctfi_file = fp;
+  if (symsect)
+     memcpy (&arci->ctfi_symsect, symsect, sizeof (struct ctf_sect));
+  if (strsect)
+     memcpy (&arci->ctfi_strsect, strsect, sizeof (struct ctf_sect));
+
+  return arci;
+}
+
+/* Free the BFD bits of a CTF file on ctf_file_close().  */
+
+static void
+ctf_bfdclose (struct ctf_archive_internal *arci)
+{
+  if (arci->ctfi_abfd != NULL)
+    if (!bfd_close_all_done (arci->ctfi_abfd))
+      ctf_dprintf ("Cannot close BFD: %s\n", bfd_errmsg (bfd_get_error()));
+}
+
+/* Open a CTF file given the specified BFD.  */
+
+ctf_archive_t *
+ctf_bfdopen (struct bfd *abfd, int *errp)
+{
+  ctf_archive_t *arc;
+  asection *ctf_asect;
+  bfd_byte *contents;
+  ctf_sect_t ctfsect;
+
+  libctf_init_debug();
+
+  if ((ctf_asect = bfd_get_section_by_name (abfd, _CTF_SECTION)) == NULL)
+    {
+      return (ctf_set_open_errno (errp, ECTF_NOCTFDATA));
+    }
+
+  if (!bfd_malloc_and_get_section (abfd, ctf_asect, &contents))
+    {
+      ctf_dprintf ("ctf_bfdopen(): cannot malloc CTF section: %s\n",
+		   bfd_errmsg (bfd_get_error()));
+      return (ctf_set_open_errno (errp, ECTF_FMT));
+    }
+
+  ctfsect.cts_name = _CTF_SECTION;
+  ctfsect.cts_type = SHT_PROGBITS;
+  ctfsect.cts_flags = 0;
+  ctfsect.cts_entsize = 1;
+  ctfsect.cts_offset = 0;
+  ctfsect.cts_size = bfd_section_size (abfd, ctf_asect);
+  ctfsect.cts_data = contents;
+
+  if ((arc = ctf_bfdopen_ctfsect (abfd, &ctfsect, errp)) != NULL)
+    {
+      arc->ctfi_data = (void *) ctfsect.cts_data;
+      return arc;
+    }
+
+  free (contents);
+  return NULL;				/* errno is set for us.  */
+}
+
+/* Open a CTF file given the specified BFD and CTF section (which may contain a
+   CTF archive or a file).  Takes ownership of the ctfsect, and frees it
+   later.  */
+
+ctf_archive_t *
+ctf_bfdopen_ctfsect (struct bfd *abfd, const ctf_sect_t *ctfsect, int *errp)
+{
+  struct ctf_archive *arc = NULL;
+  ctf_archive_t *arci;
+  ctf_file_t *fp = NULL;
+  ctf_sect_t *symsectp = NULL;
+  ctf_sect_t *strsectp = NULL;
+  const char *bfderrstr = NULL;
+  int is_archive;
+
+  asection *sym_asect;
+  ctf_sect_t symsect, strsect;
+  /* TODO: handle SYMTAB_SHNDX.  */
+
+  if ((sym_asect = bfd_section_from_elf_index (abfd,
+					       elf_onesymtab (abfd))) != NULL)
+    {
+      Elf_Internal_Shdr *symhdr = &elf_symtab_hdr (abfd);
+      asection *str_asect = NULL;
+      bfd_byte *contents;
+
+      if (symhdr->sh_link != SHN_UNDEF &&
+	  symhdr->sh_link <= elf_numsections (abfd))
+	str_asect = bfd_section_from_elf_index (abfd, symhdr->sh_link);
+
+      Elf_Internal_Shdr *strhdr = elf_elfsections (abfd)[symhdr->sh_link];
+
+      if (sym_asect && str_asect)
+	{
+	  if (!bfd_malloc_and_get_section (abfd, str_asect, &contents))
+	    {
+	      bfderrstr = "Cannot malloc string table";
+	      free (contents);
+	      goto err;
+	    }
+	  strsect.cts_data = contents;
+	  strsect.cts_name = (char *) strsect.cts_data + strhdr->sh_name;
+	  strsect.cts_type = strhdr->sh_type;
+	  strsect.cts_flags = strhdr->sh_flags;
+	  strsect.cts_entsize = strhdr->sh_size;
+	  strsect.cts_offset = strhdr->sh_offset;
+	  strsectp = &strsect;
+
+	  if (!bfd_malloc_and_get_section (abfd, sym_asect, &contents))
+	    {
+	      bfderrstr = "Cannot malloc symbol table";
+	      free (contents);
+	      goto err_free_str;
+	    }
+
+	  symsect.cts_name = (char *) strsect.cts_data + symhdr->sh_name;
+	  symsect.cts_type = symhdr->sh_type;
+	  symsect.cts_flags = symhdr->sh_flags;
+	  symsect.cts_entsize = symhdr->sh_size;
+	  symsect.cts_data = contents;
+	  symsect.cts_offset = symhdr->sh_offset;
+	  symsectp = &symsect;
+	}
+    }
+
+  if (ctfsect->cts_size > sizeof (uint64_t) &&
+      ((*(uint64_t *) ctfsect->cts_data) == CTFA_MAGIC))
+    {
+      is_archive = 1;
+      if ((arc = ctf_arc_bufopen ((void *) ctfsect->cts_data,
+				  ctfsect->cts_size, errp)) == NULL)
+	goto err_free_sym;
+    }
+  else
+    {
+      is_archive = 0;
+      if ((fp = ctf_bufopen (ctfsect, symsectp, strsectp, errp)) == NULL)
+	{
+	  ctf_dprintf ("ctf_internal_open(): cannot open CTF: %s\n",
+		       ctf_errmsg (*errp));
+	  goto err_free_sym;
+	}
+    }
+  arci = ctf_new_archive_internal (is_archive, arc, fp, symsectp, strsectp,
+				   errp);
+
+  if (arci)
+    return arci;
+ err_free_sym:
+  free ((void *) symsect.cts_data);
+err_free_str:
+  free ((void *) strsect.cts_data);
+err: _libctf_unused_;
+  if (bfderrstr)
+    {
+      ctf_dprintf ("ctf_bfdopen(): %s: %s\n", bfderrstr,
+		   bfd_errmsg (bfd_get_error()));
+      ctf_set_open_errno (errp, ECTF_FMT);
+    }
+  return NULL;
+}
+
+/* Open the specified file descriptor and return a pointer to a CTF archive that
+   contains one or more CTF containers.  The file can be an ELF file, a raw CTF
+   file, or a CTF archive.  The caller is responsible for closing the file
+   descriptor when it is no longer needed.  If this is an ELF file, TARGET, if
+   non-NULL, should be the name of a suitable BFD target.  */
+
+ctf_archive_t *
+ctf_fdopen (int fd, const char *filename, const char *target, int *errp)
+{
+  ctf_archive_t *arci;
+  bfd *abfd;
+  int nfd;
+
+  struct stat st;
+  ssize_t nbytes;
+
+  ctf_preamble_t ctfhdr;
+  uint64_t arc_magic;
+
+  memset (&ctfhdr, 0, sizeof (ctfhdr));
+
+  libctf_init_debug();
+
+  if (fstat (fd, &st) == -1)
+    return (ctf_set_open_errno (errp, errno));
+
+  if ((nbytes = ctf_pread (fd, &ctfhdr, sizeof (ctfhdr), 0)) <= 0)
+    return (ctf_set_open_errno (errp, nbytes < 0 ? errno : ECTF_FMT));
+
+  /* If we have read enough bytes to form a CTF header and the magic
+     string matches, attempt to interpret the file as raw CTF.  */
+
+  if ((size_t) nbytes >= sizeof (ctf_preamble_t) &&
+      ctfhdr.ctp_magic == CTF_MAGIC)
+    {
+      ctf_file_t *fp = NULL;
+      void *data;
+
+      if (ctfhdr.ctp_version > CTF_VERSION)
+	return (ctf_set_open_errno (errp, ECTF_CTFVERS));
+
+      if ((data = ctf_mmap (st.st_size, 0, fd)) == NULL)
+	return (ctf_set_open_errno (errp, errno));
+
+      if ((fp = ctf_simple_open (data, (size_t) st.st_size, NULL, 0, 0,
+				 NULL, 0, errp)) == NULL)
+	ctf_munmap (data, (size_t) st.st_size);
+      fp->ctf_data_mmapped = data;
+      fp->ctf_data_mmapped_len = (size_t) st.st_size;
+
+      return ctf_new_archive_internal (0, NULL, fp, NULL, NULL, errp);
+    }
+
+  if ((nbytes = ctf_pread (fd, &arc_magic, sizeof (arc_magic), 0)) <= 0)
+    return (ctf_set_open_errno (errp, nbytes < 0 ? errno : ECTF_FMT));
+
+  if ((size_t) nbytes >= sizeof (uint64_t) && arc_magic == CTFA_MAGIC)
+    {
+      struct ctf_archive *arc;
+
+      if ((arc = ctf_arc_open_internal (filename, errp)) == NULL)
+	return NULL;			/* errno is set for us.  */
+
+      return ctf_new_archive_internal (1, arc, NULL, NULL, NULL, errp);
+    }
+
+  /* Attempt to open the file with BFD.  We must dup the fd first, since bfd
+     takes ownership of the passed fd.  */
+
+  if ((nfd = dup (fd)) < 0)
+      return (ctf_set_open_errno (errp, errno));
+
+  if ((abfd = bfd_fdopenr (filename, target, nfd)) == NULL)
+    {
+      ctf_dprintf ("Cannot open BFD from %s: %s\n",
+		   filename ? filename : "(unknown file)",
+		   bfd_errmsg (bfd_get_error()));
+      return (ctf_set_open_errno (errp, ECTF_FMT));
+    }
+
+  if (!bfd_check_format (abfd, bfd_object))
+    {
+      ctf_dprintf ("BFD format problem in %s: %s\n",
+		   filename ? filename : "(unknown file)",
+		   bfd_errmsg (bfd_get_error()));
+      if (bfd_get_error() == bfd_error_file_ambiguously_recognized)
+	return (ctf_set_open_errno (errp, ECTF_BFD_AMBIGUOUS));
+      else
+	return (ctf_set_open_errno (errp, ECTF_FMT));
+    }
+
+  if ((arci = ctf_bfdopen (abfd, errp)) == NULL)
+    {
+      if (!bfd_close_all_done (abfd))
+	ctf_dprintf ("Cannot close BFD: %s\n", bfd_errmsg (bfd_get_error()));
+      return NULL;			/* errno is set for us.  */
+    }
+  arci->ctfi_bfd_close = ctf_bfdclose;
+  arci->ctfi_abfd = abfd;
+
+  return arci;
+}
+
+/* Open the specified file and return a pointer to a CTF container.  The file
+   can be either an ELF file or raw CTF file.  This is just a convenient
+   wrapper around ctf_fdopen() for callers.  */
+
+ctf_archive_t *
+ctf_open (const char *filename, const char *target, int *errp)
+{
+  ctf_archive_t *arc;
+  int fd;
+
+  if ((fd = open (filename, O_RDONLY)) == -1)
+    {
+      if (errp != NULL)
+	*errp = errno;
+      return NULL;
+    }
+
+  arc = ctf_fdopen (fd, filename, target, errp);
+  (void) close (fd);
+  return arc;
+}
+
+/* Public entry point: open a CTF archive, or CTF file.  Returns the archive, or
+   NULL and an error in *err.  Despite the fact that this uses CTF archives, it
+   must be in this file to avoid dragging in BFD into non-BFD-using programs.  */
+ctf_archive_t *
+ctf_arc_open (const char *filename, int *errp)
+{
+  return ctf_open (filename, NULL, errp);
+}
diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c
index 8c6294a89b..5230d09a97 100644
--- a/libctf/ctf-open.c
+++ b/libctf/ctf-open.c
@@ -1572,6 +1572,14 @@  ctf_file_close (ctf_file_t *fp)
   ctf_free (fp);
 }
 
+/* The converse of ctf_open().  ctf_open() disguises whatever it opens as an
+   archive, so closing one is just like closing an archive.  */
+void
+ctf_close (ctf_archive_t *arc)
+{
+  ctf_arc_close (arc);
+}
+
 /* Get the CTF archive from which this ctf_file_t is derived.  */
 ctf_archive_t *
 ctf_get_arc (const ctf_file_t *fp)