[2/2] Speedup part #2

Message ID 1595645.tF5Z5aUmPa@polaris
State New
Headers show
Series
  • Speed up direct linking with DLLs on Windows
Related show

Commit Message

Eric Botcazou March 13, 2018, 5:59 p.m.
The second patch deals with the generation of the import library on the fly.

The implementation is inefficient because the linker makes a lot of calls to
realloc and memmove when importing the symbols in order to maintain a sorted
list of symbols.

This is fixable by relying on the fact that, for every linked DLL, the list of
symbols it exports is already sorted so you can import them en masse once you
have found the insertion point.


2018-03-13  Eric Botcazou  <ebotcazou@adacore.com

ld/
	* deffile.h (def_file_add_import_from): Declare.
	(def_file_add_import_at): Likewise.
	* deffilep.y (fill_in_import): New function extracted from...
	(def_file_add_import): ...here.  Call it.
	(def_file_add_import_from): New function.
	(def_file_add_import_at): Likewise.
	* pe-dll.c (pe_implied_import_dll): Use an optimized version of the
	insertion loop for imported symbols if possible.


---
 ld/deffile.h  |  10 ++++++
 ld/deffilep.y | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++------
 ld/pe-dll.c   |  35 ++++++++++++++++++++
 3 files changed, 133 insertions(+), 12 deletions(-)

-- 
Eric Botcazou

Comments

Nick Clifton March 19, 2018, 5:35 p.m. | #1
Hi Eric,

> 2018-03-13  Eric Botcazou  <ebotcazou@adacore.com

> 

> ld/

> 	* deffile.h (def_file_add_import_from): Declare.

> 	(def_file_add_import_at): Likewise.

> 	* deffilep.y (fill_in_import): New function extracted from...

> 	(def_file_add_import): ...here.  Call it.

> 	(def_file_add_import_from): New function.

> 	(def_file_add_import_at): Likewise.

> 	* pe-dll.c (pe_implied_import_dll): Use an optimized version of the

> 	insertion loop for imported symbols if possible.


Approved - please apply.

Cheers
  Nick

Patch

commit fc67f4db5b88875e346d5327ac37b44358761464
Author: Eric Botcazou <ebotcazou@gcc.gnu.org>
Date:   Mon Mar 5 12:15:11 2018 +0100

    Speedup part #2.

diff --git a/ld/deffile.h b/ld/deffile.h
index 9a1e539667..4275e7143c 100644
--- a/ld/deffile.h
+++ b/ld/deffile.h
@@ -108,6 +108,16 @@  extern def_file_export *def_file_add_export (def_file *, const char *,
 extern def_file_import *def_file_add_import (def_file *, const char *,
 					     const char *, int, const char *,
 					     const char *, int *);
+extern int def_file_add_import_from (def_file *fdef,
+				     int num_imports,
+				     const char *name,
+				     const char *module,
+				     int ordinal,
+				     const char *internal_name,
+				     const char *its_name);
+extern def_file_import *def_file_add_import_at (def_file *, int, const char *,
+						const char *, int, const char *,
+					        const char *);
 extern void def_file_add_directive (def_file *, const char *, int);
 extern def_file_module *def_get_module (def_file *, const char *);
 #ifdef DEF_FILE_PRINT
diff --git a/ld/deffilep.y b/ld/deffilep.y
index 1931c00b9e..1aebdf65a6 100644
--- a/ld/deffilep.y
+++ b/ld/deffilep.y
@@ -816,6 +816,26 @@  find_import_in_list (def_file_import *b, int max,
   return l;
 }
 
+static void
+fill_in_import (def_file_import *i,
+		const char *name,
+		def_file_module *module,
+		int ordinal,
+		const char *internal_name,
+		const char *its_name)
+{
+  memset (i, 0, sizeof (def_file_import));
+  if (name)
+    i->name = xstrdup (name);
+  i->module = module;
+  i->ordinal = ordinal;
+  if (internal_name)
+    i->internal_name = xstrdup (internal_name);
+  else
+    i->internal_name = i->name;
+  i->its_name = (its_name ? xstrdup (its_name) : NULL);
+}
+
 def_file_import *
 def_file_add_import (def_file *fdef,
 		     const char *name,
@@ -850,18 +870,74 @@  def_file_add_import (def_file *fdef,
     }
   i = fdef->imports + pos;
   if (pos != fdef->num_imports)
-    memmove (&i[1], i, (sizeof (def_file_import) * (fdef->num_imports - pos)));
-  memset (i, 0, sizeof (def_file_import));
-  if (name)
-    i->name = xstrdup (name);
-  if (module)
-    i->module = def_stash_module (fdef, module);
-  i->ordinal = ordinal;
-  if (internal_name)
-    i->internal_name = xstrdup (internal_name);
-  else
-    i->internal_name = i->name;
-  i->its_name = (its_name ? xstrdup (its_name) : NULL);
+    memmove (i + 1, i, sizeof (def_file_import) * (fdef->num_imports - pos));
+
+  fill_in_import (i, name, def_stash_module (fdef, module), ordinal,
+		  internal_name, its_name);
+  fdef->num_imports++;
+
+  return i;
+}
+
+int
+def_file_add_import_from (def_file *fdef,
+			  int num_imports,
+			  const char *name,
+			  const char *module,
+			  int ordinal,
+			  const char *internal_name,
+			  const char *its_name ATTRIBUTE_UNUSED)
+{
+  def_file_import *i;
+  int is_dup;
+  int pos;
+  int max_imports = ROUND_UP (fdef->num_imports, 16);
+
+  /* We need to avoid here duplicates.  */
+  is_dup = 0;
+  pos = find_import_in_list (fdef->imports, fdef->num_imports,
+			     name, internal_name ? internal_name : name,
+			     module, ordinal, &is_dup);
+  if (is_dup != 0)
+    return -1;
+  if (fdef->imports && pos != fdef->num_imports)
+    {
+      i = fdef->imports + pos;
+      if (i->module && strcmp (i->module->name, module) == 0)
+	return -1;
+    }
+
+  if (fdef->num_imports + num_imports - 1 >= max_imports)
+    {
+      max_imports = ROUND_UP (fdef->num_imports + num_imports, 16);
+
+      if (fdef->imports)
+	fdef->imports = xrealloc (fdef->imports,
+				 max_imports * sizeof (def_file_import));
+      else
+	fdef->imports = xmalloc (max_imports * sizeof (def_file_import));
+    }
+  i = fdef->imports + pos;
+  if (pos != fdef->num_imports)
+    memmove (i + num_imports, i,
+	     sizeof (def_file_import) * (fdef->num_imports - pos));
+
+  return pos;
+}
+
+def_file_import *
+def_file_add_import_at (def_file *fdef,
+			int pos,
+			const char *name,
+			const char *module,
+			int ordinal,
+			const char *internal_name,
+			const char *its_name)
+{
+  def_file_import *i = fdef->imports + pos;
+
+  fill_in_import (i, name, def_stash_module (fdef, module), ordinal,
+		  internal_name, its_name);
   fdef->num_imports++;
 
   return i;
diff --git a/ld/pe-dll.c b/ld/pe-dll.c
index ad0ffcffea..efb75f2e3e 100644
--- a/ld/pe-dll.c
+++ b/ld/pe-dll.c
@@ -3317,6 +3317,7 @@  pe_implied_import_dll (const char *filename)
   bfd_vma rdata_end = 0;
   bfd_vma bss_start = 1;
   bfd_vma bss_end = 0;
+  int from;
 
   /* No, I can't use bfd here.  kernel32.dll puts its export table in
      the middle of the .rdata section.  */
@@ -3457,6 +3458,40 @@  pe_implied_import_dll (const char *filename)
       return TRUE;
     }
 
+  /* This is an optimized version of the insertion loop, which avoids lots of
+     calls to realloc and memmove from def_file_add_import.  */
+  if ((from = def_file_add_import_from (pe_def_file, nexp,
+					erva + pe_as32 (erva + name_rvas),
+					dllname, 0, NULL, NULL)) >= 0)
+    {
+      for (i = 0; i < nexp; i++)
+	{
+	  /* Pointer to the names vector.  */
+	  bfd_vma name_rva = pe_as32 (erva + name_rvas + i * 4);
+	  def_file_import *imp;
+	  /* Pointer to the function address vector.  */
+	  bfd_vma func_rva = pe_as32 (erva + exp_funcbase + i * 4);
+	  /* is_data is true if the address is in the data, rdata or bss
+	     segment.  */
+	  const int is_data =
+	    (func_rva >= data_start && func_rva < data_end)
+	    || (func_rva >= rdata_start && func_rva < rdata_end)
+	    || (func_rva >= bss_start && func_rva < bss_end);
+
+	  imp = def_file_add_import_at (pe_def_file, from + i, erva + name_rva,
+					dllname, i, NULL, NULL);
+	  /* Mark symbol type.  */
+	  imp->data = is_data;
+
+	  if (pe_dll_extra_pe_debug)
+	    printf ("%s dll-name: %s sym: %s addr: 0x%lx %s\n",
+		    __FUNCTION__, dllname, erva + name_rva,
+		    (unsigned long) func_rva, is_data ? "(data)" : "");
+	}
+
+      return TRUE;
+    }
+
   /* Iterate through the list of symbols.  */
   for (i = 0; i < nexp; i++)
     {