[v5,0/2] Improve surplus TLS accounting

Message ID cover.1592841472.git.szabolcs.nagy@arm.com
Headers show
Series
  • Improve surplus TLS accounting
Related show

Message

Szabolcs Nagy June 22, 2020, 4:20 p.m.
Addressed the review comments, the tricky static TLS accounting
bits are unchanged. These are still outstanding:

> Subsequent followup after committing this:

> - We need to fix tst-manyaudit.

> - We should be able to count how many spaces we need based on LD_AUDIT

>   or DT_AUDIT and enable up to that amount.


Reran the tests on aarch64 and x86.

Szabolcs Nagy (2):
  rtld: Add rtld.nns tunable for the number of supported namespaces
  rtld: Avoid using up static TLS surplus for optimizations [BZ #25051]

 csu/libc-tls.c             |  31 +++++-----
 elf/Makefile               |  29 +++++++++-
 elf/dl-reloc.c             |  37 +++++++++---
 elf/dl-tls.c               |  56 ++++++++++++++++--
 elf/dl-tunables.list       |  14 +++++
 elf/dynamic-link.h         |   5 +-
 elf/rtld.c                 |   3 +
 elf/tst-tls-ie-dlmopen.c   | 114 +++++++++++++++++++++++++++++++++++++
 elf/tst-tls-ie-mod.h       |  40 +++++++++++++
 elf/tst-tls-ie-mod0.c      |   4 ++
 elf/tst-tls-ie-mod1.c      |   4 ++
 elf/tst-tls-ie-mod2.c      |   4 ++
 elf/tst-tls-ie-mod3.c      |   4 ++
 elf/tst-tls-ie-mod4.c      |   4 ++
 elf/tst-tls-ie-mod5.c      |   4 ++
 elf/tst-tls-ie-mod6.c      |   4 ++
 elf/tst-tls-ie.c           | 113 ++++++++++++++++++++++++++++++++++++
 manual/tunables.texi       |  38 +++++++++++++
 sysdeps/generic/ldsodefs.h |  11 ++++
 19 files changed, 487 insertions(+), 32 deletions(-)
 create mode 100644 elf/tst-tls-ie-dlmopen.c
 create mode 100644 elf/tst-tls-ie-mod.h
 create mode 100644 elf/tst-tls-ie-mod0.c
 create mode 100644 elf/tst-tls-ie-mod1.c
 create mode 100644 elf/tst-tls-ie-mod2.c
 create mode 100644 elf/tst-tls-ie-mod3.c
 create mode 100644 elf/tst-tls-ie-mod4.c
 create mode 100644 elf/tst-tls-ie-mod5.c
 create mode 100644 elf/tst-tls-ie-mod6.c
 create mode 100644 elf/tst-tls-ie.c

-- 
2.17.1

Comments

Szabolcs Nagy June 26, 2020, 10:50 a.m. | #1
The 06/22/2020 17:20, Szabolcs Nagy wrote:
> Addressed the review comments, the tricky static TLS accounting

> bits are unchanged. These are still outstanding:

> 

> > Subsequent followup after committing this:

> > - We need to fix tst-manyaudit.

> > - We should be able to count how many spaces we need based on LD_AUDIT

> >   or DT_AUDIT and enable up to that amount.

> 

> Reran the tests on aarch64 and x86.

> 

> Szabolcs Nagy (2):

>   rtld: Add rtld.nns tunable for the number of supported namespaces

>   rtld: Avoid using up static TLS surplus for optimizations [BZ #25051]


since Carlos reviewed v4
https://sourceware.org/pipermail/libc-alpha/2020-June/115179.html
i attach the v4 to v5 diff in case that helps the review.

i assume the tst-auditmany fix would be something like

void
_dl_tls_static_surplus_init (size_t naudit)
{
  nns = TUNABLE_GET (nns, size_t, NULL);
  if (nns > DL_NNS)
    nns = DL_NNS; // nns = 1 when !SHARED

  if (DL_NNS - nns < naudit)
    _dl_fatal_printf ("too many auditors");
  nns += naudit;

  GLRO(dl_tls_static_surplus) = nns * X + ...;
}

default nns=4 and DL_NNS=16 allows 12 audit modules
(tst-auditmany needs 9) and auditors don't use up
the surplus tls reserved for the application.
diff --git a/csu/libc-start.c b/csu/libc-start.c
index 2396956266..4005caf84a 100644
--- a/csu/libc-start.c
+++ b/csu/libc-start.c
@@ -188,12 +188,10 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
   /* Initialize very early so that tunables can use it.  */
   __libc_init_secure ();
 
   __tunables_init (__environ);
 
-  _dl_static_tls_tunables_init ();
-
   ARCH_INIT_CPU_FEATURES ();
 
   /* Perform IREL{,A} relocations.  */
   ARCH_SETUP_IREL ();
 
diff --git a/csu/libc-tls.c b/csu/libc-tls.c
index 62f0b0c8c3..fb77cd94fa 100644
--- a/csu/libc-tls.c
+++ b/csu/libc-tls.c
@@ -127,10 +127,13 @@ __libc_setup_tls (void)
 	  if (phdr->p_align > max_align)
 	    max_align = phdr->p_align;
 	  break;
 	}
 
+  /* Calculate the size of the static TLS surplus.  */
+  _dl_tls_static_surplus_init ();
+
   /* We have to set up the TCB block which also (possibly) contains
      'errno'.  Therefore we avoid 'malloc' which might touch 'errno'.
      Instead we use 'sbrk' which would only uses 'errno' if it fails.
      In this case we are right away out of memory and the user gets
      what she/he deserves.  */
diff --git a/elf/Makefile b/elf/Makefile
index b8bde1f47d..5fadaec27c 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -203,11 +203,11 @@ tests += restest1 preloadtest loadfail multiload origtest resolvfail \
 	 tst-sonamemove-link tst-sonamemove-dlopen tst-dlopen-tlsmodid \
 	 tst-dlopen-self tst-auditmany tst-initfinilazyfail tst-dlopenfail \
 	 tst-dlopenfail-2 \
 	 tst-filterobj tst-filterobj-dlopen tst-auxobj tst-auxobj-dlopen \
 	 tst-audit14 tst-audit15 tst-audit16 \
-	 tst-tls-ie
+	 tst-tls-ie tst-tls-ie-dlmopen
 #	 reldep9
 tests-internal += loadtest unload unload2 circleload1 \
 	 neededtest neededtest2 neededtest3 neededtest4 \
 	 tst-tls3 tst-tls6 tst-tls7 tst-tls8 tst-dlmopen2 \
 	 tst-ptrguard1 tst-stackguard1 tst-libc_dlvsym \
@@ -318,11 +318,12 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \
 		tst-dlopenfailmod1 tst-dlopenfaillinkmod tst-dlopenfailmod2 \
 		tst-dlopenfailmod3 tst-ldconfig-ld-mod \
 		tst-filterobj-flt tst-filterobj-aux tst-filterobj-filtee \
 		tst-auditlogmod-1 tst-auditlogmod-2 tst-auditlogmod-3 \
 		tst-tls-ie-mod0 tst-tls-ie-mod1 tst-tls-ie-mod2 \
-		tst-tls-ie-mod3 tst-tls-ie-mod4 tst-tls-ie-mod5
+		tst-tls-ie-mod3 tst-tls-ie-mod4 tst-tls-ie-mod5 \
+		tst-tls-ie-mod6
 
 # Most modules build with _ISOMAC defined, but those filtered out
 # depend on internal headers.
 modules-names-tests = $(filter-out ifuncmod% tst-libc_dlvsym-dso tst-tlsmod%,\
 				   $(modules-names))
@@ -1758,6 +1759,17 @@ $(objpfx)tst-tls-ie.out: \
   $(objpfx)tst-tls-ie-mod0.so \
   $(objpfx)tst-tls-ie-mod1.so \
   $(objpfx)tst-tls-ie-mod2.so \
   $(objpfx)tst-tls-ie-mod3.so \
   $(objpfx)tst-tls-ie-mod4.so \
-  $(objpfx)tst-tls-ie-mod5.so
+  $(objpfx)tst-tls-ie-mod5.so \
+  $(objpfx)tst-tls-ie-mod6.so
+
+$(objpfx)tst-tls-ie-dlmopen: $(libdl) $(shared-thread-library)
+$(objpfx)tst-tls-ie-dlmopen.out: \
+  $(objpfx)tst-tls-ie-mod0.so \
+  $(objpfx)tst-tls-ie-mod1.so \
+  $(objpfx)tst-tls-ie-mod2.so \
+  $(objpfx)tst-tls-ie-mod3.so \
+  $(objpfx)tst-tls-ie-mod4.so \
+  $(objpfx)tst-tls-ie-mod5.so \
+  $(objpfx)tst-tls-ie-mod6.so
diff --git a/elf/dl-sysdep.c b/elf/dl-sysdep.c
index 68a780dcbd..854570821c 100644
--- a/elf/dl-sysdep.c
+++ b/elf/dl-sysdep.c
@@ -220,12 +220,10 @@ _dl_sysdep_start (void **start_argptr,
     }
 #endif
 
   __tunables_init (_environ);
 
-  _dl_static_tls_tunables_init ();
-
 #ifdef DL_SYSDEP_INIT
   DL_SYSDEP_INIT;
 #endif
 
 #ifdef DL_PLATFORM_INIT
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 740e33ea91..af5db12d08 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -27,11 +27,11 @@
 
 #include <tls.h>
 #include <dl-tls.h>
 #include <ldsodefs.h>
 
-#define TUNABLE_NAMESPACE dl
+#define TUNABLE_NAMESPACE rtld
 #include <dl-tunables.h>
 
 /* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
 
    - IE TLS in libc.so for all dlmopen namespaces except in the initial
@@ -43,25 +43,25 @@
    The maximum number of namespaces is DL_NNS, but to support that many
    namespaces correctly the static TLS allocation should be significantly
    increased, which may cause problems with small thread stacks due to the
    way static TLS is accounted (bug 11787).
 
-   So there is a dl.nns tunable limit on the number of supported namespaces
+   So there is a rtld.nns tunable limit on the number of supported namespaces
    that affects the size of the static TLS and by default it's small enough
    not to cause problems with existing applications. The limit is not
-   enforced or checked: it is the user's responsibility to increase dl.nns
+   enforced or checked: it is the user's responsibility to increase rtld.nns
    if more dlmopen namespaces are used.  */
 
 /* Size of initial-exec TLS in libc.so.  */
 #define LIBC_IE_TLS 192
 /* Size of initial-exec TLS in libraries other than libc.so.
    This should be large enough to cover runtime libraries of the
    compiler such as libgomp and libraries in libc other than libc.so.  */
 #define OTHER_IE_TLS 144
 
 void
-_dl_static_tls_tunables_init (void)
+_dl_tls_static_surplus_init (void)
 {
   size_t nns, opt_tls;
 
 #if HAVE_TUNABLES
   nns = TUNABLE_GET (nns, size_t, NULL);
diff --git a/elf/dl-tunables.h b/elf/dl-tunables.h
index 678f447e09..969e50327b 100644
--- a/elf/dl-tunables.h
+++ b/elf/dl-tunables.h
@@ -126,10 +126,6 @@ tunable_is_name (const char *orig, const char *envname)
   else
     return false;
 }
 
 #endif
-
-/* Initializers of tunables in the dl tunable namespace.  */
-void _dl_static_tls_tunables_init (void);
-
 #endif
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index ce46f28c7a..35634ef24d 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -125,18 +125,19 @@ glibc {
       type: INT_32
       default: 3
     }
   }
 
-  dl {
+  rtld {
     nns {
       type: SIZE_T
       minval: 1
       maxval: 16
       default: 4
     }
     optional_static_tls {
       type: SIZE_T
+      minval: 0
       default: 512
     }
   }
 }
diff --git a/elf/rtld.c b/elf/rtld.c
index f4c2602d65..f339f6894f 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -778,10 +778,13 @@ init_tls (void)
 	/* slotinfo[i].gen = 0; */
 	++i;
       }
   assert (i == GL(dl_tls_max_dtv_idx));
 
+  /* Calculate the size of the static TLS surplus.  */
+  _dl_tls_static_surplus_init ();
+
   /* Compute the TLS offsets for the various blocks.  */
   _dl_determine_tlsoffset ();
 
   /* Construct the static TLS block and the dtv for the initial
      thread.  For some platforms this will include allocating memory
diff --git a/elf/tst-tls-ie-dlmopen.c b/elf/tst-tls-ie-dlmopen.c
new file mode 100644
index 0000000000..0be47c7237
--- /dev/null
+++ b/elf/tst-tls-ie-dlmopen.c
@@ -0,0 +1,114 @@
+/* Test dlopen of modules with initial-exec TLS after dlmopen.
+   Copyright (C) 2016-2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This test tries to check that surplus static TLS is not used up for
+   dynamic TLS optimizations and 4*144 = 576 bytes of static TLS is
+   still available for dlopening modules with initial-exec TLS after 3
+   new dlmopen namespaces are created.  It depends on rtld.nns=4 and
+   rtld.optional_static_tls=512 tunable settings.  */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static int do_test (void);
+#include <support/xthread.h>
+#include <support/xdlfcn.h>
+#include <support/test-driver.c>
+
+/* Have some big TLS in the main exe: should not use surplus TLS.  */
+__thread char maintls[1000];
+
+static pthread_barrier_t barrier;
+
+/* Forces multi-threaded behaviour.  */
+static void *
+blocked_thread_func (void *closure)
+{
+  xpthread_barrier_wait (&barrier);
+  /* TLS load and access tests run here in the main thread.  */
+  xpthread_barrier_wait (&barrier);
+  return NULL;
+}
+
+static void *
+load_and_access (Lmid_t lmid, const char *mod, const char *func)
+{
+  /* Load module with TLS.  */
+  void *p = xdlmopen (lmid, mod, RTLD_NOW);
+  /* Access the TLS variable to ensure it is allocated.  */
+  void (*f) (void) = (void (*) (void))xdlsym (p, func);
+  f ();
+  return p;
+}
+
+static int
+do_test (void)
+{
+  void *mods[5];
+
+  {
+    int ret = pthread_barrier_init (&barrier, NULL, 2);
+    if (ret != 0)
+      {
+        errno = ret;
+        printf ("error: pthread_barrier_init: %m\n");
+        exit (1);
+      }
+  }
+
+  pthread_t blocked_thread = xpthread_create (NULL, blocked_thread_func, NULL);
+  xpthread_barrier_wait (&barrier);
+
+  printf ("maintls[%zu]:\t %p .. %p\n",
+	   sizeof maintls, maintls, maintls + sizeof maintls);
+  memset (maintls, 1, sizeof maintls);
+
+  /* Load modules with dynamic TLS (use surplus static TLS for libc
+     in new namespaces and may be for TLS optimizations too).  */
+  mods[0] = load_and_access (LM_ID_BASE, "tst-tls-ie-mod0.so", "access0");
+  mods[1] = load_and_access (LM_ID_NEWLM, "tst-tls-ie-mod1.so", "access1");
+  mods[2] = load_and_access (LM_ID_NEWLM, "tst-tls-ie-mod2.so", "access2");
+  mods[3] = load_and_access (LM_ID_NEWLM, "tst-tls-ie-mod3.so", "access3");
+  /* Load modules with initial-exec TLS (can only use surplus static TLS).  */
+  mods[4] = load_and_access (LM_ID_BASE, "tst-tls-ie-mod6.so", "access6");
+
+  /* Here 576 bytes + 3 * libc use of surplus static TLS is in use so less
+     than 1024 bytes are available (exact number depends on TLS optimizations
+     and the libc TLS use).  */
+  printf ("The next dlmopen should fail...\n");
+  void *p = dlmopen (LM_ID_BASE, "tst-tls-ie-mod4.so", RTLD_NOW);
+  if (p != NULL)
+    {
+      printf ("error: expected dlmopen to fail because there is "
+	      "not enough surplus static TLS.\n");
+      exit (1);
+    }
+  printf ("...OK failed with: %s.\n", dlerror ());
+
+  xpthread_barrier_wait (&barrier);
+  xpthread_join (blocked_thread);
+
+  /* Close the modules.  */
+  for (int i = 0; i < 5; ++i)
+    xdlclose (mods[i]);
+
+  return 0;
+}
diff --git a/elf/tst-tls-ie-mod6.c b/elf/tst-tls-ie-mod6.c
new file mode 100644
index 0000000000..c736bf0684
--- /dev/null
+++ b/elf/tst-tls-ie-mod6.c
@@ -0,0 +1,4 @@
+#define N 6
+#define SIZE 576
+#define MODEL "initial-exec"
+#include "tst-tls-ie-mod.h"
diff --git a/elf/tst-tls-ie.c b/elf/tst-tls-ie.c
index 2f00a2936d..c06454c50c 100644
--- a/elf/tst-tls-ie.c
+++ b/elf/tst-tls-ie.c
@@ -17,11 +17,11 @@
    <https://www.gnu.org/licenses/>.  */
 
 /* This test tries to check that surplus static TLS is not used up for
    dynamic TLS optimizations and 3*192 + 4*144 = 1152 bytes of static
    TLS is available for dlopening modules with initial-exec TLS.  It
-   depends on dl.nns=4 and dl.optional_static_tls=512 tunable setting.  */
+   depends on rtld.nns=4 and rtld.optional_static_tls=512 tunable setting.  */
 
 #include <errno.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -78,19 +78,32 @@ do_test (void)
 
   printf ("maintls[%zu]:\t %p .. %p\n",
 	   sizeof maintls, maintls, maintls + sizeof maintls);
   memset (maintls, 1, sizeof maintls);
 
-  /* Load modules with dynamic TLS (may use surplus TLS opportunistically).  */
+  /* Load modules with dynamic TLS (may use surplus static TLS
+     opportunistically).  */
   mods[0] = load_and_access ("tst-tls-ie-mod0.so", "access0");
   mods[1] = load_and_access ("tst-tls-ie-mod1.so", "access1");
   mods[2] = load_and_access ("tst-tls-ie-mod2.so", "access2");
   mods[3] = load_and_access ("tst-tls-ie-mod3.so", "access3");
-  /* Load modules with initial-exec TLS (can only use surplus TLS).  */
+  /* Load modules with initial-exec TLS (can only use surplus static TLS).  */
   mods[4] = load_and_access ("tst-tls-ie-mod4.so", "access4");
   mods[5] = load_and_access ("tst-tls-ie-mod5.so", "access5");
 
+  /* Here 1152 bytes of surplus static TLS is in use and at most 512 bytes
+     are available (depending on TLS optimizations).  */
+  printf ("The next dlopen should fail...\n");
+  void *p = dlopen ("tst-tls-ie-mod6.so", RTLD_NOW);
+  if (p != NULL)
+    {
+      printf ("error: expected dlopen to fail because there is "
+	      "not enough surplus static TLS.\n");
+      exit (1);
+    }
+  printf ("...OK failed with: %s.\n", dlerror ());
+
   xpthread_barrier_wait (&barrier);
   xpthread_join (blocked_thread);
 
   /* Close the modules.  */
   for (int i = 0; i < 6; ++i)
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 437fdadff0..7f891c2710 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -228,33 +228,43 @@ passed to @code{malloc} for the largest bin size to enable.
 @end deftp
 
 @node Dynamic Linking Tunables
 @section Dynamic Linking Tunables
 @cindex dynamic linking tunables
-@cindex dl tunables
+@cindex rtld tunables
 
-@deftp {Tunable namespace} glibc.dl
+@deftp {Tunable namespace} glibc.rtld
 Dynamic linker behavior can be modified by setting the
-following tunables in the @code{dl} namespace:
+following tunables in the @code{rtld} namespace:
+@end deftp
+
+@deftp Tunable glibc.rtld.nns
+Sets the number of supported dynamic link namespaces (see @code{dlmopen}).
+Currently this limit can be set between 1 and 16 inclusive, the default is 4.
+Each link namespace consumes some memory in all thread, and thus raising the
+limit will increase the amount of memory each thread uses. Raising the limit
+is useful when your application uses more than 4 dynamic linker audit modules
+e.g. LD_AUDIT, or will use more than 4 dynamic link namespaces as created
+by @code{dlmopen} with an lmid argument of @code{LM_ID_NEWLM}.
+@end deftp
+
+@deftp Tunable glibc.rtld.optional_static_tls
+Sets the amount of surplus static TLS in bytes to allocate at program
+startup.  Every thread created allocates this amount of specified surplus
+static TLS. This is a minimum value and additional space may be allocated
+for internal purposes including alignment.  Optional static TLS is used for
+optimizing dynamic TLS access for platforms that support such optimizations
+e.g. TLS descriptors or optimized TLS access for POWER (@code{DT_PPC64_OPT}
+and @code{DT_PPC_OPT}).  In order to make the best use of such optimizations
+the value should be as many bytes as would be required to hold all TLS
+variables in all dynamic loaded shared libraries.  The value cannot be known
+by the dynamic loader because it doesn't know the expected set of shared
+libraries which will be loaded.  The existing static TLS space cannot be
+changed once allocated at process startup.  The default allocation of
+optional static TLS is 512 bytes and is allocated in every thread.
 @end deftp
 
-@deftp Tunable glibc.dl.nns
-Sets the number of supported dynamic link namespaces for which enough
-static TLS is allocated (see @code{dlmopen}).  If more namespaces are
-created then static TLS may run out at @code{dlopen} or @code{dlmopen}
-time which is a non-recoverable failure.  Currently this limit can be
-set between 1 and 16 inclusive, the default is 4. If the limit is
-increased then internally more static TLS is allocated to accomodate
-system libraries with initial-exec TLS in all namespaces.
-@end deftp
-
-@deftp Tunable glibc.dl.optional_static_tls
-Sets the amount of surplus static TLS that may be used for optimizing
-dynamic TLS access (only works on certain platforms, e.g. TLSDESC can
-be optimized this way). The internal allocation of static TLS is
-increased by this amount, the default is 512.
-@end deftp
 
 @node Elision Tunables
 @section Elision Tunables
 @cindex elision tunables
 @cindex tunables, elision
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index f631684583..997084fb4b 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -1107,10 +1107,13 @@ extern size_t _dl_next_tls_modid (void) attribute_hidden;
 extern size_t _dl_count_modids (void) attribute_hidden;
 
 /* Calculate offset of the TLS blocks in the static TLS block.  */
 extern void _dl_determine_tlsoffset (void) attribute_hidden;
 
+/* Calculate the size of the static TLS surplus.  */
+void _dl_tls_static_surplus_init (void) attribute_hidden;
+
 #ifndef SHARED
 /* Set up the TCB for statically linked applications.  This is called
    early during startup because we always use TLS (for errno and the
    stack protector, among other things).  */
 void __libc_setup_tls (void);